|
kusano |
2b45e8 |
#define REALNAME ASMNAME
|
|
kusano |
2b45e8 |
#define ASSEMBLER
|
|
kusano |
2b45e8 |
#include "common.h"
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define FETCH ld
|
|
kusano |
2b45e8 |
#define gsLQC1(base,fq,ft,offset) .word(0x32<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq)
|
|
kusano |
2b45e8 |
#define gsSQC1(base,fq,ft,offset) .word(0x3A<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define M $4
|
|
kusano |
2b45e8 |
#define N $5
|
|
kusano |
2b45e8 |
#define K $6
|
|
kusano |
2b45e8 |
#define A $8
|
|
kusano |
2b45e8 |
#define B $9
|
|
kusano |
2b45e8 |
#define C $10
|
|
kusano |
2b45e8 |
#define LDC $11
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define AO $12
|
|
kusano |
2b45e8 |
#define BO $13
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define CO1 $14
|
|
kusano |
2b45e8 |
#define CO2 $15
|
|
kusano |
2b45e8 |
#define CO3 $16
|
|
kusano |
2b45e8 |
#define CO4 $17
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define KCO $18
|
|
kusano |
2b45e8 |
#define MCO $19
|
|
kusano |
2b45e8 |
#define NCO $20
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SPANB $21
|
|
kusano |
2b45e8 |
#define PREB $23
|
|
kusano |
2b45e8 |
#define PREA $24
|
|
kusano |
2b45e8 |
#define SPANA $25
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ALPHA $f15
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL)
|
|
kusano |
2b45e8 |
#define OFFSET $2
|
|
kusano |
2b45e8 |
#define KK $3
|
|
kusano |
2b45e8 |
#define TEMP $7
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define R8 8
|
|
kusano |
2b45e8 |
#define R9 9
|
|
kusano |
2b45e8 |
#define R14 14
|
|
kusano |
2b45e8 |
#define R15 15
|
|
kusano |
2b45e8 |
#define R16 16
|
|
kusano |
2b45e8 |
#define R17 17
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define t11 $f30
|
|
kusano |
2b45e8 |
#define t21 $f31
|
|
kusano |
2b45e8 |
#define t31 $f28
|
|
kusano |
2b45e8 |
#define t41 $f29
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define t12 $f26
|
|
kusano |
2b45e8 |
#define t22 $f27
|
|
kusano |
2b45e8 |
#define t32 $f24
|
|
kusano |
2b45e8 |
#define t42 $f25
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define t13 $f22
|
|
kusano |
2b45e8 |
#define t23 $f23
|
|
kusano |
2b45e8 |
#define t33 $f20
|
|
kusano |
2b45e8 |
#define t43 $f21
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define t14 $f18
|
|
kusano |
2b45e8 |
#define t24 $f19
|
|
kusano |
2b45e8 |
#define t34 $f16
|
|
kusano |
2b45e8 |
#define t44 $f17
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define c11 $f0
|
|
kusano |
2b45e8 |
#define c21 $f1
|
|
kusano |
2b45e8 |
#define c31 $f2
|
|
kusano |
2b45e8 |
#define c41 $f3
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define c12 $f4
|
|
kusano |
2b45e8 |
#define c22 $f5
|
|
kusano |
2b45e8 |
#define c32 $f6
|
|
kusano |
2b45e8 |
#define c42 $f7
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define c13 $f8
|
|
kusano |
2b45e8 |
#define c23 $f9
|
|
kusano |
2b45e8 |
#define c33 $f10
|
|
kusano |
2b45e8 |
#define c43 $f11
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define c14 $f12
|
|
kusano |
2b45e8 |
#define c24 $f13
|
|
kusano |
2b45e8 |
#define c34 $f14
|
|
kusano |
2b45e8 |
#define c44 $f0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define a0 $f0
|
|
kusano |
2b45e8 |
#define a1 $f1
|
|
kusano |
2b45e8 |
#define a2 $f2
|
|
kusano |
2b45e8 |
#define a3 $f3
|
|
kusano |
2b45e8 |
#define a4 $f4
|
|
kusano |
2b45e8 |
#define a5 $f5
|
|
kusano |
2b45e8 |
#define a6 $f6
|
|
kusano |
2b45e8 |
#define a7 $f7
|
|
kusano |
2b45e8 |
#define b0 $f8
|
|
kusano |
2b45e8 |
#define b1 $f9
|
|
kusano |
2b45e8 |
#define b2 $f10
|
|
kusano |
2b45e8 |
#define b3 $f11
|
|
kusano |
2b45e8 |
#define b4 $f12
|
|
kusano |
2b45e8 |
#define b5 $f13
|
|
kusano |
2b45e8 |
#define b6 $f14
|
|
kusano |
2b45e8 |
#define b7 $f15
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define F31 31
|
|
kusano |
2b45e8 |
#define F30 30
|
|
kusano |
2b45e8 |
#define F29 29
|
|
kusano |
2b45e8 |
#define F28 28
|
|
kusano |
2b45e8 |
#define F27 27
|
|
kusano |
2b45e8 |
#define F26 26
|
|
kusano |
2b45e8 |
#define F25 25
|
|
kusano |
2b45e8 |
#define F24 24
|
|
kusano |
2b45e8 |
#define F23 23
|
|
kusano |
2b45e8 |
#define F22 22
|
|
kusano |
2b45e8 |
#define F21 21
|
|
kusano |
2b45e8 |
#define F20 20
|
|
kusano |
2b45e8 |
#define F19 19
|
|
kusano |
2b45e8 |
#define F18 18
|
|
kusano |
2b45e8 |
#define F17 17
|
|
kusano |
2b45e8 |
#define F16 16
|
|
kusano |
2b45e8 |
#define F15 15
|
|
kusano |
2b45e8 |
#define F14 14
|
|
kusano |
2b45e8 |
#define F13 13
|
|
kusano |
2b45e8 |
#define F12 12
|
|
kusano |
2b45e8 |
#define F11 11
|
|
kusano |
2b45e8 |
#define F10 10
|
|
kusano |
2b45e8 |
#define F9 9
|
|
kusano |
2b45e8 |
#define F8 8
|
|
kusano |
2b45e8 |
#define F7 7
|
|
kusano |
2b45e8 |
#define F6 6
|
|
kusano |
2b45e8 |
#define F5 5
|
|
kusano |
2b45e8 |
#define F4 4
|
|
kusano |
2b45e8 |
#define F3 3
|
|
kusano |
2b45e8 |
#define F2 2
|
|
kusano |
2b45e8 |
#define F1 1
|
|
kusano |
2b45e8 |
#define F0 0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
PROLOGUE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddiu $sp, $sp, -160
|
|
kusano |
2b45e8 |
sd $16, 0($sp)
|
|
kusano |
2b45e8 |
sd $17, 8($sp)
|
|
kusano |
2b45e8 |
sd $18, 16($sp)
|
|
kusano |
2b45e8 |
sd $19, 24($sp)
|
|
kusano |
2b45e8 |
sd $20, 32($sp)
|
|
kusano |
2b45e8 |
sd $21, 40($sp)
|
|
kusano |
2b45e8 |
sd $22, 48($sp)
|
|
kusano |
2b45e8 |
ST $f24, 56($sp)
|
|
kusano |
2b45e8 |
ST $f25, 64($sp)
|
|
kusano |
2b45e8 |
ST $f26, 72($sp)
|
|
kusano |
2b45e8 |
ST $f27, 80($sp)
|
|
kusano |
2b45e8 |
ST $f28, 88($sp)
|
|
kusano |
2b45e8 |
sd $23, 96($sp)
|
|
kusano |
2b45e8 |
sd $24, 104($sp)
|
|
kusano |
2b45e8 |
sd $25, 112($sp)
|
|
kusano |
2b45e8 |
ST $f20,120($sp)
|
|
kusano |
2b45e8 |
ST $f21,128($sp)
|
|
kusano |
2b45e8 |
ST $f22,136($sp)
|
|
kusano |
2b45e8 |
ST $f23,144($sp)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
.L0_N4: # Loop N
|
|
kusano |
2b45e8 |
ST ALPHA,152($sp) # Backup ALPHA
|
|
kusano |
2b45e8 |
move MCO,M # Backup M
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
move NCO,N # Backup N
|
|
kusano |
2b45e8 |
move KCO,K # Backup K
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
move AO,A # Backup A_addr
|
|
kusano |
2b45e8 |
dsra N,NCO,2 # N=NCO/2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
dsll LDC,LDC,BASE_SHIFT # LDC*8Byte
|
|
kusano |
2b45e8 |
dsll SPANB,KCO,2+BASE_SHIFT # SPANB=KC*4nr*8Byte=KC*2^5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL)
|
|
kusano |
2b45e8 |
LDARG OFFSET,160($sp) # OFFSET is relate to the data part
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL) && !defined(LEFT)
|
|
kusano |
2b45e8 |
neg KK,OFFSET
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
move BO,B # Backup B_addr
|
|
kusano |
2b45e8 |
beq N,$0,.L0_N2 # N=0,NCO<4
|
|
kusano |
2b45e8 |
dsll SPANA,KCO,1+BASE_SHIFT # SPANA = KCO*2mr*8Byte
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L0_N4_Lb: # mr=4,nr=4
|
|
kusano |
2b45e8 |
move CO1,C
|
|
kusano |
2b45e8 |
dsra M,MCO,2 # M=MCO/2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
move A,AO # Reset A
|
|
kusano |
2b45e8 |
daddu CO2,C,LDC
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu PREB,BO,SPANB # PreB point next panelB
|
|
kusano |
2b45e8 |
daddu CO3,CO2,LDC
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu PREA,AO,SPANA
|
|
kusano |
2b45e8 |
daddu CO4,CO3,LDC
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL) && defined(LEFT)
|
|
kusano |
2b45e8 |
move KK,OFFSET
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
beqz M,.L14_M2
|
|
kusano |
2b45e8 |
daddu C,CO4,LDC # move C to next panel Cj
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L10:
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL)
|
|
kusano |
2b45e8 |
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
move B,BO # (SIDE=L and UPLO=L) or (SIZE=R and UPLO=U)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
dsll K,KK,2 + BASE_SHIFT # KK is the length that needs to span to the data part
|
|
kusano |
2b45e8 |
dsll TEMP,KK,2 + BASE_SHIFT
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu A,A,K # move A B to data part
|
|
kusano |
2b45e8 |
daddu B,BO,TEMP
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MTC $0,t11 # GEMM part NR=4,MR=4
|
|
kusano |
2b45e8 |
LD a0,0(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t21,t11
|
|
kusano |
2b45e8 |
MOV t31,t11
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t41,t11
|
|
kusano |
2b45e8 |
MOV t12,t11
|
|
kusano |
2b45e8 |
LD b0,0(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t22,t11
|
|
kusano |
2b45e8 |
MOV t32,t11
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t42,t11
|
|
kusano |
2b45e8 |
LD a2,2*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t13,t11
|
|
kusano |
2b45e8 |
MOV t23,t11
|
|
kusano |
2b45e8 |
LD b2,2*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t33,t11
|
|
kusano |
2b45e8 |
MOV t43,t11
|
|
kusano |
2b45e8 |
LD a3,3*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t14,t11
|
|
kusano |
2b45e8 |
MOV t24,t11
|
|
kusano |
2b45e8 |
LD b3,3*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
|
|
kusano |
2b45e8 |
dsubu TEMP,KCO,KK # temp is the length of the data part
|
|
kusano |
2b45e8 |
#elif defined(LEFT)
|
|
kusano |
2b45e8 |
daddiu TEMP, KK, 4 # S=L,U=L
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
daddiu TEMP, KK, 4 # S=R,U=U,for this two situation KK is the length of the data part
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
dsra K,TEMP,2 # K=KCO/2
|
|
kusano |
2b45e8 |
MOV t34,t11
|
|
kusano |
2b45e8 |
beqz K,.L15
|
|
kusano |
2b45e8 |
MOV t44,t11
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
move B,BO # Reset B
|
|
kusano |
2b45e8 |
MTC $0,t11 # GEMM part NR=4,MR=4
|
|
kusano |
2b45e8 |
LD a0,0(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t21,t11
|
|
kusano |
2b45e8 |
MOV t31,t11
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t41,t11
|
|
kusano |
2b45e8 |
MOV t12,t11
|
|
kusano |
2b45e8 |
LD b0,0(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t22,t11
|
|
kusano |
2b45e8 |
MOV t32,t11
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t42,t11
|
|
kusano |
2b45e8 |
dsra K,KCO,2 # K=KCO/2
|
|
kusano |
2b45e8 |
LD a2,2*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t13,t11
|
|
kusano |
2b45e8 |
MOV t23,t11
|
|
kusano |
2b45e8 |
LD b2,2*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t33,t11
|
|
kusano |
2b45e8 |
MOV t43,t11
|
|
kusano |
2b45e8 |
LD a3,3*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t14,t11
|
|
kusano |
2b45e8 |
MOV t24,t11
|
|
kusano |
2b45e8 |
LD b3,3*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t34,t11
|
|
kusano |
2b45e8 |
beqz K,.L15
|
|
kusano |
2b45e8 |
MOV t44,t11 # clear 16 results registers
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
.L11: # kr=4
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
MADD t21,t21,a1,b0
|
|
kusano |
2b45e8 |
LD a4,4*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a0,b1
|
|
kusano |
2b45e8 |
MADD t22,t22,a1,b1
|
|
kusano |
2b45e8 |
LD a5,5*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t31,t31,a2,b0
|
|
kusano |
2b45e8 |
MADD t41,t41,a3,b0
|
|
kusano |
2b45e8 |
LD b4,4*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t32,t32,a2,b1
|
|
kusano |
2b45e8 |
MADD t42,t42,a3,b1
|
|
kusano |
2b45e8 |
LD b5,5*SIZE(B)
|
|
kusano |
2b45e8 |
FETCH $0,(PREB)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t13,t13,a0,b2
|
|
kusano |
2b45e8 |
MADD t23,t23,a1,b2
|
|
kusano |
2b45e8 |
LD a6,6*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t14,t14,a0,b3
|
|
kusano |
2b45e8 |
MADD t24,t24,a1,b3
|
|
kusano |
2b45e8 |
LD b6,6*SIZE(B)
|
|
kusano |
2b45e8 |
FETCH $0,(PREA)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t33,t33,a2,b2
|
|
kusano |
2b45e8 |
MADD t43,t43,a3,b2
|
|
kusano |
2b45e8 |
LD a7,7*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t34,t34,a2,b3
|
|
kusano |
2b45e8 |
MADD t44,t44,a3,b3
|
|
kusano |
2b45e8 |
LD b7,7*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L12:
|
|
kusano |
2b45e8 |
MADD t11,t11,a4,b4
|
|
kusano |
2b45e8 |
MADD t21,t21,a5,b4
|
|
kusano |
2b45e8 |
LD a0,8*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a4,b5
|
|
kusano |
2b45e8 |
MADD t22,t22,a5,b5
|
|
kusano |
2b45e8 |
LD a1,9*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t31,t31,a6,b4
|
|
kusano |
2b45e8 |
MADD t41,t41,a7,b4
|
|
kusano |
2b45e8 |
LD b0,8*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t32,t32,a6,b5
|
|
kusano |
2b45e8 |
MADD t42,t42,a7,b5
|
|
kusano |
2b45e8 |
LD b1,9*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,4*SIZE(PREB)
|
|
kusano |
2b45e8 |
MADD t13,t13,a4,b6
|
|
kusano |
2b45e8 |
MADD t23,t23,a5,b6
|
|
kusano |
2b45e8 |
LD a2,10*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t14,t14,a4,b7
|
|
kusano |
2b45e8 |
MADD t24,t24,a5,b7
|
|
kusano |
2b45e8 |
LD b2,10*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,4*SIZE(PREA)
|
|
kusano |
2b45e8 |
MADD t33,t33,a6,b6
|
|
kusano |
2b45e8 |
MADD t43,t43,a7,b6
|
|
kusano |
2b45e8 |
LD a3,11*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t34,t34,a6,b7
|
|
kusano |
2b45e8 |
MADD t44,t44,a7,b7
|
|
kusano |
2b45e8 |
LD b3,11*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L13:
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
MADD t21,t21,a1,b0
|
|
kusano |
2b45e8 |
LD a4,12*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a0,b1
|
|
kusano |
2b45e8 |
MADD t22,t22,a1,b1
|
|
kusano |
2b45e8 |
LD a5,13*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t31,t31,a2,b0
|
|
kusano |
2b45e8 |
MADD t41,t41,a3,b0
|
|
kusano |
2b45e8 |
LD b4,12*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,8*SIZE(PREA)
|
|
kusano |
2b45e8 |
MADD t32,t32,a2,b1
|
|
kusano |
2b45e8 |
MADD t42,t42,a3,b1
|
|
kusano |
2b45e8 |
LD b5,13*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,8*SIZE(PREB)
|
|
kusano |
2b45e8 |
MADD t13,t13,a0,b2
|
|
kusano |
2b45e8 |
MADD t23,t23,a1,b2
|
|
kusano |
2b45e8 |
LD a6,14*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t14,t14,a0,b3
|
|
kusano |
2b45e8 |
MADD t24,t24,a1,b3
|
|
kusano |
2b45e8 |
daddu A,A,16*SIZE # 4mr*4kr
|
|
kusano |
2b45e8 |
LD b6,14*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t33,t33,a2,b2
|
|
kusano |
2b45e8 |
MADD t43,t43,a3,b2
|
|
kusano |
2b45e8 |
daddu B,B,16*SIZE # 4nr*4kr
|
|
kusano |
2b45e8 |
LD a7,-1*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t34,t34,a2,b3
|
|
kusano |
2b45e8 |
MADD t44,t44,a3,b3
|
|
kusano |
2b45e8 |
LD b7,-1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L14:
|
|
kusano |
2b45e8 |
MADD t11,t11,a4,b4
|
|
kusano |
2b45e8 |
MADD t21,t21,a5,b4
|
|
kusano |
2b45e8 |
LD a0,0(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a4,b5
|
|
kusano |
2b45e8 |
MADD t22,t22,a5,b5
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t31,t31,a6,b4
|
|
kusano |
2b45e8 |
MADD t41,t41,a7,b4
|
|
kusano |
2b45e8 |
daddiu K,K,-1
|
|
kusano |
2b45e8 |
LD b0,0(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t32,t32,a6,b5
|
|
kusano |
2b45e8 |
MADD t42,t42,a7,b5
|
|
kusano |
2b45e8 |
daddu PREA,PREA,16*SIZE
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,12*SIZE(PREB)
|
|
kusano |
2b45e8 |
MADD t13,t13,a4,b6
|
|
kusano |
2b45e8 |
MADD t23,t23,a5,b6
|
|
kusano |
2b45e8 |
LD a2,2*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,-4*SIZE(PREA)
|
|
kusano |
2b45e8 |
MADD t14,t14,a4,b7
|
|
kusano |
2b45e8 |
MADD t24,t24,a5,b7
|
|
kusano |
2b45e8 |
LD b2,2*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t33,t33,a6,b6
|
|
kusano |
2b45e8 |
MADD t43,t43,a7,b6
|
|
kusano |
2b45e8 |
daddu PREB,PREB,16*SIZE
|
|
kusano |
2b45e8 |
LD a3,3*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t34,t34,a6,b7
|
|
kusano |
2b45e8 |
MADD t44,t44,a7,b7
|
|
kusano |
2b45e8 |
bnez K,.L11
|
|
kusano |
2b45e8 |
LD b3,3*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L15: # kr=2
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
andi K,KCO,2
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
andi K,TEMP, 2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
beqz K,.L18
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L16:
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
MADD t21,t21,a1,b0
|
|
kusano |
2b45e8 |
LD a4,4*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a0,b1
|
|
kusano |
2b45e8 |
MADD t22,t22,a1,b1
|
|
kusano |
2b45e8 |
LD a5,5*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t31,t31,a2,b0
|
|
kusano |
2b45e8 |
MADD t41,t41,a3,b0
|
|
kusano |
2b45e8 |
LD b4,4*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,0(PREA)
|
|
kusano |
2b45e8 |
MADD t32,t32,a2,b1
|
|
kusano |
2b45e8 |
MADD t42,t42,a3,b1
|
|
kusano |
2b45e8 |
LD b5,5*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,0(PREB)
|
|
kusano |
2b45e8 |
MADD t13,t13,a0,b2
|
|
kusano |
2b45e8 |
MADD t23,t23,a1,b2
|
|
kusano |
2b45e8 |
LD a6,6*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t14,t14,a0,b3
|
|
kusano |
2b45e8 |
MADD t24,t24,a1,b3
|
|
kusano |
2b45e8 |
daddu A,A,8*SIZE # 4mr*2kr
|
|
kusano |
2b45e8 |
LD b6,6*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t33,t33,a2,b2
|
|
kusano |
2b45e8 |
MADD t43,t43,a3,b2
|
|
kusano |
2b45e8 |
daddu B,B,8*SIZE # 4nr*2kr
|
|
kusano |
2b45e8 |
LD a7,-1*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t34,t34,a2,b3
|
|
kusano |
2b45e8 |
MADD t44,t44,a3,b3
|
|
kusano |
2b45e8 |
LD b7,-1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L17:
|
|
kusano |
2b45e8 |
MADD t11,t11,a4,b4
|
|
kusano |
2b45e8 |
MADD t21,t21,a5,b4
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a4,b5
|
|
kusano |
2b45e8 |
MADD t22,t22,a5,b5
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t31,t31,a6,b4
|
|
kusano |
2b45e8 |
MADD t41,t41,a7,b4
|
|
kusano |
2b45e8 |
LD b0,0*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t32,t32,a6,b5
|
|
kusano |
2b45e8 |
MADD t42,t42,a7,b5
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,4*SIZE(PREB)
|
|
kusano |
2b45e8 |
MADD t13,t13,a4,b6
|
|
kusano |
2b45e8 |
MADD t23,t23,a5,b6
|
|
kusano |
2b45e8 |
LD a2,2*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,4*SIZE(PREA)
|
|
kusano |
2b45e8 |
MADD t14,t14,a4,b7
|
|
kusano |
2b45e8 |
MADD t24,t24,a5,b7
|
|
kusano |
2b45e8 |
LD b2,2*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t33,t33,a6,b6
|
|
kusano |
2b45e8 |
MADD t43,t43,a7,b6
|
|
kusano |
2b45e8 |
daddu PREA,PREA,8*SIZE
|
|
kusano |
2b45e8 |
LD a3,3*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t34,t34,a6,b7
|
|
kusano |
2b45e8 |
MADD t44,t44,a7,b7
|
|
kusano |
2b45e8 |
daddu PREB,PREB,8*SIZE
|
|
kusano |
2b45e8 |
LD b3,3*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L18: # kr=1
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
andi K,KCO,1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
andi K,TEMP,1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
beqz K,.L19
|
|
kusano |
2b45e8 |
LD ALPHA,152($sp) # Get ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,0(PREB)
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
MADD t21,t21,a1,b0
|
|
kusano |
2b45e8 |
daddu A,A,4*SIZE # 4mr*kr
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a0,b1
|
|
kusano |
2b45e8 |
MADD t22,t22,a1,b1
|
|
kusano |
2b45e8 |
daddu B,B,4*SIZE # 4nr*kr
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,0(PREA)
|
|
kusano |
2b45e8 |
MADD t31,t31,a2,b0
|
|
kusano |
2b45e8 |
MADD t41,t41,a3,b0
|
|
kusano |
2b45e8 |
daddu PREB,PREB,4*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t32,t32,a2,b1
|
|
kusano |
2b45e8 |
MADD t42,t42,a3,b1
|
|
kusano |
2b45e8 |
daddu PREA,PREA,4*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t13,t13,a0,b2
|
|
kusano |
2b45e8 |
MADD t23,t23,a1,b2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t14,t14,a0,b3
|
|
kusano |
2b45e8 |
MADD t24,t24,a1,b3
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t33,t33,a2,b2
|
|
kusano |
2b45e8 |
MADD t43,t43,a3,b2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t34,t34,a2,b3
|
|
kusano |
2b45e8 |
MADD t44,t44,a3,b3
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L19: # Write Back to C
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD c11,0(CO1) # GEMM write part
|
|
kusano |
2b45e8 |
LD c21,1*SIZE(CO1) # get 16 C
|
|
kusano |
2b45e8 |
LD c31,2*SIZE(CO1)
|
|
kusano |
2b45e8 |
LD c41,3*SIZE(CO1)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD c12,0(CO2)
|
|
kusano |
2b45e8 |
MADD t11,c11,t11,ALPHA
|
|
kusano |
2b45e8 |
LD c22,1*SIZE(CO2)
|
|
kusano |
2b45e8 |
MADD t21,c21,t21,ALPHA
|
|
kusano |
2b45e8 |
LD c32,2*SIZE(CO2)
|
|
kusano |
2b45e8 |
MADD t31,c31,t31,ALPHA
|
|
kusano |
2b45e8 |
LD c42,3*SIZE(CO2)
|
|
kusano |
2b45e8 |
MADD t41,c41,t41,ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD c13,0(CO3)
|
|
kusano |
2b45e8 |
MADD t12,c12,t12,ALPHA
|
|
kusano |
2b45e8 |
LD c23,1*SIZE(CO3)
|
|
kusano |
2b45e8 |
MADD t22,c22,t22,ALPHA
|
|
kusano |
2b45e8 |
LD c33,2*SIZE(CO3)
|
|
kusano |
2b45e8 |
MADD t32,c32,t32,ALPHA
|
|
kusano |
2b45e8 |
LD c43,3*SIZE(CO3)
|
|
kusano |
2b45e8 |
MADD t42,c42,t42,ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD c14,0(CO4)
|
|
kusano |
2b45e8 |
MADD t13,c13,t13,ALPHA
|
|
kusano |
2b45e8 |
LD c24,1*SIZE(CO4)
|
|
kusano |
2b45e8 |
MADD t23,c23,t23,ALPHA
|
|
kusano |
2b45e8 |
LD c34,2*SIZE(CO4)
|
|
kusano |
2b45e8 |
MADD t33,c33,t33,ALPHA
|
|
kusano |
2b45e8 |
LD c44,3*SIZE(CO4)
|
|
kusano |
2b45e8 |
MADD t43,c43,t43,ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t11,0(CO1)
|
|
kusano |
2b45e8 |
MADD t14,c14,t14,ALPHA
|
|
kusano |
2b45e8 |
ST t21,1*SIZE(CO1)
|
|
kusano |
2b45e8 |
MADD t24,c24,t24,ALPHA
|
|
kusano |
2b45e8 |
ST t31,2*SIZE(CO1)
|
|
kusano |
2b45e8 |
MADD t34,c34,t34,ALPHA
|
|
kusano |
2b45e8 |
ST t41,3*SIZE(CO1)
|
|
kusano |
2b45e8 |
MADD t44,c44,t44,ALPHA
|
|
kusano |
2b45e8 |
daddiu M,M,-1 # M--
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t12,0(CO2)
|
|
kusano |
2b45e8 |
ST t22,1*SIZE(CO2)
|
|
kusano |
2b45e8 |
ST t32,2*SIZE(CO2)
|
|
kusano |
2b45e8 |
ST t42,3*SIZE(CO2)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t13,0(CO3)
|
|
kusano |
2b45e8 |
ST t23,1*SIZE(CO3)
|
|
kusano |
2b45e8 |
ST t33,2*SIZE(CO3)
|
|
kusano |
2b45e8 |
ST t43,3*SIZE(CO3)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,4*SIZE(CO1)
|
|
kusano |
2b45e8 |
FETCH $0,4*SIZE(CO2)
|
|
kusano |
2b45e8 |
FETCH $0,4*SIZE(CO3)
|
|
kusano |
2b45e8 |
FETCH $0,4*SIZE(CO4)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,8*SIZE(CO1)
|
|
kusano |
2b45e8 |
FETCH $0,8*SIZE(CO2)
|
|
kusano |
2b45e8 |
FETCH $0,8*SIZE(CO3)
|
|
kusano |
2b45e8 |
FETCH $0,8*SIZE(CO4)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t14,0(CO4)
|
|
kusano |
2b45e8 |
daddu CO1,CO1,4*SIZE # COi += 4
|
|
kusano |
2b45e8 |
ST t24,1*SIZE(CO4)
|
|
kusano |
2b45e8 |
daddu CO2,CO2,4*SIZE
|
|
kusano |
2b45e8 |
ST t34,2*SIZE(CO4)
|
|
kusano |
2b45e8 |
daddu CO3,CO3,4*SIZE
|
|
kusano |
2b45e8 |
ST t44,3*SIZE(CO4)
|
|
kusano |
2b45e8 |
daddu PREB,BO,SPANB
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bnez M,.L10
|
|
kusano |
2b45e8 |
daddu CO4,CO4,4*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
MUL t11, ALPHA, t11 # TRMM write back part
|
|
kusano |
2b45e8 |
MUL t21, ALPHA, t21
|
|
kusano |
2b45e8 |
MUL t31, ALPHA, t31
|
|
kusano |
2b45e8 |
MUL t41, ALPHA, t41
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t11, 0 * SIZE(CO1)
|
|
kusano |
2b45e8 |
MUL t12, ALPHA, t12
|
|
kusano |
2b45e8 |
ST t21, 1 * SIZE(CO1)
|
|
kusano |
2b45e8 |
MUL t22, ALPHA, t22
|
|
kusano |
2b45e8 |
ST t31, 2 * SIZE(CO1)
|
|
kusano |
2b45e8 |
MUL t32, ALPHA, t32
|
|
kusano |
2b45e8 |
ST t41, 3 * SIZE(CO1)
|
|
kusano |
2b45e8 |
MUL t42, ALPHA, t42
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t12, 0 * SIZE(CO2)
|
|
kusano |
2b45e8 |
MUL t13, ALPHA, t13
|
|
kusano |
2b45e8 |
ST t22, 1 * SIZE(CO2)
|
|
kusano |
2b45e8 |
MUL t23, ALPHA, t23
|
|
kusano |
2b45e8 |
ST t32, 2 * SIZE(CO2)
|
|
kusano |
2b45e8 |
MUL t33, ALPHA, t33
|
|
kusano |
2b45e8 |
ST t42, 3 * SIZE(CO2)
|
|
kusano |
2b45e8 |
MUL t43, ALPHA, t43
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t13, 0 * SIZE(CO3)
|
|
kusano |
2b45e8 |
MUL t14, ALPHA, t14
|
|
kusano |
2b45e8 |
ST t23, 1 * SIZE(CO3)
|
|
kusano |
2b45e8 |
MUL t24, ALPHA, t24
|
|
kusano |
2b45e8 |
ST t33, 2 * SIZE(CO3)
|
|
kusano |
2b45e8 |
MUL t34, ALPHA, t34
|
|
kusano |
2b45e8 |
ST t43, 3 * SIZE(CO3)
|
|
kusano |
2b45e8 |
MUL t44, ALPHA, t44
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t14, 0 * SIZE(CO4)
|
|
kusano |
2b45e8 |
daddiu M,M,-1 # M--
|
|
kusano |
2b45e8 |
ST t24, 1 * SIZE(CO4)
|
|
kusano |
2b45e8 |
ST t34, 2 * SIZE(CO4)
|
|
kusano |
2b45e8 |
ST t44, 3 * SIZE(CO4)
|
|
kusano |
2b45e8 |
daddiu CO1,CO1, 4 * SIZE
|
|
kusano |
2b45e8 |
daddiu CO2,CO2, 4 * SIZE
|
|
kusano |
2b45e8 |
daddiu CO3,CO3, 4 * SIZE
|
|
kusano |
2b45e8 |
daddiu CO4,CO4, 4 * SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,4*SIZE(CO1)
|
|
kusano |
2b45e8 |
FETCH $0,4*SIZE(CO2)
|
|
kusano |
2b45e8 |
FETCH $0,4*SIZE(CO3)
|
|
kusano |
2b45e8 |
FETCH $0,4*SIZE(CO4)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,0(CO1)
|
|
kusano |
2b45e8 |
FETCH $0,0(CO2)
|
|
kusano |
2b45e8 |
FETCH $0,0(CO3)
|
|
kusano |
2b45e8 |
FETCH $0,0(CO4)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
dsubu TEMP,KCO,KK
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
daddiu TEMP,TEMP, -4
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
daddiu TEMP,TEMP, -4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
dsll K,TEMP,2 + BASE_SHIFT
|
|
kusano |
2b45e8 |
dsll TEMP,TEMP,2 + BASE_SHIFT
|
|
kusano |
2b45e8 |
daddu A,A,K # mov A to the end of panel Ai
|
|
kusano |
2b45e8 |
daddu B,B,TEMP # mov B to the end of panel Bj
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
daddiu KK, KK,4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
bnez M,.L10
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.align 3
|
|
kusano |
2b45e8 |
.L14_M2:
|
|
kusano |
2b45e8 |
andi M, MCO, 2 # nr=4,mr=2
|
|
kusano |
2b45e8 |
beqz M,.L14_M1
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L20:
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL)
|
|
kusano |
2b45e8 |
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
move B,BO # Reset B
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
dsll K,KK,1 + BASE_SHIFT # mr=2
|
|
kusano |
2b45e8 |
dsll TEMP,KK,2 + BASE_SHIFT # nr=4
|
|
kusano |
2b45e8 |
daddu A,A,K
|
|
kusano |
2b45e8 |
daddu B,BO,TEMP
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
MTC $0,t11
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t21,t11
|
|
kusano |
2b45e8 |
LD b0,0*SIZE(B)
|
|
kusano |
2b45e8 |
MOV t12,t11
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t22,t11
|
|
kusano |
2b45e8 |
LD b2,2*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t13,t11
|
|
kusano |
2b45e8 |
MOV t23,t11
|
|
kusano |
2b45e8 |
LD b3,3*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
|
|
kusano |
2b45e8 |
dsubu TEMP,KCO,KK
|
|
kusano |
2b45e8 |
#elif defined(LEFT)
|
|
kusano |
2b45e8 |
daddiu TEMP,KK,2 # left part,controlled by mr, mr=2
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
daddiu TEMP,KK,4 # right part,controlled by nr,nr=4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
dsra K,TEMP,2
|
|
kusano |
2b45e8 |
MOV t14,t11
|
|
kusano |
2b45e8 |
beqz K,.L25
|
|
kusano |
2b45e8 |
MOV t24,t11 # clear 2*4=8 results registers
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
move B,BO # Reset B
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
MTC $0,t11
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t21,t11
|
|
kusano |
2b45e8 |
LD b0,0*SIZE(B)
|
|
kusano |
2b45e8 |
MOV t12,t11
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t22,t11
|
|
kusano |
2b45e8 |
dsra K,KCO,2
|
|
kusano |
2b45e8 |
LD b2,2*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t13,t11
|
|
kusano |
2b45e8 |
MOV t23,t11
|
|
kusano |
2b45e8 |
LD b3,3*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t14,t11
|
|
kusano |
2b45e8 |
beqz K,.L25
|
|
kusano |
2b45e8 |
MOV t24,t11
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L21: # nr=4,mr=2,kr=4
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
LD a4,2*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a1,b0
|
|
kusano |
2b45e8 |
LD a5,3*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a0,b1
|
|
kusano |
2b45e8 |
LD b4,4*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t22,t22,a1,b1
|
|
kusano |
2b45e8 |
LD b5,5*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t13,t13,a0,b2
|
|
kusano |
2b45e8 |
LD b6,6*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t23,t23,a1,b2
|
|
kusano |
2b45e8 |
LD b7,7*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t14,t14,a0,b3
|
|
kusano |
2b45e8 |
MADD t24,t24,a1,b3
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t11,t11,a4,b4
|
|
kusano |
2b45e8 |
LD a2,4*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a5,b4
|
|
kusano |
2b45e8 |
LD a3,5*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a4,b5
|
|
kusano |
2b45e8 |
LD b0,8*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t22,t22,a5,b5
|
|
kusano |
2b45e8 |
LD b1,9*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t13,t13,a4,b6
|
|
kusano |
2b45e8 |
LD b2,10*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t23,t23,a5,b6
|
|
kusano |
2b45e8 |
LD b3,11*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t14,t14,a4,b7
|
|
kusano |
2b45e8 |
MADD t24,t24,a5,b7
|
|
kusano |
2b45e8 |
daddiu K,K,-1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t11,t11,a2,b0
|
|
kusano |
2b45e8 |
LD a6,6*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a3,b0
|
|
kusano |
2b45e8 |
LD a7,7*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a2,b1
|
|
kusano |
2b45e8 |
LD b4,12*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t22,t22,a3,b1
|
|
kusano |
2b45e8 |
LD b5,13*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t13,t13,a2,b2
|
|
kusano |
2b45e8 |
LD b6,14*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t23,t23,a3,b2
|
|
kusano |
2b45e8 |
LD b7,15*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t14,t14,a2,b3
|
|
kusano |
2b45e8 |
MADD t24,t24,a3,b3
|
|
kusano |
2b45e8 |
daddu A,A,8*SIZE # 2mr*4kr
|
|
kusano |
2b45e8 |
daddu B,B,16*SIZE # 4nr*4kr
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t11,t11,a6,b4
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a7,b4
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a6,b5
|
|
kusano |
2b45e8 |
LD b0,0*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t22,t22,a7,b5
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t13,t13,a6,b6
|
|
kusano |
2b45e8 |
LD b2,2*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t23,t23,a7,b6
|
|
kusano |
2b45e8 |
LD b3,3*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t14,t14,a6,b7
|
|
kusano |
2b45e8 |
bnez K,.L21
|
|
kusano |
2b45e8 |
MADD t24,t24,a7,b7
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L25:
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
andi K,KCO,2 # kr=2
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
andi K,TEMP,2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
beqz K,.L28
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L26:
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
LD a4,2*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a1,b0
|
|
kusano |
2b45e8 |
LD a5,3*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a0,b1
|
|
kusano |
2b45e8 |
LD b4,4*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t22,t22,a1,b1
|
|
kusano |
2b45e8 |
LD b5,5*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t13,t13,a0,b2
|
|
kusano |
2b45e8 |
LD b6,6*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t23,t23,a1,b2
|
|
kusano |
2b45e8 |
LD b7,7*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t14,t14,a0,b3
|
|
kusano |
2b45e8 |
MADD t24,t24,a1,b3
|
|
kusano |
2b45e8 |
daddu A,A,4*SIZE # 2mr*2kr
|
|
kusano |
2b45e8 |
daddu B,B,8*SIZE # 4nr*2kr
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L27:
|
|
kusano |
2b45e8 |
MADD t11,t11,a4,b4
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a5,b4
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a4,b5
|
|
kusano |
2b45e8 |
LD b0,0*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t22,t22,a5,b5
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t13,t13,a4,b6
|
|
kusano |
2b45e8 |
LD b2,2*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t23,t23,a5,b6
|
|
kusano |
2b45e8 |
LD b3,3*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t14,t14,a4,b7
|
|
kusano |
2b45e8 |
MADD t24,t24,a5,b7
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L28: # kr=1
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
andi K,KCO,1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
andi K,TEMP,1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
beqz K,.L29
|
|
kusano |
2b45e8 |
LD ALPHA,152($sp) # Get ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
MADD t21,t21,a1,b0
|
|
kusano |
2b45e8 |
daddu A,A,2*SIZE # 2mr*kr
|
|
kusano |
2b45e8 |
daddu B,B,4*SIZE # 4nr*kr
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a0,b1
|
|
kusano |
2b45e8 |
MADD t22,t22,a1,b1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t13,t13,a0,b2
|
|
kusano |
2b45e8 |
MADD t23,t23,a1,b2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t14,t14,a0,b3
|
|
kusano |
2b45e8 |
MADD t24,t24,a1,b3
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L29: # Write Back to C
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD c11,0(CO1) # GEMM write back part
|
|
kusano |
2b45e8 |
LD c21,1*SIZE(CO1)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD c12,0(CO2)
|
|
kusano |
2b45e8 |
LD c22,1*SIZE(CO2)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD c13,0(CO3)
|
|
kusano |
2b45e8 |
MADD t11,c11,t11,ALPHA
|
|
kusano |
2b45e8 |
LD c23,1*SIZE(CO3)
|
|
kusano |
2b45e8 |
MADD t21,c21,t21,ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD c14,0(CO4)
|
|
kusano |
2b45e8 |
MADD t12,c12,t12,ALPHA
|
|
kusano |
2b45e8 |
LD c24,1*SIZE(CO4)
|
|
kusano |
2b45e8 |
MADD t22,c22,t22,ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t11,0(CO1)
|
|
kusano |
2b45e8 |
MADD t13,c13,t13,ALPHA
|
|
kusano |
2b45e8 |
ST t21,1*SIZE(CO1)
|
|
kusano |
2b45e8 |
MADD t23,c23,t23,ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t12,0(CO2)
|
|
kusano |
2b45e8 |
MADD t14,c14,t14,ALPHA
|
|
kusano |
2b45e8 |
ST t22,1*SIZE(CO2)
|
|
kusano |
2b45e8 |
MADD t24,c24,t24,ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t13,0(CO3)
|
|
kusano |
2b45e8 |
daddu CO1,CO1,2*SIZE # COi += 2
|
|
kusano |
2b45e8 |
ST t23,1*SIZE(CO3)
|
|
kusano |
2b45e8 |
daddu CO2,CO2,2*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t14,0(CO4)
|
|
kusano |
2b45e8 |
daddu CO3,CO3,2*SIZE
|
|
kusano |
2b45e8 |
ST t24,1*SIZE(CO4)
|
|
kusano |
2b45e8 |
daddu CO4,CO4,2*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,0(CO1)
|
|
kusano |
2b45e8 |
FETCH $0,0(CO2)
|
|
kusano |
2b45e8 |
FETCH $0,0(CO3)
|
|
kusano |
2b45e8 |
FETCH $0,0(CO4)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
MUL t11, ALPHA, t11 # TRMM write back part
|
|
kusano |
2b45e8 |
MUL t21, ALPHA, t21
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t11, 0 * SIZE(CO1)
|
|
kusano |
2b45e8 |
MUL t12, ALPHA, t12
|
|
kusano |
2b45e8 |
ST t21, 1 * SIZE(CO1)
|
|
kusano |
2b45e8 |
MUL t22, ALPHA, t22
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t12, 0 * SIZE(CO2)
|
|
kusano |
2b45e8 |
MUL t13, ALPHA, t13
|
|
kusano |
2b45e8 |
ST t22, 1 * SIZE(CO2)
|
|
kusano |
2b45e8 |
MUL t23, ALPHA, t23
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t13, 0 * SIZE(CO3)
|
|
kusano |
2b45e8 |
MUL t14, ALPHA, t14
|
|
kusano |
2b45e8 |
ST t23, 1 * SIZE(CO3)
|
|
kusano |
2b45e8 |
MUL t24, ALPHA, t24
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t14, 0 * SIZE(CO4)
|
|
kusano |
2b45e8 |
ST t24, 1 * SIZE(CO4)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddiu CO1,CO1, 2 * SIZE
|
|
kusano |
2b45e8 |
daddiu CO2,CO2, 2 * SIZE
|
|
kusano |
2b45e8 |
daddiu CO3,CO3, 2 * SIZE
|
|
kusano |
2b45e8 |
daddiu CO4,CO4, 2 * SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,0(CO1)
|
|
kusano |
2b45e8 |
FETCH $0,0(CO2)
|
|
kusano |
2b45e8 |
FETCH $0,0(CO3)
|
|
kusano |
2b45e8 |
FETCH $0,0(CO4)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
dsubu TEMP,KCO,KK
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
daddiu TEMP,TEMP,-2
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
daddiu TEMP,TEMP,-4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
dsll K,TEMP,1 + BASE_SHIFT
|
|
kusano |
2b45e8 |
dsll TEMP,TEMP,2 + BASE_SHIFT
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu A,A,K # move A to next panel Ai
|
|
kusano |
2b45e8 |
daddu B,B,TEMP # move B to next panel Bj
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
daddiu KK, KK, 2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.align 3
|
|
kusano |
2b45e8 |
.L14_M1:
|
|
kusano |
2b45e8 |
andi M,MCO,1 # mr=1
|
|
kusano |
2b45e8 |
beqz M,.L0_N4_Loop # M = 0, finishing one panel Bj
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L30:
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL)
|
|
kusano |
2b45e8 |
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
move B,BO # Reset B
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
dsll K,KK, BASE_SHIFT
|
|
kusano |
2b45e8 |
dsll TEMP,KK,2 + BASE_SHIFT
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu A,A,K
|
|
kusano |
2b45e8 |
daddu B,BO,TEMP
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a0, 0 * SIZE(A) # a0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MTC $0,t11
|
|
kusano |
2b45e8 |
LD b0,0*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t12,t11
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t13,t11
|
|
kusano |
2b45e8 |
LD b2,2*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t14,t11
|
|
kusano |
2b45e8 |
LD b3,3*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
|
|
kusano |
2b45e8 |
dsubu TEMP, KCO, KK
|
|
kusano |
2b45e8 |
#elif defined(LEFT)
|
|
kusano |
2b45e8 |
daddiu TEMP, KK, 1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
daddiu TEMP, KK, 4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
dsra K,TEMP, 2
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
beqz K,.L35
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
move B,BO # Reset B, GEMM part
|
|
kusano |
2b45e8 |
dsra K,KCO,2 # K=KCO/2
|
|
kusano |
2b45e8 |
LD a0, 0 * SIZE(A) # a0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MTC $0,t11
|
|
kusano |
2b45e8 |
LD b0,0*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t12,t11
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t13,t11
|
|
kusano |
2b45e8 |
LD b2,2*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t14,t11
|
|
kusano |
2b45e8 |
beqz K,.L35
|
|
kusano |
2b45e8 |
LD b3,3*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L31: # nr=4,mr=1,kr=4
|
|
kusano |
2b45e8 |
LD a1, 1*SIZE(A) # load a1
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b4,4*SIZE(B)
|
|
kusano |
2b45e8 |
LD b5,5*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t12,t12,a0,b1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b6,6*SIZE(B)
|
|
kusano |
2b45e8 |
LD b7,7*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t13,t13,a0,b2
|
|
kusano |
2b45e8 |
MADD t14,t14,a0,b3
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a2, 2*SIZE(A) # a2
|
|
kusano |
2b45e8 |
MADD t11,t11,a1,b4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b0,8*SIZE(B)
|
|
kusano |
2b45e8 |
LD b1,9*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t12,t12,a1,b5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b2,10*SIZE(B)
|
|
kusano |
2b45e8 |
LD b3,11*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t13,t13,a1,b6
|
|
kusano |
2b45e8 |
MADD t14,t14,a1,b7
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a3, 3*SIZE(A) # a3
|
|
kusano |
2b45e8 |
MADD t11,t11,a2,b0
|
|
kusano |
2b45e8 |
daddiu K,K,-1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b4,12*SIZE(B)
|
|
kusano |
2b45e8 |
LD b5,13*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t12,t12,a2,b1
|
|
kusano |
2b45e8 |
daddu A,A,4*SIZE # 1mr*4kr
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b6,14*SIZE(B)
|
|
kusano |
2b45e8 |
LD b7,15*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t13,t13,a2,b2
|
|
kusano |
2b45e8 |
MADD t14,t14,a2,b3
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a0, 0*SIZE(A) # a0
|
|
kusano |
2b45e8 |
daddu B,B,16*SIZE # 4nr*4kr
|
|
kusano |
2b45e8 |
MADD t11,t11,a3,b4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b0,0*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t12,t12,a3,b5
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t13,t13,a3,b6
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b2,2*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t14,t14,a3,b7
|
|
kusano |
2b45e8 |
bnez K,.L31
|
|
kusano |
2b45e8 |
LD b3,3*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L35: # kr=2
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
andi K,KCO,2
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
andi K,TEMP,2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
beqz K,.L38
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L36:
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A) # load a1
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b4,4*SIZE(B)
|
|
kusano |
2b45e8 |
LD b5,5*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t12,t12,a0,b1
|
|
kusano |
2b45e8 |
daddu A,A,2*SIZE # mr*2kr
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b6,6*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t13,t13,a0,b2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b7,7*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t14,t14,a0,b3
|
|
kusano |
2b45e8 |
daddu B,B,8*SIZE # 4nr*2kr
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L37:
|
|
kusano |
2b45e8 |
LD a0,0(A)
|
|
kusano |
2b45e8 |
MADD t11,t11,a1,b4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b0,0*SIZE(B)
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t12,t12,a1,b5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b2,2*SIZE(B)
|
|
kusano |
2b45e8 |
LD b3,3*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t13,t13,a1,b6
|
|
kusano |
2b45e8 |
MADD t14,t14,a1,b7
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L38: # kr=1
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
andi K,KCO,1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
andi K,TEMP,1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
beqz K,.L39
|
|
kusano |
2b45e8 |
LD ALPHA,152($sp) # Get ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
MADD t12,t12,a0,b1
|
|
kusano |
2b45e8 |
daddu A,A,1*SIZE
|
|
kusano |
2b45e8 |
daddu B,B,4*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t13,t13,a0,b2
|
|
kusano |
2b45e8 |
MADD t14,t14,a0,b3
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L39: # Write Back
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD c11,0(CO1)
|
|
kusano |
2b45e8 |
LD c12,0(CO2)
|
|
kusano |
2b45e8 |
LD c13,0(CO3)
|
|
kusano |
2b45e8 |
LD c14,0(CO4)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t11,c11,t11,ALPHA
|
|
kusano |
2b45e8 |
MADD t12,c12,t12,ALPHA
|
|
kusano |
2b45e8 |
MADD t13,c13,t13,ALPHA
|
|
kusano |
2b45e8 |
MADD t14,c14,t14,ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t11,0(CO1)
|
|
kusano |
2b45e8 |
ST t12,0(CO2)
|
|
kusano |
2b45e8 |
ST t13,0(CO3)
|
|
kusano |
2b45e8 |
ST t14,0(CO4)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
MUL t11, ALPHA, t11
|
|
kusano |
2b45e8 |
MUL t12, ALPHA, t12
|
|
kusano |
2b45e8 |
MUL t13, ALPHA, t13
|
|
kusano |
2b45e8 |
MUL t14, ALPHA, t14
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t11, 0 * SIZE(CO1)
|
|
kusano |
2b45e8 |
ST t12, 0 * SIZE(CO2)
|
|
kusano |
2b45e8 |
ST t13, 0 * SIZE(CO3)
|
|
kusano |
2b45e8 |
ST t14, 0 * SIZE(CO4)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
dsubu TEMP, KCO, KK
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
daddiu TEMP, TEMP, -1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
daddiu TEMP, TEMP, -4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
dsll K,TEMP, BASE_SHIFT
|
|
kusano |
2b45e8 |
dsll TEMP,TEMP, 2 + BASE_SHIFT
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu A,A,K
|
|
kusano |
2b45e8 |
daddu B,B,TEMP
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
daddiu KK, KK, 1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.align 3
|
|
kusano |
2b45e8 |
.L0_N4_Loop: # mc finished
|
|
kusano |
2b45e8 |
daddiu N,N,-1 # N--
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL) && !defined(LEFT)
|
|
kusano |
2b45e8 |
daddiu KK, KK,4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
bnez N,.L0_N4_Lb
|
|
kusano |
2b45e8 |
move BO,B # Set BO point to next panel Bj
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
.L0_N2:
|
|
kusano |
2b45e8 |
andi N,NCO,2 # nr = 2
|
|
kusano |
2b45e8 |
beqz N,.L0_N1
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L0_N2_Lb:
|
|
kusano |
2b45e8 |
move CO1,C
|
|
kusano |
2b45e8 |
daddu CO2,C,LDC
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
dsra M,MCO,2
|
|
kusano |
2b45e8 |
move A,AO # Reset A
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu PREA,AO,SPANA
|
|
kusano |
2b45e8 |
daddu C,CO2,LDC
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL) && defined(LEFT)
|
|
kusano |
2b45e8 |
move KK, OFFSET
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
beqz M,.L12_M2
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L40:
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL)
|
|
kusano |
2b45e8 |
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
move B,BO # Reset B
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
dsll K,KK, 2 + BASE_SHIFT
|
|
kusano |
2b45e8 |
dsll TEMP, KK,1 + BASE_SHIFT
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu A,A,K
|
|
kusano |
2b45e8 |
daddu B,BO,TEMP
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
MTC $0,t11 # gemm part
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t21,t11
|
|
kusano |
2b45e8 |
LD b0,0*SIZE(B)
|
|
kusano |
2b45e8 |
MOV t31,t11
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t41,t11
|
|
kusano |
2b45e8 |
LD a2,2*SIZE(A)
|
|
kusano |
2b45e8 |
LD a3,3*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t12,t11
|
|
kusano |
2b45e8 |
MOV t22,t11
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
|
|
kusano |
2b45e8 |
dsubu TEMP,KCO,KK
|
|
kusano |
2b45e8 |
#elif defined(LEFT)
|
|
kusano |
2b45e8 |
daddiu TEMP, KK, 4
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
daddiu TEMP, KK, 2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
dsra K,TEMP,2
|
|
kusano |
2b45e8 |
MOV t32,t11
|
|
kusano |
2b45e8 |
beqz K,.L45
|
|
kusano |
2b45e8 |
MOV t42,t11
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
move B,BO # Reset B
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
MTC $0,t11 # gemm part
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t21,t11
|
|
kusano |
2b45e8 |
LD b0,0*SIZE(B)
|
|
kusano |
2b45e8 |
MOV t31,t11
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t41,t11
|
|
kusano |
2b45e8 |
LD a2,2*SIZE(A)
|
|
kusano |
2b45e8 |
dsra K,KCO,2 # K=KCO/2
|
|
kusano |
2b45e8 |
LD a3,3*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t12,t11
|
|
kusano |
2b45e8 |
MOV t22,t11
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t32,t11
|
|
kusano |
2b45e8 |
beqz K,.L45
|
|
kusano |
2b45e8 |
MOV t42,t11
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L41: # nr=2,mr=kr=4
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
LD a4,4*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a1,b0
|
|
kusano |
2b45e8 |
LD a5,5*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a0,b1
|
|
kusano |
2b45e8 |
LD b4,2*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t22,t22,a1,b1
|
|
kusano |
2b45e8 |
LD b5,3*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t31,t31,a2,b0
|
|
kusano |
2b45e8 |
LD a6,6*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t41,t41,a3,b0
|
|
kusano |
2b45e8 |
LD a7,7*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,(PREA)
|
|
kusano |
2b45e8 |
MADD t32,t32,a2,b1
|
|
kusano |
2b45e8 |
MADD t42,t42,a3,b1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L42:
|
|
kusano |
2b45e8 |
MADD t11,t11,a4,b4
|
|
kusano |
2b45e8 |
LD a0,8*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a5,b4
|
|
kusano |
2b45e8 |
LD a1,9*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a4,b5
|
|
kusano |
2b45e8 |
LD b2,4*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t22,t22,a5,b5
|
|
kusano |
2b45e8 |
LD b3,5*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t31,t31,a6,b4
|
|
kusano |
2b45e8 |
LD a2,10*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t41,t41,a7,b4
|
|
kusano |
2b45e8 |
LD a3,11*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,4*SIZE(PREA)
|
|
kusano |
2b45e8 |
MADD t32,t32,a6,b5
|
|
kusano |
2b45e8 |
MADD t42,t42,a7,b5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L43:
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b2
|
|
kusano |
2b45e8 |
LD a4,12*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a1,b2
|
|
kusano |
2b45e8 |
LD a5,13*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a0,b3
|
|
kusano |
2b45e8 |
LD b6,6*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t22,t22,a1,b3
|
|
kusano |
2b45e8 |
LD b7,7*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t31,t31,a2,b2
|
|
kusano |
2b45e8 |
LD a6,14*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t41,t41,a3,b2
|
|
kusano |
2b45e8 |
LD a7,15*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,8*SIZE(PREA)
|
|
kusano |
2b45e8 |
MADD t32,t32,a2,b3
|
|
kusano |
2b45e8 |
MADD t42,t42,a3,b3
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu A,A,16*SIZE # 4mr*4kr
|
|
kusano |
2b45e8 |
daddu B,B,8*SIZE # 2nr*4kr
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L44:
|
|
kusano |
2b45e8 |
MADD t11,t11,a4,b6
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a5,b6
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a4,b7
|
|
kusano |
2b45e8 |
LD b0,0*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t22,t22,a5,b7
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddiu K,K,-1
|
|
kusano |
2b45e8 |
daddu PREA,PREA,16*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t31,t31,a6,b6
|
|
kusano |
2b45e8 |
LD a2,2*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t41,t41,a7,b6
|
|
kusano |
2b45e8 |
LD a3,3*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,-4*SIZE(PREA)
|
|
kusano |
2b45e8 |
MADD t32,t32,a6,b7
|
|
kusano |
2b45e8 |
bnez K,.L41
|
|
kusano |
2b45e8 |
MADD t42,t42,a7,b7
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L45: # kr=2
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
andi K,KCO,2
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
andi K,TEMP,2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
beqz K,.L48
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L46:
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
LD a4,4*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a1,b0
|
|
kusano |
2b45e8 |
LD a5,5*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a0,b1
|
|
kusano |
2b45e8 |
LD b4,2*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t22,t22,a1,b1
|
|
kusano |
2b45e8 |
LD b5,3*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t31,t31,a2,b0
|
|
kusano |
2b45e8 |
LD a6,6*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t41,t41,a3,b0
|
|
kusano |
2b45e8 |
LD a7,7*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,0(PREA)
|
|
kusano |
2b45e8 |
MADD t32,t32,a2,b1
|
|
kusano |
2b45e8 |
daddu B,B,4*SIZE # B+=2(nr)*2(kr)*8Byte=32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t42,t42,a3,b1
|
|
kusano |
2b45e8 |
daddu A,A,8*SIZE # A+=4(mr)*2(kr)*8Byte=8*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L47:
|
|
kusano |
2b45e8 |
MADD t11,t11,a4,b4
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a5,b4
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a4,b5
|
|
kusano |
2b45e8 |
LD b0,0*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t22,t22,a5,b5
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t31,t31,a6,b4
|
|
kusano |
2b45e8 |
LD a2,2*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t41,t41,a7,b4
|
|
kusano |
2b45e8 |
LD a3,3*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,4*SIZE(PREA)
|
|
kusano |
2b45e8 |
MADD t32,t32,a6,b5
|
|
kusano |
2b45e8 |
MADD t42,t42,a7,b5
|
|
kusano |
2b45e8 |
daddu PREA,PREA,8*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L48: # kr=1
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
andi K,KCO,1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
andi K,TEMP,1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
beqz K,.L49
|
|
kusano |
2b45e8 |
LD ALPHA,152($sp) # Get ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,0(PREA)
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
MADD t21,t21,a1,b0
|
|
kusano |
2b45e8 |
daddu A,A,4*SIZE # A+=4(mr)*1(kr)*8Byte=32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a0,b1
|
|
kusano |
2b45e8 |
MADD t22,t22,a1,b1
|
|
kusano |
2b45e8 |
daddu B,B,2*SIZE
|
|
kusano |
2b45e8 |
daddu PREA,PREA,4*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t31,t31,a2,b0
|
|
kusano |
2b45e8 |
MADD t41,t41,a3,b0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t32,t32,a2,b1
|
|
kusano |
2b45e8 |
MADD t42,t42,a3,b1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L49: # Write Back
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD c11,0(CO1) # gemm write back part Fetch 16 C
|
|
kusano |
2b45e8 |
LD c21,1*SIZE(CO1)
|
|
kusano |
2b45e8 |
LD c31,2*SIZE(CO1)
|
|
kusano |
2b45e8 |
LD c41,3*SIZE(CO1)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD c12,0(CO2)
|
|
kusano |
2b45e8 |
MADD t11,c11,t11,ALPHA
|
|
kusano |
2b45e8 |
LD c22,1*SIZE(CO2)
|
|
kusano |
2b45e8 |
MADD t21,c21,t21,ALPHA
|
|
kusano |
2b45e8 |
LD c32,2*SIZE(CO2)
|
|
kusano |
2b45e8 |
MADD t31,c31,t31,ALPHA
|
|
kusano |
2b45e8 |
LD c42,3*SIZE(CO2)
|
|
kusano |
2b45e8 |
MADD t41,c41,t41,ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t11,0(CO1)
|
|
kusano |
2b45e8 |
MADD t12,c12,t12,ALPHA
|
|
kusano |
2b45e8 |
ST t21,1*SIZE(CO1)
|
|
kusano |
2b45e8 |
MADD t22,c22,t22,ALPHA
|
|
kusano |
2b45e8 |
ST t31,2*SIZE(CO1)
|
|
kusano |
2b45e8 |
MADD t32,c32,t32,ALPHA
|
|
kusano |
2b45e8 |
ST t41,3*SIZE(CO1)
|
|
kusano |
2b45e8 |
MADD t42,c42,t42,ALPHA
|
|
kusano |
2b45e8 |
daddiu M,M,-1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t12,0(CO2)
|
|
kusano |
2b45e8 |
ST t22,1*SIZE(CO2)
|
|
kusano |
2b45e8 |
ST t32,2*SIZE(CO2)
|
|
kusano |
2b45e8 |
ST t42,3*SIZE(CO2)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,4*SIZE(CO1)
|
|
kusano |
2b45e8 |
FETCH $0,4*SIZE(CO2)
|
|
kusano |
2b45e8 |
FETCH $0,8*SIZE(CO1)
|
|
kusano |
2b45e8 |
FETCH $0,8*SIZE(CO2)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu CO1,CO1,4*SIZE
|
|
kusano |
2b45e8 |
bnez M,.L40
|
|
kusano |
2b45e8 |
daddu CO2,CO2,4*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
MUL t11, ALPHA, t11
|
|
kusano |
2b45e8 |
MUL t21, ALPHA, t21
|
|
kusano |
2b45e8 |
MUL t31, ALPHA, t31
|
|
kusano |
2b45e8 |
MUL t41, ALPHA, t41
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL t12, ALPHA, t12
|
|
kusano |
2b45e8 |
ST t11, 0 * SIZE(CO1)
|
|
kusano |
2b45e8 |
MUL t22, ALPHA, t22
|
|
kusano |
2b45e8 |
ST t21, 1 * SIZE(CO1)
|
|
kusano |
2b45e8 |
MUL t32, ALPHA, t32
|
|
kusano |
2b45e8 |
ST t31, 2 * SIZE(CO1)
|
|
kusano |
2b45e8 |
MUL t42, ALPHA, t42
|
|
kusano |
2b45e8 |
ST t41, 3 * SIZE(CO1)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t12, 0 * SIZE(CO2)
|
|
kusano |
2b45e8 |
daddiu M,M,-1
|
|
kusano |
2b45e8 |
ST t22, 1 * SIZE(CO2)
|
|
kusano |
2b45e8 |
ST t32, 2 * SIZE(CO2)
|
|
kusano |
2b45e8 |
ST t42, 3 * SIZE(CO2)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddiu CO1,CO1, 4*SIZE
|
|
kusano |
2b45e8 |
daddiu CO2,CO2, 4*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,0(CO1)
|
|
kusano |
2b45e8 |
FETCH $0,0(CO2)
|
|
kusano |
2b45e8 |
FETCH $0,4(CO1)
|
|
kusano |
2b45e8 |
FETCH $0,4(CO2)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if ( defined(LEFT) && defined(TRANSA)) ||(!defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
dsubu TEMP, KCO, KK
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
daddiu TEMP, TEMP, -4
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
daddiu TEMP, TEMP, -2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
dsll K,TEMP, 2 + BASE_SHIFT
|
|
kusano |
2b45e8 |
dsll TEMP, TEMP, 1 + BASE_SHIFT
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu A,A,K
|
|
kusano |
2b45e8 |
daddu B,B,TEMP
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
daddiu KK, KK, 4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
bnez M,.L40
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.align 3
|
|
kusano |
2b45e8 |
.L12_M2:
|
|
kusano |
2b45e8 |
andi M,MCO,2 # mr = 2
|
|
kusano |
2b45e8 |
beqz M,.L12_M1
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L50:
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL)
|
|
kusano |
2b45e8 |
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
move B,BO
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
dsll K, KK, 1 + BASE_SHIFT #mr=2
|
|
kusano |
2b45e8 |
dsll TEMP, KK, 1 + BASE_SHIFT #nr=2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu A, A, K
|
|
kusano |
2b45e8 |
daddu B, BO, TEMP
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MTC $0,t11
|
|
kusano |
2b45e8 |
LD b0,0*SIZE(B)
|
|
kusano |
2b45e8 |
MOV t21,t11
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
|
|
kusano |
2b45e8 |
dsubu TEMP, KCO, KK
|
|
kusano |
2b45e8 |
#elif defined(LEFT)
|
|
kusano |
2b45e8 |
daddiu TEMP, KK, 2
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
daddiu TEMP, KK, 2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
dsra K,TEMP,2
|
|
kusano |
2b45e8 |
MOV t12,t11
|
|
kusano |
2b45e8 |
beqz K,.L55
|
|
kusano |
2b45e8 |
MOV t22,t11
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
move B,BO
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
dsra K,KCO,2 # K=KCO/2
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MTC $0,t11
|
|
kusano |
2b45e8 |
LD b0,0*SIZE(B)
|
|
kusano |
2b45e8 |
MOV t21,t11
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t12,t11
|
|
kusano |
2b45e8 |
beqz K,.L55
|
|
kusano |
2b45e8 |
MOV t22,t11
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L51: # nr=2 mr=2,kr=4
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
LD a4,2*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a1,b0
|
|
kusano |
2b45e8 |
LD b4,2*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a0,b1
|
|
kusano |
2b45e8 |
LD a5,3*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t22,t22,a1,b1
|
|
kusano |
2b45e8 |
LD b5,3*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t11,t11,a4,b4
|
|
kusano |
2b45e8 |
LD a2,4*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a5,b4
|
|
kusano |
2b45e8 |
LD b2,4*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a4,b5
|
|
kusano |
2b45e8 |
LD a3,5*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t22,t22,a5,b5
|
|
kusano |
2b45e8 |
daddiu K,K,-1
|
|
kusano |
2b45e8 |
LD b3,5*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t11,t11,a2,b2
|
|
kusano |
2b45e8 |
LD a6,6*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a3,b2
|
|
kusano |
2b45e8 |
daddu A,A,8*SIZE # A+=2(mr)*4(kr)*8Byte=8*SIZE
|
|
kusano |
2b45e8 |
LD b6,6*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a2,b3
|
|
kusano |
2b45e8 |
daddu B,B,8*SIZE # B+=2(nr)*4(kr)*8Byte=16*SIZE
|
|
kusano |
2b45e8 |
LD a7,-1*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t22,t22,a3,b3
|
|
kusano |
2b45e8 |
LD b7,-1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t11,t11,a6,b6
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a7,b6
|
|
kusano |
2b45e8 |
LD b0,0*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a6,b7
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t22,t22,a7,b7
|
|
kusano |
2b45e8 |
bnez K,.L51
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L55: # kr=2
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
andi K,KCO,2
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
andi K,TEMP,2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
beqz K,.L58
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L56:
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
LD a4,2*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a1,b0
|
|
kusano |
2b45e8 |
daddu A,A,4*SIZE # A+=2(mr)*2(kr)*8Byte=32
|
|
kusano |
2b45e8 |
LD b4,2*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a0,b1
|
|
kusano |
2b45e8 |
daddu B,B,4*SIZE # 2nr*2kr
|
|
kusano |
2b45e8 |
LD a5,-1*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t22,t22,a1,b1
|
|
kusano |
2b45e8 |
LD b5,-1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L57:
|
|
kusano |
2b45e8 |
MADD t11,t11,a4,b4
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a5,b4
|
|
kusano |
2b45e8 |
LD b0,0*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a4,b5
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t22,t22,a5,b5
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L58: # kr=1
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
andi K,KCO,1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
andi K,TEMP, 1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
beqz K,.L59
|
|
kusano |
2b45e8 |
LD ALPHA,152($sp) # Get ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
MADD t21,t21,a1,b0
|
|
kusano |
2b45e8 |
daddu A,A,2*SIZE # A+=2(mr)*1(kr)*8Byte=16
|
|
kusano |
2b45e8 |
daddu B,B,2*SIZE # 2nr*kr
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t12,t12,a0,b1
|
|
kusano |
2b45e8 |
MADD t22,t22,a1,b1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L59: # Write Back
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD c11,0(CO1) # write gemm part back Fetch 16 C
|
|
kusano |
2b45e8 |
LD c21,1*SIZE(CO1)
|
|
kusano |
2b45e8 |
LD c12,0(CO2)
|
|
kusano |
2b45e8 |
LD c22,1*SIZE(CO2)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t11,c11,t11,ALPHA
|
|
kusano |
2b45e8 |
MADD t21,c21,t21,ALPHA
|
|
kusano |
2b45e8 |
MADD t12,c12,t12,ALPHA
|
|
kusano |
2b45e8 |
MADD t22,c22,t22,ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t11,0(CO1)
|
|
kusano |
2b45e8 |
ST t21,1*SIZE(CO1)
|
|
kusano |
2b45e8 |
ST t12,0(CO2)
|
|
kusano |
2b45e8 |
ST t22,1*SIZE(CO2)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu CO1,CO1,2*SIZE
|
|
kusano |
2b45e8 |
daddu CO2,CO2,2*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,0(CO1)
|
|
kusano |
2b45e8 |
FETCH $0,0(CO2)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
daddiu M, M, -1
|
|
kusano |
2b45e8 |
daddiu CO1,CO1, 2 * SIZE
|
|
kusano |
2b45e8 |
daddiu CO2,CO2, 2 * SIZE
|
|
kusano |
2b45e8 |
MUL t11, ALPHA, t11
|
|
kusano |
2b45e8 |
MUL t21, ALPHA, t21
|
|
kusano |
2b45e8 |
MUL t12, ALPHA, t12
|
|
kusano |
2b45e8 |
MUL t22, ALPHA, t22
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t11, -2 * SIZE(CO1)
|
|
kusano |
2b45e8 |
ST t21, -1 * SIZE(CO1)
|
|
kusano |
2b45e8 |
ST t12, -2 * SIZE(CO2)
|
|
kusano |
2b45e8 |
ST t22, -1 * SIZE(CO2)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,0(CO1)
|
|
kusano |
2b45e8 |
FETCH $0,0(CO2)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
dsubu TEMP, KCO, KK
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
daddiu TEMP, TEMP, -2
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
daddiu TEMP, TEMP, -2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
dsll K, TEMP, 1 + BASE_SHIFT
|
|
kusano |
2b45e8 |
dsll TEMP, TEMP, 1 + BASE_SHIFT
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu A, A, K
|
|
kusano |
2b45e8 |
daddu B, B, TEMP
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
daddiu KK, KK, 2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.align 3
|
|
kusano |
2b45e8 |
.L12_M1:
|
|
kusano |
2b45e8 |
andi M,MCO,1 # mr = 1
|
|
kusano |
2b45e8 |
beqz M,.L0_N2_Loop
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L60:
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL)
|
|
kusano |
2b45e8 |
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
move B,BO # Reset B
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
dsll K, KK, 0 + BASE_SHIFT
|
|
kusano |
2b45e8 |
dsll TEMP, KK, 1 + BASE_SHIFT
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu A, A, K
|
|
kusano |
2b45e8 |
daddu B, BO, TEMP
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MTC $0,t11
|
|
kusano |
2b45e8 |
MOV t21,t11
|
|
kusano |
2b45e8 |
LD b0,0*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t12,t11
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
|
|
kusano |
2b45e8 |
dsubu TEMP, KCO, KK
|
|
kusano |
2b45e8 |
#elif defined(LEFT)
|
|
kusano |
2b45e8 |
daddiu TEMP, KK, 1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
daddiu TEMP, KK, 2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
dsra K,TEMP,2
|
|
kusano |
2b45e8 |
MOV t22,t11
|
|
kusano |
2b45e8 |
beqz K,.L65
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
dsra K,KCO,2
|
|
kusano |
2b45e8 |
move B,BO # Reset B
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MTC $0,t11
|
|
kusano |
2b45e8 |
MOV t21,t11
|
|
kusano |
2b45e8 |
LD b0,0*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t12,t11
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
beqz K,.L65
|
|
kusano |
2b45e8 |
MOV t22,t11
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L61: # nr=2,mr=1,kr=4
|
|
kusano |
2b45e8 |
LD a4, 1*SIZE(A) # a2
|
|
kusano |
2b45e8 |
LD b4, 2*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b5,3*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t12,t12,a0,b1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a2, 2*SIZE(A) # a3
|
|
kusano |
2b45e8 |
LD b2,4*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t11,t11,a4,b4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b3,5*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t12,t12,a4,b5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a6, 3*SIZE(A) # a4
|
|
kusano |
2b45e8 |
daddiu K,K,-1
|
|
kusano |
2b45e8 |
LD b6,6*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t11,t11,a2,b2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b7,7*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t12,t12,a2,b3
|
|
kusano |
2b45e8 |
daddu A,A,4*SIZE # A+=1(mr)*4(kr)*8Byte=32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a0, 0*SIZE(A)
|
|
kusano |
2b45e8 |
daddu B,B,8*SIZE # B+=2(nr)*4(kr)*8Byte=8*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b0,0*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t11,t11,a6,b6
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
bnez K,.L61
|
|
kusano |
2b45e8 |
MADD t12,t12,a6,b7
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L65: # kr=2
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
andi K,KCO,2
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
andi K,TEMP,2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
beqz K,.L68
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L66:
|
|
kusano |
2b45e8 |
LD a4, 1*SIZE(A) # a1
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
LD b4,2*SIZE(B)
|
|
kusano |
2b45e8 |
daddu A,A,2*SIZE # A+=1(mr)*2(kr)*8Byte=16
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b5,3*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t12,t12,a0,b1
|
|
kusano |
2b45e8 |
daddu B,B,4*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L67:
|
|
kusano |
2b45e8 |
LD a0,0(A) # a0
|
|
kusano |
2b45e8 |
LD b0,0*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t11,t11,a4,b4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b1,1*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t12,t12,a4,b5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L68: # kr=1
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
andi K,KCO,1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
andi K,TEMP,1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
beqz K,.L69
|
|
kusano |
2b45e8 |
LD ALPHA,152($sp) # Get ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
MADD t12,t12,a0,b1
|
|
kusano |
2b45e8 |
daddu A,A,1*SIZE # A+=1(mr)*1(kr)*8Byte=16
|
|
kusano |
2b45e8 |
daddu B,B,2*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L69: # Write Back
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD c11,0(CO1) # Fetch 16 C
|
|
kusano |
2b45e8 |
LD c12,0(CO2)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t11,c11,t11,ALPHA
|
|
kusano |
2b45e8 |
MADD t12,c12,t12,ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t11,0(CO1)
|
|
kusano |
2b45e8 |
ST t12,0(CO2)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu CO1,CO1,1*SIZE
|
|
kusano |
2b45e8 |
daddu CO2,CO2,1*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
MUL t11, ALPHA, t11
|
|
kusano |
2b45e8 |
MUL t12, ALPHA, t12
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t11, 0 * SIZE(CO1)
|
|
kusano |
2b45e8 |
ST t12, 0 * SIZE(CO2)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu CO1,CO1,1*SIZE
|
|
kusano |
2b45e8 |
daddu CO2,CO2,1*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
dsubu TEMP, KCO, KK
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
daddiu TEMP, TEMP, -1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
daddiu TEMP, TEMP, -2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
dsll K, TEMP, 0 + BASE_SHIFT
|
|
kusano |
2b45e8 |
dsll TEMP, TEMP, 1 + BASE_SHIFT
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu A, A, K
|
|
kusano |
2b45e8 |
daddu B, B, TEMP
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
daddiu KK, KK, 1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L0_N2_Loop:
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL) && !defined(LEFT)
|
|
kusano |
2b45e8 |
daddiu KK, KK, 2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
move BO, B
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
.L0_N1:
|
|
kusano |
2b45e8 |
andi N,NCO,1 # nr = 1
|
|
kusano |
2b45e8 |
beqz N,.L999
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
move CO1,C
|
|
kusano |
2b45e8 |
dsra M,MCO,2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
move A,AO # Reset A
|
|
kusano |
2b45e8 |
daddu PREA,AO,SPANA
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL) && defined(LEFT)
|
|
kusano |
2b45e8 |
move KK, OFFSET
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
beqz M,.L11_M2
|
|
kusano |
2b45e8 |
daddu C,CO1,LDC
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L70:
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL)
|
|
kusano |
2b45e8 |
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
move B, BO # Reset B
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
dsll K, KK, 2 + BASE_SHIFT
|
|
kusano |
2b45e8 |
dsll TEMP, KK, 0 + BASE_SHIFT
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu A, A, K
|
|
kusano |
2b45e8 |
daddu B, BO, TEMP
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
LD b0, 0*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MTC $0,t11
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
MOV t21,t11
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t31,t11
|
|
kusano |
2b45e8 |
LD a2,2*SIZE(A)
|
|
kusano |
2b45e8 |
MOV t41,t11
|
|
kusano |
2b45e8 |
LD a3,3*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
|
|
kusano |
2b45e8 |
dsubu TEMP, KCO, KK
|
|
kusano |
2b45e8 |
#elif defined(LEFT)
|
|
kusano |
2b45e8 |
daddiu TEMP, KK, 4
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
daddiu TEMP, KK, 1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
dsra K,TEMP,2
|
|
kusano |
2b45e8 |
beqz K,.L75
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
move B, BO # Reset B
|
|
kusano |
2b45e8 |
dsra K,KCO,2
|
|
kusano |
2b45e8 |
LD b0, 0*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MTC $0,t11
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
MOV t21,t11
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MOV t31,t11
|
|
kusano |
2b45e8 |
LD a2,2*SIZE(A)
|
|
kusano |
2b45e8 |
MOV t41,t11
|
|
kusano |
2b45e8 |
beqz K,.L75
|
|
kusano |
2b45e8 |
LD a3,3*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L71: # nr=1,mr=kr=4
|
|
kusano |
2b45e8 |
LD b4, 1*SIZE(B) # b1
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a4, 4*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a1,b0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a5, 5*SIZE(A)
|
|
kusano |
2b45e8 |
FETCH $0,(PREA)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a6,6*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t31,t31,a2,b0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a7,7*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t41,t41,a3,b0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L72:
|
|
kusano |
2b45e8 |
LD b2, 2*SIZE(B) # b2
|
|
kusano |
2b45e8 |
MADD t11,t11,a4,b4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a0,8*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a5,b4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a1,9*SIZE(A)
|
|
kusano |
2b45e8 |
FETCH $0,4*SIZE(PREA)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a2,10*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t31,t31,a6,b4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a3,11*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t41,t41,a7,b4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L73:
|
|
kusano |
2b45e8 |
LD b6, 3*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a4,12*SIZE(A)
|
|
kusano |
2b45e8 |
daddu B,B,4*SIZE # B+=1(nr)*4(kr)*8Byte=32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a5,13*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a1,b2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a6,14*SIZE(A)
|
|
kusano |
2b45e8 |
FETCH $0,8*SIZE(PREA)
|
|
kusano |
2b45e8 |
MADD t31,t31,a2,b2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a7,15*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t41,t41,a3,b2
|
|
kusano |
2b45e8 |
daddu A,A,16*SIZE # A+=4(mr)*4(kr)*8Byte=16*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L74:
|
|
kusano |
2b45e8 |
LD b0, 0*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t11,t11,a4,b6
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
daddu PREA,PREA,16*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a5,b6
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a2,2*SIZE(A)
|
|
kusano |
2b45e8 |
daddiu K,K,-1
|
|
kusano |
2b45e8 |
MADD t31,t31,a6,b6
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a3,3*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t41,t41,a7,b6
|
|
kusano |
2b45e8 |
bnez K,.L71
|
|
kusano |
2b45e8 |
FETCH $0,-32(PREA)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L75: # kr=2
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
andi K,KCO,2
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
andi K,TEMP,2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
beqz K,.L78
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L76:
|
|
kusano |
2b45e8 |
LD b4, 1*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a4,4*SIZE(A)
|
|
kusano |
2b45e8 |
daddu B,B,2*SIZE # B+=1(nr)*2(kr)*8Byte=32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a5,5*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a1,b0
|
|
kusano |
2b45e8 |
FETCH $0,0(PREA)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a6,6*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t31,t31,a2,b0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a7,7*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t41,t41,a3,b0
|
|
kusano |
2b45e8 |
daddu A,A,8*SIZE # A+=4(mr)*2(kr)*8Byte=8*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L77:
|
|
kusano |
2b45e8 |
LD b0,0(B)
|
|
kusano |
2b45e8 |
MADD t11,t11,a4,b4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a5,b4
|
|
kusano |
2b45e8 |
FETCH $0,4*SIZE(PREA)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t31,t31,a6,b4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a2,2*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t41,t41,a7,b4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a3,3*SIZE(A)
|
|
kusano |
2b45e8 |
daddu PREA,PREA,8*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L78: # kr=1
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
andi K,KCO,1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
andi K,TEMP,1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
beqz K,.L79
|
|
kusano |
2b45e8 |
LD ALPHA,152($sp) # Get ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,0(PREA)
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
MADD t21,t21,a1,b0
|
|
kusano |
2b45e8 |
daddu A,A,4*SIZE # A+=4(mr)*1(kr)*8Byte=32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t31,t31,a2,b0
|
|
kusano |
2b45e8 |
MADD t41,t41,a3,b0
|
|
kusano |
2b45e8 |
daddu B,B,1*SIZE
|
|
kusano |
2b45e8 |
daddu PREA,PREA,4*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L79: # Write Back
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD c11,0(CO1) # Fetch 16 C
|
|
kusano |
2b45e8 |
LD c21,1*SIZE(CO1)
|
|
kusano |
2b45e8 |
LD c31,2*SIZE(CO1)
|
|
kusano |
2b45e8 |
LD c41,3*SIZE(CO1)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t11,c11,t11,ALPHA
|
|
kusano |
2b45e8 |
MADD t21,c21,t21,ALPHA
|
|
kusano |
2b45e8 |
MADD t31,c31,t31,ALPHA
|
|
kusano |
2b45e8 |
MADD t41,c41,t41,ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t11,0(CO1)
|
|
kusano |
2b45e8 |
ST t21,1*SIZE(CO1)
|
|
kusano |
2b45e8 |
ST t31,2*SIZE(CO1)
|
|
kusano |
2b45e8 |
ST t41,3*SIZE(CO1)
|
|
kusano |
2b45e8 |
daddiu M,M,-1 # M--
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,4*SIZE(CO1)
|
|
kusano |
2b45e8 |
FETCH $0,8*SIZE(CO1)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bnez M,.L70 # M!=0
|
|
kusano |
2b45e8 |
daddu CO1,CO1,4*SIZE # COx += 4*8Byte
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
daddiu M,M,-1 # M--
|
|
kusano |
2b45e8 |
MUL t11, ALPHA, t11
|
|
kusano |
2b45e8 |
MUL t21, ALPHA, t21
|
|
kusano |
2b45e8 |
MUL t31, ALPHA, t31
|
|
kusano |
2b45e8 |
MUL t41, ALPHA, t41
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t11,0(CO1)
|
|
kusano |
2b45e8 |
ST t21,1*SIZE(CO1)
|
|
kusano |
2b45e8 |
ST t31,2*SIZE(CO1)
|
|
kusano |
2b45e8 |
ST t41,3*SIZE(CO1)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,4*SIZE(CO1)
|
|
kusano |
2b45e8 |
FETCH $0,8*SIZE(CO1)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu CO1,CO1,4*SIZE
|
|
kusano |
2b45e8 |
#if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
dsubu TEMP, KCO, KK
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
daddiu TEMP, TEMP, -4
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
daddiu TEMP, TEMP, -1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
dsll K, TEMP, 2 + BASE_SHIFT
|
|
kusano |
2b45e8 |
dsll TEMP, TEMP, 0 + BASE_SHIFT
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu A, A,K
|
|
kusano |
2b45e8 |
daddu B, B, TEMP
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
daddiu KK, KK, 4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
bnez M,.L70
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.align 3
|
|
kusano |
2b45e8 |
.L11_M2:
|
|
kusano |
2b45e8 |
andi M,MCO,2 # mr = 2
|
|
kusano |
2b45e8 |
beqz M,.L11_M1
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L80:
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL)
|
|
kusano |
2b45e8 |
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
move B, BO
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
dsll K, KK, 1 + BASE_SHIFT
|
|
kusano |
2b45e8 |
dsll TEMP, KK, 0 + BASE_SHIFT
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu A, A, K
|
|
kusano |
2b45e8 |
daddu B, BO, TEMP
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
LD b0, 0*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MTC $0,t11
|
|
kusano |
2b45e8 |
MOV t21,t11
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
|
|
kusano |
2b45e8 |
dsubu TEMP, KCO, KK
|
|
kusano |
2b45e8 |
#elif defined(LEFT)
|
|
kusano |
2b45e8 |
daddiu TEMP, KK, 2
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
daddiu TEMP, KK, 1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
dsra K,TEMP,2 # K=KCO/2
|
|
kusano |
2b45e8 |
beqz K,.L85
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
move B, BO
|
|
kusano |
2b45e8 |
dsra K,KCO,2
|
|
kusano |
2b45e8 |
LD b0, 0*SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MTC $0,t11
|
|
kusano |
2b45e8 |
MOV t21,t11
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
beqz K,.L85
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L81: # nr=1,mr=2,kr=4
|
|
kusano |
2b45e8 |
LD b4, 1*SIZE(B)
|
|
kusano |
2b45e8 |
LD a4,2*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
LD a5,3*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a1,b0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b2, 2*SIZE(B)
|
|
kusano |
2b45e8 |
LD a2,4*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t11,t11,a4,b4
|
|
kusano |
2b45e8 |
LD a3,5*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a5,b4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b6, 3*SIZE(B)
|
|
kusano |
2b45e8 |
LD a6,6*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t11,t11,a2,b2
|
|
kusano |
2b45e8 |
LD a7,7*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a3,b2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu A,A,8*SIZE # A+=2(mr)*4(kr)*8Byte=8*SIZE
|
|
kusano |
2b45e8 |
daddu B,B,4*SIZE # B+=1(nr)*4(kr)*8Byte=32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b0, 0*SIZE(B)
|
|
kusano |
2b45e8 |
daddiu K,K,-1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t11,t11,a6,b6
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
bnez K,.L81
|
|
kusano |
2b45e8 |
MADD t21,t21,a7,b6
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L85: # kr=2
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
andi K,KCO,2
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
andi K,TEMP,2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
beqz K,.L88
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L86:
|
|
kusano |
2b45e8 |
LD b4, 1*SIZE(B)
|
|
kusano |
2b45e8 |
LD a4,2*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
LD a5,3*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a1,b0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu A,A,4*SIZE # A+=2(mr)*2(kr)*8Byte=32
|
|
kusano |
2b45e8 |
daddu B,B,2*SIZE # B+=1(nr)*2(kr)*8Byte=16
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b0,0(B)
|
|
kusano |
2b45e8 |
LD a0,0*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t11,t11,a4,b4
|
|
kusano |
2b45e8 |
LD a1,1*SIZE(A)
|
|
kusano |
2b45e8 |
MADD t21,t21,a5,b4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L88: # kr=1
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
andi K,KCO,1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
andi K,TEMP,1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
beqz K,.L89
|
|
kusano |
2b45e8 |
LD ALPHA,152($sp) # Get ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
MADD t21,t21,a1,b0
|
|
kusano |
2b45e8 |
daddu A,A,2*SIZE # A+=2(mr)*1(kr)*8Byte=16
|
|
kusano |
2b45e8 |
daddu B,B,1*SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L89: # Write Back
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD c11,0(CO1) # Fetch 16 C
|
|
kusano |
2b45e8 |
LD c21,1*SIZE(CO1)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t11,c11,t11,ALPHA
|
|
kusano |
2b45e8 |
MADD t21,c21,t21,ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t11,0(CO1)
|
|
kusano |
2b45e8 |
ST t21,1*SIZE(CO1)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,2*SIZE(CO1)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu CO1,CO1,2*SIZE # COx += 2*8Byte
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
daddu CO1,CO1,2*SIZE # COx += 2*8Byte
|
|
kusano |
2b45e8 |
MUL t11, ALPHA, t11
|
|
kusano |
2b45e8 |
MUL t21, ALPHA, t21
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FETCH $0,0(CO1)
|
|
kusano |
2b45e8 |
ST t11, -2 * SIZE(CO1)
|
|
kusano |
2b45e8 |
ST t21, -1 * SIZE(CO1)
|
|
kusano |
2b45e8 |
#if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
dsubu TEMP, KCO, KK
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
daddiu TEMP, TEMP, -2
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
daddiu TEMP, TEMP, -1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
dsll K, TEMP, 1 + BASE_SHIFT
|
|
kusano |
2b45e8 |
dsll TEMP, TEMP, 0 + BASE_SHIFT
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu A, A, K
|
|
kusano |
2b45e8 |
daddu B, B, TEMP
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
daddiu KK, KK, 2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.align 3
|
|
kusano |
2b45e8 |
.L11_M1:
|
|
kusano |
2b45e8 |
andi M,MCO,1 # mr = 1
|
|
kusano |
2b45e8 |
beqz M,.L999
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L90:
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL)
|
|
kusano |
2b45e8 |
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
move B, BO
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
dsll K, KK, 0 + BASE_SHIFT
|
|
kusano |
2b45e8 |
dsll TEMP, KK, 0 + BASE_SHIFT
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu A, A, K
|
|
kusano |
2b45e8 |
daddu B, BO, TEMP
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
LD a0, 0*SIZE(A)
|
|
kusano |
2b45e8 |
LD b0, 0*SIZE(B)
|
|
kusano |
2b45e8 |
MTC $0,t11
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
|
|
kusano |
2b45e8 |
dsubu TEMP, KCO, KK
|
|
kusano |
2b45e8 |
#elif defined(LEFT)
|
|
kusano |
2b45e8 |
daddiu TEMP, KK, 1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
daddiu TEMP, KK, 1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
dsra K, TEMP, 2
|
|
kusano |
2b45e8 |
beqz K,.L95
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
move B, BO
|
|
kusano |
2b45e8 |
LD a0, 0*SIZE(A)
|
|
kusano |
2b45e8 |
LD b0, 0*SIZE(B)
|
|
kusano |
2b45e8 |
dsra K,KCO,2
|
|
kusano |
2b45e8 |
beqz K,.L95
|
|
kusano |
2b45e8 |
MTC $0,t11
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L91: # nr=mr=1,kr=4
|
|
kusano |
2b45e8 |
LD a4, 1*SIZE(A)
|
|
kusano |
2b45e8 |
LD b4, 1*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a2, 2*SIZE(A)
|
|
kusano |
2b45e8 |
LD b2, 2*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t11,t11,a4,b4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a6, 3*SIZE(A)
|
|
kusano |
2b45e8 |
LD b6, 3*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t11,t11,a2,b2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddu A,A,4*SIZE # A+=1(mr)*4(kr)*8Byte=32
|
|
kusano |
2b45e8 |
daddu B,B,4*SIZE # B+=1(nr)*4(kr)*8Byte=32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a0, 0*SIZE(A)
|
|
kusano |
2b45e8 |
LD b0, 0*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t11,t11,a6,b6
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddiu K,K,-1
|
|
kusano |
2b45e8 |
bnez K,.L91
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L95: # kr=2
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
andi K,KCO,2
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
andi K,TEMP,2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
beqz K,.L98
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L96:
|
|
kusano |
2b45e8 |
LD a4, 1*SIZE(A)
|
|
kusano |
2b45e8 |
LD b4, 1*SIZE(B)
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
daddu B,B,2*SIZE # B+=1(nr)*2(kr)*8Byte=16
|
|
kusano |
2b45e8 |
daddu A,A,2*SIZE # A+=1(mr)*2(kr)*8Byte=32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b0,0(B)
|
|
kusano |
2b45e8 |
LD a0,0(A)
|
|
kusano |
2b45e8 |
MADD t11,t11,a4,b4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L98: # kr=1
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
andi K,KCO,1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
andi K,TEMP,1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
beqz K,.L99
|
|
kusano |
2b45e8 |
LD ALPHA,152($sp) # Get ALPHA
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t11,t11,a0,b0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L99: # Write Back
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD c11,0(CO1) # Fetch 16 C
|
|
kusano |
2b45e8 |
MADD t11,c11,t11,ALPHA
|
|
kusano |
2b45e8 |
ST t11,0(CO1)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
MUL t11, ALPHA, t11
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t11, 0 * SIZE(CO1)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L999: # End
|
|
kusano |
2b45e8 |
ld $16, 0($sp)
|
|
kusano |
2b45e8 |
ld $17, 8($sp)
|
|
kusano |
2b45e8 |
ld $18, 16($sp)
|
|
kusano |
2b45e8 |
ld $19, 24($sp)
|
|
kusano |
2b45e8 |
ld $20, 32($sp)
|
|
kusano |
2b45e8 |
ld $21, 40($sp)
|
|
kusano |
2b45e8 |
ld $22, 48($sp)
|
|
kusano |
2b45e8 |
LD $f24, 56($sp)
|
|
kusano |
2b45e8 |
LD $f25, 64($sp)
|
|
kusano |
2b45e8 |
LD $f26, 72($sp)
|
|
kusano |
2b45e8 |
LD $f27, 80($sp)
|
|
kusano |
2b45e8 |
LD $f28, 88($sp)
|
|
kusano |
2b45e8 |
ld $23, 96($sp)
|
|
kusano |
2b45e8 |
ld $24, 104($sp)
|
|
kusano |
2b45e8 |
ld $25, 112($sp)
|
|
kusano |
2b45e8 |
LD $f20,120($sp)
|
|
kusano |
2b45e8 |
LD $f21,128($sp)
|
|
kusano |
2b45e8 |
LD $f22,136($sp)
|
|
kusano |
2b45e8 |
LD $f23,144($sp)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
j $31
|
|
kusano |
2b45e8 |
daddiu $sp, $sp, 160
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
EPILOGUE
|