#define REALNAME ASMNAME #define ASSEMBLER #include "common.h" #define FETCH ld #define STACKSIZE 160 #define gsLQC1(base,fq,ft,offset) .word(0x32<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq) #define gsSQC1(base,fq,ft,offset) .word(0x3A<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq) ##### Parameter registers #### #define M $4 #define N $5 #define K $6 #define A $8 #define B $9 #define C $10 #define LDC $11 #### Pointer A, B, C #### #define AO $12 #define BO $13 #define CO1 $14 #define CO2 $15 #define CO3 $16 #define CO4 $17 #define PREA $18 #define PREB $19 #### Used registers #### #define A1 $f0 #define A2 $f1 #define A3 $f2 #define A4 $f3 #define A5 $f4 #define A6 $f5 #define A7 $f6 #define A8 $f7 #define B1 $f8 #define B2 $f9 #define B3 $f10 #define B4 $f11 #define B5 $f12 #define B6 $f13 #define B7 $f14 #define B8 $f15 #define C11 $f16 #define C12 $f17 #define C21 $f18 #define C22 $f19 #define C31 $f20 #define C32 $f21 #define C41 $f22 #define C42 $f23 #define C13 $f24 #define C14 $f25 #define C23 $f26 #define C24 $f27 #define C33 $f28 #define C34 $f29 #define C43 $f30 #define C44 $f31 #define I $2 #define J $3 #define L $7 #### Alpha register #### #define ALPHA $f15 #define F31 31 #define F30 30 #define F29 29 #define F28 28 #define F27 27 #define F26 26 #define F25 25 #define F24 24 #define F23 23 #define F22 22 #define F21 21 #define F20 20 #define F19 19 #define F18 18 #define F17 17 #define F16 16 #define F15 15 #define F14 14 #define F13 13 #define F12 12 #define F11 11 #define F10 10 #define F9 9 #define F8 8 #define F7 7 #define F6 6 #define F5 5 #define F4 4 #define F3 3 #define F2 2 #define F1 1 #define F0 0 #define R12 12 #define R13 13 #define R14 14 #define R15 15 #define R16 16 #define R17 17 #if defined(TRMMKERNEL) #define OFFSET $23 #define KK $24 #define TEMP $25 #endif # .text # .align 2 ## .globl gemm # .set nomips16 # .ent gemm # .type gemm, @function #gemm: # .frame $sp,STACKSIZE,$31 # vars= 48, regs= 1/0, args= 0, gp= 0 # .mask 0x40000000,-8 # .fmask 0x00000000,0 # .set noreorder # .set nomacro PROLOGUE daddiu $sp,$sp,-STACKSIZE sd $16, 0($sp) sd $17, 8($sp) sd $18, 16($sp) sd $19, 24($sp) sd $20, 32($sp) sd $21, 40($sp) sd $22, 48($sp) ST $f24, 56($sp) ST $f25, 64($sp) ST $f26, 72($sp) ST $f27, 80($sp) ST $f28, 88($sp) #if defined(TRMMKERNEL) sd $23, 96($sp) sd $24, 104($sp) sd $25, 112($sp) LDARG OFFSET, 160($sp) #endif #ifndef __64BIT__ ST $f20,120($sp) ST $f21,128($sp) ST $f22,136($sp) ST $f23,144($sp) #endif .align 4 .L4: dsra J, N, 2 # NR=4 dsll LDC, LDC, BASE_SHIFT# LDC*SIZE #if defined(TRMMKERNEL) && !defined(LEFT) neg KK, OFFSET #endif blez J, .L2 ST ALPHA, 152($sp) .L48: dsra I, M, 3 # MR=8 dsll PREA, K, BASE_SHIFT move AO, A # Reset A move CO1, C daddu CO2, C, LDC daddu CO3, CO2, LDC daddu CO4, CO3, LDC daddu PREA, A, PREA #if defined(TRMMKERNEL) && defined(LEFT) move KK, OFFSET #endif blez I, .L44 daddu C, CO4, LDC .align 4 .L481: #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) ||\ (!defined(LEFT) && !defined(TRANSA)) move BO, B #else dsll L, KK, 3 + BASE_SHIFT # kk*8mr*datasize dsll TEMP, KK, 2 + BASE_SHIFT daddu AO, AO, L # AO point to the data addr daddu BO, B, TEMP #endif MTC $0, C11 # CLEAR REAULTS REGISTERS MOV C12, C11 dsll PREB, K, BASE_SHIFT MOV C21, C11 MOV C22, C11 MOV C31, C11 MOV C32, C11 gsLQC1(R13, F9, F8, 0) # B1 B2 MOV C41, C11 MOV C42, C11 gsLQC1(R12, F1, F0, 0) # A1 A2 MOV C13, C11 MOV C14, C11 gsLQC1(R12, F3, F2, 1) # A3 A4 MOV C23, C11 FETCH $0, 0 * SIZE(CO1) MOV C24, C11 FETCH $0, 4 * SIZE(CO1) MOV C33, C11 FETCH $0, 0 * SIZE(CO2) MOV C34, C11 FETCH $0, 4 * SIZE(CO2) daddu PREB, B, PREB MOV C43, C11 FETCH $0, 0 * SIZE(CO3) MOV C44, C11 FETCH $0, 4 * SIZE(CO3) PLU B3, B1, B1 FETCH $0, 0 * SIZE(CO4) PLU B4, B2, B2 FETCH $0, 4 * SIZE(CO4) #if (defined(LEFT) && !defined(TRANSA)) ||\ (!defined(LEFT) && defined(TRANSA)) dsubu TEMP, K, KK # TEMP is the length of the data part #elif defined(LEFT) daddiu TEMP, KK, 8 #else daddiu TEMP, KK, 4 #endif dsra L, TEMP, 6 blez L, .L482 NOP #else # GEMM PART move BO, B # Reset B dsra L, K, 6 # UnRoll K=64 MTC $0, C11 # CLEAR REAULTS REGISTERS MOV C12, C11 dsll PREB, K, BASE_SHIFT MOV C21, C11 MOV C22, C11 MOV C31, C11 MOV C32, C11 gsLQC1(R13, F9, F8, 0) # B1 B2 MOV C41, C11 MOV C42, C11 gsLQC1(R12, F1, F0, 0) # A1 A2 MOV C13, C11 MOV C14, C11 gsLQC1(R12, F3, F2, 1) # A3 A4 MOV C23, C11 FETCH $0, 0 * SIZE(CO1) MOV C24, C11 FETCH $0, 4 * SIZE(CO1) MOV C33, C11 FETCH $0, 0 * SIZE(CO2) MOV C34, C11 FETCH $0, 4 * SIZE(CO2) daddu PREB, B, PREB MOV C43, C11 FETCH $0, 0 * SIZE(CO3) MOV C44, C11 FETCH $0, 4 * SIZE(CO3) PLU B3, B1, B1 FETCH $0, 0 * SIZE(CO4) PLU B4, B2, B2 blez L, .L482 FETCH $0, 4 * SIZE(CO4) #endif .L4810: daddiu L, L, -1 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 bgtz L, .L4810 MADPS C44, C44, A8, B8 .align 4 .L482: #ifndef TRMMKERNEL andi L, K, 32 #else andi L, TEMP, 32 #endif blez L, .L483 NOP MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 .align 4 .L483: #ifndef TRMMKERNEL andi L, K, 16 #else andi L, TEMP, 16 #endif blez L, .L484 NOP MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 .align 4 .L484: #ifndef TRMMKERNEL andi L, K, 8 #else andi L, TEMP, 8 #endif blez L, .L485 NOP MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 .align 4 .L485: #ifndef TRMMKERNEL andi L, K, 4 #else andi L, TEMP, 4 #endif blez L, .L486 NOP MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 4) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 5) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 6) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 7) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 8 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 32 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 16 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 20 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 12 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 16 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 24 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 28 * SIZE(PREA) daddiu PREA, PREA, 32 * SIZE MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 .align 4 .L486: #ifndef TRMMKERNEL andi L, K, 2 #else andi L, TEMP, 2 #endif blez L, .L487 NOP MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 FETCH $0, 0 * SIZE(PREB) MADPS C13, C13, A1, B3 daddiu BO, BO, 8 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 16 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 PLU B7, B5, B5 FETCH $0, 0 * SIZE(PREA) MADPS C24, C24, A2, B4 PLU B8, B6, B6 FETCH $0, 4 * SIZE(PREA) MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 MADPS C11, C11, A5, B5 MADPS C21, C21, A6, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C12, C12, A5, B6 MADPS C22, C22, A6, B6 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C31, C31, A7, B5 MADPS C41, C41, A8, B5 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C32, C32, A7, B6 MADPS C42, C42, A8, B6 FETCH $0, 4 * SIZE(PREB) MADPS C13, C13, A5, B7 MADPS C23, C23, A6, B7 daddiu PREB, PREB, 8 * SIZE MADPS C33, C33, A7, B7 MADPS C43, C43, A8, B7 MADPS C14, C14, A5, B8 PLU B3, B1, B1 FETCH $0, 8 * SIZE(PREA) MADPS C24, C24, A6, B8 PLU B4, B2, B2 FETCH $0, 12 * SIZE(PREA) MADPS C34, C34, A7, B8 MADPS C44, C44, A8, B8 daddiu PREA, PREA, 16 * SIZE .align 4 .L487: #ifndef TRMMKERNEL andi L, K, 1 #else andi L, TEMP, 1 #endif blez L, .L480 LD ALPHA, 152($sp) MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 MADPS C12, C12, A1, B2 MADPS C22, C22, A2, B2 MADPS C31, C31, A3, B1 MADPS C41, C41, A4, B1 MADPS C32, C32, A3, B2 MADPS C42, C42, A4, B2 MADPS C13, C13, A1, B3 daddiu BO, BO, 4 * SIZE # 4KR*4NR MADPS C23, C23, A2, B3 daddiu AO, AO, 8 * SIZE # 4KR*8MR MADPS C33, C33, A3, B3 MADPS C43, C43, A4, B3 MADPS C14, C14, A1, B4 MADPS C24, C24, A2, B4 MADPS C34, C34, A3, B4 MADPS C44, C44, A4, B4 .align 4 .L480: # Write Back #ifndef TRMMKERNEL daddiu I, I, -1 CVTU A1, C13 # A1=C13.upper=c12 CVTU A2, C11 # A2=C11.upper=c22 CVTU A3, C23 # A3=C23.upper=c14 LD B1, 1 * SIZE(CO1) CVTU A4, C21 # A4=C21.upper=c24 LD B2, 1 * SIZE(CO2) CVTU A5, C33 # A5=C33.upper=c16 LD B3, 3 * SIZE(CO1) CVTU A6, C31 # A6=C31.upper=c26 LD B4, 3 * SIZE(CO2) CVTU A7, C43 # A7=C43.upper=c18 LD B5, 5 * SIZE(CO1) CVTU A8, C41 # A8=C41.upper=c28 LD B6, 5 * SIZE(CO2) MADD A1, B1, A1, ALPHA # c12 LD B7, 7 * SIZE(CO1) MADD A2, B2, A2, ALPHA # c22 LD B1, 7 * SIZE(CO2) MADD A3, B3, A3, ALPHA # c14 LD B2, 0 * SIZE(CO1) MADD A4, B4, A4, ALPHA # c24 LD B3, 0 * SIZE(CO2) MADD A5, B5, A5, ALPHA # c16 LD B4, 2 * SIZE(CO1) MADD A6, B6, A6, ALPHA # c26 LD B5, 2 * SIZE(CO2) MADD A7, B7, A7, ALPHA # c18 LD B6, 4 * SIZE(CO1) MADD A8, B1, A8, ALPHA # c28 ST A1, 1 * SIZE(CO1) MADD C11, B2, C11, ALPHA # c12 LD B7, 4 * SIZE(CO2) MADD C13, B3, C13, ALPHA # c22 ST A2, 1 * SIZE(CO2) MADD C21, B4, C21, ALPHA # c14 LD A1, 6 * SIZE(CO1) MADD C23, B5, C23, ALPHA # c24 ST A3, 3 * SIZE(CO1) MADD C31, B6, C31, ALPHA # c16 LD A2, 6 * SIZE(CO2) MADD C33, B7, C33, ALPHA # c26 ST A4, 3 * SIZE(CO2) ST A5, 5 * SIZE(CO1) ST A6, 5 * SIZE(CO2) ST A7, 7 * SIZE(CO1) ST A8, 7 * SIZE(CO2) MADD C41, A1, C41, ALPHA # c18 ST C11, 0 * SIZE(CO1) MADD C43, A2, C43, ALPHA # c28 ST C13, 0 * SIZE(CO2) ST C21, 2 * SIZE(CO1) ST C23, 2 * SIZE(CO2) ST C31, 4 * SIZE(CO1) ST C33, 4 * SIZE(CO2) ST C41, 6 * SIZE(CO1) CVTU A1, C14 # B1=C12.upper=c42 ST C43, 6 * SIZE(CO2) CVTU A2, C12 # B2=C14.upper=c32 LD B1, 1 * SIZE(CO3) CVTU A3, C24 # B3=C22.upper=c44 LD B2, 1 * SIZE(CO4) CVTU A4, C22 # B4=C24.upper=c34 LD B3, 3 * SIZE(CO3) CVTU A5, C34 # B5=C32.upper=c46 LD B4, 3 * SIZE(CO4) CVTU A6, C32 # B6=C24.upper=c36 LD B5, 5 * SIZE(CO3) CVTU A7, C44 # B7=C42.upper=c48 LD B6, 5 * SIZE(CO4) CVTU A8, C42 # A1=C44.upper=c38 LD B7, 7 * SIZE(CO3) MADD A1, B1, A1, ALPHA # c31 LD C11, 7 * SIZE(CO4) MADD A2, B2, A2, ALPHA LD C13, 0 * SIZE(CO3) MADD A3, B3, A3, ALPHA LD C21, 0 * SIZE(CO4) MADD A4, B4, A4, ALPHA LD C23, 2 * SIZE(CO3) MADD A5, B5, A5, ALPHA LD C31, 2 * SIZE(CO4) MADD A6, B6, A6, ALPHA LD C33, 4 * SIZE(CO3) MADD A7, B7, A7, ALPHA LD C41, 4 * SIZE(CO4) MADD A8, C11, A8, ALPHA ST A1, 1 * SIZE(CO3) MADD C12, C13, C12, ALPHA LD C43, 6 * SIZE(CO3) MADD C14, C21, C14, ALPHA ST A2, 1 * SIZE(CO4) MADD C22, C23, C22, ALPHA LD B1, 6 * SIZE(CO4) MADD C24, C31, C24, ALPHA ST A3, 3 * SIZE(CO3) MADD C32, C33, C32, ALPHA ST A4, 3 * SIZE(CO4) MADD C34, C41, C34, ALPHA ST A5, 5 * SIZE(CO3) MADD C42, C43, C42, ALPHA ST A6, 5 * SIZE(CO4) ST A7, 7 * SIZE(CO3) NOP MADD C44, B1, C44, ALPHA ST A8, 7 * SIZE(CO4) ST C12, 0 * SIZE(CO3) ST C14, 0 * SIZE(CO4) ST C22, 2 * SIZE(CO3) ST C24, 2 * SIZE(CO4) ST C32, 4 * SIZE(CO3) ST C34, 4 * SIZE(CO4) ST C42, 6 * SIZE(CO3) ST C44, 6 * SIZE(CO4) daddiu CO1, CO1, 8 * SIZE daddiu CO2, CO2, 8 * SIZE daddiu CO3, CO3, 8 * SIZE bgtz I, .L481 daddiu CO4, CO4, 8 * SIZE #else daddiu I, I, -1 CVTU A1, C13 # A1=C13.upper=c12 CVTU A2, C11 # A2=C11.upper=c22 CVTU A3, C23 # A3=C23.upper=c14 CVTU A4, C21 # A4=C21.upper=c24 CVTU A5, C33 # A5=C33.upper=c16 CVTU A6, C31 # A6=C31.upper=c26 CVTU A7, C43 # A7=C43.upper=c18 CVTU A8, C41 # A8=C41.upper=c28 MUL A1, A1, ALPHA # c12 MUL A2, A2, ALPHA # c22 MUL A3, A3, ALPHA # c14 MUL A4, A4, ALPHA # c24 MUL A5, A5, ALPHA # c16 MUL A6, A6, ALPHA # c26 MUL A7, A7, ALPHA # c18 MUL A8, A8, ALPHA # c28 MUL C11, C11, ALPHA # c12 ST A1, 1 * SIZE(CO1) MUL C13, C13, ALPHA # c22 ST A2, 1 * SIZE(CO2) MUL C21, C21, ALPHA # c14 ST A3, 3 * SIZE(CO1) MUL C23, C23, ALPHA # c24 ST A4, 3 * SIZE(CO2) MUL C31, C31, ALPHA # c16 ST A5, 5 * SIZE(CO1) MUL C33, C33, ALPHA # c26 ST A6, 5 * SIZE(CO2) MUL C41, C41, ALPHA # c18 ST A7, 7 * SIZE(CO1) MUL C43, C43, ALPHA # c28 ST A8, 7 * SIZE(CO2) CVTU A1, C14 # B1=C12.upper=c42 ST C11, 0 * SIZE(CO1) CVTU A2, C12 # B2=C14.upper=c32 ST C13, 0 * SIZE(CO2) CVTU A3, C24 # B3=C22.upper=c44 ST C21, 2 * SIZE(CO1) CVTU A4, C22 # B4=C24.upper=c34 ST C23, 2 * SIZE(CO2) CVTU A5, C34 # B5=C32.upper=c46 ST C31, 4 * SIZE(CO1) CVTU A6, C32 # B6=C24.upper=c36 ST C33, 4 * SIZE(CO2) CVTU A7, C44 # B7=C42.upper=c48 ST C41, 6 * SIZE(CO1) CVTU A8, C42 # A1=C44.upper=c38 ST C43, 6 * SIZE(CO2) MUL A1, A1, ALPHA # c31 MUL A2, A2, ALPHA MUL A3, A3, ALPHA MUL A4, A4, ALPHA MUL A5, A5, ALPHA MUL A6, A6, ALPHA MUL A7, A7, ALPHA MUL A8, A8, ALPHA MUL C12, C12, ALPHA ST A1, 1 * SIZE(CO3) MUL C14, C14, ALPHA ST A2, 1 * SIZE(CO4) MUL C22, C22, ALPHA ST A3, 3 * SIZE(CO3) MUL C24, C24, ALPHA ST A4, 3 * SIZE(CO4) MUL C32, C32, ALPHA ST A5, 5 * SIZE(CO3) MUL C34, C34, ALPHA ST A6, 5 * SIZE(CO4) MUL C42, C42, ALPHA ST A7, 7 * SIZE(CO3) MUL C44, C44, ALPHA ST A8, 7 * SIZE(CO4) ST C12, 0 * SIZE(CO3) ST C14, 0 * SIZE(CO4) ST C22, 2 * SIZE(CO3) ST C24, 2 * SIZE(CO4) ST C32, 4 * SIZE(CO3) ST C34, 4 * SIZE(CO4) ST C42, 6 * SIZE(CO3) ST C44, 6 * SIZE(CO4) daddiu CO1, CO1, 8 * SIZE daddiu CO2, CO2, 8 * SIZE daddiu CO3, CO3, 8 * SIZE daddiu CO4, CO4, 8 * SIZE #if ( defined(LEFT) && defined(TRANSA)) ||\ (!defined(LEFT) && !defined(TRANSA)) dsubu TEMP, K, KK #ifdef LEFT daddiu TEMP, TEMP, -8 #else daddiu TEMP, TEMP, -4 #endif dsll L, TEMP, 3 + BASE_SHIFT dsll TEMP, TEMP, 2 + BASE_SHIFT daddu AO, AO, L daddu BO, BO, TEMP #endif #ifdef LEFT daddiu KK, KK, 8 #endif bgtz I, .L481 NOP #endif .align 4 .L44: andi I, M, 4 # MR=4 blez I, .L42 NOP .align 4 .L441: #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) ||\ (!defined(LEFT) && !defined(TRANSA)) move BO, B # Reset B #else dsll L, KK, 2 + BASE_SHIFT dsll TEMP, KK, 2 + BASE_SHIFT daddu AO, AO, L daddu BO, B, TEMP #endif MTC $0, C11 # CLEAR REAULTS REGISTERS MOV C12, C11 dsll PREB, K, BASE_SHIFT MOV C21, C11 MOV C22, C11 MOV C31, C11 MOV C32, C11 gsLQC1(R13, F9, F8, 0) # B1 B2 MOV C41, C11 MOV C42, C11 gsLQC1(R12, F1, F0, 0) # A1 A2 MOV C13, C11 MOV C14, C11 MOV C23, C11 FETCH $0, 0 * SIZE(CO1) MOV C24, C11 MOV C33, C11 FETCH $0, 0 * SIZE(CO2) MOV C34, C11 daddu PREB, B, PREB MOV C43, C11 FETCH $0, 0 * SIZE(CO3) MOV C44, C11 PLU B3, B1, B1 FETCH $0, 0 * SIZE(CO4) PLU B4, B2, B2 #if (defined(LEFT) && !defined(TRANSA)) ||\ (!defined(LEFT) && defined(TRANSA)) dsubu TEMP, K, KK #elif defined(LEFT) daddu TEMP, KK, 4 #else daddu TEMP, KK, 4 #endif dsra L, TEMP, 2 blez L, .L442 NOP #else move BO, B # Reset B dsra L, K, 2 # UnRoll K=4 MTC $0, C11 # CLEAR REAULTS REGISTERS MOV C12, C11 dsll PREB, K, BASE_SHIFT MOV C21, C11 MOV C22, C11 MOV C31, C11 MOV C32, C11 gsLQC1(R13, F9, F8, 0) # B1 B2 MOV C41, C11 MOV C42, C11 gsLQC1(R12, F1, F0, 0) # A1 A2 MOV C13, C11 MOV C14, C11 MOV C23, C11 FETCH $0, 0 * SIZE(CO1) MOV C24, C11 MOV C33, C11 FETCH $0, 0 * SIZE(CO2) MOV C34, C11 daddu PREB, B, PREB MOV C43, C11 FETCH $0, 0 * SIZE(CO3) MOV C44, C11 PLU B3, B1, B1 FETCH $0, 0 * SIZE(CO4) blez L, .L442 PLU B4, B2, B2 #endif .L4410: # daddiu L, L, -1 MADPS C11, C11, A1, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C21, C21, A2, B1 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C12, C12, A1, B2 FETCH $0, 0 * SIZE(PREB) MADPS C22, C22, A2, B2 FETCH $0, 0 * SIZE(PREA) MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C14, C14, A1, B4 MADPS C24, C24, A2, B4 PLU B7, B5, B5 PLU B8, B6, B6 MADPS C11, C11, A3, B5 gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C21, C21, A4, B5 gsLQC1(R12, F5, F4, 2) # A5 A6 MADPS C12, C12, A3, B6 FETCH $0, 4 * SIZE(PREB) MADPS C22, C22, A4, B6 FETCH $0, 4 * SIZE(PREA) MADPS C13, C13, A3, B7 MADPS C23, C23, A4, B7 MADPS C14, C14, A3, B8 MADPS C24, C24, A4, B8 PLU B3, B1, B1 PLU B4, B2, B2 MADPS C11, C11, A5, B1 gsLQC1(R13, F13, F12, 3) # B3 B4 MADPS C21, C21, A6, B1 gsLQC1(R12, F7, F6, 3) # A7 A8 MADPS C12, C12, A5, B2 FETCH $0, 8 * SIZE(PREB) daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C22, C22, A6, B2 FETCH $0, 8 * SIZE(PREA) daddiu AO, AO, 16 * SIZE # 4KR*4MR MADPS C13, C13, A5, B3 MADPS C23, C23, A6, B3 MADPS C14, C14, A5, B4 MADPS C24, C24, A6, B4 PLU B7, B5, B5 PLU B8, B6, B6 MADPS C11, C11, A7, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C21, C21, A8, B5 gsLQC1(R12, F1, F0, 0) # A1 A2 MADPS C12, C12, A7, B6 FETCH $0, 12 * SIZE(PREB) MADPS C22, C22, A8, B6 FETCH $0, 12 * SIZE(PREA) MADPS C13, C13, A7, B7 daddiu PREA, PREA, 16 * SIZE MADPS C23, C23, A8, B7 daddiu PREB, PREB, 16 * SIZE MADPS C14, C14, A7, B8 MADPS C24, C24, A8, B8 PLU B3, B1, B1 bgtz L, .L4410 PLU B4, B2, B2 .align 4 .L442: #ifndef TRMMKERNEL andi L, K, 2 #else andi L, TEMP, 2 #endif blez L, .L443 NOP MADPS C11, C11, A1, B1 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C21, C21, A2, B1 gsLQC1(R12, F3, F2, 1) # A3 A4 MADPS C12, C12, A1, B2 FETCH $0, 0 * SIZE(PREB) daddiu BO, BO, 8 * SIZE # 2KR*4NR MADPS C22, C22, A2, B2 FETCH $0, 0 * SIZE(PREA) daddiu AO, AO, 8 * SIZE # 2KR*4MR MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C14, C14, A1, B4 MADPS C24, C24, A2, B4 PLU B7, B5, B5 PLU B8, B6, B6 MADPS C11, C11, A3, B5 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C21, C21, A4, B5 gsLQC1(R12, F1, F0, 0) # A5 A6 MADPS C12, C12, A3, B6 FETCH $0, 4 * SIZE(PREB) MADPS C22, C22, A4, B6 FETCH $0, 4 * SIZE(PREA) MADPS C13, C13, A3, B7 daddiu PREB, PREB, 8 MADPS C23, C23, A4, B7 daddiu PREA, PREA, 8 MADPS C14, C14, A3, B8 MADPS C24, C24, A4, B8 PLU B3, B1, B1 PLU B4, B2, B2 .align 4 .L443: #ifndef TRMMKERNEL andi L, K, 1 #else andi L, TEMP, 1 #endif blez L, .L440 LD ALPHA, 152($sp) MADPS C11, C11, A1, B1 MADPS C21, C21, A2, B1 MADPS C12, C12, A1, B2 daddiu BO, BO, 4 * SIZE # 1KR*4NR MADPS C22, C22, A2, B2 daddiu AO, AO, 4 * SIZE # 1KR*4MR MADPS C13, C13, A1, B3 MADPS C23, C23, A2, B3 MADPS C14, C14, A1, B4 MADPS C24, C24, A2, B4 .align 4 .L440: #ifndef TRMMKERNEL CVTU A1, C13 # A1=C13.upper=c12 LD B1, 1 * SIZE(CO1) CVTU A2, C11 # A2=C11.upper=c22 LD B2, 1 * SIZE(CO2) CVTU A3, C23 # A3=C23.upper=c14 LD B3, 3 * SIZE(CO1) CVTU A4, C21 # A4=C21.upper=c24 LD B4, 3 * SIZE(CO2) MADD A1, B1, A1, ALPHA # c12 LD B5, 0 * SIZE(CO1) MADD A2, B2, A2, ALPHA # c22 LD B6, 0 * SIZE(CO2) MADD A3, B3, A3, ALPHA # c14 LD B7, 2 * SIZE(CO1) MADD A4, B4, A4, ALPHA # c24 LD B1, 2 * SIZE(CO2) MADD C11, B5, C11, ALPHA # c12 ST A1, 1 * SIZE(CO1) MADD C13, B6, C13, ALPHA # c22 ST A2, 1 * SIZE(CO2) MADD C21, B7, C21, ALPHA # c14 ST A3, 3 * SIZE(CO1) MADD C23, B1, C23, ALPHA # c24 ST A4, 3 * SIZE(CO2) ST C11, 0 * SIZE(CO1) ST C13, 0 * SIZE(CO2) ST C21, 2 * SIZE(CO1) ST C23, 2 * SIZE(CO2) CVTU A1, C14 # B1=C12.upper=c42 LD B1, 1 * SIZE(CO3) CVTU A2, C12 # B2=C14.upper=c32 LD B2, 1 * SIZE(CO4) CVTU A3, C24 # B3=C22.upper=c44 LD B3, 3 * SIZE(CO3) CVTU A4, C22 # B4=C24.upper=c34 LD B4, 3 * SIZE(CO4) MADD A1, B1, A1, ALPHA # c31 LD A5, 0 * SIZE(CO3) MADD A2, B2, A2, ALPHA LD A6, 0 * SIZE(CO4) MADD A3, B3, A3, ALPHA LD A7, 2 * SIZE(CO3) MADD A4, B4, A4, ALPHA LD A8, 2 * SIZE(CO4) MADD C12, A5, C12, ALPHA ST A1, 1 * SIZE(CO3) MADD C14, A6, C14, ALPHA ST A2, 1 * SIZE(CO4) MADD C22, A7, C22, ALPHA ST A3, 3 * SIZE(CO3) MADD C24, A8, C24, ALPHA ST A4, 3 * SIZE(CO4) ST C12, 0 * SIZE(CO3) ST C14, 0 * SIZE(CO4) ST C22, 2 * SIZE(CO3) ST C24, 2 * SIZE(CO4) daddiu CO1, CO1, 4 * SIZE daddiu CO2, CO2, 4 * SIZE daddiu CO3, CO3, 4 * SIZE daddiu CO4, CO4, 4 * SIZE #else CVTU A1, C13 # A1=C13.upper=c12 CVTU A2, C11 # A2=C11.upper=c22 CVTU A3, C23 # A3=C23.upper=c14 CVTU A4, C21 # A4=C21.upper=c24 MUL A1, A1, ALPHA # c12 MUL A2, A2, ALPHA # c22 MUL A3, A3, ALPHA # c14 MUL A4, A4, ALPHA # c24 MUL C11, C11, ALPHA # c12 ST A1, 1 * SIZE(CO1) MUL C13, C13, ALPHA # c22 ST A2, 1 * SIZE(CO2) MUL C21, C21, ALPHA # c14 ST A3, 3 * SIZE(CO1) MUL C23, C23, ALPHA # c24 ST A4, 3 * SIZE(CO2) CVTU A5, C14 # B1=C12.upper=c42 ST C11, 0 * SIZE(CO1) CVTU A6, C12 # B2=C14.upper=c32 ST C13, 0 * SIZE(CO2) CVTU A7, C24 # B3=C22.upper=c44 ST C21, 2 * SIZE(CO1) CVTU A8, C22 # B4=C24.upper=c34 ST C23, 2 * SIZE(CO2) MUL A5, A5, ALPHA # c31 MUL A6, A6, ALPHA MUL A7, A7, ALPHA MUL A8, A8, ALPHA MUL C12, C12, ALPHA ST A5, 1 * SIZE(CO3) MUL C14, C14, ALPHA ST A6, 1 * SIZE(CO4) MUL C22, C22, ALPHA ST A7, 3 * SIZE(CO3) MUL C24, C24, ALPHA ST A8, 3 * SIZE(CO4) ST C12, 0 * SIZE(CO3) ST C14, 0 * SIZE(CO4) ST C22, 2 * SIZE(CO3) ST C24, 2 * SIZE(CO4) daddiu CO1, CO1, 4 * SIZE daddiu CO2, CO2, 4 * SIZE daddiu CO3, CO3, 4 * SIZE daddiu CO4, CO4, 4 * SIZE #if ( defined(LEFT) && defined(TRANSA))||\ (!defined(LEFT) && !defined(TRANSA)) dsubu TEMP, K, KK #ifdef LEFT daddiu TEMP, TEMP, -4 #else daddiu TEMP, TEMP, -4 #endif dsll L, TEMP, 2 + BASE_SHIFT dsll TEMP, TEMP, 2 + BASE_SHIFT daddu AO, AO, L daddu BO, BO, TEMP #endif #ifdef LEFT daddiu KK, KK, 4 #endif #endif .align 4 .L42: andi I, M, 2 blez I, .L41 NOP .align 4 .L421: #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) ||\ (!defined(LEFT) && !defined(TRANSA)) move BO, B #else dsll L, KK, 1 + BASE_SHIFT dsll TEMP, KK, 2 + BASE_SHIFT daddu AO, AO, L daddu BO, B, TEMP #endif MTC $0, C11 # CLEAR REAULTS REGISTERS MOV C12, C11 MOV C21, C11 MOV C22, C11 MOV C31, C11 MOV C32, C11 gsLQC1(R13, F9, F8, 0) # B1 B2 MOV C41, C11 MOV C42, C11 gsLQC1(R12, F1, F0, 0) # A1 A2 MOV C13, C11 MOV C14, C11 MOV C23, C11 FETCH $0, 0 * SIZE(CO1) MOV C24, C11 MOV C33, C11 FETCH $0, 0 * SIZE(CO2) MOV C34, C11 MOV C43, C11 FETCH $0, 0 * SIZE(CO3) MOV C44, C11 PLU B3, B1, B1 FETCH $0, 0 * SIZE(CO4) PLU B4, B2, B2 #if (defined(LEFT) && !defined(TRANSA)) ||\ (!defined(LEFT) && defined(TRANSA)) dsubu TEMP, K, KK #elif defined(LEFT) daddiu TEMP, KK, 2 #else daddiu TEMP, KK, 4 #endif dsra L, TEMP, 2 blez L, .L422 NOP #else move BO, B # Reset B dsra L, K, 2 # UnRoll K=4 MTC $0, C11 # CLEAR REAULTS REGISTERS MOV C12, C11 MOV C21, C11 MOV C22, C11 MOV C31, C11 MOV C32, C11 gsLQC1(R13, F9, F8, 0) # B1 B2 MOV C41, C11 MOV C42, C11 gsLQC1(R12, F1, F0, 0) # A1 A2 MOV C13, C11 MOV C14, C11 MOV C23, C11 FETCH $0, 0 * SIZE(CO1) MOV C24, C11 MOV C33, C11 FETCH $0, 0 * SIZE(CO2) MOV C34, C11 MOV C43, C11 FETCH $0, 0 * SIZE(CO3) MOV C44, C11 PLU B3, B1, B1 FETCH $0, 0 * SIZE(CO4) blez L, .L422 PLU B4, B2, B2 #endif .L4210: daddiu L, L, -1 MADPS C11, C11, A1, B1 MADPS C12, C12, A1, B2 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C13, C13, A1, B3 MADPS C14, C14, A1, B4 gsLQC1(R12, F3, F2, 1) # B1 B2 PLU B7, B5, B5 PLU B8, B6, B6 MADPS C11, C11, A2, B5 MADPS C12, C12, A2, B6 daddiu AO, AO, 8 * SIZE # 4KR*2MR gsLQC1(R13, F9, F8, 2) # B1 B2 MADPS C13, C13, A2, B7 MADPS C14, C14, A2, B8 PLU B3, B1, B1 PLU B4, B2, B2 MADPS C11, C11, A3, B1 gsLQC1(R12, F1, F0, 0) # B3 B4 MADPS C12, C12, A3, B2 gsLQC1(R13, F13, F12, 3) # B3 B4 daddiu BO, BO, 16 * SIZE # 4KR*4NR MADPS C13, C13, A3, B3 MADPS C14, C14, A3, B4 PLU B7, B5, B5 PLU B8, B6, B6 MADPS C11, C11, A4, B5 MADPS C12, C12, A4, B6 gsLQC1(R13, F9, F8, 0) # B3 B4 MADPS C13, C13, A4, B7 MADPS C14, C14, A4, B8 PLU B3, B1, B1 bgtz L, .L4210 PLU B4, B2, B2 .align 4 .L422: #ifndef TRMMKERNEL andi L, K, 2 #else andi L, TEMP, 2 #endif blez L, .L423 NOP daddiu AO, AO, 4 * SIZE # 2KR*2MR MADPS C11, C11, A1, B1 MADPS C12, C12, A1, B2 gsLQC1(R13, F13, F12, 1) # B3 B4 MADPS C13, C13, A1, B3 MADPS C14, C14, A1, B4 daddiu BO, BO, 8 * SIZE # 2KR*2MR PLU B7, B5, B5 PLU B8, B6, B6 MADPS C11, C11, A2, B5 MADPS C12, C12, A2, B6 gsLQC1(R13, F9, F8, 0) # B1 B2 MADPS C13, C13, A2, B7 MADPS C14, C14, A2, B8 gsLQC1(R12, F1, F0, 0) PLU B3, B1, B1 PLU B4, B2, B2 .L423: #ifndef TRMMKERNEL andi L, K, 1 #else andi L, TEMP, 1 #endif blez L, .L420 LD ALPHA, 152($sp) MADPS C11, C11, A1, B1 MADPS C12, C12, A1, B2 daddiu BO, BO, 4 * SIZE # 2KR*4NR daddiu AO, AO, 2 * SIZE # 2KR*4MR MADPS C13, C13, A1, B3 MADPS C14, C14, A1, B4 .align 4 .L420: #ifndef TRMMKERNEL CVTU A1, C13 # A1=C13.upper=c12 LD B1, 1 * SIZE(CO1) CVTU A2, C11 # A2=C11.upper=c22 LD B2, 1 * SIZE(CO2) MADD A1, B1, A1, ALPHA # c12 LD B5, 0 * SIZE(CO1) MADD A2, B2, A2, ALPHA # c22 LD B6, 0 * SIZE(CO2) MADD C11, B5, C11, ALPHA # c12 ST A1, 1 * SIZE(CO1) MADD C13, B6, C13, ALPHA # c22 ST A2, 1 * SIZE(CO2) ST C11, 0 * SIZE(CO1) ST C13, 0 * SIZE(CO2) CVTU A1, C14 # B1=C12.upper=c42 LD B1, 1 * SIZE(CO3) CVTU A2, C12 # B2=C14.upper=c32 LD B2, 1 * SIZE(CO4) MADD A1, B1, A1, ALPHA # c31 LD A5, 0 * SIZE(CO3) MADD A2, B2, A2, ALPHA LD A6, 0 * SIZE(CO4) MADD C12, A5, C12, ALPHA ST A1, 1 * SIZE(CO3) MADD C14, A6, C14, ALPHA ST A2, 1 * SIZE(CO4) ST C12, 0 * SIZE(CO3) ST C14, 0 * SIZE(CO4) daddiu CO1, CO1, 2 * SIZE daddiu CO2, CO2, 2 * SIZE daddiu CO3, CO3, 2 * SIZE daddiu CO4, CO4, 2 * SIZE #else CVTU A1, C13 # A1=C13.upper=c12 CVTU A2, C11 # A2=C11.upper=c22 MUL A1, A1, ALPHA # c12 MUL A2, A2, ALPHA # c22 MUL C11, C11, ALPHA # c12 MUL C13, C13, ALPHA # c22 CVTU A3, C14 # B1=C12.upper=c42 CVTU A4, C12 # B2=C14.upper=c32 MUL A3, A3, ALPHA # c31 ST A1, 1 * SIZE(CO1) MUL A4, A4, ALPHA ST A2, 1 * SIZE(CO2) MUL C12, C12, ALPHA ST C11, 0 * SIZE(CO1) MUL C14, C14, ALPHA ST C13, 0 * SIZE(CO2) ST A3, 1 * SIZE(CO3) ST A4, 1 * SIZE(CO4) ST C12, 0 * SIZE(CO3) ST C14, 0 * SIZE(CO4) daddiu CO1, CO1, 2 * SIZE daddiu CO2, CO2, 2 * SIZE daddiu CO3, CO3, 2 * SIZE daddiu CO4, CO4, 2 * SIZE #if ( defined(LEFT) && defined(TRANSA))||\ (!defined(LEFT) && !defined(TRANSA)) dsubu TEMP, K, KK #ifdef LEFT daddiu TEMP, TEMP, -2 #else daddiu TEMP, TEMP, -4 #endif dsll L, TEMP, 1 + BASE_SHIFT dsll TEMP, TEMP, 2 + BASE_SHIFT daddu AO, AO, L daddu BO, BO, TEMP #endif #ifdef LEFT daddiu KK, KK, 2 #endif #endif .align 4 .L41: andi I, M, 1 blez I, .L40 NOP .align 4 .L411: #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) ||\ (!defined(LEFT) && !defined(TRANSA)) move BO, B #else dsll L, KK, BASE_SHIFT dsll TEMP, KK, 2 + BASE_SHIFT daddu AO, AO, L daddu BO, B, TEMP #endif MTC $0, C11 # CLEAR REAULTS REGISTERS MOV C12, C11 LD B1, 0 * SIZE(BO) MOV C21, C11 MOV C22, C11 LD A1, 0 * SIZE(AO) MOV C31, C11 MOV C32, C11 LD B2, 1 * SIZE(BO) MOV C41, C11 MOV C42, C11 LD B3, 2 * SIZE(BO) MOV C13, C11 MOV C14, C11 LD B4, 3 * SIZE(BO) MOV C23, C11 MOV C24, C11 MOV C33, C11 MOV C34, C11 MOV C43, C11 MOV C44, C11 #if (defined(LEFT) && !defined(TRANSA))||\ (!defined(LEFT) && defined(TRANSA)) dsubu TEMP, K, KK #elif defined(LEFT) daddiu TEMP, KK, 1 #else daddiu TEMP, KK, 4 #endif dsra L, TEMP, 2 blez L, .L412 #else move BO, B # Reset B dsra L, K, 2 # UnRoll K=4 MTC $0, C11 # CLEAR REAULTS REGISTERS MOV C12, C11 LD B1, 0 * SIZE(BO) MOV C21, C11 MOV C22, C11 LD A1, 0 * SIZE(AO) MOV C31, C11 MOV C32, C11 LD B2, 1 * SIZE(BO) MOV C41, C11 MOV C42, C11 LD B3, 2 * SIZE(BO) MOV C13, C11 MOV C14, C11 LD B4, 3 * SIZE(BO) MOV C23, C11 MOV C24, C11 MOV C33, C11 MOV C34, C11 MOV C43, C11 blez L, .L412 MOV C44, C11 #endif .L4110: daddiu L, L, -1 LD A2, 1 * SIZE(AO) MADD C11, C11, A1, B1 LD B5, 4 * SIZE(BO) MADD C12, C12, A1, B2 LD B6, 5 * SIZE(BO) MADD C13, C13, A1, B3 LD B7, 6 * SIZE(BO) MADD C14, C14, A1, B4 LD B8, 7 * SIZE(BO) LD A3, 2 * SIZE(AO) NOP MADD C11, C11, A2, B5 LD B1, 8 * SIZE(BO) MADD C12, C12, A2, B6 LD B2, 9 * SIZE(BO) MADD C13, C13, A2, B7 LD B3, 10 * SIZE(BO) MADD C14, C14, A2, B8 LD B4, 11 * SIZE(BO) LD A4, 3 * SIZE(AO) daddiu AO, AO, 4 * SIZE MADD C11, C11, A3, B1 LD B5, 12 * SIZE(BO) MADD C12, C12, A3, B2 LD B6, 13 * SIZE(BO) MADD C13, C13, A3, B3 LD B7, 14 * SIZE(BO) MADD C14, C14, A3, B4 LD B8, 15 * SIZE(BO) LD A1, 0 * SIZE(AO) daddiu BO, BO, 16 * SIZE MADD C11, C11, A4, B5 LD B1, 0 * SIZE(BO) MADD C12, C12, A4, B6 LD B2, 1 * SIZE(BO) MADD C13, C13, A4, B7 LD B3, 2 * SIZE(BO) MADD C14, C14, A4, B8 bgtz L, .L4110 LD B4, 3 * SIZE(BO) .L412: #ifndef TRMMKERNEL andi L, K, 2 #else andi L, TEMP, 2 #endif blez L, .L413 NOP LD A2, 1 * SIZE(AO) daddiu AO, AO, 2 * SIZE MADD C11, C11, A1, B1 LD B5, 4 * SIZE(BO) MADD C12, C12, A1, B2 LD B6, 5 * SIZE(BO) MADD C13, C13, A1, B3 LD B7, 6 * SIZE(BO) MADD C14, C14, A1, B4 LD B8, 7 * SIZE(BO) LD A1, 0 * SIZE(AO) daddiu BO, BO, 8 * SIZE MADD C11, C11, A2, B5 LD B1, 0 * SIZE(BO) MADD C12, C12, A2, B6 LD B2, 1 * SIZE(BO) MADD C13, C13, A2, B7 LD B3, 2 * SIZE(BO) MADD C14, C14, A2, B8 LD B4, 3 * SIZE(BO) .L413: #ifndef TRMMKERNEL andi L, K, 1 #else andi L, TEMP, 1 #endif blez L, .L410 LD ALPHA, 152($sp) MADD C11, C11, A1, B1 MADD C12, C12, A1, B2 daddiu AO, AO, 1 * SIZE MADD C13, C13, A1, B3 MADD C14, C14, A1, B4 daddiu BO, BO, 4 * SIZE .align 4 .L410: #ifndef TRMMKERNEL LD A5, 0 * SIZE(CO1) LD A6, 0 * SIZE(CO2) LD A7, 0 * SIZE(CO3) LD A8, 0 * SIZE(CO4) MADD A5, A5, C11, ALPHA MADD A6, A6, C12, ALPHA MADD A7, A7, C13, ALPHA MADD A8, A8, C14, ALPHA ST A5, 0 * SIZE(CO1) ST A6, 0 * SIZE(CO2) ST A7, 0 * SIZE(CO3) ST A8, 0 * SIZE(CO4) daddiu CO1, CO1, 1 * SIZE daddiu CO2, CO2, 1 * SIZE daddiu CO3, CO3, 1 * SIZE daddiu CO4, CO4, 1 * SIZE #else MUL A5, C11, ALPHA MUL A6, C12, ALPHA MUL A7, C13, ALPHA MUL A8, C14, ALPHA ST A5, 0 * SIZE(CO1) ST A6, 0 * SIZE(CO2) ST A7, 0 * SIZE(CO3) ST A8, 0 * SIZE(CO4) daddiu CO1, CO1, 1 * SIZE daddiu CO2, CO2, 1 * SIZE daddiu CO3, CO3, 1 * SIZE daddiu CO4, CO4, 1 * SIZE #if ( defined(LEFT) && defined(TRANSA))||\ (!defined(LEFT) && !defined(TRANSA)) dsubu TEMP, K, KK #ifdef LEFT daddiu TEMP, TEMP, -1 #else daddiu TEMP, TEMP, -4 #endif dsll L, TEMP, BASE_SHIFT dsll TEMP, TEMP, 2 + BASE_SHIFT daddu AO, AO, L daddu BO, BO, TEMP #endif #ifdef LEFT daddiu KK, KK, 1 #endif #endif .align 4 .L40: #if defined(TRMMKERNEL) && !defined(LEFT) daddiu KK, KK, 4 #endif daddiu J, J, -1 move B, BO bgtz J, .L48 NOP .align 4 .L2: # Nr=2 andi J, N, 2 blez J, .L1 NOP .L28: dsra I, M, 3 # MR=8 move AO, A # Reset A move CO1, C #if defined(TRMMKERNEL) && defined(LEFT) move KK, OFFSET #endif daddu CO2, C, LDC blez I, .L24 daddu C, CO2, LDC .align 4 .L281: #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) move BO, B #else dsll L, KK, 3 + BASE_SHIFT dsll TEMP, KK, 1 + BASE_SHIFT daddu AO, AO, L daddu BO, B, TEMP #endif MTC $0, C11 # CLEAR REAULTS REGISTERS LD A1, 0 * SIZE(AO) MOV C12, C11 LD A2, 1 * SIZE(AO) MOV C21, C11 LD A3, 2 * SIZE(AO) MOV C22, C11 LD A4, 3 * SIZE(AO) MOV C31, C11 LD A5, 4 * SIZE(AO) MOV C32, C11 LD A6, 5 * SIZE(AO) MOV C41, C11 LD B1, 0 * SIZE(BO) MOV C42, C11 LD B2, 1 * SIZE(BO) MOV C13, C11 LD A7, 6 * SIZE(AO) MOV C14, C11 LD A8, 7 * SIZE(AO) MOV C23, C11 MOV C24, C11 MOV C33, C11 MOV C34, C11 MOV C43, C11 MOV C44, C11 #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) dsubu TEMP, K, KK #elif defined(LEFT) daddiu TEMP, KK, 8 #else daddiu TEMP, KK, 2 #endif dsra L, TEMP, 1 blez L, .L282 NOP #else move BO, B # Reset B dsra L, K, 1 # UnRoll K=4 MTC $0, C11 # CLEAR REAULTS REGISTERS LD A1, 0 * SIZE(AO) MOV C12, C11 LD A2, 1 * SIZE(AO) MOV C21, C11 LD A3, 2 * SIZE(AO) MOV C22, C11 LD A4, 3 * SIZE(AO) MOV C31, C11 LD A5, 4 * SIZE(AO) MOV C32, C11 LD A6, 5 * SIZE(AO) MOV C41, C11 LD B1, 0 * SIZE(BO) MOV C42, C11 LD B2, 1 * SIZE(BO) MOV C13, C11 LD A7, 6 * SIZE(AO) MOV C14, C11 LD A8, 7 * SIZE(AO) MOV C23, C11 MOV C24, C11 MOV C33, C11 MOV C34, C11 MOV C43, C11 blez L, .L282 MOV C44, C11 #endif .align 4 .L2810: daddiu L, L, -1 MADD C11, C11, A1, B1 LD B5, 8 * SIZE(AO) MADD C21, C21, A2, B1 LD B6, 9 * SIZE(AO) MADD C31, C31, A3, B1 LD B7, 10 * SIZE(AO) MADD C41, C41, A4, B1 LD B8, 11 * SIZE(AO) MADD C12, C12, A1, B2 MADD C22, C22, A2, B2 LD B3, 2 * SIZE(BO) MADD C32, C32, A3, B2 MADD C42, C42, A4, B2 LD B4, 3 * SIZE(BO) daddiu BO, BO, 4 * SIZE MADD C13, C13, A5, B1 MADD C23, C23, A6, B1 LD A1, 12 * SIZE(AO) MADD C33, C33, A7, B1 MADD C43, C43, A8, B1 LD A2, 13 * SIZE(AO) MADD C14, C14, A5, B2 MADD C24, C24, A6, B2 LD A3, 14 * SIZE(AO) MADD C34, C34, A7, B2 MADD C44, C44, A8, B2 LD A4, 15 * SIZE(AO) daddiu AO, AO, 16 * SIZE MADD C11, C11, B5, B3 LD A5, 4 * SIZE(AO) MADD C21, C21, B6, B3 LD A6, 5 * SIZE(AO) MADD C13, C13, A1, B3 MADD C23, C23, A2, B3 LD A7, 6 * SIZE(AO) MADD C33, C33, A3, B3 MADD C43, C43, A4, B3 LD A8, 7 * SIZE(AO) MADD C14, C14, A1, B4 MADD C24, C24, A2, B4 LD B1, 0 * SIZE(BO) MADD C34, C34, A3, B4 MADD C44, C44, A4, B4 LD B2, 1 * SIZE(BO) MADD C31, C31, B7, B3 MADD C41, C41, B8, B3 LD A1, 0 * SIZE(AO) MADD C12, C12, B5, B4 LD A2, 1 * SIZE(AO) MADD C22, C22, B6, B4 LD A3, 2 * SIZE(AO) LD A4, 3 * SIZE(AO) MADD C32, C32, B7, B4 bgtz L, .L2810 MADD C42, C42, B8, B4 .align 4 .L282: #ifndef TRMMKERNEL andi L, K, 1 #else andi L, TEMP, 1 #endif blez L, .L280 LD ALPHA, 152($sp) MADD C13, C13, A5, B1 MADD C23, C23, A6, B1 MADD C33, C33, A7, B1 MADD C43, C43, A8, B1 MADD C14, C14, A5, B2 MADD C24, C24, A6, B2 MADD C34, C34, A7, B2 MADD C44, C44, A8, B2 daddiu AO, AO, 8 * SIZE MADD C11, C11, A1, B1 MADD C21, C21, A2, B1 MADD C31, C31, A3, B1 MADD C41, C41, A4, B1 MADD C12, C12, A1, B2 MADD C22, C22, A2, B2 MADD C32, C32, A3, B2 MADD C42, C42, A4, B2 daddiu BO, BO, 2 * SIZE .align 4 .L280: # Write Back #ifndef TRMMKERNEL daddiu I, I, -1 LD A1, 0 * SIZE(CO1) LD A2, 1 * SIZE(CO1) LD A3, 2 * SIZE(CO1) LD A4, 3 * SIZE(CO1) LD A5, 4 * SIZE(CO1) LD A6, 5 * SIZE(CO1) LD A7, 6 * SIZE(CO1) LD A8, 7 * SIZE(CO1) MADD A1, A1, C11, ALPHA LD B1, 0 * SIZE(CO2) MADD A2, A2, C21, ALPHA LD B2, 1 * SIZE(CO2) MADD A3, A3, C31, ALPHA LD B3, 2 * SIZE(CO2) MADD A4, A4, C41, ALPHA LD B4, 3 * SIZE(CO2) MADD A5, A5, C13, ALPHA LD B5, 4 * SIZE(CO2) MADD A6, A6, C23, ALPHA LD B6, 5 * SIZE(CO2) MADD A7, A7, C33, ALPHA LD B7, 6 * SIZE(CO2) MADD A8, A8, C43, ALPHA LD C11, 7 * SIZE(CO2) MADD B1, B1, C12, ALPHA ST A1, 0 * SIZE(CO1) MADD B2, B2, C22, ALPHA ST A2, 1 * SIZE(CO1) MADD B3, B3, C32, ALPHA ST A3, 2 * SIZE(CO1) MADD B4, B4, C42, ALPHA ST A4, 3 * SIZE(CO1) MADD B5, B5, C14, ALPHA ST A5, 4 * SIZE(CO1) MADD B6, B6, C24, ALPHA ST A6, 5 * SIZE(CO1) MADD B7, B7, C34, ALPHA ST A7, 6 * SIZE(CO1) MADD C11, C11, C44, ALPHA ST A8, 7 * SIZE(CO1) ST B1, 0 * SIZE(CO2) ST B2, 1 * SIZE(CO2) ST B3, 2 * SIZE(CO2) ST B4, 3 * SIZE(CO2) ST B5, 4 * SIZE(CO2) ST B6, 5 * SIZE(CO2) ST B7, 6 * SIZE(CO2) ST C11, 7 * SIZE(CO2) daddiu CO1, CO1, 8 * SIZE bgtz I, .L281 daddiu CO2, CO2, 8 * SIZE #else daddiu I, I, -1 MUL A1, C11, ALPHA MUL A2, C21, ALPHA MUL A3, C31, ALPHA MUL A4, C41, ALPHA MUL A5, C13, ALPHA MUL A6, C23, ALPHA MUL A7, C33, ALPHA MUL A8, C43, ALPHA MUL B1, C12, ALPHA ST A1, 0 * SIZE(CO1) MUL B2, C22, ALPHA ST A2, 1 * SIZE(CO1) MUL B3, C32, ALPHA ST A3, 2 * SIZE(CO1) MUL B4, C42, ALPHA ST A4, 3 * SIZE(CO1) MUL B5, C14, ALPHA ST A5, 4 * SIZE(CO1) MUL B6, C24, ALPHA ST A6, 5 * SIZE(CO1) MUL B7, C34, ALPHA ST A7, 6 * SIZE(CO1) MUL C11, C44, ALPHA ST A8, 7 * SIZE(CO1) ST B1, 0 * SIZE(CO2) ST B2, 1 * SIZE(CO2) ST B3, 2 * SIZE(CO2) ST B4, 3 * SIZE(CO2) ST B5, 4 * SIZE(CO2) ST B6, 5 * SIZE(CO2) ST B7, 6 * SIZE(CO2) ST C11, 7 * SIZE(CO2) #if ( defined(LEFT) && defined(TRANSA)) ||(!defined(LEFT) && !defined(TRANSA)) dsubu TEMP, K, KK #ifdef LEFT daddiu TEMP, TEMP, -8 #else daddiu TEMP, TEMP, -2 #endif dsll L, TEMP, 3 + BASE_SHIFT dsll TEMP, TEMP, 1 + BASE_SHIFT daddu AO, AO, L daddu BO, BO, TEMP #endif #ifdef LEFT daddiu KK, KK, 8 #endif daddiu CO1, CO1, 8 * SIZE bgtz I, .L281 daddiu CO2, CO2, 8 * SIZE #endif .align 4 .L24: andi I, M, 4 # MR=4 blez I, .L22 NOP .align 4 .L241: #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) move BO, B #else dsll L, KK, 2 + BASE_SHIFT dsll TEMP, KK, 1 + BASE_SHIFT daddu AO, AO, L daddu BO, B, TEMP #endif MTC $0, C11 # CLEAR REAULTS REGISTERS MOV C12, C11 LD A1, 0 * SIZE(AO) MOV C21, C11 MOV C22, C11 LD A2, 1 * SIZE(AO) MOV C31, C11 MOV C32, C11 LD A3, 2 * SIZE(AO) MOV C41, C11 MOV C42, C11 LD A4, 3 * SIZE(AO) MOV C13, C11 MOV C14, C11 LD B1, 0 * SIZE(BO) MOV C23, C11 MOV C24, C11 LD B2, 1 * SIZE(BO) MOV C33, C11 MOV C34, C11 MOV C43, C11 MOV C44, C11 #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) dsubu TEMP, K, KK #elif defined(LEFT) daddiu TEMP, KK, 4 #else daddiu TEMP, KK, 2 #endif dsra L, TEMP, 1 blez L, .L242 NOP #else move BO, B # Reset B dsra L, K, 1 # UnRoll K=4 MTC $0, C11 # CLEAR REAULTS REGISTERS MOV C12, C11 LD A1, 0 * SIZE(AO) MOV C21, C11 MOV C22, C11 LD A2, 1 * SIZE(AO) MOV C31, C11 MOV C32, C11 LD A3, 2 * SIZE(AO) MOV C41, C11 MOV C42, C11 LD A4, 3 * SIZE(AO) MOV C13, C11 MOV C14, C11 LD B1, 0 * SIZE(BO) MOV C23, C11 MOV C24, C11 LD B2, 1 * SIZE(BO) MOV C33, C11 MOV C34, C11 MOV C43, C11 blez L, .L242 MOV C44, C11 #endif .align 4 .L2410: daddiu L, L, -1 MADD C11, C11, A1, B1 LD A5, 4 * SIZE(AO) MADD C21, C21, A2, B1 LD B3, 2 * SIZE(BO) MADD C31, C31, A3, B1 LD B4, 3 * SIZE(BO) MADD C41, C41, A4, B1 LD A6, 5 * SIZE(AO) daddiu BO, BO, 4 * SIZE MADD C12, C12, A1, B2 LD A7, 6 * SIZE(AO) MADD C22, C22, A2, B2 LD A8, 7 * SIZE(AO) daddiu AO, AO, 8 * SIZE MADD C32, C32, A3, B2 MADD C42, C42, A4, B2 MADD C11, C11, A5, B3 LD A1, 0 * SIZE(AO) MADD C21, C21, A6, B3 LD B1, 0 * SIZE(BO) MADD C31, C31, A7, B3 LD B2, 1 * SIZE(BO) MADD C41, C41, A8, B3 LD A2, 1 * SIZE(AO) MADD C12, C12, A5, B4 LD A3, 2 * SIZE(AO) MADD C22, C22, A6, B4 LD A4, 3 * SIZE(AO) MADD C32, C32, A7, B4 bgtz L, .L2410 MADD C42, C42, A8, B4 .align 4 .L242: #ifndef TRMMKERNEL andi L, K, 1 #else andi L, TEMP, 1 #endif blez L, .L240 LD ALPHA, 152($sp) MADD C11, C11, A1, B1 MADD C21, C21, A2, B1 MADD C31, C31, A3, B1 MADD C41, C41, A4, B1 MADD C12, C12, A1, B2 MADD C22, C22, A2, B2 MADD C32, C32, A3, B2 MADD C42, C42, A4, B2 daddiu AO, AO, 4 * SIZE daddiu BO, BO, 2 * SIZE .align 4 .L240: # Write Back #ifndef TRMMKERNEL LD A1, 0 * SIZE(CO1) LD A2, 1 * SIZE(CO1) LD A3, 2 * SIZE(CO1) LD A4, 3 * SIZE(CO1) MADD A1, A1, C11, ALPHA LD B1, 0 * SIZE(CO2) MADD A2, A2, C21, ALPHA LD B2, 1 * SIZE(CO2) MADD A3, A3, C31, ALPHA LD B3, 2 * SIZE(CO2) MADD A4, A4, C41, ALPHA LD B4, 3 * SIZE(CO2) MADD B1, B1, C12, ALPHA ST A1, 0 * SIZE(CO1) MADD B2, B2, C22, ALPHA ST A2, 1 * SIZE(CO1) MADD B3, B3, C32, ALPHA ST A3, 2 * SIZE(CO1) MADD B4, B4, C42, ALPHA ST A4, 3 * SIZE(CO1) ST B1, 0 * SIZE(CO2) ST B2, 1 * SIZE(CO2) ST B3, 2 * SIZE(CO2) ST B4, 3 * SIZE(CO2) daddiu CO1, CO1, 4 * SIZE daddiu CO2, CO2, 4 * SIZE #else MUL A1, C11, ALPHA MUL A2, C21, ALPHA MUL A3, C31, ALPHA MUL A4, C41, ALPHA MUL B1, C12, ALPHA ST A1, 0 * SIZE(CO1) MUL B2, C22, ALPHA ST A2, 1 * SIZE(CO1) MUL B3, C32, ALPHA ST A3, 2 * SIZE(CO1) MUL B4, C42, ALPHA ST A4, 3 * SIZE(CO1) ST B1, 0 * SIZE(CO2) ST B2, 1 * SIZE(CO2) ST B3, 2 * SIZE(CO2) ST B4, 3 * SIZE(CO2) daddiu CO1, CO1, 4 * SIZE daddiu CO2, CO2, 4 * SIZE #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) dsubu TEMP, K, KK #ifdef LEFT daddiu TEMP, TEMP, -4 #else daddiu TEMP, TEMP, -2 #endif dsll L, TEMP, 2 + BASE_SHIFT dsll TEMP, TEMP, 1 + BASE_SHIFT daddu AO, AO, L daddu BO, BO, TEMP #endif #ifdef LEFT daddiu KK, KK, 4 #endif #endif .align 4 .L22: andi I, M, 2 blez I, .L21 NOP .align 4 .L221: #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) move BO, B #else dsll L, KK, 1 + BASE_SHIFT dsll TEMP, KK, 1 + BASE_SHIFT daddu AO, AO, L daddu BO, B, TEMP #endif MTC $0, C11 # CLEAR REAULTS REGISTERS MOV C12, C11 LD A1, 0 * SIZE(AO) MOV C21, C11 MOV C22, C11 LD A2, 1 * SIZE(AO) MOV C31, C11 MOV C32, C11 LD B1, 0 * SIZE(BO) MOV C41, C11 MOV C42, C11 LD B2, 1 * SIZE(BO) MOV C43, C11 MOV C44, C11 #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) dsubu TEMP, K, KK #elif defined(LEFT) daddiu TEMP, KK, 2 #else daddiu TEMP, KK, 2 #endif dsra L, TEMP, 1 blez L, .L222 NOP #else move BO, B # Reset B dsra L, K, 1 # UnRoll K=4 MTC $0, C11 # CLEAR REAULTS REGISTERS MOV C12, C11 LD A1, 0 * SIZE(AO) MOV C21, C11 MOV C22, C11 LD A2, 1 * SIZE(AO) MOV C31, C11 MOV C32, C11 LD B1, 0 * SIZE(BO) MOV C41, C11 MOV C42, C11 LD B2, 1 * SIZE(BO) MOV C43, C11 blez L, .L222 MOV C44, C11 #endif .align 4 .L2210: daddiu L, L, -1 MADD C11, C11, A1, B1 LD A3, 2 * SIZE(AO) MADD C21, C21, A2, B1 LD B3, 2 * SIZE(BO) MADD C12, C12, A1, B2 LD A4, 3 * SIZE(AO) daddiu AO, AO, 4 * SIZE MADD C22, C22, A2, B2 LD B4, 3 * SIZE(BO) daddiu BO, BO, 4 * SIZE MADD C11, C11, A3, B3 LD A1, 0 * SIZE(AO) MADD C21, C21, A4, B3 LD B1, 0 * SIZE(BO) MADD C12, C12, A3, B4 LD B2, 1 * SIZE(BO) MADD C22, C22, A4, B4 bgtz L, .L2210 LD A2, 1 * SIZE(AO) .align 4 .L222: #ifndef TRMMKERNEL andi L, K, 1 #else andi L, TEMP, 1 #endif blez L, .L220 LD ALPHA, 152($sp) MADD C11, C11, A1, B1 MADD C21, C21, A2, B1 MADD C12, C12, A1, B2 MADD C22, C22, A2, B2 daddiu AO, AO, 2 * SIZE daddiu BO, BO, 2 * SIZE .align 4 .L220: # Write Back #ifndef TRMMKERNEL LD A1, 0 * SIZE(CO1) LD A2, 1 * SIZE(CO1) MADD A1, A1, C11, ALPHA LD B1, 0 * SIZE(CO2) MADD A2, A2, C21, ALPHA LD B2, 1 * SIZE(CO2) MADD B1, B1, C12, ALPHA ST A1, 0 * SIZE(CO1) MADD B2, B2, C22, ALPHA ST A2, 1 * SIZE(CO1) ST B1, 0 * SIZE(CO2) ST B2, 1 * SIZE(CO2) daddiu CO1, CO1, 2 * SIZE daddiu CO2, CO2, 2 * SIZE #else MUL A1, C11, ALPHA MUL A2, C21, ALPHA MUL B1, C12, ALPHA MUL B2, C22, ALPHA ST A1, 0 * SIZE(CO1) ST A2, 1 * SIZE(CO1) ST B1, 0 * SIZE(CO2) ST B2, 1 * SIZE(CO2) daddiu CO1, CO1, 2 * SIZE daddiu CO2, CO2, 2 * SIZE #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) dsubu TEMP, K, KK #ifdef LEFT daddiu TEMP, TEMP, -2 #else daddiu TEMP, TEMP, -2 #endif dsll L, TEMP, 1 + BASE_SHIFT dsll TEMP, TEMP, 1 + BASE_SHIFT daddu AO, AO, L daddu BO, BO, TEMP #endif #ifdef LEFT daddu KK, KK, 2 #endif #endif .align 4 .L21: andi I, M, 1 blez I, .L20 NOP .align 4 .L211: #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) move BO, B # Reset B #else dsll L, KK, BASE_SHIFT dsll TEMP, KK, 1 + BASE_SHIFT daddu AO, AO, L daddu BO, B, TEMP #endif MTC $0, C11 # CLEAR REAULTS REGISTERS MOV C12, C11 LD A1, 0 * SIZE(AO) MOV C21, C11 MOV C22, C11 MOV C31, C11 MOV C32, C11 LD B1, 0 * SIZE(BO) MOV C41, C11 MOV C42, C11 LD B2, 1 * SIZE(BO) MOV C43, C11 MOV C44, C11 #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) dsubu TEMP, K, KK #elif defined(LEFT) daddiu TEMP, KK, 1 #else daddiu TEMP, KK, 2 #endif dsra L, TEMP, 1 blez L, .L212 NOP #else move BO, B # Reset B dsra L, K, 1 # UnRoll K=4 MTC $0, C11 # CLEAR REAULTS REGISTERS MOV C12, C11 LD A1, 0 * SIZE(AO) MOV C21, C11 MOV C22, C11 MOV C31, C11 MOV C32, C11 LD B1, 0 * SIZE(BO) MOV C41, C11 MOV C42, C11 LD B2, 1 * SIZE(BO) MOV C43, C11 blez L, .L212 MOV C44, C11 #endif .align 4 .L2110: daddiu L, L, -1 MADD C11, C11, A1, B1 LD A2, 1 * SIZE(AO) MADD C12, C12, A1, B2 LD B3, 2 * SIZE(BO) LD B4, 3 * SIZE(BO) daddiu AO, AO, 2 * SIZE daddiu BO, BO, 4 * SIZE MADD C11, C11, A2, B3 LD A1, 0 * SIZE(AO) MADD C12, C12, A2, B4 LD B1, 0 * SIZE(BO) bgtz L, .L2110 LD B2, 1 * SIZE(BO) .align 4 .L212: #ifndef TRMMKERNEL andi L, K, 1 #else andi L, TEMP, 1 #endif blez L, .L210 LD ALPHA, 152($sp) MADD C11, C11, A1, B1 MADD C12, C12, A1, B2 daddiu AO, AO, 1 * SIZE daddiu BO, BO, 2 * SIZE .align 4 .L210: # Write Back #ifndef TRMMKERNEL LD A1, 0 * SIZE(CO1) MADD A1, A1, C11, ALPHA LD B1, 0 * SIZE(CO2) MADD B1, B1, C12, ALPHA ST A1, 0 * SIZE(CO1) ST B1, 0 * SIZE(CO2) daddiu CO1, CO1, 1 * SIZE daddiu CO2, CO2, 1 * SIZE #else MUL A1, C11, ALPHA MUL B1, C12, ALPHA ST A1, 0 * SIZE(CO1) ST B1, 0 * SIZE(CO2) daddiu CO1, CO1, 1 * SIZE daddiu CO2, CO2, 1 * SIZE #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) dsubu TEMP, K, KK #ifdef LEFT daddiu TEMP, TEMP, -1 #else daddiu TEMP, TEMP, -2 #endif dsll L, TEMP, BASE_SHIFT dsll TEMP, TEMP, 1 + BASE_SHIFT daddu AO, AO, L daddu BO, BO, TEMP #endif #ifdef LEFT daddiu KK, KK, 1 #endif #endif .align 4 .L20: #if defined(TRMMKERNEL) && !defined(LEFT) daddiu KK, KK, 2 #endif move B, BO .align 4 .L1: andi J, N, 1 blez J, .L999 NOP .L18: dsra I, M, 3 # MR=8 move AO, A # Reset A #if defined(TRMMKERNEL) && defined(LEFT) move KK, OFFSET #endif blez I, .L14 NOP .align 4 .L181: #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) move BO, B # Reset B #else dsll L, KK, 3 + BASE_SHIFT dsll TEMP, KK, BASE_SHIFT daddu AO, AO, L daddu BO, B, TEMP #endif MTC $0, C11 # CLEAR REAULTS REGISTERS LD A1, 0 * SIZE(AO) MOV C12, C11 LD A2, 1 * SIZE(AO) MOV C21, C11 LD A3, 2 * SIZE(AO) MOV C22, C11 LD A4, 3 * SIZE(AO) MOV C31, C11 LD A5, 4 * SIZE(AO) MOV C32, C11 LD A6, 5 * SIZE(AO) MOV C41, C11 LD B1, 0 * SIZE(BO) MOV C42, C11 LD A7, 6 * SIZE(AO) MOV C13, C11 LD A8, 7 * SIZE(AO) MOV C14, C11 MOV C23, C11 MOV C24, C11 MOV C33, C11 MOV C34, C11 MOV C43, C11 MOV C44, C11 #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) dsubu TEMP, K, KK #elif defined(LEFT) daddiu TEMP, KK, 8 #else daddiu TEMP, KK, 1 #endif dsra L, TEMP, 1 blez L, .L182 NOP #else move BO, B # Reset B dsra L, K, 1 # UnRoll K=4 MTC $0, C11 # CLEAR REAULTS REGISTERS LD A1, 0 * SIZE(AO) MOV C12, C11 LD A2, 1 * SIZE(AO) MOV C21, C11 LD A3, 2 * SIZE(AO) MOV C22, C11 LD A4, 3 * SIZE(AO) MOV C31, C11 LD A5, 4 * SIZE(AO) MOV C32, C11 LD A6, 5 * SIZE(AO) MOV C41, C11 LD B1, 0 * SIZE(BO) MOV C42, C11 LD A7, 6 * SIZE(AO) MOV C13, C11 LD A8, 7 * SIZE(AO) MOV C14, C11 MOV C23, C11 MOV C24, C11 MOV C33, C11 MOV C34, C11 MOV C43, C11 blez L, .L182 MOV C44, C11 #endif .align 4 .L1810: daddiu L, L, -1 MADD C11, C11, A1, B1 LD B5, 8 * SIZE(AO) MADD C21, C21, A2, B1 LD B6, 9 * SIZE(AO) MADD C31, C31, A3, B1 LD B7, 10 * SIZE(AO) MADD C41, C41, A4, B1 LD B8, 11 * SIZE(AO) MADD C13, C13, A5, B1 LD B2, 1 * SIZE(BO) daddiu BO, BO, 2 * SIZE MADD C23, C23, A6, B1 LD A1, 12 * SIZE(AO) MADD C33, C33, A7, B1 LD A2, 13 * SIZE(AO) MADD C43, C43, A8, B1 LD A3, 14 * SIZE(AO) LD A4, 15 * SIZE(AO) daddiu AO, AO, 16 * SIZE MADD C11, C11, B5, B2 LD A5, 4 * SIZE(AO) MADD C21, C21, B6, B2 LD A6, 5 * SIZE(AO) MADD C13, C13, A1, B2 LD A7, 6 * SIZE(AO) MADD C23, C23, A2, B2 LD A8, 7 * SIZE(AO) MADD C33, C33, A3, B2 LD B1, 0 * SIZE(BO) MADD C43, C43, A4, B2 LD A1, 0 * SIZE(AO) MADD C31, C31, B7, B2 LD A2, 1 * SIZE(AO) MADD C41, C41, B8, B2 LD A3, 2 * SIZE(AO) bgtz L, .L1810 LD A4, 3 * SIZE(AO) .align 4 .L182: #ifndef TRMMKERNEL andi L, K, 1 #else andi L, TEMP, 1 #endif blez L, .L180 LD ALPHA, 152($sp) MADD C13, C13, A5, B1 MADD C23, C23, A6, B1 MADD C33, C33, A7, B1 MADD C43, C43, A8, B1 daddiu AO, AO, 8 * SIZE MADD C11, C11, A1, B1 MADD C21, C21, A2, B1 MADD C31, C31, A3, B1 MADD C41, C41, A4, B1 daddiu BO, BO, 1 * SIZE .align 4 .L180: # Write Back #ifndef TRMMKERNEL daddiu I, I, -1 LD A1, 0 * SIZE(C) LD A2, 1 * SIZE(C) LD A3, 2 * SIZE(C) LD A4, 3 * SIZE(C) LD A5, 4 * SIZE(C) LD A6, 5 * SIZE(C) LD A7, 6 * SIZE(C) LD A8, 7 * SIZE(C) MADD A1, A1, C11, ALPHA MADD A2, A2, C21, ALPHA MADD A3, A3, C31, ALPHA MADD A4, A4, C41, ALPHA MADD A5, A5, C13, ALPHA MADD A6, A6, C23, ALPHA MADD A7, A7, C33, ALPHA MADD A8, A8, C43, ALPHA ST A1, 0 * SIZE(C) ST A2, 1 * SIZE(C) ST A3, 2 * SIZE(C) ST A4, 3 * SIZE(C) ST A5, 4 * SIZE(C) ST A6, 5 * SIZE(C) ST A7, 6 * SIZE(C) ST A8, 7 * SIZE(C) daddiu C, C, 8 * SIZE bgtz I, .L181 NOP #else daddiu I, I, -1 MUL A1, C11, ALPHA MUL A2, C21, ALPHA MUL A3, C31, ALPHA MUL A4, C41, ALPHA MUL A5, C13, ALPHA MUL A6, C23, ALPHA MUL A7, C33, ALPHA MUL A8, C43, ALPHA ST A1, 0 * SIZE(C) ST A2, 1 * SIZE(C) ST A3, 2 * SIZE(C) ST A4, 3 * SIZE(C) ST A5, 4 * SIZE(C) ST A6, 5 * SIZE(C) ST A7, 6 * SIZE(C) ST A8, 7 * SIZE(C) #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) dsubu TEMP, K, KK #ifdef LEFT daddiu TEMP, TEMP, -8 #else daddiu TEMP, TEMP, -1 #endif dsll L, TEMP, 3 + BASE_SHIFT dsll TEMP, TEMP, BASE_SHIFT daddu AO, AO, L daddu BO, BO, TEMP #endif #ifdef LEFT daddiu KK, KK, 8 #endif daddiu C, C, 8 * SIZE bgtz I, .L181 NOP #endif .align 4 .L14: andi I, M, 4 # MR=4 blez I, .L12 NOP .align 4 .L141: #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) move BO, B #else dsll L, KK, 2 + BASE_SHIFT dsll TEMP, KK, BASE_SHIFT daddu AO, AO, L daddu BO, B, TEMP #endif MTC $0, C11 # CLEAR REAULTS REGISTERS MOV C12, C11 LD A1, 0 * SIZE(AO) MOV C21, C11 MOV C22, C11 LD A2, 1 * SIZE(AO) MOV C31, C11 MOV C32, C11 LD A3, 2 * SIZE(AO) MOV C41, C11 MOV C42, C11 LD A4, 3 * SIZE(AO) MOV C13, C11 MOV C14, C11 LD B1, 0 * SIZE(BO) MOV C23, C11 MOV C24, C11 MOV C33, C11 MOV C34, C11 MOV C43, C11 MOV C44, C11 #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) dsubu TEMP, K, KK #elif defined(LEFT) daddiu TEMP, KK, 4 #else daddiu TEMP, KK, 1 #endif dsra L, TEMP, 1 blez L, .L142 NOP #else move BO, B # Reset B dsra L, K, 1 # UnRoll K=4 MTC $0, C11 # CLEAR REAULTS REGISTERS MOV C12, C11 LD A1, 0 * SIZE(AO) MOV C21, C11 MOV C22, C11 LD A2, 1 * SIZE(AO) MOV C31, C11 MOV C32, C11 LD A3, 2 * SIZE(AO) MOV C41, C11 MOV C42, C11 LD A4, 3 * SIZE(AO) MOV C13, C11 MOV C14, C11 LD B1, 0 * SIZE(BO) MOV C23, C11 MOV C24, C11 MOV C33, C11 MOV C34, C11 MOV C43, C11 blez L, .L142 MOV C44, C11 #endif .align 4 .L1410: daddiu L, L, -1 MADD C11, C11, A1, B1 LD A5, 4 * SIZE(AO) MADD C21, C21, A2, B1 LD B3, 1 * SIZE(BO) MADD C31, C31, A3, B1 LD A6, 5 * SIZE(AO) daddiu BO, BO, 2 * SIZE MADD C41, C41, A4, B1 LD A7, 6 * SIZE(AO) LD A8, 7 * SIZE(AO) daddiu AO, AO, 8 * SIZE MADD C11, C11, A5, B3 LD A1, 0 * SIZE(AO) MADD C21, C21, A6, B3 LD B1, 0 * SIZE(BO) MADD C31, C31, A7, B3 LD A2, 1 * SIZE(AO) MADD C41, C41, A8, B3 LD A3, 2 * SIZE(AO) bgtz L, .L1410 LD A4, 3 * SIZE(AO) .align 4 .L142: #ifndef TRMMKERNEL andi L, K, 1 #else andi L, TEMP, 1 #endif blez L, .L140 LD ALPHA, 152($sp) MADD C11, C11, A1, B1 MADD C21, C21, A2, B1 MADD C31, C31, A3, B1 MADD C41, C41, A4, B1 daddiu AO, AO, 4 * SIZE daddiu BO, BO, 1 * SIZE .align 4 .L140: # Write Back #ifndef TRMMKERNEL LD A1, 0 * SIZE(C) LD A2, 1 * SIZE(C) LD A3, 2 * SIZE(C) LD A4, 3 * SIZE(C) MADD A1, A1, C11, ALPHA MADD A2, A2, C21, ALPHA MADD A3, A3, C31, ALPHA MADD A4, A4, C41, ALPHA ST A1, 0 * SIZE(C) ST A2, 1 * SIZE(C) ST A3, 2 * SIZE(C) ST A4, 3 * SIZE(C) daddiu C, C, 4 * SIZE #else MUL A1, C11, ALPHA MUL A2, C21, ALPHA MUL A3, C31, ALPHA MUL A4, C41, ALPHA ST A1, 0 * SIZE(C) ST A2, 1 * SIZE(C) ST A3, 2 * SIZE(C) ST A4, 3 * SIZE(C) daddiu C, C, 4 * SIZE #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) dsubu TEMP, K, KK #ifdef LEFT daddiu TEMP, TEMP, -4 #else daddiu TEMP, TEMP, -1 #endif dsll L, TEMP, 2 + BASE_SHIFT dsll TEMP, TEMP, BASE_SHIFT daddu AO, AO, L daddu BO, BO, TEMP #endif #ifdef LEFT daddiu KK, KK, 4 #endif #endif .align 4 .L12: andi I, M, 2 blez I, .L11 NOP .align 4 .L121: #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) ||\ (!defined(LEFT) && !defined(TRANSA)) move BO, B # Reset B #else dsll L, KK, 1 + BASE_SHIFT dsll TEMP, KK, BASE_SHIFT daddu AO, AO, L daddu BO, B, TEMP #endif MTC $0, C11 # CLEAR REAULTS REGISTERS MOV C12, C11 LD A1, 0 * SIZE(AO) MOV C21, C11 MOV C22, C11 LD A2, 1 * SIZE(AO) MOV C31, C11 MOV C32, C11 LD B1, 0 * SIZE(BO) MOV C41, C11 MOV C42, C11 MOV C43, C11 MOV C44, C11 #if (defined(LEFT) && !defined(TRANSA)) ||\ (!defined(LEFT) && defined(TRANSA)) dsubu TEMP, K, KK #elif defined(LEFT) daddiu TEMP, KK, 2 #else daddiu TEMP, KK, 1 #endif dsra L, TEMP, 1 blez L, .L122 NOP #else move BO, B # Reset B dsra L, K, 1 # UnRoll K=4 MTC $0, C11 # CLEAR REAULTS REGISTERS MOV C12, C11 LD A1, 0 * SIZE(AO) MOV C21, C11 MOV C22, C11 LD A2, 1 * SIZE(AO) MOV C31, C11 MOV C32, C11 LD B1, 0 * SIZE(BO) MOV C41, C11 MOV C42, C11 MOV C43, C11 blez L, .L122 MOV C44, C11 #endif .align 4 .L1210: daddiu L, L, -1 MADD C11, C11, A1, B1 LD B3, 1 * SIZE(BO) MADD C21, C21, A2, B1 daddiu BO, BO, 2 * SIZE LD A3, 2 * SIZE(AO) LD A4, 3 * SIZE(AO) daddiu AO, AO, 4 * SIZE MADD C11, C11, A3, B3 LD B1, 0 * SIZE(BO) MADD C21, C21, A4, B3 LD A1, 0 * SIZE(AO) bgtz L, .L1210 LD A2, 1 * SIZE(AO) .align 4 .L122: #ifndef TRMMKERNEL andi L, K, 1 #else andi L, TEMP, 1 #endif blez L, .L120 LD ALPHA, 152($sp) MADD C11, C11, A1, B1 MADD C21, C21, A2, B1 daddiu AO, AO, 2 * SIZE daddiu BO, BO, 1 * SIZE .align 4 .L120: # Write Back #ifndef TRMMKERNEL LD A1, 0 * SIZE(C) LD A2, 1 * SIZE(C) MADD A1, A1, C11, ALPHA MADD A2, A2, C21, ALPHA ST A1, 0 * SIZE(C) ST A2, 1 * SIZE(C) daddiu C, C, 2 * SIZE #else MUL A1, C11, ALPHA MUL A2, C21, ALPHA ST A1, 0 * SIZE(C) ST A2, 1 * SIZE(C) daddiu C, C, 2 * SIZE #if ( defined(LEFT) && defined(TRANSA))||\ (!defined(LEFT) && !defined(TRANSA)) dsubu TEMP, K, KK #ifdef LEFT daddiu TEMP, TEMP, -2 #else daddiu TEMP, TEMP, -1 #endif dsll L, TEMP, 1 + BASE_SHIFT dsll TEMP, TEMP, BASE_SHIFT daddu AO, AO, L daddu BO, BO, TEMP #endif #ifdef LEFT daddiu KK, KK, 2 #endif #endif .align 4 .L11: andi I, M, 1 blez I, .L10 NOP .align 4 .L111: #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA))||\ (!defined(LEFT) && !defined(TRANSA)) move BO, B #else dsll L, KK, BASE_SHIFT daddu AO, AO, L daddu BO, B, L #endif MTC $0, C11 # CLEAR REAULTS REGISTERS MOV C12, C11 LD A1, 0 * SIZE(AO) MOV C21, C11 MOV C22, C11 LD B1, 0 * SIZE(BO) MOV C31, C11 MOV C32, C11 #if (defined(LEFT) && !defined(TRANSA))||\ (!defined(LEFT) && defined(TRANSA)) dsubu TEMP, K, KK #elif defined(LEFT) daddiu TEMP, KK, 1 #else daddiu TEMP, KK, 1 #endif dsra L, TEMP, 1 blez L, .L112 NOP #else move BO, B # Reset B dsra L, K, 1 # UnRoll K=4 MTC $0, C11 # CLEAR REAULTS REGISTERS MOV C12, C11 LD A1, 0 * SIZE(AO) MOV C21, C11 MOV C22, C11 LD B1, 0 * SIZE(BO) MOV C31, C11 blez L, .L112 MOV C32, C11 #endif .align 4 .L1110: daddiu L, L, -1 MADD C11, C11, A1, B1 LD A2, 1 * SIZE(AO) LD B2, 1 * SIZE(BO) daddiu AO, AO, 2 * SIZE daddiu BO, BO, 2 * SIZE MADD C11, C11, A2, B2 LD A1, 0 * SIZE(AO) LD B1, 0 * SIZE(BO) bgtz L, .L1110 NOP .align 4 .L112: #ifndef TRMMKERNEL andi L, K, 1 #else andi L, TEMP, 1 #endif blez L, .L110 LD ALPHA, 152($sp) MADD C11, C11, A1, B1 daddiu AO, AO, 1 * SIZE daddiu BO, BO, 1 * SIZE .align 4 .L110: # Write Back #ifndef TRMMKERNEL LD A1, 0 * SIZE(C) MADD A1, A1, C11, ALPHA ST A1, 0 * SIZE(C) daddiu C, C, 1 * SIZE #else MUL A1, C11, ALPHA ST A1, 0 * SIZE(C) daddiu C, C, 1 * SIZE #endif .align 4 .L10: move B, BO NOP .L999: ld $16, 0($sp) ld $17, 8($sp) ld $18, 16($sp) ld $19, 24($sp) ld $20, 32($sp) ld $21, 40($sp) ld $22, 48($sp) LD $f24, 56($sp) LD $f25, 64($sp) LD $f26, 72($sp) LD $f27, 80($sp) LD $f28, 88($sp) #if defined(TRMMKERNEL) ld $23, 96($sp) ld $24, 104($sp) ld $25, 112($sp) #endif #ifndef __64BIT__ LD $f20,120($sp) LD $f21,128($sp) LD $f22,136($sp) LD $f23,144($sp) #endif daddiu $sp,$sp,STACKSIZE j $31 nop EPILOGUE # .set macro # .set reorder # .end gemm # .size gemm, .-gemm # .ident "GCC: (Debian 4.4.6-6) 4.4.6"