/*********************************************************************/ /* Copyright 2009, 2010 The University of Texas at Austin. */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ /* without modification, are permitted provided that the following */ /* conditions are met: */ /* */ /* 1. Redistributions of source code must retain the above */ /* copyright notice, this list of conditions and the following */ /* disclaimer. */ /* */ /* 2. Redistributions in binary form must reproduce the above */ /* copyright notice, this list of conditions and the following */ /* disclaimer in the documentation and/or other materials */ /* provided with the distribution. */ /* */ /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ /* POSSIBILITY OF SUCH DAMAGE. */ /* */ /* The views and conclusions contained in the software and */ /* documentation are those of the authors and should not be */ /* interpreted as representing official policies, either expressed */ /* or implied, of The University of Texas at Austin. */ /*********************************************************************/ #define ASSEMBLER #include "common.h" #define M $4 #define N $5 #define K $6 #define A $8 #define B $9 #define C $10 #define LDC $11 #define AO $12 #define BO $13 #define I $2 #define J $3 #define L $7 #define CO1 $14 #define CO2 $15 #define CO3 $16 #define CO4 $17 #define CO5 $18 #define CO6 $19 #define CO7 $20 #define CO8 $21 #define OFFSET $22 #define KK $23 #define TEMP $24 #define AORIG $25 #define a1 $f0 #define a2 $f1 #define a3 $f27 #define a4 $f28 #define b1 $f2 #define b2 $f3 #define b3 $f4 #define b4 $f5 #define b5 $f6 #define b6 $f7 #define b7 $f8 #define b8 $f9 #define a5 b8 #define c11 $f10 #define c12 $f11 #define c21 $f12 #define c22 $f13 #define c31 $f14 #define c32 $f16 #define c41 $f17 #define c42 $f18 #define c51 $f19 #define c52 $f20 #define c61 $f21 #define c62 $f22 #define c71 $f23 #define c72 $f24 #define c81 $f25 #define c82 $f26 #define ALPHA $f15 PROLOGUE daddiu $sp, $sp, -144 SDARG $16, 0($sp) SDARG $17, 8($sp) SDARG $18, 16($sp) SDARG $19, 24($sp) SDARG $20, 32($sp) SDARG $21, 40($sp) sdc1 $f24, 48($sp) sdc1 $f25, 56($sp) sdc1 $f26, 64($sp) sdc1 $f27, 72($sp) sdc1 $f28, 80($sp) SDARG $22, 88($sp) SDARG $23, 96($sp) SDARG $24, 104($sp) SDARG $25, 112($sp) #ifndef __64BIT__ sdc1 $f20,112($sp) sdc1 $f21,120($sp) sdc1 $f22,128($sp) sdc1 $f23,136($sp) #endif LDARG OFFSET, 144($sp) dsll LDC, LDC, BASE_SHIFT #ifdef LN mult M, K mflo TEMP dsll TEMP, TEMP, BASE_SHIFT daddu A, A, TEMP dsll TEMP, M, BASE_SHIFT daddu C, C, TEMP #endif #ifdef RN neg KK, OFFSET #endif #ifdef RT mult N, K mflo TEMP dsll TEMP, TEMP, BASE_SHIFT daddu B, B, TEMP mult N, LDC mflo TEMP daddu C, C, TEMP dsubu KK, N, OFFSET #endif dsra J, N, 3 blez J, .L30 nop .L10: #ifdef RT dsll TEMP, K, 3 + BASE_SHIFT dsubu B, B, TEMP dsll TEMP, LDC, 3 dsubu C, C, TEMP #endif move CO1, C MTC $0, c11 daddu CO2, C, LDC daddu CO3, CO2, LDC daddiu J, J, -1 daddu CO4, CO3, LDC MOV c21, c11 daddu CO5, CO4, LDC MOV c31, c11 daddu CO6, CO5, LDC MOV c41, c11 daddu CO7, CO6, LDC MOV c51, c11 daddu CO8, CO7, LDC #ifdef LN daddu KK, M, OFFSET #endif #ifdef LT move KK, OFFSET #endif #if defined(LN) || defined(RT) move AORIG, A #else move AO, A #endif #ifndef RT daddu C, CO8, LDC #endif andi I, M, 1 MOV c61, c11 blez I, .L20 MOV c71, c11 #if defined(LT) || defined(RN) LD a1, 0 * SIZE(AO) LD a2, 1 * SIZE(AO) LD a3, 2 * SIZE(AO) LD a4, 3 * SIZE(AO) LD b1, 0 * SIZE(B) LD b2, 1 * SIZE(B) LD b3, 2 * SIZE(B) LD b4, 3 * SIZE(B) LD b5, 4 * SIZE(B) LD b6, 8 * SIZE(B) LD b7, 12 * SIZE(B) dsra L, KK, 2 MOV c81, c11 blez L, .L25 move BO, B #else #ifdef LN dsll TEMP, K, 0 + BASE_SHIFT dsubu AORIG, AORIG, TEMP #endif dsll L, KK, 0 + BASE_SHIFT dsll TEMP, KK, 3 + BASE_SHIFT daddu AO, AORIG, L daddu BO, B, TEMP dsubu TEMP, K, KK LD a1, 0 * SIZE(AO) LD a2, 1 * SIZE(AO) LD a3, 2 * SIZE(AO) LD a4, 3 * SIZE(AO) LD b1, 0 * SIZE(BO) LD b2, 1 * SIZE(BO) LD b3, 2 * SIZE(BO) LD b4, 3 * SIZE(BO) LD b5, 4 * SIZE(BO) LD b6, 8 * SIZE(BO) LD b7, 12 * SIZE(BO) dsra L, TEMP, 2 MOV c81, c11 blez L, .L25 NOP #endif .align 3 .L22: MADD c11, c11, a1, b1 LD b1, 16 * SIZE(BO) MADD c21, c21, a1, b2 LD b2, 5 * SIZE(BO) MADD c31, c31, a1, b3 LD b3, 6 * SIZE(BO) MADD c41, c41, a1, b4 LD b4, 7 * SIZE(BO) MADD c51, c51, a1, b5 LD b5, 20 * SIZE(BO) MADD c61, c61, a1, b2 LD b2, 9 * SIZE(BO) MADD c71, c71, a1, b3 LD b3, 10 * SIZE(BO) MADD c81, c81, a1, b4 LD b4, 11 * SIZE(BO) LD a1, 4 * SIZE(AO) daddiu L, L, -1 MADD c11, c11, a2, b6 LD b6, 24 * SIZE(BO) MADD c21, c21, a2, b2 LD b2, 13 * SIZE(BO) MADD c31, c31, a2, b3 LD b3, 14 * SIZE(BO) MADD c41, c41, a2, b4 LD b4, 15 * SIZE(BO) MADD c51, c51, a2, b7 LD b7, 28 * SIZE(BO) MADD c61, c61, a2, b2 LD b2, 17 * SIZE(BO) MADD c71, c71, a2, b3 LD b3, 18 * SIZE(BO) MADD c81, c81, a2, b4 LD b4, 19 * SIZE(BO) LD a2, 5 * SIZE(AO) daddiu AO, AO, 4 * SIZE MADD c11, c11, a3, b1 LD b1, 32 * SIZE(BO) MADD c21, c21, a3, b2 LD b2, 21 * SIZE(BO) MADD c31, c31, a3, b3 LD b3, 22 * SIZE(BO) MADD c41, c41, a3, b4 LD b4, 23 * SIZE(BO) MADD c51, c51, a3, b5 LD b5, 36 * SIZE(BO) MADD c61, c61, a3, b2 LD b2, 25 * SIZE(BO) MADD c71, c71, a3, b3 LD b3, 26 * SIZE(BO) MADD c81, c81, a3, b4 LD b4, 27 * SIZE(BO) LD a3, 2 * SIZE(AO) daddiu BO, BO, 32 * SIZE MADD c11, c11, a4, b6 LD b6, 8 * SIZE(BO) MADD c21, c21, a4, b2 LD b2, -3 * SIZE(BO) MADD c31, c31, a4, b3 LD b3, -2 * SIZE(BO) MADD c41, c41, a4, b4 LD b4, -1 * SIZE(BO) MADD c51, c51, a4, b7 LD b7, 12 * SIZE(BO) MADD c61, c61, a4, b2 LD b2, 1 * SIZE(BO) MADD c71, c71, a4, b3 LD b3, 2 * SIZE(BO) MADD c81, c81, a4, b4 LD b4, 3 * SIZE(BO) bgtz L, .L22 LD a4, 3 * SIZE(AO) .align 3 .L25: #if defined(LT) || defined(RN) andi L, KK, 3 #else andi L, TEMP, 3 #endif NOP blez L, .L28 NOP .align 3 .L26: MADD c11, c11, a1, b1 LD b1, 8 * SIZE(BO) MADD c21, c21, a1, b2 LD b2, 5 * SIZE(BO) MADD c31, c31, a1, b3 LD b3, 6 * SIZE(BO) MADD c41, c41, a1, b4 LD b4, 7 * SIZE(BO) daddiu L, L, -1 MOV a2, a2 daddiu AO, AO, 1 * SIZE daddiu BO, BO, 8 * SIZE MADD c51, c51, a1, b5 LD b5, 4 * SIZE(BO) MADD c61, c61, a1, b2 LD b2, 1 * SIZE(BO) MADD c71, c71, a1, b3 LD b3, 2 * SIZE(BO) MADD c81, c81, a1, b4 LD a1, 0 * SIZE(AO) bgtz L, .L26 LD b4, 3 * SIZE(BO) .L28: #if defined(LN) || defined(RT) #ifdef LN daddiu TEMP, KK, -1 #else daddiu TEMP, KK, -8 #endif dsll L, TEMP, 0 + BASE_SHIFT dsll TEMP, TEMP, 3 + BASE_SHIFT daddu AO, AORIG, L daddu BO, B, TEMP #endif #if defined(LN) || defined(LT) LD b1, 0 * SIZE(BO) LD b2, 1 * SIZE(BO) LD b3, 2 * SIZE(BO) LD b4, 3 * SIZE(BO) LD b5, 4 * SIZE(BO) LD b6, 5 * SIZE(BO) LD b7, 6 * SIZE(BO) LD b8, 7 * SIZE(BO) SUB c11, b1, c11 SUB c21, b2, c21 SUB c31, b3, c31 SUB c41, b4, c41 SUB c51, b5, c51 SUB c61, b6, c61 SUB c71, b7, c71 SUB c81, b8, c81 #else LD b1, 0 * SIZE(AO) LD b2, 1 * SIZE(AO) LD b3, 2 * SIZE(AO) LD b4, 3 * SIZE(AO) LD b5, 4 * SIZE(AO) LD b6, 5 * SIZE(AO) LD b7, 6 * SIZE(AO) LD b8, 7 * SIZE(AO) SUB c11, b1, c11 SUB c21, b2, c21 SUB c31, b3, c31 SUB c41, b4, c41 SUB c51, b5, c51 SUB c61, b6, c61 SUB c71, b7, c71 SUB c81, b8, c81 #endif #if defined(LN) || defined(LT) LD b1, 0 * SIZE(AO) MUL c11, b1, c11 MUL c21, b1, c21 MUL c31, b1, c31 MUL c41, b1, c41 MUL c51, b1, c51 MUL c61, b1, c61 MUL c71, b1, c71 MUL c81, b1, c81 #endif #ifdef RN LD b1, 0 * SIZE(BO) LD b2, 1 * SIZE(BO) LD b3, 2 * SIZE(BO) LD b4, 3 * SIZE(BO) LD b5, 4 * SIZE(BO) LD b6, 5 * SIZE(BO) LD b7, 6 * SIZE(BO) LD b8, 7 * SIZE(BO) MUL c11, b1, c11 NMSUB c21, c21, b2, c11 NMSUB c31, c31, b3, c11 NMSUB c41, c41, b4, c11 NMSUB c51, c51, b5, c11 NMSUB c61, c61, b6, c11 NMSUB c71, c71, b7, c11 NMSUB c81, c81, b8, c11 LD b2, 9 * SIZE(BO) LD b3, 10 * SIZE(BO) LD b4, 11 * SIZE(BO) LD b5, 12 * SIZE(BO) LD b6, 13 * SIZE(BO) LD b7, 14 * SIZE(BO) LD b8, 15 * SIZE(BO) MUL c21, b2, c21 NMSUB c31, c31, b3, c21 NMSUB c41, c41, b4, c21 NMSUB c51, c51, b5, c21 NMSUB c61, c61, b6, c21 NMSUB c71, c71, b7, c21 NMSUB c81, c81, b8, c21 LD b3, 18 * SIZE(BO) LD b4, 19 * SIZE(BO) LD b5, 20 * SIZE(BO) LD b6, 21 * SIZE(BO) LD b7, 22 * SIZE(BO) LD b8, 23 * SIZE(BO) MUL c31, b3, c31 NMSUB c41, c41, b4, c31 NMSUB c51, c51, b5, c31 NMSUB c61, c61, b6, c31 NMSUB c71, c71, b7, c31 NMSUB c81, c81, b8, c31 LD b4, 27 * SIZE(BO) LD b5, 28 * SIZE(BO) LD b6, 29 * SIZE(BO) LD b7, 30 * SIZE(BO) LD b8, 31 * SIZE(BO) MUL c41, b4, c41 NMSUB c51, c51, b5, c41 NMSUB c61, c61, b6, c41 NMSUB c71, c71, b7, c41 NMSUB c81, c81, b8, c41 LD b5, 36 * SIZE(BO) LD b6, 37 * SIZE(BO) LD b7, 38 * SIZE(BO) LD b8, 39 * SIZE(BO) MUL c51, b5, c51 NMSUB c61, c61, b6, c51 NMSUB c71, c71, b7, c51 NMSUB c81, c81, b8, c51 LD b6, 45 * SIZE(BO) LD b7, 46 * SIZE(BO) LD b8, 47 * SIZE(BO) MUL c61, b6, c61 NMSUB c71, c71, b7, c61 NMSUB c81, c81, b8, c61 LD b7, 54 * SIZE(BO) LD b8, 55 * SIZE(BO) MUL c71, b7, c71 NMSUB c81, c81, b8, c71 LD b8, 63 * SIZE(BO) MUL c81, b8, c81 #endif #ifdef RT LD b1, 63 * SIZE(BO) LD b2, 62 * SIZE(BO) LD b3, 61 * SIZE(BO) LD b4, 60 * SIZE(BO) LD b5, 59 * SIZE(BO) LD b6, 58 * SIZE(BO) LD b7, 57 * SIZE(BO) LD b8, 56 * SIZE(BO) MUL c81, b1, c81 NMSUB c71, c71, b2, c81 NMSUB c61, c61, b3, c81 NMSUB c51, c51, b4, c81 NMSUB c41, c41, b5, c81 NMSUB c31, c31, b6, c81 NMSUB c21, c21, b7, c81 NMSUB c11, c11, b8, c81 LD b2, 54 * SIZE(BO) LD b3, 53 * SIZE(BO) LD b4, 52 * SIZE(BO) LD b5, 51 * SIZE(BO) LD b6, 50 * SIZE(BO) LD b7, 49 * SIZE(BO) LD b8, 48 * SIZE(BO) MUL c71, b2, c71 NMSUB c61, c61, b3, c71 NMSUB c51, c51, b4, c71 NMSUB c41, c41, b5, c71 NMSUB c31, c31, b6, c71 NMSUB c21, c21, b7, c71 NMSUB c11, c11, b8, c71 LD b3, 45 * SIZE(BO) LD b4, 44 * SIZE(BO) LD b5, 43 * SIZE(BO) LD b6, 42 * SIZE(BO) LD b7, 41 * SIZE(BO) LD b8, 40 * SIZE(BO) MUL c61, b3, c61 NMSUB c51, c51, b4, c61 NMSUB c41, c41, b5, c61 NMSUB c31, c31, b6, c61 NMSUB c21, c21, b7, c61 NMSUB c11, c11, b8, c61 LD b4, 36 * SIZE(BO) LD b5, 35 * SIZE(BO) LD b6, 34 * SIZE(BO) LD b7, 33 * SIZE(BO) LD b8, 32 * SIZE(BO) MUL c51, b4, c51 NMSUB c41, c41, b5, c51 NMSUB c31, c31, b6, c51 NMSUB c21, c21, b7, c51 NMSUB c11, c11, b8, c51 LD b5, 27 * SIZE(BO) LD b6, 26 * SIZE(BO) LD b7, 25 * SIZE(BO) LD b8, 24 * SIZE(BO) MUL c41, b5, c41 NMSUB c31, c31, b6, c41 NMSUB c21, c21, b7, c41 NMSUB c11, c11, b8, c41 LD b6, 18 * SIZE(BO) LD b7, 17 * SIZE(BO) LD b8, 16 * SIZE(BO) MUL c31, b6, c31 NMSUB c21, c21, b7, c31 NMSUB c11, c11, b8, c31 LD b7, 9 * SIZE(BO) LD b8, 8 * SIZE(BO) MUL c21, b7, c21 NMSUB c11, c11, b8, c21 LD b8, 0 * SIZE(BO) MUL c11, b8, c11 #endif #ifdef LN daddiu CO1, CO1, -1 * SIZE daddiu CO2, CO2, -1 * SIZE daddiu CO3, CO3, -1 * SIZE daddiu CO4, CO4, -1 * SIZE daddiu CO5, CO5, -1 * SIZE daddiu CO6, CO6, -1 * SIZE daddiu CO7, CO7, -1 * SIZE daddiu CO8, CO8, -1 * SIZE #endif #if defined(LN) || defined(LT) ST c11, 0 * SIZE(BO) ST c21, 1 * SIZE(BO) ST c31, 2 * SIZE(BO) ST c41, 3 * SIZE(BO) ST c51, 4 * SIZE(BO) ST c61, 5 * SIZE(BO) ST c71, 6 * SIZE(BO) ST c81, 7 * SIZE(BO) #else ST c11, 0 * SIZE(AO) ST c21, 1 * SIZE(AO) ST c31, 2 * SIZE(AO) ST c41, 3 * SIZE(AO) ST c51, 4 * SIZE(AO) ST c61, 5 * SIZE(AO) ST c71, 6 * SIZE(AO) ST c81, 7 * SIZE(AO) #endif ST c11, 0 * SIZE(CO1) ST c21, 0 * SIZE(CO2) ST c31, 0 * SIZE(CO3) ST c41, 0 * SIZE(CO4) ST c51, 0 * SIZE(CO5) ST c61, 0 * SIZE(CO6) ST c71, 0 * SIZE(CO7) ST c81, 0 * SIZE(CO8) MTC $0, c11 #ifndef LN daddiu CO1, CO1, 1 * SIZE daddiu CO2, CO2, 1 * SIZE daddiu CO3, CO3, 1 * SIZE daddiu CO4, CO4, 1 * SIZE daddiu CO5, CO5, 1 * SIZE daddiu CO6, CO6, 1 * SIZE daddiu CO7, CO7, 1 * SIZE daddiu CO8, CO8, 1 * SIZE #endif MOV c21, c11 #ifdef RT dsll TEMP, K, BASE_SHIFT daddu AORIG, AORIG, TEMP #endif MOV c31, c11 #if defined(LT) || defined(RN) dsubu TEMP, K, KK dsll L, TEMP, 0 + BASE_SHIFT dsll TEMP, TEMP, 3 + BASE_SHIFT daddu AO, AO, L daddu BO, BO, TEMP #endif MOV c41, c11 #ifdef LT daddiu KK, KK, 1 #endif #ifdef LN daddiu KK, KK, -1 #endif .align 3 .L20: dsra I, M, 1 MOV c51, c11 blez I, .L29 MOV c61, c11 .L11: #if defined(LT) || defined(RN) LD a1, 0 * SIZE(AO) MOV c71, c11 LD b1, 0 * SIZE(B) MOV c81, c11 LD a3, 4 * SIZE(AO) MOV c12, c11 LD b2, 1 * SIZE(B) MOV c22, c11 dsra L, KK, 2 MOV c32, c11 LD b3, 2 * SIZE(B) MOV c42, c11 LD b4, 3 * SIZE(B) MOV c52, c11 LD b5, 4 * SIZE(B) MOV c62, c11 LD b6, 8 * SIZE(B) MOV c72, c11 LD b7, 12 * SIZE(B) MOV c82, c11 blez L, .L15 move BO, B #else #ifdef LN dsll TEMP, K, 1 + BASE_SHIFT dsubu AORIG, AORIG, TEMP #endif dsll L, KK, 1 + BASE_SHIFT dsll TEMP, KK, 3 + BASE_SHIFT daddu AO, AORIG, L daddu BO, B, TEMP dsubu TEMP, K, KK LD a1, 0 * SIZE(AO) MOV c71, c11 LD b1, 0 * SIZE(BO) MOV c81, c11 LD a3, 4 * SIZE(AO) MOV c12, c11 LD b2, 1 * SIZE(BO) MOV c22, c11 MOV c32, c11 LD b3, 2 * SIZE(BO) MOV c42, c11 LD b4, 3 * SIZE(BO) MOV c52, c11 LD b5, 4 * SIZE(BO) MOV c62, c11 LD b6, 8 * SIZE(BO) MOV c72, c11 LD b7, 12 * SIZE(BO) MOV c82, c11 dsra L, TEMP, 2 blez L, .L15 NOP #endif MADD c11, c11, a1, b1 LD a2, 1 * SIZE(AO) MADD c21, c21, a1, b2 daddiu L, L, -1 MADD c31, c31, a1, b3 blez L, .L13 MADD c41, c41, a1, b4 NOP .align 3 .L12: MADD c12, c12, a2, b1 LD b1, 16 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 5 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 6 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 7 * SIZE(BO) MADD c51, c51, a1, b5 NOP MADD c61, c61, a1, b2 LD a4, 2 * SIZE(AO) MADD c71, c71, a1, b3 NOP MADD c81, c81, a1, b4 LD a1, 8 * SIZE(AO) MADD c52, c52, a2, b5 LD b5, 20 * SIZE(BO) MADD c62, c62, a2, b2 LD b2, 9 * SIZE(BO) MADD c72, c72, a2, b3 LD b3, 10 * SIZE(BO) MADD c82, c82, a2, b4 LD b4, 11 * SIZE(BO) MADD c11, c11, a4, b6 LD a2, 3 * SIZE(AO) MADD c21, c21, a4, b2 NOP MADD c31, c31, a4, b3 NOP MADD c41, c41, a4, b4 NOP MADD c12, c12, a2, b6 LD b6, 24 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 13 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 14 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 15 * SIZE(BO) MADD c51, c51, a4, b7 NOP MADD c61, c61, a4, b2 NOP MADD c71, c71, a4, b3 NOP MADD c81, c81, a4, b4 NOP MADD c52, c52, a2, b7 LD b7, 28 * SIZE(BO) MADD c62, c62, a2, b2 LD b2, 17 * SIZE(BO) MADD c72, c72, a2, b3 LD b3, 18 * SIZE(BO) MADD c82, c82, a2, b4 LD b4, 19 * SIZE(BO) MADD c11, c11, a3, b1 LD a2, 5 * SIZE(AO) MADD c21, c21, a3, b2 NOP MADD c31, c31, a3, b3 NOP MADD c41, c41, a3, b4 NOP MADD c12, c12, a2, b1 LD b1, 32 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 21 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 22 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 23 * SIZE(BO) MADD c51, c51, a3, b5 NOP MADD c61, c61, a3, b2 LD a4, 6 * SIZE(AO) MADD c71, c71, a3, b3 NOP MADD c81, c81, a3, b4 LD a3, 12 * SIZE(AO) MADD c52, c52, a2, b5 LD b5, 36 * SIZE(BO) MADD c62, c62, a2, b2 LD b2, 25 * SIZE(BO) MADD c72, c72, a2, b3 LD b3, 26 * SIZE(BO) MADD c82, c82, a2, b4 LD b4, 27 * SIZE(BO) MADD c11, c11, a4, b6 LD a2, 7 * SIZE(AO) MADD c21, c21, a4, b2 NOP MADD c31, c31, a4, b3 NOP MADD c41, c41, a4, b4 daddiu L, L, -1 MADD c12, c12, a2, b6 LD b6, 40 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 29 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 30 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 31 * SIZE(BO) MADD c51, c51, a4, b7 daddiu BO, BO, 32 * SIZE MADD c61, c61, a4, b2 daddiu AO, AO, 8 * SIZE MADD c71, c71, a4, b3 NOP MADD c81, c81, a4, b4 NOP MADD c52, c52, a2, b7 LD b7, 12 * SIZE(BO) MADD c62, c62, a2, b2 LD b2, 1 * SIZE(BO) MADD c72, c72, a2, b3 LD b3, 2 * SIZE(BO) MADD c82, c82, a2, b4 LD b4, 3 * SIZE(BO) MADD c11, c11, a1, b1 LD a2, 1 * SIZE(AO) MADD c21, c21, a1, b2 NOP MADD c31, c31, a1, b3 bgtz L, .L12 MADD c41, c41, a1, b4 NOP .align 3 .L13: MADD c12, c12, a2, b1 LD b1, 16 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 5 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 6 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 7 * SIZE(BO) MADD c51, c51, a1, b5 NOP MADD c61, c61, a1, b2 LD a4, 2 * SIZE(AO) MADD c71, c71, a1, b3 NOP MADD c81, c81, a1, b4 LD a1, 8 * SIZE(AO) MADD c52, c52, a2, b5 LD b5, 20 * SIZE(BO) MADD c62, c62, a2, b2 LD b2, 9 * SIZE(BO) MADD c72, c72, a2, b3 LD b3, 10 * SIZE(BO) MADD c82, c82, a2, b4 LD b4, 11 * SIZE(BO) MADD c11, c11, a4, b6 LD a2, 3 * SIZE(AO) MADD c21, c21, a4, b2 NOP MADD c31, c31, a4, b3 NOP MADD c41, c41, a4, b4 NOP MADD c12, c12, a2, b6 LD b6, 24 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 13 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 14 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 15 * SIZE(BO) MADD c51, c51, a4, b7 NOP MADD c61, c61, a4, b2 NOP MADD c71, c71, a4, b3 NOP MADD c81, c81, a4, b4 NOP MADD c52, c52, a2, b7 LD b7, 28 * SIZE(BO) MADD c62, c62, a2, b2 LD b2, 17 * SIZE(BO) MADD c72, c72, a2, b3 LD b3, 18 * SIZE(BO) MADD c82, c82, a2, b4 LD b4, 19 * SIZE(BO) MADD c11, c11, a3, b1 LD a2, 5 * SIZE(AO) MADD c21, c21, a3, b2 NOP MADD c31, c31, a3, b3 NOP MADD c41, c41, a3, b4 NOP MADD c12, c12, a2, b1 LD b1, 32 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 21 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 22 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 23 * SIZE(BO) MADD c51, c51, a3, b5 NOP MADD c61, c61, a3, b2 LD a4, 6 * SIZE(AO) MADD c71, c71, a3, b3 NOP MADD c81, c81, a3, b4 LD a3, 12 * SIZE(AO) MADD c52, c52, a2, b5 LD b5, 36 * SIZE(BO) MADD c62, c62, a2, b2 LD b2, 25 * SIZE(BO) MADD c72, c72, a2, b3 LD b3, 26 * SIZE(BO) MADD c82, c82, a2, b4 LD b4, 27 * SIZE(BO) MADD c11, c11, a4, b6 LD a2, 7 * SIZE(AO) MADD c21, c21, a4, b2 NOP MADD c31, c31, a4, b3 NOP MADD c41, c41, a4, b4 NOP MADD c12, c12, a2, b6 LD b6, 40 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 29 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 30 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 31 * SIZE(BO) MADD c51, c51, a4, b7 daddiu BO, BO, 32 * SIZE MADD c61, c61, a4, b2 daddiu AO, AO, 8 * SIZE MADD c71, c71, a4, b3 NOP MADD c81, c81, a4, b4 NOP MADD c52, c52, a2, b7 LD b7, 12 * SIZE(BO) MADD c62, c62, a2, b2 LD b2, 1 * SIZE(BO) MADD c72, c72, a2, b3 LD b3, 2 * SIZE(BO) MADD c82, c82, a2, b4 LD b4, 3 * SIZE(BO) .align 3 .L15: #if defined(LT) || defined(RN) andi L, KK, 3 #else andi L, TEMP, 3 #endif blez L, .L18 NOP .align 3 .L16: MADD c11, c11, a1, b1 LD a2, 1 * SIZE(AO) MADD c21, c21, a1, b2 NOP MADD c31, c31, a1, b3 NOP MADD c41, c41, a1, b4 NOP MADD c12, c12, a2, b1 LD b1, 8 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 5 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 6 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 7 * SIZE(BO) MADD c51, c51, a1, b5 daddiu L, L, -1 MADD c61, c61, a1, b2 daddiu AO, AO, 2 * SIZE MADD c71, c71, a1, b3 daddiu BO, BO, 8 * SIZE MADD c81, c81, a1, b4 LD a1, 0 * SIZE(AO) MADD c52, c52, a2, b5 LD b5, 4 * SIZE(BO) MADD c62, c62, a2, b2 LD b2, 1 * SIZE(BO) MADD c72, c72, a2, b3 LD b3, 2 * SIZE(BO) MADD c82, c82, a2, b4 bgtz L, .L16 LD b4, 3 * SIZE(BO) .L18: #if defined(LN) || defined(RT) #ifdef LN daddiu TEMP, KK, -2 #else daddiu TEMP, KK, -8 #endif dsll L, TEMP, 1 + BASE_SHIFT dsll TEMP, TEMP, 3 + BASE_SHIFT daddu AO, AORIG, L daddu BO, B, TEMP #endif #if defined(LN) || defined(LT) LD b1, 0 * SIZE(BO) LD b2, 1 * SIZE(BO) LD b3, 2 * SIZE(BO) LD b4, 3 * SIZE(BO) SUB c11, b1, c11 LD b5, 4 * SIZE(BO) SUB c21, b2, c21 LD b6, 5 * SIZE(BO) SUB c31, b3, c31 LD b7, 6 * SIZE(BO) SUB c41, b4, c41 LD b8, 7 * SIZE(BO) SUB c51, b5, c51 LD b1, 8 * SIZE(BO) SUB c61, b6, c61 LD b2, 9 * SIZE(BO) SUB c71, b7, c71 LD b3, 10 * SIZE(BO) SUB c81, b8, c81 LD b4, 11 * SIZE(BO) SUB c12, b1, c12 LD b5, 12 * SIZE(BO) SUB c22, b2, c22 LD b6, 13 * SIZE(BO) SUB c32, b3, c32 LD b7, 14 * SIZE(BO) SUB c42, b4, c42 LD b8, 15 * SIZE(BO) SUB c52, b5, c52 #ifdef LN LD b1, 3 * SIZE(AO) #else LD b1, 0 * SIZE(AO) #endif SUB c62, b6, c62 SUB c72, b7, c72 SUB c82, b8, c82 #else LD b1, 0 * SIZE(AO) LD b2, 1 * SIZE(AO) LD b3, 2 * SIZE(AO) LD b4, 3 * SIZE(AO) SUB c11, b1, c11 LD b5, 4 * SIZE(AO) SUB c12, b2, c12 LD b6, 5 * SIZE(AO) SUB c21, b3, c21 LD b7, 6 * SIZE(AO) SUB c22, b4, c22 LD b8, 7 * SIZE(AO) SUB c31, b5, c31 LD b1, 8 * SIZE(AO) SUB c32, b6, c32 LD b2, 9 * SIZE(AO) SUB c41, b7, c41 LD b3, 10 * SIZE(AO) SUB c42, b8, c42 LD b4, 11 * SIZE(AO) LD b5, 12 * SIZE(AO) SUB c51, b1, c51 LD b6, 13 * SIZE(AO) SUB c52, b2, c52 LD b7, 14 * SIZE(AO) SUB c61, b3, c61 LD b8, 15 * SIZE(AO) SUB c62, b4, c62 SUB c71, b5, c71 SUB c72, b6, c72 SUB c81, b7, c81 SUB c82, b8, c82 #endif #ifdef LN MUL c12, b1, c12 LD b2, 2 * SIZE(AO) MUL c22, b1, c22 MUL c32, b1, c32 MUL c42, b1, c42 MUL c52, b1, c52 MUL c62, b1, c62 MUL c72, b1, c72 MUL c82, b1, c82 NMSUB c11, c11, b2, c12 LD b3, 0 * SIZE(AO) NMSUB c21, c21, b2, c22 NMSUB c31, c31, b2, c32 NMSUB c41, c41, b2, c42 NMSUB c51, c51, b2, c52 NMSUB c61, c61, b2, c62 NMSUB c71, c71, b2, c72 NMSUB c81, c81, b2, c82 MUL c11, b3, c11 daddiu CO1, CO1, -2 * SIZE MUL c21, b3, c21 daddiu CO2, CO2, -2 * SIZE MUL c31, b3, c31 daddiu CO3, CO3, -2 * SIZE MUL c41, b3, c41 daddiu CO4, CO4, -2 * SIZE MUL c51, b3, c51 daddiu CO5, CO5, -2 * SIZE MUL c61, b3, c61 daddiu CO6, CO6, -2 * SIZE MUL c71, b3, c71 daddiu CO7, CO7, -2 * SIZE MUL c81, b3, c81 daddiu CO8, CO8, -2 * SIZE #endif #ifdef LT MUL c11, b1, c11 LD b2, 1 * SIZE(AO) MUL c21, b1, c21 MUL c31, b1, c31 MUL c41, b1, c41 MUL c51, b1, c51 MUL c61, b1, c61 MUL c71, b1, c71 MUL c81, b1, c81 NMSUB c12, c12, b2, c11 LD b3, 3 * SIZE(AO) NMSUB c22, c22, b2, c21 NMSUB c32, c32, b2, c31 NMSUB c42, c42, b2, c41 NMSUB c52, c52, b2, c51 NMSUB c62, c62, b2, c61 NMSUB c72, c72, b2, c71 NMSUB c82, c82, b2, c81 MUL c12, b3, c12 MUL c22, b3, c22 MUL c32, b3, c32 MUL c42, b3, c42 MUL c52, b3, c52 MUL c62, b3, c62 MUL c72, b3, c72 MUL c82, b3, c82 #endif #ifdef RN LD b1, 0 * SIZE(BO) LD b2, 1 * SIZE(BO) LD b3, 2 * SIZE(BO) LD b4, 3 * SIZE(BO) MUL c11, b1, c11 MUL c12, b1, c12 LD b5, 4 * SIZE(BO) NMSUB c21, c21, b2, c11 NMSUB c22, c22, b2, c12 LD b6, 5 * SIZE(BO) NMSUB c31, c31, b3, c11 NMSUB c32, c32, b3, c12 LD b7, 6 * SIZE(BO) NMSUB c41, c41, b4, c11 NMSUB c42, c42, b4, c12 LD b8, 7 * SIZE(BO) NMSUB c51, c51, b5, c11 NMSUB c52, c52, b5, c12 LD b2, 9 * SIZE(BO) NMSUB c61, c61, b6, c11 NMSUB c62, c62, b6, c12 LD b3, 10 * SIZE(BO) NMSUB c71, c71, b7, c11 NMSUB c72, c72, b7, c12 LD b4, 11 * SIZE(BO) NMSUB c81, c81, b8, c11 NMSUB c82, c82, b8, c12 LD b5, 12 * SIZE(BO) MUL c21, b2, c21 MUL c22, b2, c22 LD b6, 13 * SIZE(BO) NMSUB c31, c31, b3, c21 NMSUB c32, c32, b3, c22 LD b7, 14 * SIZE(BO) NMSUB c41, c41, b4, c21 NMSUB c42, c42, b4, c22 LD b8, 15 * SIZE(BO) NMSUB c51, c51, b5, c21 NMSUB c52, c52, b5, c22 LD b3, 18 * SIZE(BO) NMSUB c61, c61, b6, c21 NMSUB c62, c62, b6, c22 LD b4, 19 * SIZE(BO) NMSUB c71, c71, b7, c21 NMSUB c72, c72, b7, c22 LD b5, 20 * SIZE(BO) NMSUB c81, c81, b8, c21 NMSUB c82, c82, b8, c22 LD b6, 21 * SIZE(BO) MUL c31, b3, c31 MUL c32, b3, c32 LD b7, 22 * SIZE(BO) NMSUB c41, c41, b4, c31 NMSUB c42, c42, b4, c32 LD b8, 23 * SIZE(BO) NMSUB c51, c51, b5, c31 NMSUB c52, c52, b5, c32 LD b4, 27 * SIZE(BO) NMSUB c61, c61, b6, c31 NMSUB c62, c62, b6, c32 LD b5, 28 * SIZE(BO) NMSUB c71, c71, b7, c31 NMSUB c72, c72, b7, c32 LD b6, 29 * SIZE(BO) NMSUB c81, c81, b8, c31 NMSUB c82, c82, b8, c32 LD b7, 30 * SIZE(BO) MUL c41, b4, c41 MUL c42, b4, c42 LD b8, 31 * SIZE(BO) NMSUB c51, c51, b5, c41 NMSUB c52, c52, b5, c42 LD b5, 36 * SIZE(BO) NMSUB c61, c61, b6, c41 NMSUB c62, c62, b6, c42 LD b6, 37 * SIZE(BO) NMSUB c71, c71, b7, c41 NMSUB c72, c72, b7, c42 LD b7, 38 * SIZE(BO) NMSUB c81, c81, b8, c41 NMSUB c82, c82, b8, c42 LD b8, 39 * SIZE(BO) MUL c51, b5, c51 MUL c52, b5, c52 NMSUB c61, c61, b6, c51 NMSUB c62, c62, b6, c52 LD b6, 45 * SIZE(BO) NMSUB c71, c71, b7, c51 NMSUB c72, c72, b7, c52 LD b7, 46 * SIZE(BO) NMSUB c81, c81, b8, c51 NMSUB c82, c82, b8, c52 LD b8, 47 * SIZE(BO) MUL c61, b6, c61 MUL c62, b6, c62 NMSUB c71, c71, b7, c61 NMSUB c72, c72, b7, c62 LD b7, 54 * SIZE(BO) NMSUB c81, c81, b8, c61 NMSUB c82, c82, b8, c62 LD b8, 55 * SIZE(BO) MUL c71, b7, c71 MUL c72, b7, c72 NMSUB c81, c81, b8, c71 NMSUB c82, c82, b8, c72 LD b8, 63 * SIZE(BO) MUL c81, b8, c81 MUL c82, b8, c82 #endif #ifdef RT LD b1, 63 * SIZE(BO) LD b2, 62 * SIZE(BO) LD b3, 61 * SIZE(BO) LD b4, 60 * SIZE(BO) MUL c81, b1, c81 MUL c82, b1, c82 LD b5, 59 * SIZE(BO) NMSUB c71, c71, b2, c81 NMSUB c72, c72, b2, c82 LD b6, 58 * SIZE(BO) NMSUB c61, c61, b3, c81 NMSUB c62, c62, b3, c82 LD b7, 57 * SIZE(BO) NMSUB c51, c51, b4, c81 NMSUB c52, c52, b4, c82 LD b8, 56 * SIZE(BO) NMSUB c41, c41, b5, c81 NMSUB c42, c42, b5, c82 LD b2, 54 * SIZE(BO) NMSUB c31, c31, b6, c81 NMSUB c32, c32, b6, c82 LD b3, 53 * SIZE(BO) NMSUB c21, c21, b7, c81 NMSUB c22, c22, b7, c82 LD b4, 52 * SIZE(BO) NMSUB c11, c11, b8, c81 NMSUB c12, c12, b8, c82 LD b5, 51 * SIZE(BO) MUL c71, b2, c71 MUL c72, b2, c72 LD b6, 50 * SIZE(BO) NMSUB c61, c61, b3, c71 NMSUB c62, c62, b3, c72 LD b7, 49 * SIZE(BO) NMSUB c51, c51, b4, c71 NMSUB c52, c52, b4, c72 LD b8, 48 * SIZE(BO) NMSUB c41, c41, b5, c71 NMSUB c42, c42, b5, c72 LD b3, 45 * SIZE(BO) NMSUB c31, c31, b6, c71 NMSUB c32, c32, b6, c72 LD b4, 44 * SIZE(BO) NMSUB c21, c21, b7, c71 NMSUB c22, c22, b7, c72 LD b5, 43 * SIZE(BO) NMSUB c11, c11, b8, c71 NMSUB c12, c12, b8, c72 LD b6, 42 * SIZE(BO) MUL c61, b3, c61 MUL c62, b3, c62 LD b7, 41 * SIZE(BO) NMSUB c51, c51, b4, c61 NMSUB c52, c52, b4, c62 LD b8, 40 * SIZE(BO) NMSUB c41, c41, b5, c61 NMSUB c42, c42, b5, c62 LD b4, 36 * SIZE(BO) NMSUB c31, c31, b6, c61 NMSUB c32, c32, b6, c62 LD b5, 35 * SIZE(BO) NMSUB c21, c21, b7, c61 NMSUB c22, c22, b7, c62 LD b6, 34 * SIZE(BO) NMSUB c11, c11, b8, c61 NMSUB c12, c12, b8, c62 LD b7, 33 * SIZE(BO) MUL c51, b4, c51 MUL c52, b4, c52 LD b8, 32 * SIZE(BO) NMSUB c41, c41, b5, c51 NMSUB c42, c42, b5, c52 LD b5, 27 * SIZE(BO) NMSUB c31, c31, b6, c51 NMSUB c32, c32, b6, c52 LD b6, 26 * SIZE(BO) NMSUB c21, c21, b7, c51 NMSUB c22, c22, b7, c52 LD b7, 25 * SIZE(BO) NMSUB c11, c11, b8, c51 NMSUB c12, c12, b8, c52 LD b8, 24 * SIZE(BO) MUL c41, b5, c41 MUL c42, b5, c42 NMSUB c31, c31, b6, c41 NMSUB c32, c32, b6, c42 LD b6, 18 * SIZE(BO) NMSUB c21, c21, b7, c41 NMSUB c22, c22, b7, c42 LD b7, 17 * SIZE(BO) NMSUB c11, c11, b8, c41 NMSUB c12, c12, b8, c42 LD b8, 16 * SIZE(BO) MUL c31, b6, c31 MUL c32, b6, c32 NMSUB c21, c21, b7, c31 NMSUB c22, c22, b7, c32 LD b7, 9 * SIZE(BO) NMSUB c11, c11, b8, c31 NMSUB c12, c12, b8, c32 LD b8, 8 * SIZE(BO) MUL c21, b7, c21 MUL c22, b7, c22 NMSUB c11, c11, b8, c21 NMSUB c12, c12, b8, c22 LD b8, 0 * SIZE(BO) MUL c11, b8, c11 MUL c12, b8, c12 #endif #if defined(LN) || defined(LT) ST c11, 0 * SIZE(BO) ST c21, 1 * SIZE(BO) ST c31, 2 * SIZE(BO) ST c41, 3 * SIZE(BO) ST c51, 4 * SIZE(BO) ST c61, 5 * SIZE(BO) ST c71, 6 * SIZE(BO) ST c81, 7 * SIZE(BO) ST c12, 8 * SIZE(BO) ST c22, 9 * SIZE(BO) ST c32, 10 * SIZE(BO) ST c42, 11 * SIZE(BO) ST c52, 12 * SIZE(BO) ST c62, 13 * SIZE(BO) ST c72, 14 * SIZE(BO) ST c82, 15 * SIZE(BO) #else ST c11, 0 * SIZE(AO) ST c12, 1 * SIZE(AO) ST c21, 2 * SIZE(AO) ST c22, 3 * SIZE(AO) ST c31, 4 * SIZE(AO) ST c32, 5 * SIZE(AO) ST c41, 6 * SIZE(AO) ST c42, 7 * SIZE(AO) ST c51, 8 * SIZE(AO) ST c52, 9 * SIZE(AO) ST c61, 10 * SIZE(AO) ST c62, 11 * SIZE(AO) ST c71, 12 * SIZE(AO) ST c72, 13 * SIZE(AO) ST c81, 14 * SIZE(AO) ST c82, 15 * SIZE(AO) #endif ST c11, 0 * SIZE(CO1) ST c12, 1 * SIZE(CO1) ST c21, 0 * SIZE(CO2) ST c22, 1 * SIZE(CO2) ST c31, 0 * SIZE(CO3) ST c32, 1 * SIZE(CO3) ST c41, 0 * SIZE(CO4) ST c42, 1 * SIZE(CO4) ST c51, 0 * SIZE(CO5) ST c52, 1 * SIZE(CO5) ST c61, 0 * SIZE(CO6) ST c62, 1 * SIZE(CO6) ST c71, 0 * SIZE(CO7) ST c72, 1 * SIZE(CO7) ST c81, 0 * SIZE(CO8) ST c82, 1 * SIZE(CO8) MTC $0, a1 #ifndef LN daddiu CO1, CO1, 2 * SIZE daddiu CO2, CO2, 2 * SIZE daddiu CO3, CO3, 2 * SIZE daddiu CO4, CO4, 2 * SIZE daddiu CO5, CO5, 2 * SIZE daddiu CO6, CO6, 2 * SIZE daddiu CO7, CO7, 2 * SIZE daddiu CO8, CO8, 2 * SIZE #endif MOV c11, a1 MOV c21, a1 #ifdef RT dsll TEMP, K, 1 + BASE_SHIFT daddu AORIG, AORIG, TEMP #endif MOV c31, a1 MOV c41, a1 #if defined(LT) || defined(RN) dsubu TEMP, K, KK dsll L, TEMP, 1 + BASE_SHIFT dsll TEMP, TEMP, 3 + BASE_SHIFT daddu AO, AO, L daddu BO, BO, TEMP #endif #ifdef LT daddiu KK, KK, 2 #endif #ifdef LN daddiu KK, KK, -2 #endif daddiu I, I, -1 MOV c51, a1 bgtz I, .L11 MOV c61, a1 .align 3 .L29: #ifdef LN dsll TEMP, K, 3 + BASE_SHIFT daddu B, B, TEMP #endif #if defined(LT) || defined(RN) move B, BO #endif #ifdef RN daddiu KK, KK, 8 #endif #ifdef RT daddiu KK, KK, -8 #endif bgtz J, .L10 NOP .align 3 .L30: andi J, N, 4 blez J, .L50 move AO, A #ifdef RT dsll TEMP, K, 2 + BASE_SHIFT dsubu B, B, TEMP dsll TEMP, LDC, 2 dsubu C, C, TEMP #endif move CO1, C MTC $0, c11 daddu CO2, C, LDC daddu CO3, CO2, LDC MOV c21, c11 daddu CO4, CO3, LDC MOV c31, c11 #ifdef LN daddu KK, M, OFFSET #endif #ifdef LT move KK, OFFSET #endif #if defined(LN) || defined(RT) move AORIG, A #else move AO, A #endif #ifndef RT daddu C, CO4, LDC #endif andi I, M, 1 blez I, .L40 MOV c41, c11 #if defined(LT) || defined(RN) LD a1, 0 * SIZE(AO) MOV c71, c11 LD a2, 1 * SIZE(AO) MOV c81, c11 LD b1, 0 * SIZE(B) LD b2, 1 * SIZE(B) LD b3, 2 * SIZE(B) LD b4, 3 * SIZE(B) LD b5, 4 * SIZE(B) LD b6, 8 * SIZE(B) LD b7, 12 * SIZE(B) dsra L, KK, 2 blez L, .L45 move BO, B #else #ifdef LN dsll TEMP, K, BASE_SHIFT dsubu AORIG, AORIG, TEMP #endif dsll L, KK, 0 + BASE_SHIFT dsll TEMP, KK, 2 + BASE_SHIFT daddu AO, AORIG, L daddu BO, B, TEMP dsubu TEMP, K, KK LD a1, 0 * SIZE(AO) MOV c71, c11 LD a2, 1 * SIZE(AO) MOV c81, c11 LD b1, 0 * SIZE(BO) LD b2, 1 * SIZE(BO) LD b3, 2 * SIZE(BO) LD b4, 3 * SIZE(BO) LD b5, 4 * SIZE(BO) LD b6, 8 * SIZE(BO) LD b7, 12 * SIZE(BO) dsra L, TEMP, 2 blez L, .L45 NOP #endif .align 3 .L42: MADD c11, c11, a1, b1 LD b1, 16 * SIZE(BO) MADD c21, c21, a1, b2 LD b2, 5 * SIZE(BO) MADD c31, c31, a1, b3 LD b3, 6 * SIZE(BO) MADD c41, c41, a1, b4 LD b4, 7 * SIZE(BO) LD a1, 4 * SIZE(AO) daddiu L, L, -1 MADD c11, c11, a2, b5 LD b5, 20 * SIZE(BO) MADD c21, c21, a2, b2 LD b2, 9 * SIZE(BO) MADD c31, c31, a2, b3 LD b3, 10 * SIZE(BO) MADD c41, c41, a2, b4 LD b4, 11 * SIZE(BO) LD a2, 2 * SIZE(AO) daddiu AO, AO, 4 * SIZE MADD c11, c11, a2, b6 LD b6, 24 * SIZE(BO) MADD c21, c21, a2, b2 LD b2, 13 * SIZE(BO) MADD c31, c31, a2, b3 LD b3, 14 * SIZE(BO) MADD c41, c41, a2, b4 LD b4, 15 * SIZE(BO) LD a2, -1 * SIZE(AO) daddiu BO, BO, 16 * SIZE MADD c11, c11, a2, b7 LD b7, 12 * SIZE(BO) MADD c21, c21, a2, b2 LD b2, 1 * SIZE(BO) MADD c31, c31, a2, b3 LD b3, 2 * SIZE(BO) MADD c41, c41, a2, b4 LD b4, 3 * SIZE(BO) bgtz L, .L42 LD a2, 1 * SIZE(AO) .align 3 .L45: #if defined(LT) || defined(RN) andi L, KK, 3 #else andi L, TEMP, 3 #endif NOP blez L, .L48 NOP .align 3 .L46: MADD c11, c11, a1, b1 LD b1, 4 * SIZE(BO) MADD c21, c21, a1, b2 LD b2, 5 * SIZE(BO) MADD c31, c31, a1, b3 LD b3, 6 * SIZE(BO) MADD c41, c41, a1, b4 LD a1, 1 * SIZE(AO) LD b4, 7 * SIZE(BO) daddiu L, L, -1 daddiu AO, AO, 1 * SIZE MOV a2, a2 bgtz L, .L46 daddiu BO, BO, 4 * SIZE .L48: #if defined(LN) || defined(RT) #ifdef LN daddiu TEMP, KK, -1 #else daddiu TEMP, KK, -4 #endif dsll L, TEMP, 0 + BASE_SHIFT dsll TEMP, TEMP, 2 + BASE_SHIFT daddu AO, AORIG, L daddu BO, B, TEMP #endif #if defined(LN) || defined(LT) LD b1, 0 * SIZE(BO) LD b2, 1 * SIZE(BO) LD b3, 2 * SIZE(BO) LD b4, 3 * SIZE(BO) SUB c11, b1, c11 SUB c21, b2, c21 SUB c31, b3, c31 SUB c41, b4, c41 #else LD b1, 0 * SIZE(AO) LD b2, 1 * SIZE(AO) LD b3, 2 * SIZE(AO) LD b4, 3 * SIZE(AO) SUB c11, b1, c11 SUB c21, b2, c21 SUB c31, b3, c31 SUB c41, b4, c41 #endif #if defined(LN) || defined(LT) LD b1, 0 * SIZE(AO) MUL c11, b1, c11 MUL c21, b1, c21 MUL c31, b1, c31 MUL c41, b1, c41 #endif #ifdef RN LD b1, 0 * SIZE(BO) LD b2, 1 * SIZE(BO) LD b3, 2 * SIZE(BO) LD b4, 3 * SIZE(BO) MUL c11, b1, c11 NMSUB c21, c21, b2, c11 NMSUB c31, c31, b3, c11 NMSUB c41, c41, b4, c11 LD b2, 5 * SIZE(BO) LD b3, 6 * SIZE(BO) LD b4, 7 * SIZE(BO) MUL c21, b2, c21 NMSUB c31, c31, b3, c21 NMSUB c41, c41, b4, c21 LD b3, 10 * SIZE(BO) LD b4, 11 * SIZE(BO) MUL c31, b3, c31 NMSUB c41, c41, b4, c31 LD b4, 15 * SIZE(BO) MUL c41, b4, c41 #endif #ifdef RT LD b5, 15 * SIZE(BO) LD b6, 14 * SIZE(BO) LD b7, 13 * SIZE(BO) LD b8, 12 * SIZE(BO) MUL c41, b5, c41 NMSUB c31, c31, b6, c41 NMSUB c21, c21, b7, c41 NMSUB c11, c11, b8, c41 LD b6, 10 * SIZE(BO) LD b7, 9 * SIZE(BO) LD b8, 8 * SIZE(BO) MUL c31, b6, c31 NMSUB c21, c21, b7, c31 NMSUB c11, c11, b8, c31 LD b7, 5 * SIZE(BO) LD b8, 4 * SIZE(BO) MUL c21, b7, c21 NMSUB c11, c11, b8, c21 LD b8, 0 * SIZE(BO) MUL c11, b8, c11 #endif #ifdef LN daddiu CO1, CO1, -1 * SIZE daddiu CO2, CO2, -1 * SIZE daddiu CO3, CO3, -1 * SIZE daddiu CO4, CO4, -1 * SIZE #endif #if defined(LN) || defined(LT) ST c11, 0 * SIZE(BO) ST c21, 1 * SIZE(BO) ST c31, 2 * SIZE(BO) ST c41, 3 * SIZE(BO) #else ST c11, 0 * SIZE(AO) ST c21, 1 * SIZE(AO) ST c31, 2 * SIZE(AO) ST c41, 3 * SIZE(AO) #endif ST c11, 0 * SIZE(CO1) ST c21, 0 * SIZE(CO2) ST c31, 0 * SIZE(CO3) ST c41, 0 * SIZE(CO4) MTC $0, c11 #ifndef LN daddiu CO1, CO1, 1 * SIZE daddiu CO2, CO2, 1 * SIZE daddiu CO3, CO3, 1 * SIZE daddiu CO4, CO4, 1 * SIZE #endif MOV c21, c11 #ifdef RT dsll TEMP, K, BASE_SHIFT daddu AORIG, AORIG, TEMP #endif #if defined(LT) || defined(RN) dsubu TEMP, K, KK dsll L, TEMP, 0 + BASE_SHIFT dsll TEMP, TEMP, 2 + BASE_SHIFT daddu AO, AO, L daddu BO, BO, TEMP #endif MOV c31, c11 #ifdef LT daddiu KK, KK, 1 #endif #ifdef LN daddiu KK, KK, -1 #endif .align 3 .L40: dsra I, M, 1 MOV c61, c11 blez I, .L49 MOV c41, c11 .L31: #if defined(LT) || defined(RN) LD a1, 0 * SIZE(AO) LD a3, 4 * SIZE(AO) LD b1, 0 * SIZE(B) MOV c12, c11 LD b2, 1 * SIZE(B) MOV c22, c11 LD b3, 2 * SIZE(B) MOV c32, c11 LD b4, 3 * SIZE(B) MOV c42, c11 LD b5, 4 * SIZE(B) dsra L, KK, 2 LD b6, 8 * SIZE(B) LD b7, 12 * SIZE(B) blez L, .L35 move BO, B #else #ifdef LN dsll TEMP, K, 1 + BASE_SHIFT dsubu AORIG, AORIG, TEMP #endif dsll L, KK, 1 + BASE_SHIFT dsll TEMP, KK, 2 + BASE_SHIFT daddu AO, AORIG, L daddu BO, B, TEMP dsubu TEMP, K, KK LD a1, 0 * SIZE(AO) LD a3, 4 * SIZE(AO) LD b1, 0 * SIZE(BO) MOV c12, c11 LD b2, 1 * SIZE(BO) MOV c22, c11 LD b3, 2 * SIZE(BO) MOV c32, c11 LD b4, 3 * SIZE(BO) MOV c42, c11 LD b5, 4 * SIZE(BO) dsra L, TEMP, 2 LD b6, 8 * SIZE(BO) LD b7, 12 * SIZE(BO) blez L, .L35 NOP #endif .align 3 .L32: MADD c11, c11, a1, b1 LD a2, 1 * SIZE(AO) MADD c21, c21, a1, b2 daddiu L, L, -1 MADD c31, c31, a1, b3 NOP MADD c41, c41, a1, b4 LD a1, 2 * SIZE(AO) MADD c12, c12, a2, b1 LD b1, 16 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 5 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 6 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 7 * SIZE(BO) MADD c11, c11, a1, b5 LD a2, 3 * SIZE(AO) MADD c21, c21, a1, b2 NOP MADD c31, c31, a1, b3 NOP MADD c41, c41, a1, b4 LD a1, 8 * SIZE(AO) MADD c12, c12, a2, b5 LD b5, 20 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 9 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 10 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 11 * SIZE(BO) MADD c11, c11, a3, b6 LD a2, 5 * SIZE(AO) MADD c21, c21, a3, b2 NOP MADD c31, c31, a3, b3 NOP MADD c41, c41, a3, b4 LD a3, 6 * SIZE(AO) MADD c12, c12, a2, b6 LD b6, 24 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 13 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 14 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 15 * SIZE(BO) MADD c11, c11, a3, b7 LD a2, 7 * SIZE(AO) MADD c21, c21, a3, b2 daddiu AO, AO, 8 * SIZE MADD c31, c31, a3, b3 daddiu BO, BO, 16 * SIZE MADD c41, c41, a3, b4 LD a3, 4 * SIZE(AO) MADD c12, c12, a2, b7 LD b7, 12 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 1 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 2 * SIZE(BO) MADD c42, c42, a2, b4 NOP bgtz L, .L32 LD b4, 3 * SIZE(BO) .align 3 .L35: #if defined(LT) || defined(RN) andi L, KK, 3 #else andi L, TEMP, 3 #endif NOP blez L, .L38 NOP .align 3 .L36: MADD c11, c11, a1, b1 LD a2, 1 * SIZE(AO) MADD c21, c21, a1, b2 daddiu L, L, -1 MADD c31, c31, a1, b3 daddiu AO, AO, 2 * SIZE MADD c41, c41, a1, b4 LD a1, 0 * SIZE(AO) MADD c12, c12, a2, b1 LD b1, 4 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 5 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 6 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 7 * SIZE(BO) bgtz L, .L36 daddiu BO, BO, 4 * SIZE .L38: #if defined(LN) || defined(RT) #ifdef LN daddiu TEMP, KK, -2 #else daddiu TEMP, KK, -4 #endif dsll L, TEMP, 1 + BASE_SHIFT dsll TEMP, TEMP, 2 + BASE_SHIFT daddu AO, AORIG, L daddu BO, B, TEMP #endif #if defined(LN) || defined(LT) LD b1, 0 * SIZE(BO) LD b2, 1 * SIZE(BO) LD b3, 2 * SIZE(BO) LD b4, 3 * SIZE(BO) LD b5, 4 * SIZE(BO) LD b6, 5 * SIZE(BO) LD b7, 6 * SIZE(BO) LD b8, 7 * SIZE(BO) SUB c11, b1, c11 SUB c21, b2, c21 SUB c31, b3, c31 SUB c41, b4, c41 SUB c12, b5, c12 SUB c22, b6, c22 SUB c32, b7, c32 SUB c42, b8, c42 #else LD b1, 0 * SIZE(AO) LD b2, 1 * SIZE(AO) LD b3, 2 * SIZE(AO) LD b4, 3 * SIZE(AO) LD b5, 4 * SIZE(AO) LD b6, 5 * SIZE(AO) LD b7, 6 * SIZE(AO) LD b8, 7 * SIZE(AO) SUB c11, b1, c11 SUB c12, b2, c12 SUB c21, b3, c21 SUB c22, b4, c22 SUB c31, b5, c31 SUB c32, b6, c32 SUB c41, b7, c41 SUB c42, b8, c42 #endif #ifdef LN LD b1, 3 * SIZE(AO) LD b2, 2 * SIZE(AO) LD b3, 0 * SIZE(AO) MUL c12, b1, c12 MUL c22, b1, c22 MUL c32, b1, c32 MUL c42, b1, c42 NMSUB c11, c11, b2, c12 NMSUB c21, c21, b2, c22 NMSUB c31, c31, b2, c32 NMSUB c41, c41, b2, c42 MUL c11, b3, c11 MUL c21, b3, c21 MUL c31, b3, c31 MUL c41, b3, c41 #endif #ifdef LT LD b1, 0 * SIZE(AO) LD b2, 1 * SIZE(AO) LD b3, 3 * SIZE(AO) MUL c11, b1, c11 MUL c21, b1, c21 MUL c31, b1, c31 MUL c41, b1, c41 NMSUB c12, c12, b2, c11 NMSUB c22, c22, b2, c21 NMSUB c32, c32, b2, c31 NMSUB c42, c42, b2, c41 MUL c12, b3, c12 MUL c22, b3, c22 MUL c32, b3, c32 MUL c42, b3, c42 #endif #ifdef RN LD b1, 0 * SIZE(BO) LD b2, 1 * SIZE(BO) LD b3, 2 * SIZE(BO) LD b4, 3 * SIZE(BO) MUL c11, b1, c11 MUL c12, b1, c12 NMSUB c21, c21, b2, c11 NMSUB c22, c22, b2, c12 NMSUB c31, c31, b3, c11 NMSUB c32, c32, b3, c12 NMSUB c41, c41, b4, c11 NMSUB c42, c42, b4, c12 LD b2, 5 * SIZE(BO) LD b3, 6 * SIZE(BO) LD b4, 7 * SIZE(BO) MUL c21, b2, c21 MUL c22, b2, c22 NMSUB c31, c31, b3, c21 NMSUB c32, c32, b3, c22 NMSUB c41, c41, b4, c21 NMSUB c42, c42, b4, c22 LD b3, 10 * SIZE(BO) LD b4, 11 * SIZE(BO) MUL c31, b3, c31 MUL c32, b3, c32 NMSUB c41, c41, b4, c31 NMSUB c42, c42, b4, c32 LD b4, 15 * SIZE(BO) MUL c41, b4, c41 MUL c42, b4, c42 #endif #ifdef RT LD b5, 15 * SIZE(BO) LD b6, 14 * SIZE(BO) LD b7, 13 * SIZE(BO) LD b8, 12 * SIZE(BO) MUL c41, b5, c41 MUL c42, b5, c42 NMSUB c31, c31, b6, c41 NMSUB c32, c32, b6, c42 NMSUB c21, c21, b7, c41 NMSUB c22, c22, b7, c42 NMSUB c11, c11, b8, c41 NMSUB c12, c12, b8, c42 LD b6, 10 * SIZE(BO) LD b7, 9 * SIZE(BO) LD b8, 8 * SIZE(BO) MUL c31, b6, c31 MUL c32, b6, c32 NMSUB c21, c21, b7, c31 NMSUB c22, c22, b7, c32 NMSUB c11, c11, b8, c31 NMSUB c12, c12, b8, c32 LD b7, 5 * SIZE(BO) LD b8, 4 * SIZE(BO) MUL c21, b7, c21 MUL c22, b7, c22 NMSUB c11, c11, b8, c21 NMSUB c12, c12, b8, c22 LD b8, 0 * SIZE(BO) MUL c11, b8, c11 MUL c12, b8, c12 #endif #ifdef LN daddiu CO1, CO1, -2 * SIZE daddiu CO2, CO2, -2 * SIZE daddiu CO3, CO3, -2 * SIZE daddiu CO4, CO4, -2 * SIZE #endif #if defined(LN) || defined(LT) ST c11, 0 * SIZE(BO) ST c21, 1 * SIZE(BO) ST c31, 2 * SIZE(BO) ST c41, 3 * SIZE(BO) ST c12, 4 * SIZE(BO) ST c22, 5 * SIZE(BO) ST c32, 6 * SIZE(BO) ST c42, 7 * SIZE(BO) #else ST c11, 0 * SIZE(AO) ST c12, 1 * SIZE(AO) ST c21, 2 * SIZE(AO) ST c22, 3 * SIZE(AO) ST c31, 4 * SIZE(AO) ST c32, 5 * SIZE(AO) ST c41, 6 * SIZE(AO) ST c42, 7 * SIZE(AO) #endif ST c11, 0 * SIZE(CO1) ST c12, 1 * SIZE(CO1) ST c21, 0 * SIZE(CO2) ST c22, 1 * SIZE(CO2) ST c31, 0 * SIZE(CO3) ST c32, 1 * SIZE(CO3) ST c41, 0 * SIZE(CO4) ST c42, 1 * SIZE(CO4) #ifndef LN daddiu CO1, CO1, 2 * SIZE daddiu CO2, CO2, 2 * SIZE daddiu CO3, CO3, 2 * SIZE daddiu CO4, CO4, 2 * SIZE #endif #ifdef RT dsll TEMP, K, 1 + BASE_SHIFT daddu AORIG, AORIG, TEMP #endif #if defined(LT) || defined(RN) dsubu TEMP, K, KK dsll L, TEMP, 1 + BASE_SHIFT dsll TEMP, TEMP, 2 + BASE_SHIFT daddu AO, AO, L daddu BO, BO, TEMP #endif #ifdef LT daddiu KK, KK, 2 #endif #ifdef LN daddiu KK, KK, -2 #endif MTC $0, a1 MOV c11, a1 MOV c21, a1 MOV c31, a1 daddiu I, I, -1 bgtz I, .L31 MOV c41, c11 .align 3 .L49: #ifdef LN dsll TEMP, K, 2 + BASE_SHIFT daddu B, B, TEMP #endif #if defined(LT) || defined(RN) move B, BO #endif #ifdef RN daddiu KK, KK, 4 #endif #ifdef RT daddiu KK, KK, -4 #endif .align 3 .L50: andi J, N, 2 blez J, .L70 #ifdef RT dsll TEMP, K, 1 + BASE_SHIFT dsubu B, B, TEMP dsll TEMP, LDC, 1 dsubu C, C, TEMP #endif move AO, A move CO1, C daddu CO2, C, LDC #ifdef LN daddu KK, M, OFFSET #endif #ifdef LT move KK, OFFSET #endif #if defined(LN) || defined(RT) move AORIG, A #else move AO, A #endif #ifndef RT daddu C, CO2, LDC #endif andi I, M, 1 blez I, .L60 NOP #if defined(LT) || defined(RN) dsra L, KK, 2 LD a1, 0 * SIZE(AO) MTC $0, c11 LD a2, 1 * SIZE(AO) MOV c21, c11 LD a3, 2 * SIZE(AO) MOV c31, c11 LD a4, 3 * SIZE(AO) MOV c41, c11 LD b1, 0 * SIZE(B) LD b2, 1 * SIZE(B) LD b3, 2 * SIZE(B) LD b4, 3 * SIZE(B) LD b5, 4 * SIZE(B) LD b6, 8 * SIZE(B) LD b7, 12 * SIZE(B) blez L, .L65 move BO, B #else #ifdef LN dsll TEMP, K, BASE_SHIFT dsubu AORIG, AORIG, TEMP #endif dsll L, KK, 0 + BASE_SHIFT dsll TEMP, KK, 1 + BASE_SHIFT daddu AO, AORIG, L daddu BO, B, TEMP dsubu TEMP, K, KK dsra L, TEMP, 2 LD a1, 0 * SIZE(AO) MTC $0, c11 LD a2, 1 * SIZE(AO) MOV c21, c11 LD a3, 2 * SIZE(AO) MOV c31, c11 LD a4, 3 * SIZE(AO) MOV c41, c11 LD b1, 0 * SIZE(BO) LD b2, 1 * SIZE(BO) LD b3, 2 * SIZE(BO) LD b4, 3 * SIZE(BO) LD b5, 4 * SIZE(BO) LD b6, 8 * SIZE(BO) LD b7, 12 * SIZE(BO) blez L, .L65 NOP #endif .align 3 .L62: MADD c11, c11, a1, b1 LD b1, 4 * SIZE(BO) MADD c21, c21, a1, b2 LD b2, 5 * SIZE(BO) MADD c31, c31, a2, b3 LD b3, 6 * SIZE(BO) MADD c41, c41, a2, b4 LD b4, 7 * SIZE(BO) LD a1, 4 * SIZE(AO) LD a2, 5 * SIZE(AO) MADD c11, c11, a3, b1 LD b1, 8 * SIZE(BO) MADD c21, c21, a3, b2 LD b2, 9 * SIZE(BO) MADD c31, c31, a4, b3 LD b3, 10 * SIZE(BO) MADD c41, c41, a4, b4 LD b4, 11 * SIZE(BO) LD a3, 6 * SIZE(AO) LD a4, 7 * SIZE(AO) daddiu L, L, -1 daddiu AO, AO, 4 * SIZE bgtz L, .L62 daddiu BO, BO, 8 * SIZE .align 3 .L65: #if defined(LT) || defined(RN) andi L, KK, 3 #else andi L, TEMP, 3 #endif NOP blez L, .L68 NOP .align 3 .L66: MADD c11, c11, a1, b1 LD b1, 2 * SIZE(BO) MADD c21, c21, a1, b2 LD b2, 3 * SIZE(BO) LD a1, 1 * SIZE(AO) daddiu L, L, -1 daddiu AO, AO, 1 * SIZE bgtz L, .L66 daddiu BO, BO, 2 * SIZE .L68: ADD c11, c11, c31 ADD c21, c21, c41 #if defined(LN) || defined(RT) #ifdef LN daddiu TEMP, KK, -1 #else daddiu TEMP, KK, -2 #endif dsll L, TEMP, 0 + BASE_SHIFT dsll TEMP, TEMP, 1 + BASE_SHIFT daddu AO, AORIG, L daddu BO, B, TEMP #endif #if defined(LN) || defined(LT) LD b1, 0 * SIZE(BO) LD b2, 1 * SIZE(BO) SUB c11, b1, c11 SUB c21, b2, c21 #else LD b1, 0 * SIZE(AO) LD b2, 1 * SIZE(AO) SUB c11, b1, c11 SUB c21, b2, c21 #endif #if defined(LN) || defined(LT) LD b3, 0 * SIZE(AO) MUL c11, b3, c11 MUL c21, b3, c21 #endif #ifdef RN LD b1, 0 * SIZE(BO) LD b2, 1 * SIZE(BO) LD b3, 3 * SIZE(BO) MUL c11, b1, c11 NMSUB c21, c21, b2, c11 MUL c21, b3, c21 #endif #ifdef RT LD b1, 3 * SIZE(BO) LD b2, 2 * SIZE(BO) LD b3, 0 * SIZE(BO) MUL c21, b1, c21 NMSUB c11, c11, b2, c21 MUL c11, b3, c11 #endif #ifdef LN daddiu CO1, CO1, -1 * SIZE daddiu CO2, CO2, -1 * SIZE #endif #if defined(LN) || defined(LT) ST c11, 0 * SIZE(BO) ST c21, 1 * SIZE(BO) #else ST c11, 0 * SIZE(AO) ST c21, 1 * SIZE(AO) #endif ST c11, 0 * SIZE(CO1) ST c21, 0 * SIZE(CO2) #ifndef LN daddiu CO1, CO1, 1 * SIZE daddiu CO2, CO2, 1 * SIZE #endif #ifdef RT dsll TEMP, K, 0 + BASE_SHIFT daddu AORIG, AORIG, TEMP #endif #if defined(LT) || defined(RN) dsubu TEMP, K, KK dsll L, TEMP, 0 + BASE_SHIFT dsll TEMP, TEMP, 1 + BASE_SHIFT daddu AO, AO, L daddu BO, BO, TEMP #endif #ifdef LT daddiu KK, KK, 1 #endif #ifdef LN daddiu KK, KK, -1 #endif .align 3 .L60: dsra I, M, 1 blez I, .L69 NOP .L51: #if defined(LT) || defined(RN) LD a1, 0 * SIZE(AO) MTC $0, c11 LD a2, 1 * SIZE(AO) MOV c21, c11 LD a5, 4 * SIZE(AO) LD b1, 0 * SIZE(B) MOV c12, c11 LD b2, 1 * SIZE(B) MOV c22, c11 LD b3, 2 * SIZE(B) LD b5, 4 * SIZE(B) dsra L, KK, 2 LD b6, 8 * SIZE(B) LD b7, 12 * SIZE(B) blez L, .L55 move BO, B #else #ifdef LN dsll TEMP, K, 1 + BASE_SHIFT dsubu AORIG, AORIG, TEMP #endif dsll L, KK, 1 + BASE_SHIFT dsll TEMP, KK, 1 + BASE_SHIFT daddu AO, AORIG, L daddu BO, B, TEMP dsubu TEMP, K, KK LD a1, 0 * SIZE(AO) MTC $0, c11 LD a2, 1 * SIZE(AO) MOV c21, c11 LD a5, 4 * SIZE(AO) LD b1, 0 * SIZE(BO) MOV c12, c11 LD b2, 1 * SIZE(BO) MOV c22, c11 LD b3, 2 * SIZE(BO) LD b5, 4 * SIZE(BO) dsra L, TEMP, 2 LD b6, 8 * SIZE(BO) LD b7, 12 * SIZE(BO) blez L, .L55 NOP #endif .align 3 .L52: MADD c11, c11, a1, b1 LD a3, 2 * SIZE(AO) MADD c21, c21, a1, b2 LD b4, 3 * SIZE(BO) MADD c12, c12, a2, b1 LD a4, 3 * SIZE(AO) MADD c22, c22, a2, b2 LD b1, 8 * SIZE(BO) MADD c11, c11, a3, b3 LD a1, 8 * SIZE(AO) MADD c21, c21, a3, b4 LD b2, 5 * SIZE(BO) MADD c12, c12, a4, b3 LD a2, 5 * SIZE(AO) MADD c22, c22, a4, b4 LD b3, 6 * SIZE(BO) MADD c11, c11, a5, b5 LD a3, 6 * SIZE(AO) MADD c21, c21, a5, b2 LD b4, 7 * SIZE(BO) MADD c12, c12, a2, b5 LD a4, 7 * SIZE(AO) MADD c22, c22, a2, b2 LD b5, 12 * SIZE(BO) MADD c11, c11, a3, b3 LD a5, 12 * SIZE(AO) MADD c21, c21, a3, b4 LD b2, 9 * SIZE(BO) MADD c12, c12, a4, b3 LD a2, 9 * SIZE(AO) MADD c22, c22, a4, b4 LD b3, 10 * SIZE(BO) daddiu AO, AO, 8 * SIZE daddiu L, L, -1 bgtz L, .L52 daddiu BO, BO, 8 * SIZE .align 3 .L55: #if defined(LT) || defined(RN) andi L, KK, 3 #else andi L, TEMP, 3 #endif NOP blez L, .L58 NOP .align 3 .L56: MADD c11, c11, a1, b1 LD a2, 1 * SIZE(AO) MADD c21, c21, a1, b2 LD a1, 2 * SIZE(AO) MADD c12, c12, a2, b1 LD b1, 2 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 3 * SIZE(BO) daddiu L, L, -1 daddiu AO, AO, 2 * SIZE bgtz L, .L56 daddiu BO, BO, 2 * SIZE .L58: #if defined(LN) || defined(RT) #ifdef LN daddiu TEMP, KK, -2 #else daddiu TEMP, KK, -2 #endif dsll L, TEMP, 1 + BASE_SHIFT dsll TEMP, TEMP, 1 + BASE_SHIFT daddu AO, AORIG, L daddu BO, B, TEMP #endif #if defined(LN) || defined(LT) LD b1, 0 * SIZE(BO) LD b2, 1 * SIZE(BO) LD b3, 2 * SIZE(BO) LD b4, 3 * SIZE(BO) SUB c11, b1, c11 SUB c21, b2, c21 SUB c12, b3, c12 SUB c22, b4, c22 #else LD b1, 0 * SIZE(AO) LD b2, 1 * SIZE(AO) LD b3, 2 * SIZE(AO) LD b4, 3 * SIZE(AO) SUB c11, b1, c11 SUB c12, b2, c12 SUB c21, b3, c21 SUB c22, b4, c22 #endif #ifdef LN LD b1, 3 * SIZE(AO) LD b2, 2 * SIZE(AO) LD b3, 0 * SIZE(AO) MUL c12, b1, c12 MUL c22, b1, c22 NMSUB c11, c11, b2, c12 NMSUB c21, c21, b2, c22 MUL c11, b3, c11 MUL c21, b3, c21 #endif #ifdef LT LD b1, 0 * SIZE(AO) LD b2, 1 * SIZE(AO) LD b3, 3 * SIZE(AO) MUL c11, b1, c11 MUL c21, b1, c21 NMSUB c12, c12, b2, c11 NMSUB c22, c22, b2, c21 MUL c12, b3, c12 MUL c22, b3, c22 #endif #ifdef RN LD b1, 0 * SIZE(BO) LD b2, 1 * SIZE(BO) LD b3, 3 * SIZE(BO) MUL c11, b1, c11 MUL c12, b1, c12 NMSUB c21, c21, b2, c11 NMSUB c22, c22, b2, c12 MUL c21, b3, c21 MUL c22, b3, c22 #endif #ifdef RT LD b1, 3 * SIZE(BO) LD b2, 2 * SIZE(BO) LD b3, 0 * SIZE(BO) MUL c21, b1, c21 MUL c22, b1, c22 NMSUB c11, c11, b2, c21 NMSUB c12, c12, b2, c22 MUL c11, b3, c11 MUL c12, b3, c12 #endif #ifdef LN daddiu CO1, CO1, -2 * SIZE daddiu CO2, CO2, -2 * SIZE #endif #if defined(LN) || defined(LT) ST c11, 0 * SIZE(BO) ST c21, 1 * SIZE(BO) ST c12, 2 * SIZE(BO) ST c22, 3 * SIZE(BO) #else ST c11, 0 * SIZE(AO) ST c12, 1 * SIZE(AO) ST c21, 2 * SIZE(AO) ST c22, 3 * SIZE(AO) #endif ST c11, 0 * SIZE(CO1) ST c12, 1 * SIZE(CO1) ST c21, 0 * SIZE(CO2) ST c22, 1 * SIZE(CO2) #ifndef LN daddiu CO1, CO1, 2 * SIZE daddiu CO2, CO2, 2 * SIZE #endif #ifdef RT dsll TEMP, K, 1 + BASE_SHIFT daddu AORIG, AORIG, TEMP #endif #if defined(LT) || defined(RN) dsubu TEMP, K, KK dsll TEMP, TEMP, 1 + BASE_SHIFT daddu AO, AO, TEMP daddu BO, BO, TEMP #endif #ifdef LT daddiu KK, KK, 2 #endif #ifdef LN daddiu KK, KK, -2 #endif MTC $0, a1 MOV c11, a1 MOV c21, a1 MOV c31, a1 daddiu I, I, -1 bgtz I, .L51 MOV c41, c11 .align 3 .L69: #ifdef LN dsll TEMP, K, 1 + BASE_SHIFT daddu B, B, TEMP #endif #if defined(LT) || defined(RN) move B, BO #endif #ifdef RN daddiu KK, KK, 2 #endif #ifdef RT daddiu KK, KK, -2 #endif .align 3 .L70: andi J, N, 1 blez J, .L999 NOP #ifdef RT dsll TEMP, K, BASE_SHIFT dsubu B, B, TEMP dsubu C, C, LDC #endif move AO, A move CO1, C #ifdef LN daddu KK, M, OFFSET #endif #ifdef LT move KK, OFFSET #endif #if defined(LN) || defined(RT) move AORIG, A #else move AO, A #endif #ifndef RT daddu C, CO1, LDC #endif andi I, M, 1 blez I, .L80 NOP #if defined(LT) || defined(RN) LD a1, 0 * SIZE(AO) MTC $0, c11 LD a2, 1 * SIZE(AO) MOV c21, c11 LD a3, 2 * SIZE(AO) LD a4, 3 * SIZE(AO) LD b1, 0 * SIZE(B) LD b2, 1 * SIZE(B) LD b3, 2 * SIZE(B) LD b4, 3 * SIZE(B) LD b5, 4 * SIZE(B) LD b6, 8 * SIZE(B) LD b7, 12 * SIZE(B) dsra L, KK, 2 blez L, .L85 move BO, B #else #ifdef LN dsll TEMP, K, BASE_SHIFT dsubu AORIG, AORIG, TEMP #endif dsll TEMP, KK, BASE_SHIFT daddu AO, AORIG, TEMP daddu BO, B, TEMP dsubu TEMP, K, KK LD a1, 0 * SIZE(AO) MTC $0, c11 LD a2, 1 * SIZE(AO) MOV c21, c11 LD a3, 2 * SIZE(AO) LD a4, 3 * SIZE(AO) LD b1, 0 * SIZE(BO) LD b2, 1 * SIZE(BO) LD b3, 2 * SIZE(BO) LD b4, 3 * SIZE(BO) LD b5, 4 * SIZE(BO) LD b6, 8 * SIZE(BO) LD b7, 12 * SIZE(BO) dsra L, TEMP, 2 blez L, .L85 NOP #endif .align 3 .L82: LD a1, 0 * SIZE(AO) LD b1, 0 * SIZE(BO) MADD c11, c11, a1, b1 LD a1, 1 * SIZE(AO) LD b1, 1 * SIZE(BO) MADD c21, c21, a1, b1 LD a1, 2 * SIZE(AO) LD b1, 2 * SIZE(BO) MADD c11, c11, a1, b1 LD a1, 3 * SIZE(AO) LD b1, 3 * SIZE(BO) MADD c21, c21, a1, b1 daddiu L, L, -1 daddiu AO, AO, 4 * SIZE bgtz L, .L82 daddiu BO, BO, 4 * SIZE .align 3 .L85: #if defined(LT) || defined(RN) andi L, KK, 3 #else andi L, TEMP, 3 #endif NOP blez L, .L88 NOP .align 3 .L86: LD a1, 0 * SIZE(AO) LD b1, 0 * SIZE(BO) MADD c11, c11, a1, b1 daddiu L, L, -1 daddiu AO, AO, 1 * SIZE bgtz L, .L86 daddiu BO, BO, 1 * SIZE .L88: ADD c11, c11, c21 #if defined(LN) || defined(RT) #ifdef LN daddiu TEMP, KK, -1 #else daddiu TEMP, KK, -1 #endif dsll TEMP, TEMP, 0 + BASE_SHIFT daddu AO, AORIG, TEMP daddu BO, B, TEMP #endif #if defined(LN) || defined(LT) LD b1, 0 * SIZE(BO) SUB c11, b1, c11 #else LD b1, 0 * SIZE(AO) SUB c11, b1, c11 #endif #if defined(LN) || defined(LT) LD b1, 0 * SIZE(AO) MUL c11, b1, c11 #endif #if defined(RN) || defined(RT) LD b1, 0 * SIZE(BO) MUL c11, b1, c11 #endif #ifdef LN daddiu CO1, CO1, -1 * SIZE #endif #if defined(LN) || defined(LT) ST c11, 0 * SIZE(BO) #else ST c11, 0 * SIZE(AO) #endif ST c11, 0 * SIZE(CO1) #ifndef LN daddiu CO1, CO1, 1 * SIZE #endif #ifdef RT dsll TEMP, K, BASE_SHIFT daddu AORIG, AORIG, TEMP #endif #if defined(LT) || defined(RN) dsubu TEMP, K, KK dsll TEMP, TEMP, 0 + BASE_SHIFT daddu AO, AO, TEMP daddu BO, BO, TEMP #endif #ifdef LT daddiu KK, KK, 1 #endif #ifdef LN daddiu KK, KK, -1 #endif .align 3 .L80: dsra I, M, 1 blez I, .L89 NOP .L71: #if defined(LT) || defined(RN) LD a1, 0 * SIZE(AO) MTC $0, c11 LD a2, 1 * SIZE(AO) MOV c21, c11 LD a5, 4 * SIZE(AO) LD b1, 0 * SIZE(B) MOV c12, c11 LD b2, 1 * SIZE(B) MOV c22, c11 LD b3, 2 * SIZE(B) LD b5, 4 * SIZE(B) dsra L, KK, 2 LD b6, 8 * SIZE(B) LD b7, 12 * SIZE(B) blez L, .L75 move BO, B #else #ifdef LN dsll TEMP, K, 1 + BASE_SHIFT dsubu AORIG, AORIG, TEMP #endif dsll L, KK, 1 + BASE_SHIFT dsll TEMP, KK, 0 + BASE_SHIFT daddu AO, AORIG, L daddu BO, B, TEMP dsubu TEMP, K, KK LD a1, 0 * SIZE(AO) MTC $0, c11 LD a2, 1 * SIZE(AO) MOV c21, c11 LD a5, 4 * SIZE(AO) LD b1, 0 * SIZE(BO) MOV c12, c11 LD b2, 1 * SIZE(BO) MOV c22, c11 LD b3, 2 * SIZE(BO) LD b5, 4 * SIZE(BO) dsra L, TEMP, 2 LD b6, 8 * SIZE(BO) LD b7, 12 * SIZE(BO) blez L, .L75 NOP #endif .align 3 .L72: LD a1, 0 * SIZE(AO) LD a2, 1 * SIZE(AO) LD b1, 0 * SIZE(BO) MADD c11, c11, a1, b1 MADD c12, c12, a2, b1 LD a1, 2 * SIZE(AO) LD a2, 3 * SIZE(AO) LD b1, 1 * SIZE(BO) MADD c11, c11, a1, b1 MADD c12, c12, a2, b1 LD a1, 4 * SIZE(AO) LD a2, 5 * SIZE(AO) LD b1, 2 * SIZE(BO) MADD c11, c11, a1, b1 MADD c12, c12, a2, b1 LD a1, 6 * SIZE(AO) LD a2, 7 * SIZE(AO) LD b1, 3 * SIZE(BO) MADD c11, c11, a1, b1 MADD c12, c12, a2, b1 daddiu L, L, -1 daddiu AO, AO, 8 * SIZE bgtz L, .L72 daddiu BO, BO, 4 * SIZE .align 3 .L75: #if defined(LT) || defined(RN) andi L, KK, 3 #else andi L, TEMP, 3 #endif NOP blez L, .L78 NOP .align 3 .L76: LD a1, 0 * SIZE(AO) LD a2, 1 * SIZE(AO) LD b1, 0 * SIZE(BO) MADD c11, c11, a1, b1 MADD c12, c12, a2, b1 daddiu L, L, -1 daddiu AO, AO, 2 * SIZE bgtz L, .L76 daddiu BO, BO, 1 * SIZE .L78: ADD c11, c11, c21 ADD c12, c12, c22 #if defined(LN) || defined(RT) #ifdef LN daddiu TEMP, KK, -2 #else daddiu TEMP, KK, -1 #endif dsll L, TEMP, 1 + BASE_SHIFT dsll TEMP, TEMP, 0 + BASE_SHIFT daddu AO, AORIG, L daddu BO, B, TEMP #endif #if defined(LN) || defined(LT) LD b1, 0 * SIZE(BO) LD b2, 1 * SIZE(BO) SUB c11, b1, c11 SUB c12, b2, c12 #else LD b1, 0 * SIZE(AO) LD b2, 1 * SIZE(AO) SUB c11, b1, c11 SUB c12, b2, c12 #endif #ifdef LN LD b1, 3 * SIZE(AO) LD b2, 2 * SIZE(AO) LD b3, 0 * SIZE(AO) MUL c12, b1, c12 NMSUB c11, c11, b2, c12 MUL c11, b3, c11 #endif #ifdef LT LD b1, 0 * SIZE(AO) LD b2, 1 * SIZE(AO) LD b3, 3 * SIZE(AO) MUL c11, b1, c11 NMSUB c12, c12, b2, c11 MUL c12, b3, c12 #endif #if defined(RN) || defined(RT) LD b1, 0 * SIZE(BO) MUL c11, b1, c11 MUL c12, b1, c12 #endif #ifdef LN daddiu CO1, CO1, -2 * SIZE #endif #if defined(LN) || defined(LT) ST c11, 0 * SIZE(BO) ST c12, 1 * SIZE(BO) #else ST c11, 0 * SIZE(AO) ST c12, 1 * SIZE(AO) #endif ST c11, 0 * SIZE(CO1) ST c12, 1 * SIZE(CO1) #ifndef LN daddiu CO1, CO1, 2 * SIZE #endif #ifdef RT dsll TEMP, K, 1 + BASE_SHIFT daddu AORIG, AORIG, TEMP #endif #if defined(LT) || defined(RN) dsubu TEMP, K, KK dsll L, TEMP, 1 + BASE_SHIFT dsll TEMP, TEMP, 0 + BASE_SHIFT daddu AO, AO, L daddu BO, BO, TEMP #endif #ifdef LT daddiu KK, KK, 2 #endif #ifdef LN daddiu KK, KK, -2 #endif daddiu I, I, -1 bgtz I, .L71 NOP .align 3 .L89: #ifdef LN dsll TEMP, K, BASE_SHIFT daddu B, B, TEMP #endif #if defined(LT) || defined(RN) move B, BO #endif #ifdef RN daddiu KK, KK, 1 #endif #ifdef RT daddiu KK, KK, -1 #endif .align 3 .L999: LDARG $16, 0($sp) LDARG $17, 8($sp) LDARG $18, 16($sp) LDARG $19, 24($sp) LDARG $20, 32($sp) LDARG $21, 40($sp) ldc1 $f24, 48($sp) ldc1 $f25, 56($sp) ldc1 $f26, 64($sp) ldc1 $f27, 72($sp) ldc1 $f28, 80($sp) LDARG $22, 88($sp) LDARG $23, 96($sp) LDARG $24, 104($sp) LDARG $25, 112($sp) #ifndef __64BIT__ ldc1 $f20,112($sp) ldc1 $f21,120($sp) ldc1 $f22,128($sp) ldc1 $f23,136($sp) #endif j $31 daddiu $sp, $sp, 144 EPILOGUE