/*********************************************************************/ /* Copyright 2009, 2010 The University of Texas at Austin. */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ /* without modification, are permitted provided that the following */ /* conditions are met: */ /* */ /* 1. Redistributions of source code must retain the above */ /* copyright notice, this list of conditions and the following */ /* disclaimer. */ /* */ /* 2. Redistributions in binary form must reproduce the above */ /* copyright notice, this list of conditions and the following */ /* disclaimer in the documentation and/or other materials */ /* provided with the distribution. */ /* */ /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ /* POSSIBILITY OF SUCH DAMAGE. */ /* */ /* The views and conclusions contained in the software and */ /* documentation are those of the authors and should not be */ /* interpreted as representing official policies, either expressed */ /* or implied, of The University of Texas at Austin. */ /*********************************************************************/ #define ASSEMBLER #include "common.h" #undef ZERO #define ALPHA 0 #define FZERO 16 #define M r3 #define N r4 #define K r5 #ifdef linux #define A r6 #define B r7 #define C r8 #define LDC r9 #define OFFSET r10 #endif #define TEMP r11 #define KK r14 #define INCM1 r15 #define INCM3 r16 #define INCM5 r17 #define INCM7 r18 #define INC2 r19 #define INC r20 #define INC4 r21 #define I r22 #define J r23 #define AO r24 #define BO r25 #define AO2 r26 #define BO2 r27 #define CO1 r28 #define CO2 r29 #define ZERO r31 #ifndef NEEDPARAM #define A1 f16 #define A2 f17 #define A3 f18 #define A4 f19 #define A5 f20 #define A6 f21 #define A7 f22 #define A8 f23 #define A9 f24 #define A10 f25 #define B1 f26 #define B2 f27 #define B3 f28 #define B4 f29 #define B5 f30 #define B6 f31 #define AP B6 #if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \ defined(NR) || defined(NC) || defined(TR) || defined(TC) #define FXCPMADD fxcpmadd #define FXCSMADD fxcxnpma #else #define FXCPMADD fxcpnsma #define FXCSMADD fxcxma #endif PROLOGUE PROFCODE li r0, -16 stfpdux f14, SP, r0 stfpdux f15, SP, r0 stfpdux f16, SP, r0 stfpdux f17, SP, r0 stfpdux f18, SP, r0 stfpdux f19, SP, r0 stfpdux f20, SP, r0 stfpdux f21, SP, r0 stfpdux f22, SP, r0 stfpdux f23, SP, r0 stfpdux f24, SP, r0 stfpdux f25, SP, r0 stfpdux f26, SP, r0 stfpdux f27, SP, r0 stfpdux f28, SP, r0 stfpdux f29, SP, r0 stfpdux f30, SP, r0 stfpdux f31, SP, r0 stwu r31, -4(SP) stwu r30, -4(SP) stwu r29, -4(SP) stwu r28, -4(SP) stwu r27, -4(SP) stwu r26, -4(SP) stwu r25, -4(SP) stwu r24, -4(SP) stwu r23, -4(SP) stwu r22, -4(SP) stwu r21, -4(SP) stwu r20, -4(SP) stwu r19, -4(SP) stwu r18, -4(SP) stwu r17, -4(SP) stwu r16, -4(SP) stwu r15, -4(SP) stwu r14, -4(SP) li r0, 0 stwu r0, -4(SP) stwu r0, -4(SP) stfdu f2, -8(SP) stfdu f1, -8(SP) slwi LDC, LDC, ZBASE_SHIFT cmpwi cr0, M, 0 ble .L999 cmpwi cr0, N, 0 ble .L999 cmpwi cr0, K, 0 ble .L999 #if defined(TRMMKERNEL) && !defined(LEFT) neg KK, OFFSET #endif andi. r0, C, 2 * SIZE - 1 bne .L1000 li INC, 1 * SIZE li INC2, 2 * SIZE li INC4, 4 * SIZE li INCM1, -1 * SIZE li INCM3, -2 * SIZE li INCM5, -4 * SIZE li INCM7, -6 * SIZE addi C, C, - 2 * SIZE srawi. J, N, 1 ble .L50 .align 4 .L10: mr CO1, C add CO2, C, LDC add C, CO2, LDC #if defined(TRMMKERNEL) && defined(LEFT) mr KK, OFFSET #endif addi AO, A, -4 * SIZE li r0, FZERO lfpsx f0, SP, r0 srawi. I, M, 2 ble .L20 .align 4 .L11: #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) addi AO2, AO, 2 * SIZE fpmr f4, f0 addi BO, B, - 4 * SIZE fpmr f8, f0 addi BO2, B, - 2 * SIZE fpmr f12, f0 #else slwi TEMP, KK, 2 + ZBASE_SHIFT slwi r0, KK, 1 + ZBASE_SHIFT add AO, AO, TEMP add BO, B, r0 addi AO2, AO, 2 * SIZE fpmr f4, f0 addi BO, BO, - 4 * SIZE fpmr f8, f0 addi BO2, BO, 2 * SIZE fpmr f12, f0 #endif #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 4 #else addi TEMP, KK, 2 #endif srawi. r0, TEMP, 2 fpmr f1, f0 mtspr CTR, r0 ble .L14 #else addi AO2, AO, 2 * SIZE fpmr f4, f0 addi BO, B, - 4 * SIZE fpmr f8, f0 addi BO2, B, - 2 * SIZE fpmr f12, f0 srawi. r0, K, 2 fpmr f1, f0 mtspr CTR, r0 ble .L14 #endif LFPDUX A1, AO, INC4 fpmr f5, f0 LFPDUX A3, AO, INC4 fpmr f9, f0 LFPDUX B1, BO, INC4 fpmr f13, f0 LFPDUX A5, AO, INC4 fpmr f2, f0 LFPDUX A6, AO, INC4 fpmr f6, f0 LFPDUX B3, BO, INC4 fpmr f10, f0 LFPDUX A7, AO, INC4 fpmr f14, f0 LFPDUX A8, AO, INC4 fpmr f3, f0 LFPDUX B5, BO, INC4 fpmr f7, f0 LFPDUX A9, AO, INC4 fpmr f11, f0 LFPDUX A2, AO2, INC4 fpmr f15, f0 LFPDUX B2, BO2, INC4 bdz- .L13 .align 4 .L12: ## 1 ## FXCPMADD f0, B1, A1, f0 nop FXCSMADD f4, B1, A1, f4 nop FXCPMADD f8, B2, A1, f8 LFPDUX B4, BO2, INC4 FXCSMADD f12, B2, A1, f12 LFPDUX B6, BO, INC4 FXCPMADD f1, B1, A2, f1 nop FXCSMADD f5, B1, A2, f5 LFPDUX A4, AO2, INC4 FXCPMADD f9, B2, A2, f9 LFPDUX A10, AO, INC4 FXCSMADD f13, B2, A2, f13 nop FXCPMADD f2, B1, A3, f2 nop FXCSMADD f6, B1, A3, f6 nop FXCPMADD f10, B2, A3, f10 nop FXCSMADD f14, B2, A3, f14 nop FXCPMADD f3, B1, A4, f3 nop FXCSMADD f7, B1, A4, f7 LFPDUX A2, AO2, INC4 FXCPMADD f11, B2, A4, f11 LFPDUX A1, AO, INC4 FXCSMADD f15, B2, A4, f15 nop ## 2 ## FXCPMADD f0, B3, A5, f0 nop FXCSMADD f4, B3, A5, f4 nop FXCPMADD f8, B4, A5, f8 LFPDUX B2, BO2, INC4 FXCSMADD f12, B4, A5, f12 LFPDUX B1, BO, INC4 FXCPMADD f1, B3, A2, f1 nop FXCSMADD f5, B3, A2, f5 LFPDUX A4, AO2, INC4 FXCPMADD f9, B4, A2, f9 LFPDUX A3, AO, INC4 FXCSMADD f13, B4, A2, f13 nop FXCPMADD f2, B3, A6, f2 nop FXCSMADD f6, B3, A6, f6 nop FXCPMADD f10, B4, A6, f10 nop FXCSMADD f14, B4, A6, f14 nop FXCPMADD f3, B3, A4, f3 nop FXCSMADD f7, B3, A4, f7 LFPDUX A2, AO2, INC4 FXCPMADD f11, B4, A4, f11 LFPDUX A5, AO, INC4 FXCSMADD f15, B4, A4, f15 nop ## 3 ## FXCPMADD f0, B5, A7, f0 nop FXCSMADD f4, B5, A7, f4 nop FXCPMADD f8, B2, A7, f8 LFPDUX B4, BO2, INC4 FXCSMADD f12, B2, A7, f12 LFPDUX B3, BO, INC4 FXCPMADD f1, B5, A2, f1 nop FXCSMADD f5, B5, A2, f5 LFPDUX A4, AO2, INC4 FXCPMADD f9, B2, A2, f9 LFPDUX A6, AO, INC4 FXCSMADD f13, B2, A2, f13 nop FXCPMADD f2, B5, A8, f2 nop FXCSMADD f6, B5, A8, f6 nop FXCPMADD f10, B2, A8, f10 nop FXCSMADD f14, B2, A8, f14 nop FXCPMADD f3, B5, A4, f3 nop FXCSMADD f7, B5, A4, f7 LFPDUX A2, AO2, INC4 FXCPMADD f11, B2, A4, f11 LFPDUX A7, AO, INC4 FXCSMADD f15, B2, A4, f15 nop ## 4 ## FXCPMADD f0, B6, A9, f0 nop FXCSMADD f4, B6, A9, f4 nop FXCPMADD f8, B4, A9, f8 LFPDUX B2, BO2, INC4 FXCSMADD f12, B4, A9, f12 LFPDUX B5, BO, INC4 FXCPMADD f1, B6, A2, f1 nop FXCSMADD f5, B6, A2, f5 LFPDUX A4, AO2, INC4 FXCPMADD f9, B4, A2, f9 LFPDUX A8, AO, INC4 FXCSMADD f13, B4, A2, f13 nop FXCPMADD f2, B6, A10, f2 nop FXCSMADD f6, B6, A10, f6 nop FXCPMADD f10, B4, A10, f10 nop FXCSMADD f14, B4, A10, f14 nop FXCPMADD f3, B6, A4, f3 LFPDUX A2, AO2, INC4 FXCSMADD f7, B6, A4, f7 LFPDUX A9, AO, INC4 FXCPMADD f11, B4, A4, f11 nop FXCSMADD f15, B4, A4, f15 bdnz+ .L12 .align 4 .L13: ## 1 ## FXCPMADD f0, B1, A1, f0 nop FXCSMADD f4, B1, A1, f4 nop FXCPMADD f8, B2, A1, f8 LFPDUX B4, BO2, INC4 FXCSMADD f12, B2, A1, f12 LFPDUX B6, BO, INC4 FXCPMADD f1, B1, A2, f1 nop FXCSMADD f5, B1, A2, f5 LFPDUX A4, AO2, INC4 FXCPMADD f9, B2, A2, f9 LFPDUX A10, AO, INC4 FXCSMADD f13, B2, A2, f13 nop FXCPMADD f2, B1, A3, f2 nop FXCSMADD f6, B1, A3, f6 nop FXCPMADD f10, B2, A3, f10 nop FXCSMADD f14, B2, A3, f14 nop FXCPMADD f3, B1, A4, f3 nop FXCSMADD f7, B1, A4, f7 LFPDUX A2, AO2, INC4 FXCPMADD f11, B2, A4, f11 #ifndef TRMMKERNEL LFPDUX A1, CO1, INC2 #else nop #endif FXCSMADD f15, B2, A4, f15 nop ## 2 ## FXCPMADD f0, B3, A5, f0 nop FXCSMADD f4, B3, A5, f4 nop FXCPMADD f8, B4, A5, f8 LFPDUX B2, BO2, INC4 FXCSMADD f12, B4, A5, f12 #ifndef TRMMKERNEL LFPDUX B1, CO1, INC2 #else nop #endif FXCPMADD f1, B3, A2, f1 nop FXCSMADD f5, B3, A2, f5 LFPDUX A4, AO2, INC4 FXCPMADD f9, B4, A2, f9 #ifndef TRMMKERNEL LFPDUX A3, CO1, INC2 #else nop #endif FXCSMADD f13, B4, A2, f13 nop FXCPMADD f2, B3, A6, f2 nop FXCSMADD f6, B3, A6, f6 nop FXCPMADD f10, B4, A6, f10 nop FXCSMADD f14, B4, A6, f14 nop FXCPMADD f3, B3, A4, f3 nop FXCSMADD f7, B3, A4, f7 LFPDUX A2, AO2, INC4 FXCPMADD f11, B4, A4, f11 #ifndef TRMMKERNEL LFPDUX A5, CO1, INC2 #else nop #endif FXCSMADD f15, B4, A4, f15 nop ## 3 ## FXCPMADD f0, B5, A7, f0 nop FXCSMADD f4, B5, A7, f4 nop FXCPMADD f8, B2, A7, f8 LFPDUX B4, BO2, INC4 FXCSMADD f12, B2, A7, f12 #ifndef TRMMKERNEL LFPDUX B3, CO2, INC2 #else nop #endif FXCPMADD f1, B5, A2, f1 nop FXCSMADD f5, B5, A2, f5 LFPDUX A4, AO2, INC4 FXCPMADD f9, B2, A2, f9 #ifndef TRMMKERNEL LFPDUX A6, CO2, INC2 #else nop #endif FXCSMADD f13, B2, A2, f13 FXCPMADD f2, B5, A8, f2 nop FXCSMADD f6, B5, A8, f6 nop FXCPMADD f10, B2, A8, f10 nop FXCSMADD f14, B2, A8, f14 nop FXCPMADD f3, B5, A4, f3 nop FXCSMADD f7, B5, A4, f7 LFPDUX A2, AO2, INC4 FXCPMADD f11, B2, A4, f11 #ifndef TRMMKERNEL LFPDUX A7, CO2, INC2 #else nop #endif FXCSMADD f15, B2, A4, f15 nop ## 4 ## FXCPMADD f0, B6, A9, f0 nop FXCSMADD f4, B6, A9, f4 nop FXCPMADD f8, B4, A9, f8 #ifndef TRMMKERNEL LFPDUX B2, CO2, INC2 #else nop #endif FXCSMADD f12, B4, A9, f12 FXCPMADD f1, B6, A2, f1 nop FXCSMADD f5, B6, A2, f5 LFPDUX A4, AO2, INC4 FXCPMADD f9, B4, A2, f9 nop FXCSMADD f13, B4, A2, f13 nop FXCPMADD f2, B6, A10, f2 FXCSMADD f6, B6, A10, f6 FXCPMADD f10, B4, A10, f10 FXCSMADD f14, B4, A10, f14 FXCPMADD f3, B6, A4, f3 FXCSMADD f7, B6, A4, f7 FXCPMADD f11, B4, A4, f11 FXCSMADD f15, B4, A4, f15 .align 4 .L14: li r0, ALPHA lfpdx AP, SP, r0 #ifdef TRMMKERNEL li r0, FZERO lfpsx f30, SP, r0 #endif #if defined(TRMMKERNEL) #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 4 #else addi TEMP, KK, 2 #endif andi. r0, TEMP, 3 mtspr CTR, r0 ble+ .L18 cmpwi cr0, TEMP, 3 bgt+ .L15 #else andi. r0, K, 3 mtspr CTR, r0 ble+ .L18 cmpwi cr0, K, 3 bgt+ .L15 #endif #ifndef TRMMKERNEL LFPDUX A1, CO1, INC2 fpmr f5, f0 LFPDUX B1, CO1, INC2 fpmr f9, f0 LFPDUX A3, CO1, INC2 fpmr f13, f0 LFPDUX A5, CO1, INC2 fpmr f2, f0 LFPDUX B3, CO2, INC2 fpmr f6, f0 LFPDUX A6, CO2, INC2 fpmr f10, f0 LFPDUX A7, CO2, INC2 fpmr f14, f0 LFPDUX B2, CO2, INC2 fpmr f3, f0 #else fpmr f5, f0 fpmr f9, f0 fpmr f13, f0 fpmr f2, f0 fpmr f6, f0 fpmr f10, f0 fpmr f14, f0 fpmr f3, f0 #endif fpmr f7, f0 fpmr f11, f0 fpmr f15, f0 .align 4 .L15: LFPDUX A2, AO, INC4 LFPDUX A4, AO2, INC4 LFPDUX A10, BO, INC4 LFPDUX B4, BO2, INC4 bdz- .L17 .align 4 .L16: FXCPMADD f0, A10, A2, f0 FXCSMADD f4, A10, A2, f4 FXCPMADD f8, B4, A2, f8 FXCSMADD f12, B4, A2, f12 LFPDUX A2, AO, INC4 FXCPMADD f1, A10, A4, f1 FXCSMADD f5, A10, A4, f5 FXCPMADD f9, B4, A4, f9 FXCSMADD f13, B4, A4, f13 LFPDUX A4, AO2, INC4 FXCPMADD f2, A10, A2, f2 FXCSMADD f6, A10, A2, f6 FXCPMADD f10, B4, A2, f10 FXCSMADD f14, B4, A2, f14 LFPDUX A2, AO, INC4 FXCPMADD f3, A10, A4, f3 FXCSMADD f7, A10, A4, f7 LFPDUX A10, BO, INC4 FXCPMADD f11, B4, A4, f11 FXCSMADD f15, B4, A4, f15 LFPDUX A4, AO2, INC4 LFPDUX B4, BO2, INC4 bdnz+ .L16 .align 4 .L17: FXCPMADD f0, A10, A2, f0 FXCSMADD f4, A10, A2, f4 FXCPMADD f8, B4, A2, f8 FXCSMADD f12, B4, A2, f12 LFPDUX A2, AO, INC4 FXCPMADD f1, A10, A4, f1 FXCSMADD f5, A10, A4, f5 FXCPMADD f9, B4, A4, f9 FXCSMADD f13, B4, A4, f13 LFPDUX A4, AO2, INC4 FXCPMADD f2, A10, A2, f2 FXCSMADD f6, A10, A2, f6 FXCPMADD f10, B4, A2, f10 FXCSMADD f14, B4, A2, f14 FXCPMADD f3, A10, A4, f3 FXCSMADD f7, A10, A4, f7 FXCPMADD f11, B4, A4, f11 FXCSMADD f15, B4, A4, f15 .align 4 .L18: #if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \ defined(RN) || defined(RT) || defined(CN) || defined(CT) fpadd f0, f0, f4 fpadd f8, f8, f12 fpadd f1, f1, f5 fpadd f9, f9, f13 fpadd f2, f2, f6 fpadd f10, f10, f14 fpadd f3, f3, f7 fpadd f11, f11, f15 #else fpsub f0, f0, f4 fpsub f8, f8, f12 fpsub f1, f1, f5 fpsub f9, f9, f13 fpsub f2, f2, f6 fpsub f10, f10, f14 fpsub f3, f3, f7 fpsub f11, f11, f15 #endif #ifndef TRMMKERNEL fxcpmadd A1, f0, AP, A1 fxcpmadd B1, f1, AP, B1 fxcpmadd A3, f2, AP, A3 fxcpmadd A5, f3, AP, A5 fxcxnpma f0, f0, AP, A1 fxcpmadd B3, f8, AP, B3 fxcxnpma f1, f1, AP, B1 fxcpmadd A6, f9, AP, A6 fxcxnpma f2, f2, AP, A3 fxcpmadd A7, f10, AP, A7 fxcxnpma f3, f3, AP, A5 fxcpmadd B2, f11, AP, B2 fxcxnpma f8, f8, AP, B3 STFPDUX f0, CO1, INCM7 fxcxnpma f9, f9, AP, A6 STFPDUX f1, CO1, INC2 fxcxnpma f10, f10, AP, A7 STFPDUX f2, CO1, INC2 fxcxnpma f11, f11, AP, B2 STFPDUX f3, CO1, INC2 STFPDUX f8, CO2, INCM7 STFPDUX f9, CO2, INC2 STFPDUX f10, CO2, INC2 STFPDUX f11, CO2, INC2 #else fxcpmadd f12, f0, AP, f30 fxcpmadd f13, f1, AP, f30 fxcpmadd f14, f2, AP, f30 fxcpmadd f15, f3, AP, f30 fxcxnpma f0, f0, AP, f12 fxcxnpma f1, f1, AP, f13 fxcxnpma f2, f2, AP, f14 fxcxnpma f3, f3, AP, f15 fxcpmadd f16, f8, AP, f30 fxcpmadd f17, f9, AP, f30 fxcpmadd f18, f10, AP, f30 fxcpmadd f19, f11, AP, f30 fxcxnpma f8, f8, AP, f16 fxcxnpma f9, f9, AP, f17 fxcxnpma f10, f10, AP, f18 fxcxnpma f11, f11, AP, f19 STFPDUX f0, CO1, INC2 STFPDUX f1, CO1, INC2 STFPDUX f2, CO1, INC2 STFPDUX f3, CO1, INC2 STFPDUX f8, CO2, INC2 STFPDUX f9, CO2, INC2 STFPDUX f10, CO2, INC2 STFPDUX f11, CO2, INC2 #endif #ifdef TRMMKERNEL #if ( defined(LEFT) && defined(TRANSA)) || \ (!defined(LEFT) && !defined(TRANSA)) sub TEMP, K, KK #ifdef LEFT addi TEMP, TEMP, -4 #else addi TEMP, TEMP, -2 #endif slwi r0, TEMP, 2 + ZBASE_SHIFT slwi TEMP, TEMP, 1 + ZBASE_SHIFT add AO, AO, r0 add BO, BO, TEMP #endif #ifdef LEFT addi KK, KK, 4 #endif #endif addic. I, I, -1 li r0, FZERO lfpsx f0, SP, r0 bgt+ .L11 .align 4 .L20: andi. I, M, 2 beq .L30 #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) addi AO2, AO, 2 * SIZE fpmr f4, f0 addi BO, B, - 4 * SIZE fpmr f8, f0 addi BO2, B, - 2 * SIZE fpmr f12, f0 #else slwi TEMP, KK, 1 + ZBASE_SHIFT slwi r0, KK, 1 + ZBASE_SHIFT add AO, AO, TEMP add BO, B, r0 addi AO2, AO, 2 * SIZE fpmr f4, f0 addi BO, BO, - 4 * SIZE fpmr f8, f0 addi BO2, BO, 2 * SIZE fpmr f12, f0 #endif #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 2 #else addi TEMP, KK, 2 #endif srawi. r0, TEMP, 2 fpmr f1, f0 fpmr f5, f0 fpmr f9, f0 mtspr CTR, r0 fpmr f13, f0 ble .L24 #else addi AO2, AO, 2 * SIZE fpmr f4, f0 addi BO, B, - 4 * SIZE fpmr f8, f0 addi BO2, B, - 2 * SIZE fpmr f12, f0 srawi. r0, K, 2 fpmr f1, f0 fpmr f5, f0 fpmr f9, f0 mtspr CTR, r0 fpmr f13, f0 ble .L24 #endif LFPDUX A1, AO, INC4 LFPDUX B1, BO, INC4 LFPDUX A2, AO2, INC4 LFPDUX B2, BO2, INC4 LFPDUX A3, AO, INC4 LFPDUX B3, BO, INC4 LFPDUX A4, AO2, INC4 LFPDUX B4, BO2, INC4 LFPDUX A5, AO, INC4 LFPDUX B5, BO, INC4 LFPDUX A6, AO2, INC4 LFPDUX B6, BO2, INC4 LFPDUX A7, AO, INC4 LFPDUX A9, BO, INC4 LFPDUX A10, BO2, INC4 bdz- .L23 .align 4 .L22: FXCPMADD f0, B1, A1, f0 nop FXCSMADD f4, B1, A1, f4 LFPDUX A8, AO2, INC4 FXCPMADD f8, B2, A1, f8 nop FXCSMADD f12, B2, A1, f12 LFPDUX A1, AO, INC4 FXCPMADD f1, B1, A2, f1 nop FXCSMADD f5, B1, A2, f5 LFPDUX B1, BO, INC4 FXCPMADD f9, B2, A2, f9 nop FXCSMADD f13, B2, A2, f13 LFPDUX B2, BO2, INC4 FXCPMADD f0, B3, A3, f0 nop FXCSMADD f4, B3, A3, f4 LFPDUX A2, AO2, INC4 FXCPMADD f8, B4, A3, f8 nop FXCSMADD f12, B4, A3, f12 LFPDUX A3, AO, INC4 FXCPMADD f1, B3, A4, f1 nop FXCSMADD f5, B3, A4, f5 LFPDUX B3, BO, INC4 FXCPMADD f9, B4, A4, f9 nop FXCSMADD f13, B4, A4, f13 LFPDUX B4, BO2, INC4 FXCPMADD f0, B5, A5, f0 nop FXCSMADD f4, B5, A5, f4 LFPDUX A4, AO2, INC4 FXCPMADD f8, B6, A5, f8 nop FXCSMADD f12, B6, A5, f12 LFPDUX A5, AO, INC4 FXCPMADD f1, B5, A6, f1 nop FXCSMADD f5, B5, A6, f5 LFPDUX B5, BO, INC4 FXCPMADD f9, B6, A6, f9 nop FXCSMADD f13, B6, A6, f13 LFPDUX B6, BO2, INC4 FXCPMADD f0, A9, A7, f0 nop FXCSMADD f4, A9, A7, f4 LFPDUX A6, AO2, INC4 FXCPMADD f8, A10, A7, f8 nop FXCSMADD f12, A10, A7, f12 LFPDUX A7, AO, INC4 FXCPMADD f1, A9, A8, f1 nop FXCSMADD f5, A9, A8, f5 LFPDUX A9, BO, INC4 FXCPMADD f9, A10, A8, f9 nop FXCSMADD f13, A10, A8, f13 LFPDUX A10, BO2, INC4 bdnz+ .L22 .align 4 .L23: FXCPMADD f0, B1, A1, f0 FXCSMADD f4, B1, A1, f4 LFPDUX A8, AO2, INC4 FXCPMADD f8, B2, A1, f8 FXCSMADD f12, B2, A1, f12 FXCPMADD f1, B1, A2, f1 FXCSMADD f5, B1, A2, f5 FXCPMADD f9, B2, A2, f9 FXCSMADD f13, B2, A2, f13 FXCPMADD f0, B3, A3, f0 FXCSMADD f4, B3, A3, f4 FXCPMADD f8, B4, A3, f8 FXCSMADD f12, B4, A3, f12 FXCPMADD f1, B3, A4, f1 FXCSMADD f5, B3, A4, f5 FXCPMADD f9, B4, A4, f9 FXCSMADD f13, B4, A4, f13 FXCPMADD f0, B5, A5, f0 FXCSMADD f4, B5, A5, f4 FXCPMADD f8, B6, A5, f8 FXCSMADD f12, B6, A5, f12 FXCPMADD f1, B5, A6, f1 FXCSMADD f5, B5, A6, f5 FXCPMADD f9, B6, A6, f9 FXCSMADD f13, B6, A6, f13 FXCPMADD f0, A9, A7, f0 FXCSMADD f4, A9, A7, f4 FXCPMADD f8, A10, A7, f8 FXCSMADD f12, A10, A7, f12 FXCPMADD f1, A9, A8, f1 FXCSMADD f5, A9, A8, f5 FXCPMADD f9, A10, A8, f9 FXCSMADD f13, A10, A8, f13 .align 4 .L24: li r0, ALPHA lfpdx AP, SP, r0 #ifdef TRMMKERNEL li r0, FZERO lfpsx f30, SP, r0 #endif #if defined(TRMMKERNEL) #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 2 #else addi TEMP, KK, 2 #endif andi. r0, TEMP, 3 mtspr CTR, r0 #else andi. r0, K, 3 mtspr CTR, r0 #endif ble+ .L28 LFPDUX A1, AO, INC4 LFPDUX A2, AO2, INC4 LFPDUX B1, BO, INC4 LFPDUX B2, BO2, INC4 bdz- .L27 .align 4 .L26: FXCPMADD f0, B1, A1, f0 FXCSMADD f4, B1, A1, f4 FXCPMADD f8, B2, A1, f8 FXCSMADD f12, B2, A1, f12 LFPDUX A1, AO, INC4 FXCPMADD f1, B1, A2, f1 FXCSMADD f5, B1, A2, f5 LFPDUX B1, BO, INC4 FXCPMADD f9, B2, A2, f9 FXCSMADD f13, B2, A2, f13 LFPDUX A2, AO2, INC4 LFPDUX B2, BO2, INC4 bdnz+ .L26 .align 4 .L27: FXCPMADD f0, B1, A1, f0 FXCSMADD f4, B1, A1, f4 FXCPMADD f8, B2, A1, f8 FXCSMADD f12, B2, A1, f12 FXCPMADD f1, B1, A2, f1 FXCSMADD f5, B1, A2, f5 FXCPMADD f9, B2, A2, f9 FXCSMADD f13, B2, A2, f13 .align 4 .L28: #ifndef TRMMKERNEL LFPDUX A1, CO1, INC2 LFPDUX A2, CO1, INC2 LFPDUX A3, CO2, INC2 LFPDUX A4, CO2, INC2 #endif #if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \ defined(RN) || defined(RT) || defined(CN) || defined(CT) fpadd f0, f0, f4 fpadd f8, f8, f12 fpadd f1, f1, f5 fpadd f9, f9, f13 #else fpsub f0, f0, f4 fpsub f8, f8, f12 fpsub f1, f1, f5 fpsub f9, f9, f13 #endif #ifndef TRMMKERNEL fxcpmadd A1, f0, AP, A1 fxcpmadd A2, f1, AP, A2 fxcpmadd A3, f8, AP, A3 fxcpmadd A4, f9, AP, A4 fxcxnpma f0, f0, AP, A1 fxcxnpma f1, f1, AP, A2 fxcxnpma f8, f8, AP, A3 fxcxnpma f9, f9, AP, A4 STFPDUX f0, CO1, INCM3 STFPDUX f1, CO1, INC2 STFPDUX f8, CO2, INCM3 STFPDUX f9, CO2, INC2 #else fxcpmadd f12, f0, AP, f30 fxcpmadd f13, f1, AP, f30 fxcpmadd f14, f8, AP, f30 fxcpmadd f15, f9, AP, f30 fxcxnpma f0, f0, AP, f12 fxcxnpma f1, f1, AP, f13 fxcxnpma f8, f8, AP, f14 fxcxnpma f9, f9, AP, f15 STFPDUX f0, CO1, INC2 STFPDUX f1, CO1, INC2 STFPDUX f8, CO2, INC2 STFPDUX f9, CO2, INC2 #endif #ifdef TRMMKERNEL #if ( defined(LEFT) && defined(TRANSA)) || \ (!defined(LEFT) && !defined(TRANSA)) sub TEMP, K, KK #ifdef LEFT addi TEMP, TEMP, -2 #else addi TEMP, TEMP, -2 #endif slwi r0, TEMP, 1 + ZBASE_SHIFT slwi TEMP, TEMP, 1 + ZBASE_SHIFT add AO, AO, r0 add BO, BO, TEMP #endif #ifdef LEFT addi KK, KK, 2 #endif #endif li r0, FZERO lfpsx f0, SP, r0 .align 4 .L30: andi. I, M, 1 beq .L49 #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) addi AO2, AO, 2 * SIZE fpmr f1, f0 addi BO, B, - 4 * SIZE fpmr f2, f0 addi BO2, B, - 2 * SIZE fpmr f3, f0 #else slwi TEMP, KK, 0 + ZBASE_SHIFT slwi r0, KK, 1 + ZBASE_SHIFT add AO, AO, TEMP add BO, B, r0 addi AO2, AO, 2 * SIZE fpmr f1, f0 addi BO, BO, - 4 * SIZE fpmr f2, f0 addi BO2, BO, 2 * SIZE fpmr f3, f0 #endif #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 1 #else addi TEMP, KK, 2 #endif srawi. r0, TEMP, 2 mtspr CTR, r0 ble .L34 #else addi AO2, AO, 2 * SIZE fpmr f1, f0 addi BO, B, - 4 * SIZE fpmr f2, f0 addi BO2, B, - 2 * SIZE fpmr f3, f0 srawi. r0, K, 2 mtspr CTR, r0 ble .L34 #endif LFPDUX A1, AO, INC4 LFPDUX B1, BO, INC4 LFPDUX B2, BO2, INC4 LFPDUX A2, AO2, INC4 LFPDUX B3, BO, INC4 LFPDUX B4, BO2, INC4 LFPDUX A3, AO, INC4 LFPDUX A5, BO, INC4 LFPDUX A6, BO2, INC4 LFPDUX A4, AO2, INC4 LFPDUX A7, BO, INC4 LFPDUX A8, BO2, INC4 bdz- .L33 .align 4 .L32: FXCPMADD f0, B1, A1, f0 FXCSMADD f1, B1, A1, f1 LFPDUX B1, BO, INC4 FXCPMADD f2, B2, A1, f2 FXCSMADD f3, B2, A1, f3 LFPDUX B2, BO2, INC4 LFPDUX A1, AO, INC4 FXCPMADD f0, B3, A2, f0 FXCSMADD f1, B3, A2, f1 LFPDUX B3, BO, INC4 FXCPMADD f2, B4, A2, f2 FXCSMADD f3, B4, A2, f3 LFPDUX B4, BO2, INC4 LFPDUX A2, AO2, INC4 FXCPMADD f0, A5, A3, f0 FXCSMADD f1, A5, A3, f1 LFPDUX A5, BO, INC4 FXCPMADD f2, A6, A3, f2 FXCSMADD f3, A6, A3, f3 LFPDUX A6, BO2, INC4 LFPDUX A3, AO, INC4 FXCPMADD f0, A7, A4, f0 FXCSMADD f1, A7, A4, f1 LFPDUX A7, BO, INC4 FXCPMADD f2, A8, A4, f2 FXCSMADD f3, A8, A4, f3 LFPDUX A8, BO2, INC4 LFPDUX A4, AO2, INC4 bdnz+ .L32 .align 4 .L33: FXCPMADD f0, B1, A1, f0 FXCSMADD f1, B1, A1, f1 FXCPMADD f2, B2, A1, f2 FXCSMADD f3, B2, A1, f3 FXCPMADD f0, B3, A2, f0 FXCSMADD f1, B3, A2, f1 FXCPMADD f2, B4, A2, f2 FXCSMADD f3, B4, A2, f3 FXCPMADD f0, A5, A3, f0 FXCSMADD f1, A5, A3, f1 FXCPMADD f2, A6, A3, f2 FXCSMADD f3, A6, A3, f3 FXCPMADD f0, A7, A4, f0 FXCSMADD f1, A7, A4, f1 FXCPMADD f2, A8, A4, f2 FXCSMADD f3, A8, A4, f3 .align 4 .L34: li r0, ALPHA lfpdx AP, SP, r0 #ifdef TRMMKERNEL li r0, FZERO lfpsx f30, SP, r0 #endif #if defined(TRMMKERNEL) #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 1 #else addi TEMP, KK, 2 #endif andi. r0, TEMP, 3 mtspr CTR, r0 #else andi. r0, K, 3 mtspr CTR, r0 #endif ble+ .L38 LFPDX A1, AO, INC4 LFPDUX B1, BO, INC4 LFPDUX B2, BO2, INC4 add AO, AO, INC2 bdz- .L37 .align 4 .L36: FXCPMADD f0, B1, A1, f0 FXCSMADD f1, B1, A1, f1 LFPDUX B1, BO, INC4 FXCPMADD f2, B2, A1, f2 FXCSMADD f3, B2, A1, f3 LFPDX A1, AO, INC4 LFPDUX B2, BO2, INC4 add AO, AO, INC2 bdnz+ .L36 .align 4 .L37: FXCPMADD f0, B1, A1, f0 FXCSMADD f1, B1, A1, f1 FXCPMADD f2, B2, A1, f2 FXCSMADD f3, B2, A1, f3 .align 4 .L38: #ifndef TRMMKERNEL LFPDX A1, CO1, INC2 LFPDX A2, CO2, INC2 #endif #if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \ defined(RN) || defined(RT) || defined(CN) || defined(CT) fpadd f0, f0, f1 fpadd f2, f2, f3 #else fpsub f0, f0, f1 fpsub f2, f2, f3 #endif #ifndef TRMMKERNEL fxcpmadd A1, f0, AP, A1 fxcpmadd A2, f2, AP, A2 fxcxnpma f0, f0, AP, A1 fxcxnpma f2, f2, AP, A2 #else fxcpmadd f12, f0, AP, f30 fxcpmadd f13, f2, AP, f30 fxcxnpma f0, f0, AP, f12 fxcxnpma f2, f2, AP, f13 #endif STFPDUX f0, CO1, INC2 STFPDUX f2, CO2, INC2 #ifdef TRMMKERNEL #if ( defined(LEFT) && defined(TRANSA)) || \ (!defined(LEFT) && !defined(TRANSA)) sub TEMP, K, KK #ifdef LEFT addi TEMP, TEMP, -1 #else addi TEMP, TEMP, -2 #endif slwi r0, TEMP, 0 + ZBASE_SHIFT slwi TEMP, TEMP, 1 + ZBASE_SHIFT add AO, AO, r0 add BO, BO, TEMP #endif #ifdef LEFT addi KK, KK, 1 #endif #endif li r0, FZERO lfpsx f0, SP, r0 .align 4 .L49: #if defined(TRMMKERNEL) && !defined(LEFT) addi KK, KK, 2 #endif addi B, BO, 4 * SIZE addic. J, J, -1 bgt+ .L10 .align 4 .L50: andi. J, N, 1 beq .L999 mr CO1, C #if defined(TRMMKERNEL) && defined(LEFT) mr KK, OFFSET #endif addi AO, A, -2 * SIZE li r0, FZERO lfpsx f0, SP, r0 srawi. I, M, 2 ble .L60 .align 4 .L51: #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) fpmr f4, f0 addi BO, B, - 2 * SIZE fpmr f1, f0 fpmr f5, f0 fpmr f2, f0 fpmr f6, f0 #else slwi TEMP, KK, 2 + ZBASE_SHIFT slwi r0, KK, 0 + ZBASE_SHIFT add AO, AO, TEMP add BO, B, r0 fpmr f4, f0 addi BO, BO, - 2 * SIZE fpmr f1, f0 fpmr f5, f0 fpmr f2, f0 fpmr f6, f0 #endif #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 4 #else addi TEMP, KK, 1 #endif srawi. r0, TEMP, 2 fpmr f3, f0 mtspr CTR, r0 fpmr f7, f0 ble .L54 #else srawi. r0, K, 2 fpmr f4, f0 addi BO, B, - 2 * SIZE fpmr f1, f0 fpmr f5, f0 fpmr f2, f0 fpmr f6, f0 fpmr f3, f0 mtspr CTR, r0 fpmr f7, f0 ble .L54 #endif LFPDUX B1, BO, INC2 LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 LFPDUX A3, AO, INC2 LFPDUX A4, AO, INC2 LFPDUX B3, BO, INC2 LFPDUX A5, AO, INC2 LFPDUX A6, AO, INC2 LFPDUX A7, AO, INC2 LFPDUX A8, AO, INC2 bdz- .L53 .align 4 .L52: FXCPMADD f0, B1, A1, f0 LFPDUX B4, BO, INC2 FXCSMADD f4, B1, A1, f4 LFPDUX A1, AO, INC2 FXCPMADD f1, B1, A2, f1 nop FXCSMADD f5, B1, A2, f5 LFPDUX A2, AO, INC2 FXCPMADD f2, B1, A3, f2 nop FXCSMADD f6, B1, A3, f6 LFPDUX A3, AO, INC2 FXCPMADD f3, B1, A4, f3 nop FXCSMADD f7, B1, A4, f7 LFPDUX A4, AO, INC2 FXCPMADD f0, B2, A5, f0 LFPDUX B1, BO, INC2 FXCSMADD f4, B2, A5, f4 LFPDUX A5, AO, INC2 FXCPMADD f1, B2, A6, f1 nop FXCSMADD f5, B2, A6, f5 LFPDUX A6, AO, INC2 FXCPMADD f2, B2, A7, f2 nop FXCSMADD f6, B2, A7, f6 LFPDUX A7, AO, INC2 FXCPMADD f3, B2, A8, f3 nop FXCSMADD f7, B2, A8, f7 LFPDUX A8, AO, INC2 FXCPMADD f0, B3, A1, f0 LFPDUX B2, BO, INC2 FXCSMADD f4, B3, A1, f4 LFPDUX A1, AO, INC2 FXCPMADD f1, B3, A2, f1 nop FXCSMADD f5, B3, A2, f5 LFPDUX A2, AO, INC2 FXCPMADD f2, B3, A3, f2 nop FXCSMADD f6, B3, A3, f6 LFPDUX A3, AO, INC2 FXCPMADD f3, B3, A4, f3 nop FXCSMADD f7, B3, A4, f7 LFPDUX A4, AO, INC2 FXCPMADD f0, B4, A5, f0 LFPDUX B3, BO, INC2 FXCSMADD f4, B4, A5, f4 LFPDUX A5, AO, INC2 FXCPMADD f1, B4, A6, f1 nop FXCSMADD f5, B4, A6, f5 LFPDUX A6, AO, INC2 FXCPMADD f2, B4, A7, f2 nop FXCSMADD f6, B4, A7, f6 LFPDUX A7, AO, INC2 FXCPMADD f3, B4, A8, f3 nop FXCSMADD f7, B4, A8, f7 LFPDUX A8, AO, INC2 bdnz+ .L52 .align 4 .L53: FXCPMADD f0, B1, A1, f0 LFPDUX B4, BO, INC2 FXCSMADD f4, B1, A1, f4 LFPDUX A1, AO, INC2 FXCPMADD f1, B1, A2, f1 nop FXCSMADD f5, B1, A2, f5 LFPDUX A2, AO, INC2 FXCPMADD f2, B1, A3, f2 nop FXCSMADD f6, B1, A3, f6 LFPDUX A3, AO, INC2 FXCPMADD f3, B1, A4, f3 nop FXCSMADD f7, B1, A4, f7 LFPDUX A4, AO, INC2 FXCPMADD f0, B2, A5, f0 nop FXCSMADD f4, B2, A5, f4 LFPDUX A5, AO, INC2 FXCPMADD f1, B2, A6, f1 nop FXCSMADD f5, B2, A6, f5 LFPDUX A6, AO, INC2 FXCPMADD f2, B2, A7, f2 nop FXCSMADD f6, B2, A7, f6 LFPDUX A7, AO, INC2 FXCPMADD f3, B2, A8, f3 nop FXCSMADD f7, B2, A8, f7 LFPDUX A8, AO, INC2 FXCPMADD f0, B3, A1, f0 FXCSMADD f4, B3, A1, f4 FXCPMADD f1, B3, A2, f1 FXCSMADD f5, B3, A2, f5 FXCPMADD f2, B3, A3, f2 FXCSMADD f6, B3, A3, f6 FXCPMADD f3, B3, A4, f3 FXCSMADD f7, B3, A4, f7 FXCPMADD f0, B4, A5, f0 FXCSMADD f4, B4, A5, f4 FXCPMADD f1, B4, A6, f1 FXCSMADD f5, B4, A6, f5 FXCPMADD f2, B4, A7, f2 FXCSMADD f6, B4, A7, f6 FXCPMADD f3, B4, A8, f3 FXCSMADD f7, B4, A8, f7 .align 4 .L54: li r0, ALPHA lfpdx AP, SP, r0 #ifdef TRMMKERNEL li r0, FZERO lfpsx f30, SP, r0 #endif #if defined(TRMMKERNEL) #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 4 #else addi TEMP, KK, 1 #endif andi. r0, TEMP, 3 mtspr CTR, r0 #else andi. r0, K, 3 mtspr CTR, r0 #endif ble+ .L58 LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 LFPDUX A2, AO, INC2 LFPDUX A3, AO, INC2 LFPDUX A4, AO, INC2 bdz- .L57 .align 4 .L56: FXCPMADD f0, B1, A1, f0 FXCSMADD f4, B1, A1, f4 LFPDUX A1, AO, INC2 FXCPMADD f1, B1, A2, f1 FXCSMADD f5, B1, A2, f5 LFPDUX A2, AO, INC2 FXCPMADD f2, B1, A3, f2 FXCSMADD f6, B1, A3, f6 LFPDUX A3, AO, INC2 FXCPMADD f3, B1, A4, f3 FXCSMADD f7, B1, A4, f7 LFPDUX A4, AO, INC2 LFPDUX B1, BO, INC2 bdnz+ .L56 .align 4 .L57: FXCPMADD f0, B1, A1, f0 FXCSMADD f4, B1, A1, f4 FXCPMADD f1, B1, A2, f1 FXCSMADD f5, B1, A2, f5 FXCPMADD f2, B1, A3, f2 FXCSMADD f6, B1, A3, f6 FXCPMADD f3, B1, A4, f3 FXCSMADD f7, B1, A4, f7 .align 4 .L58: #ifndef TRMMKERNEL LFPDUX A1, CO1, INC2 LFPDUX A2, CO1, INC2 LFPDUX A3, CO1, INC2 LFPDUX A4, CO1, INC2 #endif #if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \ defined(RN) || defined(RT) || defined(CN) || defined(CT) fpadd f0, f0, f4 fpadd f1, f1, f5 fpadd f2, f2, f6 fpadd f3, f3, f7 #else fpsub f0, f0, f4 fpsub f1, f1, f5 fpsub f2, f2, f6 fpsub f3, f3, f7 #endif #ifndef TRMMKERNEL fxcpmadd A1, f0, AP, A1 fxcpmadd A2, f1, AP, A2 fxcpmadd A3, f2, AP, A3 fxcpmadd A4, f3, AP, A4 fxcxnpma f0, f0, AP, A1 fxcxnpma f1, f1, AP, A2 fxcxnpma f2, f2, AP, A3 fxcxnpma f3, f3, AP, A4 STFPDUX f0, CO1, INCM7 STFPDUX f1, CO1, INC2 STFPDUX f2, CO1, INC2 STFPDUX f3, CO1, INC2 #else fxcpmadd f12, f0, AP, f30 fxcpmadd f13, f1, AP, f30 fxcpmadd f14, f2, AP, f30 fxcpmadd f15, f3, AP, f30 fxcxnpma f0, f0, AP, f12 fxcxnpma f1, f1, AP, f13 fxcxnpma f2, f2, AP, f14 fxcxnpma f3, f3, AP, f15 STFPDUX f0, CO1, INC2 STFPDUX f1, CO1, INC2 STFPDUX f2, CO1, INC2 STFPDUX f3, CO1, INC2 #endif #ifdef TRMMKERNEL #if ( defined(LEFT) && defined(TRANSA)) || \ (!defined(LEFT) && !defined(TRANSA)) sub TEMP, K, KK #ifdef LEFT addi TEMP, TEMP, -4 #else addi TEMP, TEMP, -1 #endif slwi r0, TEMP, 2 + ZBASE_SHIFT slwi TEMP, TEMP, 0 + ZBASE_SHIFT add AO, AO, r0 add BO, BO, TEMP #endif #ifdef LEFT addi KK, KK, 4 #endif #endif addic. I, I, -1 li r0, FZERO lfpsx f0, SP, r0 bgt+ .L51 .align 4 .L60: andi. I, M, 2 beq .L70 #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) addi BO, B, - 2 * SIZE fpmr f1, f0 #else slwi TEMP, KK, 1 + ZBASE_SHIFT slwi r0, KK, 0 + ZBASE_SHIFT add AO, AO, TEMP add BO, B, r0 addi BO, BO, - 2 * SIZE fpmr f1, f0 #endif #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 2 #else addi TEMP, KK, 1 #endif srawi. r0, TEMP, 2 fpmr f2, f0 mtspr CTR, r0 fpmr f3, f0 ble .L64 #else srawi. r0, K, 2 fpmr f1, f0 addi BO, B, - 2 * SIZE fpmr f2, f0 mtspr CTR, r0 fpmr f3, f0 ble .L64 #endif LFPDUX B1, BO, INC2 LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 LFPDUX A3, AO, INC2 LFPDUX A4, AO, INC2 LFPDUX B3, BO, INC2 LFPDUX A5, AO, INC2 LFPDUX A6, AO, INC2 LFPDUX B4, BO, INC2 LFPDUX A7, AO, INC2 LFPDUX A8, AO, INC2 bdz- .L63 .align 4 .L62: FXCPMADD f0, B1, A1, f0 FXCSMADD f2, B1, A1, f2 LFPDUX A1, AO, INC2 FXCPMADD f1, B1, A2, f1 FXCSMADD f3, B1, A2, f3 LFPDUX A2, AO, INC2 LFPDUX B1, BO, INC2 FXCPMADD f0, B2, A3, f0 FXCSMADD f2, B2, A3, f2 LFPDUX A3, AO, INC2 FXCPMADD f1, B2, A4, f1 FXCSMADD f3, B2, A4, f3 LFPDUX A4, AO, INC2 LFPDUX B2, BO, INC2 FXCPMADD f0, B3, A5, f0 FXCSMADD f2, B3, A5, f2 LFPDUX A5, AO, INC2 FXCPMADD f1, B3, A6, f1 FXCSMADD f3, B3, A6, f3 LFPDUX A6, AO, INC2 LFPDUX B3, BO, INC2 FXCPMADD f0, B4, A7, f0 FXCSMADD f2, B4, A7, f2 LFPDUX A7, AO, INC2 FXCPMADD f1, B4, A8, f1 FXCSMADD f3, B4, A8, f3 LFPDUX A8, AO, INC2 LFPDUX B4, BO, INC2 bdnz+ .L62 .align 4 .L63: FXCPMADD f0, B1, A1, f0 FXCSMADD f2, B1, A1, f2 FXCPMADD f1, B1, A2, f1 FXCSMADD f3, B1, A2, f3 FXCPMADD f0, B2, A3, f0 FXCSMADD f2, B2, A3, f2 FXCPMADD f1, B2, A4, f1 FXCSMADD f3, B2, A4, f3 FXCPMADD f0, B3, A5, f0 FXCSMADD f2, B3, A5, f2 FXCPMADD f1, B3, A6, f1 FXCSMADD f3, B3, A6, f3 FXCPMADD f0, B4, A7, f0 FXCSMADD f2, B4, A7, f2 FXCPMADD f1, B4, A8, f1 FXCSMADD f3, B4, A8, f3 .align 4 .L64: li r0, ALPHA lfpdx AP, SP, r0 #ifdef TRMMKERNEL li r0, FZERO lfpsx f30, SP, r0 #endif #if defined(TRMMKERNEL) #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 2 #else addi TEMP, KK, 1 #endif andi. r0, TEMP, 3 mtspr CTR, r0 #else andi. r0, K, 3 mtspr CTR, r0 #endif ble+ .L68 LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 LFPDUX A2, AO, INC2 bdz- .L67 .align 4 .L66: FXCPMADD f0, B1, A1, f0 FXCSMADD f2, B1, A1, f2 LFPDUX A1, AO, INC2 FXCPMADD f1, B1, A2, f1 FXCSMADD f3, B1, A2, f3 LFPDUX B1, BO, INC2 LFPDUX A2, AO, INC2 bdnz+ .L66 .align 4 .L67: FXCPMADD f0, B1, A1, f0 FXCSMADD f2, B1, A1, f2 FXCPMADD f1, B1, A2, f1 FXCSMADD f3, B1, A2, f3 .align 4 .L68: #ifndef TRMMKERNEL LFPDUX A1, CO1, INC2 LFPDUX A2, CO1, INC2 #endif #if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \ defined(RN) || defined(RT) || defined(CN) || defined(CT) fpadd f0, f0, f2 fpadd f1, f1, f3 #else fpsub f0, f0, f2 fpsub f1, f1, f3 #endif #ifndef TRMMKERNEL fxcpmadd A1, f0, AP, A1 fxcpmadd A2, f1, AP, A2 fxcxnpma f0, f0, AP, A1 fxcxnpma f1, f1, AP, A2 STFPDUX f0, CO1, INCM3 STFPDUX f1, CO1, INC2 #else fxcpmadd f12, f0, AP, f30 fxcpmadd f13, f1, AP, f30 fxcxnpma f0, f0, AP, f12 fxcxnpma f1, f1, AP, f13 STFPDUX f0, CO1, INC2 STFPDUX f1, CO1, INC2 #endif #ifdef TRMMKERNEL #if ( defined(LEFT) && defined(TRANSA)) || \ (!defined(LEFT) && !defined(TRANSA)) sub TEMP, K, KK #ifdef LEFT addi TEMP, TEMP, -2 #else addi TEMP, TEMP, -1 #endif slwi r0, TEMP, 1 + ZBASE_SHIFT slwi TEMP, TEMP, 0 + ZBASE_SHIFT add AO, AO, r0 add BO, BO, TEMP #endif #ifdef LEFT addi KK, KK, 2 #endif #endif li r0, FZERO lfpsx f0, SP, r0 .align 4 .L70: andi. I, M, 1 beq .L89 #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) addi BO, B, - 2 * SIZE fpmr f1, f0 #else slwi TEMP, KK, 0 + ZBASE_SHIFT slwi r0, KK, 0 + ZBASE_SHIFT add AO, AO, TEMP add BO, B, r0 addi BO, BO, - 2 * SIZE fpmr f1, f0 #endif #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 1 #else addi TEMP, KK, 1 #endif srawi. r0, TEMP, 3 fpmr f2, f0 mtspr CTR, r0 fpmr f3, f0 ble .L74 #else addi BO, B, - 2 * SIZE fpmr f1, f0 srawi. r0, K, 3 fpmr f2, f0 mtspr CTR, r0 fpmr f3, f0 ble .L74 #endif LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 LFPDUX A3, AO, INC2 LFPDUX B3, BO, INC2 LFPDUX A4, AO, INC2 LFPDUX B4, BO, INC2 LFPDUX A5, AO, INC2 LFPDUX B5, BO, INC2 LFPDUX A6, AO, INC2 LFPDUX B6, BO, INC2 LFPDUX A7, AO, INC2 LFPDUX A9, BO, INC2 LFPDUX A8, AO, INC2 LFPDUX A10, BO, INC2 bdz- .L73 .align 4 .L72: FXCPMADD f0, B1, A1, f0 FXCSMADD f1, B1, A1, f1 LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 FXCPMADD f2, B2, A2, f2 FXCSMADD f3, B2, A2, f3 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 FXCPMADD f0, B3, A3, f0 FXCSMADD f1, B3, A3, f1 LFPDUX A3, AO, INC2 LFPDUX B3, BO, INC2 FXCPMADD f2, B4, A4, f2 FXCSMADD f3, B4, A4, f3 LFPDUX A4, AO, INC2 LFPDUX B4, BO, INC2 FXCPMADD f0, B5, A5, f0 FXCSMADD f1, B5, A5, f1 LFPDUX A5, AO, INC2 LFPDUX B5, BO, INC2 FXCPMADD f2, B6, A6, f2 FXCSMADD f3, B6, A6, f3 LFPDUX A6, AO, INC2 LFPDUX B6, BO, INC2 FXCPMADD f0, A9, A7, f0 FXCSMADD f1, A9, A7, f1 LFPDUX A7, AO, INC2 LFPDUX A9, BO, INC2 FXCPMADD f2, A10, A8, f2 FXCSMADD f3, A10, A8, f3 LFPDUX A8, AO, INC2 LFPDUX A10, BO, INC2 bdnz+ .L72 .align 4 .L73: FXCPMADD f0, B1, A1, f0 FXCSMADD f1, B1, A1, f1 FXCPMADD f2, B2, A2, f2 FXCSMADD f3, B2, A2, f3 FXCPMADD f0, B3, A3, f0 FXCSMADD f1, B3, A3, f1 FXCPMADD f2, B4, A4, f2 FXCSMADD f3, B4, A4, f3 FXCPMADD f0, B5, A5, f0 FXCSMADD f1, B5, A5, f1 FXCPMADD f2, B6, A6, f2 FXCSMADD f3, B6, A6, f3 FXCPMADD f0, A9, A7, f0 FXCSMADD f1, A9, A7, f1 FXCPMADD f2, A10, A8, f2 FXCSMADD f3, A10, A8, f3 .align 4 .L74: li r0, ALPHA lfpdx AP, SP, r0 #ifdef TRMMKERNEL li r0, FZERO lfpsx f30, SP, r0 #endif #if defined(TRMMKERNEL) #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 1 #else addi TEMP, KK, 1 #endif andi. r0, TEMP, 7 mtspr CTR, r0 #else andi. r0, K, 7 mtspr CTR, r0 #endif ble+ .L78 LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 bdz- .L77 .align 4 .L76: FXCPMADD f0, B1, A1, f0 FXCSMADD f1, B1, A1, f1 LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 bdnz+ .L76 .align 4 .L77: FXCPMADD f0, B1, A1, f0 FXCSMADD f1, B1, A1, f1 .align 4 .L78: #ifndef TRMMKERNEL LFPDX A1, CO1, INC2 #endif fpadd f0, f0, f2 fpadd f1, f1, f3 #if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \ defined(RN) || defined(RT) || defined(CN) || defined(CT) fpadd f0, f0, f1 #else fpsub f0, f0, f1 #endif #ifndef TRMMKERNEL fxcpmadd A1, f0, AP, A1 fxcxnpma f0, f0, AP, A1 #else fxcpmadd f12, f0, AP, f30 fxcxnpma f0, f0, AP, f12 #endif STFPDUX f0, CO1, INC2 li r0, FZERO lfpsx f0, SP, r0 .align 4 .L89: addi B, BO, 2 * SIZE .align 4 .L999: addi SP, SP, 20 lwzu r14, 4(SP) lwzu r15, 4(SP) lwzu r16, 4(SP) lwzu r17, 4(SP) lwzu r18, 4(SP) lwzu r19, 4(SP) lwzu r20, 4(SP) lwzu r21, 4(SP) lwzu r22, 4(SP) lwzu r23, 4(SP) lwzu r24, 4(SP) lwzu r25, 4(SP) lwzu r26, 4(SP) lwzu r27, 4(SP) lwzu r28, 4(SP) lwzu r29, 4(SP) lwzu r30, 4(SP) lwzu r31, 4(SP) subi SP, SP, 12 li r0, 16 lfpdux f31, SP, r0 lfpdux f30, SP, r0 lfpdux f29, SP, r0 lfpdux f28, SP, r0 lfpdux f27, SP, r0 lfpdux f26, SP, r0 lfpdux f25, SP, r0 lfpdux f24, SP, r0 lfpdux f23, SP, r0 lfpdux f22, SP, r0 lfpdux f21, SP, r0 lfpdux f20, SP, r0 lfpdux f19, SP, r0 lfpdux f18, SP, r0 lfpdux f17, SP, r0 lfpdux f16, SP, r0 lfpdux f15, SP, r0 lfpdux f14, SP, r0 addi SP, SP, 16 blr .align 4 .L1000: li INC, 1 * SIZE li INC2, 2 * SIZE li INC4, 4 * SIZE li INCM1, -1 * SIZE li INCM3, -3 * SIZE li INCM5, -5 * SIZE li INCM7, -7 * SIZE addi C, C, - 1 * SIZE srawi. J, N, 1 ble .L1050 .align 4 .L1010: mr CO1, C add CO2, C, LDC add C, CO2, LDC #if defined(TRMMKERNEL) && defined(LEFT) mr KK, OFFSET #endif addi AO, A, -4 * SIZE li r0, FZERO lfpsx f0, SP, r0 srawi. I, M, 2 ble .L1020 .align 4 .L1011: #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) addi AO2, AO, 2 * SIZE fpmr f4, f0 addi BO, B, - 4 * SIZE fpmr f8, f0 addi BO2, B, - 2 * SIZE fpmr f12, f0 #else slwi TEMP, KK, 2 + ZBASE_SHIFT slwi r0, KK, 1 + ZBASE_SHIFT add AO, AO, TEMP add BO, B, r0 addi AO2, AO, 2 * SIZE fpmr f4, f0 addi BO, BO, - 4 * SIZE fpmr f8, f0 addi BO2, BO, 2 * SIZE fpmr f12, f0 #endif #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 4 #else addi TEMP, KK, 2 #endif srawi. r0, TEMP, 2 fpmr f1, f0 mtspr CTR, r0 ble .L1014 #else addi AO2, AO, 2 * SIZE fpmr f4, f0 addi BO, B, - 4 * SIZE fpmr f8, f0 addi BO2, B, - 2 * SIZE fpmr f12, f0 srawi. r0, K, 2 fpmr f1, f0 mtspr CTR, r0 ble .L1014 #endif LFPDUX A1, AO, INC4 fpmr f5, f0 LFPDUX A3, AO, INC4 fpmr f9, f0 LFPDUX B1, BO, INC4 fpmr f13, f0 LFPDUX A5, AO, INC4 fpmr f2, f0 LFPDUX A6, AO, INC4 fpmr f6, f0 LFPDUX B3, BO, INC4 fpmr f10, f0 LFPDUX A7, AO, INC4 fpmr f14, f0 LFPDUX A8, AO, INC4 fpmr f3, f0 LFPDUX B5, BO, INC4 fpmr f7, f0 LFPDUX A9, AO, INC4 fpmr f11, f0 LFPDUX A2, AO2, INC4 fpmr f15, f0 LFPDUX B2, BO2, INC4 bdz- .L1013 .align 4 .L1012: ## 1 ## FXCPMADD f0, B1, A1, f0 nop FXCSMADD f4, B1, A1, f4 nop FXCPMADD f8, B2, A1, f8 LFPDUX B4, BO2, INC4 FXCSMADD f12, B2, A1, f12 LFPDUX B6, BO, INC4 FXCPMADD f1, B1, A2, f1 nop FXCSMADD f5, B1, A2, f5 LFPDUX A4, AO2, INC4 FXCPMADD f9, B2, A2, f9 LFPDUX A10, AO, INC4 FXCSMADD f13, B2, A2, f13 nop FXCPMADD f2, B1, A3, f2 nop FXCSMADD f6, B1, A3, f6 nop FXCPMADD f10, B2, A3, f10 nop FXCSMADD f14, B2, A3, f14 nop FXCPMADD f3, B1, A4, f3 nop FXCSMADD f7, B1, A4, f7 LFPDUX A2, AO2, INC4 FXCPMADD f11, B2, A4, f11 LFPDUX A1, AO, INC4 FXCSMADD f15, B2, A4, f15 nop ## 2 ## FXCPMADD f0, B3, A5, f0 nop FXCSMADD f4, B3, A5, f4 nop FXCPMADD f8, B4, A5, f8 LFPDUX B2, BO2, INC4 FXCSMADD f12, B4, A5, f12 LFPDUX B1, BO, INC4 FXCPMADD f1, B3, A2, f1 nop FXCSMADD f5, B3, A2, f5 LFPDUX A4, AO2, INC4 FXCPMADD f9, B4, A2, f9 LFPDUX A3, AO, INC4 FXCSMADD f13, B4, A2, f13 nop FXCPMADD f2, B3, A6, f2 nop FXCSMADD f6, B3, A6, f6 nop FXCPMADD f10, B4, A6, f10 nop FXCSMADD f14, B4, A6, f14 nop FXCPMADD f3, B3, A4, f3 nop FXCSMADD f7, B3, A4, f7 LFPDUX A2, AO2, INC4 FXCPMADD f11, B4, A4, f11 LFPDUX A5, AO, INC4 FXCSMADD f15, B4, A4, f15 nop ## 3 ## FXCPMADD f0, B5, A7, f0 nop FXCSMADD f4, B5, A7, f4 nop FXCPMADD f8, B2, A7, f8 LFPDUX B4, BO2, INC4 FXCSMADD f12, B2, A7, f12 LFPDUX B3, BO, INC4 FXCPMADD f1, B5, A2, f1 nop FXCSMADD f5, B5, A2, f5 LFPDUX A4, AO2, INC4 FXCPMADD f9, B2, A2, f9 LFPDUX A6, AO, INC4 FXCSMADD f13, B2, A2, f13 nop FXCPMADD f2, B5, A8, f2 nop FXCSMADD f6, B5, A8, f6 nop FXCPMADD f10, B2, A8, f10 nop FXCSMADD f14, B2, A8, f14 nop FXCPMADD f3, B5, A4, f3 nop FXCSMADD f7, B5, A4, f7 LFPDUX A2, AO2, INC4 FXCPMADD f11, B2, A4, f11 LFPDUX A7, AO, INC4 FXCSMADD f15, B2, A4, f15 nop ## 4 ## FXCPMADD f0, B6, A9, f0 nop FXCSMADD f4, B6, A9, f4 nop FXCPMADD f8, B4, A9, f8 LFPDUX B2, BO2, INC4 FXCSMADD f12, B4, A9, f12 LFPDUX B5, BO, INC4 FXCPMADD f1, B6, A2, f1 nop FXCSMADD f5, B6, A2, f5 LFPDUX A4, AO2, INC4 FXCPMADD f9, B4, A2, f9 LFPDUX A8, AO, INC4 FXCSMADD f13, B4, A2, f13 nop FXCPMADD f2, B6, A10, f2 nop FXCSMADD f6, B6, A10, f6 nop FXCPMADD f10, B4, A10, f10 nop FXCSMADD f14, B4, A10, f14 nop FXCPMADD f3, B6, A4, f3 LFPDUX A2, AO2, INC4 FXCSMADD f7, B6, A4, f7 LFPDUX A9, AO, INC4 FXCPMADD f11, B4, A4, f11 nop FXCSMADD f15, B4, A4, f15 bdnz+ .L1012 .align 4 .L1013: ## 1 ## FXCPMADD f0, B1, A1, f0 nop FXCSMADD f4, B1, A1, f4 nop FXCPMADD f8, B2, A1, f8 LFPDUX B4, BO2, INC4 FXCSMADD f12, B2, A1, f12 LFPDUX B6, BO, INC4 FXCPMADD f1, B1, A2, f1 nop FXCSMADD f5, B1, A2, f5 LFPDUX A4, AO2, INC4 FXCPMADD f9, B2, A2, f9 LFPDUX A10, AO, INC4 FXCSMADD f13, B2, A2, f13 nop FXCPMADD f2, B1, A3, f2 nop FXCSMADD f6, B1, A3, f6 nop FXCPMADD f10, B2, A3, f10 nop FXCSMADD f14, B2, A3, f14 nop FXCPMADD f3, B1, A4, f3 nop FXCSMADD f7, B1, A4, f7 LFPDUX A2, AO2, INC4 FXCPMADD f11, B2, A4, f11 #ifndef TRMMKERNEL LFDUX A1, CO1, INC #else nop #endif FXCSMADD f15, B2, A4, f15 nop ## 2 ## FXCPMADD f0, B3, A5, f0 nop FXCSMADD f4, B3, A5, f4 nop FXCPMADD f8, B4, A5, f8 LFPDUX B2, BO2, INC4 FXCSMADD f12, B4, A5, f12 #ifndef TRMMKERNEL LFDUX B1, CO1, INC2 #else nop #endif FXCPMADD f1, B3, A2, f1 nop FXCSMADD f5, B3, A2, f5 LFPDUX A4, AO2, INC4 FXCPMADD f9, B4, A2, f9 #ifndef TRMMKERNEL LFDUX A3, CO1, INC2 #else nop #endif FXCSMADD f13, B4, A2, f13 nop FXCPMADD f2, B3, A6, f2 nop FXCSMADD f6, B3, A6, f6 nop FXCPMADD f10, B4, A6, f10 nop FXCSMADD f14, B4, A6, f14 nop FXCPMADD f3, B3, A4, f3 nop FXCSMADD f7, B3, A4, f7 LFPDUX A2, AO2, INC4 FXCPMADD f11, B4, A4, f11 #ifndef TRMMKERNEL LFDUX A5, CO1, INC2 #else nop #endif FXCSMADD f15, B4, A4, f15 nop ## 3 ## FXCPMADD f0, B5, A7, f0 nop FXCSMADD f4, B5, A7, f4 nop FXCPMADD f8, B2, A7, f8 LFPDUX B4, BO2, INC4 FXCSMADD f12, B2, A7, f12 #ifndef TRMMKERNEL LFSDUX A1, CO1, INCM5 #else nop #endif FXCPMADD f1, B5, A2, f1 nop FXCSMADD f5, B5, A2, f5 LFPDUX A4, AO2, INC4 FXCPMADD f9, B2, A2, f9 #ifndef TRMMKERNEL LFSDUX B1, CO1, INC2 #else nop #endif FXCSMADD f13, B2, A2, f13 nop FXCPMADD f2, B5, A8, f2 nop FXCSMADD f6, B5, A8, f6 nop FXCPMADD f10, B2, A8, f10 nop FXCSMADD f14, B2, A8, f14 nop FXCPMADD f3, B5, A4, f3 nop FXCSMADD f7, B5, A4, f7 LFPDUX A2, AO2, INC4 FXCPMADD f11, B2, A4, f11 #ifndef TRMMKERNEL LFSDUX A3, CO1, INC2 #else nop #endif FXCSMADD f15, B2, A4, f15 nop ## 4 ## FXCPMADD f0, B6, A9, f0 nop FXCSMADD f4, B6, A9, f4 nop FXCPMADD f8, B4, A9, f8 #ifndef TRMMKERNEL LFSDUX A5, CO1, INC2 #else nop #endif FXCSMADD f12, B4, A9, f12 #ifndef TRMMKERNEL LFDUX B3, CO2, INC #else nop #endif FXCPMADD f1, B6, A2, f1 nop FXCSMADD f5, B6, A2, f5 LFPDUX A4, AO2, INC4 FXCPMADD f9, B4, A2, f9 #ifndef TRMMKERNEL LFDUX A6, CO2, INC2 #else nop #endif FXCSMADD f13, B4, A2, f13 nop FXCPMADD f2, B6, A10, f2 nop FXCSMADD f6, B6, A10, f6 nop FXCPMADD f10, B4, A10, f10 nop FXCSMADD f14, B4, A10, f14 #ifndef TRMMKERNEL LFDUX A7, CO2, INC2 #else nop #endif FXCPMADD f3, B6, A4, f3 nop FXCSMADD f7, B6, A4, f7 nop FXCPMADD f11, B4, A4, f11 nop FXCSMADD f15, B4, A4, f15 #ifndef TRMMKERNEL LFDUX B2, CO2, INC2 #else nop #endif .align 4 .L1014: li r0, ALPHA lfpdx AP, SP, r0 #ifdef TRMMKERNEL li r0, FZERO lfpsx f30, SP, r0 #endif #if defined(TRMMKERNEL) #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 4 #else addi TEMP, KK, 2 #endif andi. r0, TEMP, 3 mtspr CTR, r0 ble+ .L1018 cmpwi cr0, TEMP, 3 bgt+ .L1015 #else andi. r0, K, 3 mtspr CTR, r0 ble+ .L1018 cmpwi cr0, K, 3 bgt+ .L1015 #endif #ifndef TRMMKERNEL LFDUX A1, CO1, INC fpmr f5, f0 LFDUX B1, CO1, INC2 fpmr f9, f0 LFDUX A3, CO1, INC2 fpmr f13, f0 LFDUX A5, CO1, INC2 fpmr f2, f0 LFSDUX A1, CO1, INCM5 fpmr f6, f0 LFSDUX B1, CO1, INC2 fpmr f10, f0 LFSDUX A3, CO1, INC2 fpmr f14, f0 LFSDUX A5, CO1, INC2 fpmr f3, f0 LFDUX B3, CO2, INC fpmr f7, f0 LFDUX A6, CO2, INC2 fpmr f11, f0 LFDUX A7, CO2, INC2 fpmr f15, f0 LFDUX B2, CO2, INC2 #else fpmr f5, f0 fpmr f9, f0 fpmr f13, f0 fpmr f2, f0 fpmr f6, f0 fpmr f10, f0 fpmr f14, f0 fpmr f3, f0 fpmr f7, f0 fpmr f11, f0 fpmr f15, f0 #endif .align 4 .L1015: LFPDUX A2, AO, INC4 LFPDUX A4, AO2, INC4 LFPDUX A10, BO, INC4 LFPDUX B4, BO2, INC4 bdz- .L1017 .align 4 .L1016: FXCPMADD f0, A10, A2, f0 FXCSMADD f4, A10, A2, f4 FXCPMADD f8, B4, A2, f8 FXCSMADD f12, B4, A2, f12 LFPDUX A2, AO, INC4 FXCPMADD f1, A10, A4, f1 FXCSMADD f5, A10, A4, f5 FXCPMADD f9, B4, A4, f9 FXCSMADD f13, B4, A4, f13 LFPDUX A4, AO2, INC4 FXCPMADD f2, A10, A2, f2 FXCSMADD f6, A10, A2, f6 FXCPMADD f10, B4, A2, f10 FXCSMADD f14, B4, A2, f14 LFPDUX A2, AO, INC4 FXCPMADD f3, A10, A4, f3 FXCSMADD f7, A10, A4, f7 LFPDUX A10, BO, INC4 FXCPMADD f11, B4, A4, f11 FXCSMADD f15, B4, A4, f15 LFPDUX A4, AO2, INC4 LFPDUX B4, BO2, INC4 bdnz+ .L1016 .align 4 .L1017: FXCPMADD f0, A10, A2, f0 FXCSMADD f4, A10, A2, f4 FXCPMADD f8, B4, A2, f8 FXCSMADD f12, B4, A2, f12 LFPDUX A2, AO, INC4 FXCPMADD f1, A10, A4, f1 FXCSMADD f5, A10, A4, f5 FXCPMADD f9, B4, A4, f9 FXCSMADD f13, B4, A4, f13 LFPDUX A4, AO2, INC4 FXCPMADD f2, A10, A2, f2 FXCSMADD f6, A10, A2, f6 FXCPMADD f10, B4, A2, f10 FXCSMADD f14, B4, A2, f14 FXCPMADD f3, A10, A4, f3 FXCSMADD f7, A10, A4, f7 FXCPMADD f11, B4, A4, f11 FXCSMADD f15, B4, A4, f15 .align 4 .L1018: #if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \ defined(RN) || defined(RT) || defined(CN) || defined(CT) fpadd f0, f0, f4 fpadd f8, f8, f12 fpadd f1, f1, f5 fpadd f9, f9, f13 fpadd f2, f2, f6 fpadd f10, f10, f14 fpadd f3, f3, f7 fpadd f11, f11, f15 #else fpsub f0, f0, f4 fpsub f8, f8, f12 fpsub f1, f1, f5 fpsub f9, f9, f13 fpsub f2, f2, f6 fpsub f10, f10, f14 fpsub f3, f3, f7 fpsub f11, f11, f15 #endif #ifndef TRMMKERNEL fxcpmadd A1, f0, AP, A1 LFSDUX B3, CO2, INCM5 fxcpmadd B1, f1, AP, B1 LFSDUX A6, CO2, INC2 fxcpmadd A3, f2, AP, A3 LFSDUX A7, CO2, INC2 fxcpmadd A5, f3, AP, A5 LFSDUX B2, CO2, INC2 fxcxnpma f0, f0, AP, A1 fxcpmadd B3, f8, AP, B3 fxcxnpma f1, f1, AP, B1 fxcpmadd A6, f9, AP, A6 fxcxnpma f2, f2, AP, A3 fxcpmadd A7, f10, AP, A7 fxcxnpma f3, f3, AP, A5 STFDUX f0, CO1, INCM7 fxcpmadd B2, f11, AP, B2 STFSDUX f0, CO1, INC fxcxnpma f8, f8, AP, B3 STFDUX f1, CO1, INC STFSDUX f1, CO1, INC fxcxnpma f9, f9, AP, A6 STFDUX f2, CO1, INC STFSDUX f2, CO1, INC fxcxnpma f10, f10, AP, A7 STFDUX f3, CO1, INC STFSDUX f3, CO1, INC fxcxnpma f11, f11, AP, B2 STFDUX f8, CO2, INCM7 #else fxcpmadd f12, f0, AP, f30 fxcpmadd f13, f1, AP, f30 fxcpmadd f14, f2, AP, f30 fxcpmadd f15, f3, AP, f30 fxcxnpma f0, f0, AP, f12 fxcxnpma f1, f1, AP, f13 fxcxnpma f2, f2, AP, f14 fxcxnpma f3, f3, AP, f15 fxcpmadd f16, f8, AP, f30 fxcpmadd f17, f9, AP, f30 fxcpmadd f18, f10, AP, f30 fxcpmadd f19, f11, AP, f30 fxcxnpma f8, f8, AP, f16 fxcxnpma f9, f9, AP, f17 fxcxnpma f10, f10, AP, f18 fxcxnpma f11, f11, AP, f19 STFDUX f0, CO1, INC STFSDUX f0, CO1, INC STFDUX f1, CO1, INC STFSDUX f1, CO1, INC STFDUX f2, CO1, INC STFSDUX f2, CO1, INC STFDUX f3, CO1, INC STFSDUX f3, CO1, INC STFDUX f8, CO2, INC #endif STFSDUX f8, CO2, INC STFDUX f9, CO2, INC STFSDUX f9, CO2, INC STFDUX f10, CO2, INC STFSDUX f10, CO2, INC STFDUX f11, CO2, INC STFSDUX f11, CO2, INC #ifdef TRMMKERNEL #if ( defined(LEFT) && defined(TRANSA)) || \ (!defined(LEFT) && !defined(TRANSA)) sub TEMP, K, KK #ifdef LEFT addi TEMP, TEMP, -4 #else addi TEMP, TEMP, -2 #endif slwi r0, TEMP, 2 + ZBASE_SHIFT slwi TEMP, TEMP, 1 + ZBASE_SHIFT add AO, AO, r0 add BO, BO, TEMP #endif #ifdef LEFT addi KK, KK, 4 #endif #endif addic. I, I, -1 li r0, FZERO lfpsx f0, SP, r0 bgt+ .L1011 .align 4 .L1020: andi. I, M, 2 beq .L1030 #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) addi AO2, AO, 2 * SIZE fpmr f4, f0 addi BO, B, - 4 * SIZE fpmr f8, f0 addi BO2, B, - 2 * SIZE fpmr f12, f0 #else slwi TEMP, KK, 1 + ZBASE_SHIFT slwi r0, KK, 1 + ZBASE_SHIFT add AO, AO, TEMP add BO, B, r0 addi AO2, AO, 2 * SIZE fpmr f4, f0 addi BO, BO, - 4 * SIZE fpmr f8, f0 addi BO2, BO, 2 * SIZE fpmr f12, f0 #endif #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 2 #else addi TEMP, KK, 2 #endif srawi. r0, TEMP, 2 fpmr f1, f0 fpmr f5, f0 fpmr f9, f0 mtspr CTR, r0 fpmr f13, f0 ble .L1024 #else addi AO2, AO, 2 * SIZE fpmr f4, f0 addi BO, B, - 4 * SIZE fpmr f8, f0 addi BO2, B, - 2 * SIZE fpmr f12, f0 srawi. r0, K, 2 fpmr f1, f0 fpmr f5, f0 fpmr f9, f0 mtspr CTR, r0 fpmr f13, f0 ble .L1024 #endif LFPDUX A1, AO, INC4 LFPDUX B1, BO, INC4 LFPDUX A2, AO2, INC4 LFPDUX B2, BO2, INC4 LFPDUX A3, AO, INC4 LFPDUX B3, BO, INC4 LFPDUX A4, AO2, INC4 LFPDUX B4, BO2, INC4 LFPDUX A5, AO, INC4 LFPDUX B5, BO, INC4 LFPDUX A6, AO2, INC4 LFPDUX B6, BO2, INC4 LFPDUX A7, AO, INC4 LFPDUX A9, BO, INC4 LFPDUX A10, BO2, INC4 bdz- .L1023 .align 4 .L1022: FXCPMADD f0, B1, A1, f0 nop FXCSMADD f4, B1, A1, f4 LFPDUX A8, AO2, INC4 FXCPMADD f8, B2, A1, f8 nop FXCSMADD f12, B2, A1, f12 LFPDUX A1, AO, INC4 FXCPMADD f1, B1, A2, f1 nop FXCSMADD f5, B1, A2, f5 LFPDUX B1, BO, INC4 FXCPMADD f9, B2, A2, f9 nop FXCSMADD f13, B2, A2, f13 LFPDUX B2, BO2, INC4 FXCPMADD f0, B3, A3, f0 nop FXCSMADD f4, B3, A3, f4 LFPDUX A2, AO2, INC4 FXCPMADD f8, B4, A3, f8 nop FXCSMADD f12, B4, A3, f12 LFPDUX A3, AO, INC4 FXCPMADD f1, B3, A4, f1 nop FXCSMADD f5, B3, A4, f5 LFPDUX B3, BO, INC4 FXCPMADD f9, B4, A4, f9 nop FXCSMADD f13, B4, A4, f13 LFPDUX B4, BO2, INC4 FXCPMADD f0, B5, A5, f0 nop FXCSMADD f4, B5, A5, f4 LFPDUX A4, AO2, INC4 FXCPMADD f8, B6, A5, f8 nop FXCSMADD f12, B6, A5, f12 LFPDUX A5, AO, INC4 FXCPMADD f1, B5, A6, f1 nop FXCSMADD f5, B5, A6, f5 LFPDUX B5, BO, INC4 FXCPMADD f9, B6, A6, f9 nop FXCSMADD f13, B6, A6, f13 LFPDUX B6, BO2, INC4 FXCPMADD f0, A9, A7, f0 nop FXCSMADD f4, A9, A7, f4 LFPDUX A6, AO2, INC4 FXCPMADD f8, A10, A7, f8 nop FXCSMADD f12, A10, A7, f12 LFPDUX A7, AO, INC4 FXCPMADD f1, A9, A8, f1 nop FXCSMADD f5, A9, A8, f5 LFPDUX A9, BO, INC4 FXCPMADD f9, A10, A8, f9 nop FXCSMADD f13, A10, A8, f13 LFPDUX A10, BO2, INC4 bdnz+ .L1022 .align 4 .L1023: FXCPMADD f0, B1, A1, f0 FXCSMADD f4, B1, A1, f4 LFPDUX A8, AO2, INC4 FXCPMADD f8, B2, A1, f8 FXCSMADD f12, B2, A1, f12 FXCPMADD f1, B1, A2, f1 FXCSMADD f5, B1, A2, f5 FXCPMADD f9, B2, A2, f9 FXCSMADD f13, B2, A2, f13 FXCPMADD f0, B3, A3, f0 FXCSMADD f4, B3, A3, f4 FXCPMADD f8, B4, A3, f8 FXCSMADD f12, B4, A3, f12 FXCPMADD f1, B3, A4, f1 FXCSMADD f5, B3, A4, f5 FXCPMADD f9, B4, A4, f9 FXCSMADD f13, B4, A4, f13 FXCPMADD f0, B5, A5, f0 FXCSMADD f4, B5, A5, f4 FXCPMADD f8, B6, A5, f8 FXCSMADD f12, B6, A5, f12 FXCPMADD f1, B5, A6, f1 FXCSMADD f5, B5, A6, f5 FXCPMADD f9, B6, A6, f9 FXCSMADD f13, B6, A6, f13 FXCPMADD f0, A9, A7, f0 FXCSMADD f4, A9, A7, f4 FXCPMADD f8, A10, A7, f8 FXCSMADD f12, A10, A7, f12 FXCPMADD f1, A9, A8, f1 FXCSMADD f5, A9, A8, f5 FXCPMADD f9, A10, A8, f9 FXCSMADD f13, A10, A8, f13 .align 4 .L1024: li r0, ALPHA lfpdx AP, SP, r0 #ifdef TRMMKERNEL li r0, FZERO lfpsx f30, SP, r0 #endif #if defined(TRMMKERNEL) #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 2 #else addi TEMP, KK, 2 #endif andi. r0, TEMP, 3 mtspr CTR, r0 #else andi. r0, K, 3 mtspr CTR, r0 #endif ble+ .L1028 LFPDUX A1, AO, INC4 LFPDUX A2, AO2, INC4 LFPDUX B1, BO, INC4 LFPDUX B2, BO2, INC4 bdz- .L1027 .align 4 .L1026: FXCPMADD f0, B1, A1, f0 FXCSMADD f4, B1, A1, f4 FXCPMADD f8, B2, A1, f8 FXCSMADD f12, B2, A1, f12 LFPDUX A1, AO, INC4 FXCPMADD f1, B1, A2, f1 FXCSMADD f5, B1, A2, f5 LFPDUX B1, BO, INC4 FXCPMADD f9, B2, A2, f9 FXCSMADD f13, B2, A2, f13 LFPDUX A2, AO2, INC4 LFPDUX B2, BO2, INC4 bdnz+ .L1026 .align 4 .L1027: FXCPMADD f0, B1, A1, f0 FXCSMADD f4, B1, A1, f4 FXCPMADD f8, B2, A1, f8 FXCSMADD f12, B2, A1, f12 FXCPMADD f1, B1, A2, f1 FXCSMADD f5, B1, A2, f5 FXCPMADD f9, B2, A2, f9 FXCSMADD f13, B2, A2, f13 .align 4 .L1028: #ifndef TRMMKERNEL LFDUX A1, CO1, INC LFDUX A2, CO1, INC2 LFDUX A3, CO2, INC LFDUX A4, CO2, INC2 LFSDUX A1, CO1, INCM1 LFSDUX A2, CO1, INC2 LFSDUX A3, CO2, INCM1 LFSDUX A4, CO2, INC2 #endif #if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \ defined(RN) || defined(RT) || defined(CN) || defined(CT) fpadd f0, f0, f4 fpadd f8, f8, f12 fpadd f1, f1, f5 fpadd f9, f9, f13 #else fpsub f0, f0, f4 fpsub f8, f8, f12 fpsub f1, f1, f5 fpsub f9, f9, f13 #endif #ifndef TRMMKERNEL fxcpmadd A1, f0, AP, A1 fxcpmadd A2, f1, AP, A2 fxcpmadd A3, f8, AP, A3 fxcpmadd A4, f9, AP, A4 fxcxnpma f0, f0, AP, A1 fxcxnpma f1, f1, AP, A2 fxcxnpma f8, f8, AP, A3 fxcxnpma f9, f9, AP, A4 STFDUX f0, CO1, INCM3 STFSDUX f0, CO1, INC STFDUX f1, CO1, INC STFSDUX f1, CO1, INC STFDUX f8, CO2, INCM3 STFSDUX f8, CO2, INC STFDUX f9, CO2, INC STFSDUX f9, CO2, INC #else fxcpmadd f12, f0, AP, f30 fxcpmadd f13, f1, AP, f30 fxcpmadd f14, f8, AP, f30 fxcpmadd f15, f9, AP, f30 fxcxnpma f0, f0, AP, f12 fxcxnpma f1, f1, AP, f13 fxcxnpma f8, f8, AP, f14 fxcxnpma f9, f9, AP, f15 STFDUX f0, CO1, INC STFSDUX f0, CO1, INC STFDUX f1, CO1, INC STFSDUX f1, CO1, INC STFDUX f8, CO2, INC STFSDUX f8, CO2, INC STFDUX f9, CO2, INC STFSDUX f9, CO2, INC #endif #ifdef TRMMKERNEL #if ( defined(LEFT) && defined(TRANSA)) || \ (!defined(LEFT) && !defined(TRANSA)) sub TEMP, K, KK #ifdef LEFT addi TEMP, TEMP, -2 #else addi TEMP, TEMP, -2 #endif slwi r0, TEMP, 1 + ZBASE_SHIFT slwi TEMP, TEMP, 1 + ZBASE_SHIFT add AO, AO, r0 add BO, BO, TEMP #endif #ifdef LEFT addi KK, KK, 2 #endif #endif li r0, FZERO lfpsx f0, SP, r0 .align 4 .L1030: andi. I, M, 1 beq .L1049 #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) addi AO2, AO, 2 * SIZE fpmr f1, f0 addi BO, B, - 4 * SIZE fpmr f2, f0 addi BO2, B, - 2 * SIZE fpmr f3, f0 #else slwi TEMP, KK, 0 + ZBASE_SHIFT slwi r0, KK, 1 + ZBASE_SHIFT add AO, AO, TEMP add BO, B, r0 addi AO2, AO, 2 * SIZE fpmr f1, f0 addi BO, BO, - 4 * SIZE fpmr f2, f0 addi BO2, BO, 2 * SIZE fpmr f3, f0 #endif #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 1 #else addi TEMP, KK, 2 #endif srawi. r0, TEMP, 2 mtspr CTR, r0 ble .L1034 #else addi AO2, AO, 2 * SIZE fpmr f1, f0 addi BO, B, - 4 * SIZE fpmr f2, f0 addi BO2, B, - 2 * SIZE fpmr f3, f0 srawi. r0, K, 2 mtspr CTR, r0 ble .L1034 #endif LFPDUX A1, AO, INC4 LFPDUX B1, BO, INC4 LFPDUX B2, BO2, INC4 LFPDUX A2, AO2, INC4 LFPDUX B3, BO, INC4 LFPDUX B4, BO2, INC4 LFPDUX A3, AO, INC4 LFPDUX A5, BO, INC4 LFPDUX A6, BO2, INC4 LFPDUX A4, AO2, INC4 LFPDUX A7, BO, INC4 LFPDUX A8, BO2, INC4 bdz- .L1033 .align 4 .L1032: FXCPMADD f0, B1, A1, f0 FXCSMADD f1, B1, A1, f1 LFPDUX B1, BO, INC4 FXCPMADD f2, B2, A1, f2 FXCSMADD f3, B2, A1, f3 LFPDUX B2, BO2, INC4 LFPDUX A1, AO, INC4 FXCPMADD f0, B3, A2, f0 FXCSMADD f1, B3, A2, f1 LFPDUX B3, BO, INC4 FXCPMADD f2, B4, A2, f2 FXCSMADD f3, B4, A2, f3 LFPDUX B4, BO2, INC4 LFPDUX A2, AO2, INC4 FXCPMADD f0, A5, A3, f0 FXCSMADD f1, A5, A3, f1 LFPDUX A5, BO, INC4 FXCPMADD f2, A6, A3, f2 FXCSMADD f3, A6, A3, f3 LFPDUX A6, BO2, INC4 LFPDUX A3, AO, INC4 FXCPMADD f0, A7, A4, f0 FXCSMADD f1, A7, A4, f1 LFPDUX A7, BO, INC4 FXCPMADD f2, A8, A4, f2 FXCSMADD f3, A8, A4, f3 LFPDUX A8, BO2, INC4 LFPDUX A4, AO2, INC4 bdnz+ .L1032 .align 4 .L1033: FXCPMADD f0, B1, A1, f0 FXCSMADD f1, B1, A1, f1 FXCPMADD f2, B2, A1, f2 FXCSMADD f3, B2, A1, f3 FXCPMADD f0, B3, A2, f0 FXCSMADD f1, B3, A2, f1 FXCPMADD f2, B4, A2, f2 FXCSMADD f3, B4, A2, f3 FXCPMADD f0, A5, A3, f0 FXCSMADD f1, A5, A3, f1 FXCPMADD f2, A6, A3, f2 FXCSMADD f3, A6, A3, f3 FXCPMADD f0, A7, A4, f0 FXCSMADD f1, A7, A4, f1 FXCPMADD f2, A8, A4, f2 FXCSMADD f3, A8, A4, f3 .align 4 .L1034: li r0, ALPHA lfpdx AP, SP, r0 #ifdef TRMMKERNEL li r0, FZERO lfpsx f30, SP, r0 #endif #if defined(TRMMKERNEL) #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 1 #else addi TEMP, KK, 2 #endif andi. r0, TEMP, 3 mtspr CTR, r0 #else andi. r0, K, 3 mtspr CTR, r0 #endif ble+ .L1038 LFPDX A1, AO, INC4 LFPDUX B1, BO, INC4 LFPDUX B2, BO2, INC4 add AO, AO, INC2 bdz- .L1037 .align 4 .L1036: FXCPMADD f0, B1, A1, f0 FXCSMADD f1, B1, A1, f1 LFPDUX B1, BO, INC4 FXCPMADD f2, B2, A1, f2 FXCSMADD f3, B2, A1, f3 LFPDX A1, AO, INC4 LFPDUX B2, BO2, INC4 add AO, AO, INC2 bdnz+ .L1036 .align 4 .L1037: FXCPMADD f0, B1, A1, f0 FXCSMADD f1, B1, A1, f1 FXCPMADD f2, B2, A1, f2 FXCSMADD f3, B2, A1, f3 .align 4 .L1038: #ifndef TRMMKERNEL LFDUX A1, CO1, INC LFDUX A2, CO2, INC LFSDUX A1, CO1, INC LFSDUX A2, CO2, INC #endif #if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \ defined(RN) || defined(RT) || defined(CN) || defined(CT) fpadd f0, f0, f1 fpadd f2, f2, f3 #else fpsub f0, f0, f1 fpsub f2, f2, f3 #endif #ifndef TRMMKERNEL fxcpmadd A1, f0, AP, A1 fxcpmadd A2, f2, AP, A2 fxcxnpma f0, f0, AP, A1 fxcxnpma f2, f2, AP, A2 STFDUX f0, CO1, INCM1 STFSDUX f0, CO1, INC STFDUX f2, CO2, INCM1 STFSDUX f2, CO2, INC #else fxcpmadd f12, f0, AP, f30 fxcpmadd f13, f2, AP, f30 fxcxnpma f0, f0, AP, f12 fxcxnpma f2, f2, AP, f13 STFDUX f0, CO1, INC STFSDUX f0, CO1, INC STFDUX f2, CO2, INC STFSDUX f2, CO2, INC #endif #ifdef TRMMKERNEL #if ( defined(LEFT) && defined(TRANSA)) || \ (!defined(LEFT) && !defined(TRANSA)) sub TEMP, K, KK #ifdef LEFT addi TEMP, TEMP, -1 #else addi TEMP, TEMP, -2 #endif slwi r0, TEMP, 0 + ZBASE_SHIFT slwi TEMP, TEMP, 1 + ZBASE_SHIFT add AO, AO, r0 add BO, BO, TEMP #endif #ifdef LEFT addi KK, KK, 1 #endif #endif li r0, FZERO lfpsx f0, SP, r0 .align 4 .L1049: #if defined(TRMMKERNEL) && !defined(LEFT) addi KK, KK, 2 #endif addi B, BO, 4 * SIZE addic. J, J, -1 bgt+ .L1010 .align 4 .L1050: andi. J, N, 1 beq .L10999 mr CO1, C #if defined(TRMMKERNEL) && defined(LEFT) mr KK, OFFSET #endif addi AO, A, -2 * SIZE li r0, FZERO lfpsx f0, SP, r0 srawi. I, M, 2 ble .L1060 .align 4 .L1051: #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) fpmr f4, f0 addi BO, B, - 2 * SIZE fpmr f1, f0 fpmr f5, f0 fpmr f2, f0 fpmr f6, f0 #else slwi TEMP, KK, 2 + ZBASE_SHIFT slwi r0, KK, 0 + ZBASE_SHIFT add AO, AO, TEMP add BO, B, r0 fpmr f4, f0 addi BO, BO, - 2 * SIZE fpmr f1, f0 fpmr f5, f0 fpmr f2, f0 fpmr f6, f0 #endif #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 4 #else addi TEMP, KK, 1 #endif srawi. r0, TEMP, 2 fpmr f3, f0 mtspr CTR, r0 fpmr f7, f0 ble .L1054 #else srawi. r0, K, 2 fpmr f4, f0 addi BO, B, - 2 * SIZE fpmr f1, f0 fpmr f5, f0 fpmr f2, f0 fpmr f6, f0 fpmr f3, f0 mtspr CTR, r0 fpmr f7, f0 ble .L1054 #endif LFPDUX B1, BO, INC2 LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 LFPDUX A3, AO, INC2 LFPDUX A4, AO, INC2 LFPDUX B3, BO, INC2 LFPDUX A5, AO, INC2 LFPDUX A6, AO, INC2 LFPDUX A7, AO, INC2 LFPDUX A8, AO, INC2 bdz- .L1053 .align 4 .L1052: FXCPMADD f0, B1, A1, f0 LFPDUX B4, BO, INC2 FXCSMADD f4, B1, A1, f4 LFPDUX A1, AO, INC2 FXCPMADD f1, B1, A2, f1 nop FXCSMADD f5, B1, A2, f5 LFPDUX A2, AO, INC2 FXCPMADD f2, B1, A3, f2 nop FXCSMADD f6, B1, A3, f6 LFPDUX A3, AO, INC2 FXCPMADD f3, B1, A4, f3 nop FXCSMADD f7, B1, A4, f7 LFPDUX A4, AO, INC2 FXCPMADD f0, B2, A5, f0 LFPDUX B1, BO, INC2 FXCSMADD f4, B2, A5, f4 LFPDUX A5, AO, INC2 FXCPMADD f1, B2, A6, f1 nop FXCSMADD f5, B2, A6, f5 LFPDUX A6, AO, INC2 FXCPMADD f2, B2, A7, f2 nop FXCSMADD f6, B2, A7, f6 LFPDUX A7, AO, INC2 FXCPMADD f3, B2, A8, f3 nop FXCSMADD f7, B2, A8, f7 LFPDUX A8, AO, INC2 FXCPMADD f0, B3, A1, f0 LFPDUX B2, BO, INC2 FXCSMADD f4, B3, A1, f4 LFPDUX A1, AO, INC2 FXCPMADD f1, B3, A2, f1 nop FXCSMADD f5, B3, A2, f5 LFPDUX A2, AO, INC2 FXCPMADD f2, B3, A3, f2 nop FXCSMADD f6, B3, A3, f6 LFPDUX A3, AO, INC2 FXCPMADD f3, B3, A4, f3 nop FXCSMADD f7, B3, A4, f7 LFPDUX A4, AO, INC2 FXCPMADD f0, B4, A5, f0 LFPDUX B3, BO, INC2 FXCSMADD f4, B4, A5, f4 LFPDUX A5, AO, INC2 FXCPMADD f1, B4, A6, f1 nop FXCSMADD f5, B4, A6, f5 LFPDUX A6, AO, INC2 FXCPMADD f2, B4, A7, f2 nop FXCSMADD f6, B4, A7, f6 LFPDUX A7, AO, INC2 FXCPMADD f3, B4, A8, f3 nop FXCSMADD f7, B4, A8, f7 LFPDUX A8, AO, INC2 bdnz+ .L1052 .align 4 .L1053: FXCPMADD f0, B1, A1, f0 LFPDUX B4, BO, INC2 FXCSMADD f4, B1, A1, f4 LFPDUX A1, AO, INC2 FXCPMADD f1, B1, A2, f1 nop FXCSMADD f5, B1, A2, f5 LFPDUX A2, AO, INC2 FXCPMADD f2, B1, A3, f2 nop FXCSMADD f6, B1, A3, f6 LFPDUX A3, AO, INC2 FXCPMADD f3, B1, A4, f3 nop FXCSMADD f7, B1, A4, f7 LFPDUX A4, AO, INC2 FXCPMADD f0, B2, A5, f0 nop FXCSMADD f4, B2, A5, f4 LFPDUX A5, AO, INC2 FXCPMADD f1, B2, A6, f1 nop FXCSMADD f5, B2, A6, f5 LFPDUX A6, AO, INC2 FXCPMADD f2, B2, A7, f2 nop FXCSMADD f6, B2, A7, f6 LFPDUX A7, AO, INC2 FXCPMADD f3, B2, A8, f3 nop FXCSMADD f7, B2, A8, f7 LFPDUX A8, AO, INC2 FXCPMADD f0, B3, A1, f0 FXCSMADD f4, B3, A1, f4 FXCPMADD f1, B3, A2, f1 FXCSMADD f5, B3, A2, f5 FXCPMADD f2, B3, A3, f2 FXCSMADD f6, B3, A3, f6 FXCPMADD f3, B3, A4, f3 FXCSMADD f7, B3, A4, f7 FXCPMADD f0, B4, A5, f0 FXCSMADD f4, B4, A5, f4 FXCPMADD f1, B4, A6, f1 FXCSMADD f5, B4, A6, f5 FXCPMADD f2, B4, A7, f2 FXCSMADD f6, B4, A7, f6 FXCPMADD f3, B4, A8, f3 FXCSMADD f7, B4, A8, f7 .align 4 .L1054: li r0, ALPHA lfpdx AP, SP, r0 #ifdef TRMMKERNEL li r0, FZERO lfpsx f30, SP, r0 #endif #if defined(TRMMKERNEL) #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 4 #else addi TEMP, KK, 1 #endif andi. r0, TEMP, 3 mtspr CTR, r0 #else andi. r0, K, 3 mtspr CTR, r0 #endif ble+ .L1058 LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 LFPDUX A2, AO, INC2 LFPDUX A3, AO, INC2 LFPDUX A4, AO, INC2 bdz- .L1057 .align 4 .L1056: FXCPMADD f0, B1, A1, f0 FXCSMADD f4, B1, A1, f4 LFPDUX A1, AO, INC2 FXCPMADD f1, B1, A2, f1 FXCSMADD f5, B1, A2, f5 LFPDUX A2, AO, INC2 FXCPMADD f2, B1, A3, f2 FXCSMADD f6, B1, A3, f6 LFPDUX A3, AO, INC2 FXCPMADD f3, B1, A4, f3 FXCSMADD f7, B1, A4, f7 LFPDUX A4, AO, INC2 LFPDUX B1, BO, INC2 bdnz+ .L1056 .align 4 .L1057: FXCPMADD f0, B1, A1, f0 FXCSMADD f4, B1, A1, f4 FXCPMADD f1, B1, A2, f1 FXCSMADD f5, B1, A2, f5 FXCPMADD f2, B1, A3, f2 FXCSMADD f6, B1, A3, f6 FXCPMADD f3, B1, A4, f3 FXCSMADD f7, B1, A4, f7 .align 4 .L1058: #ifndef TRMMKERNEL LFDUX A1, CO1, INC LFDUX A2, CO1, INC2 LFDUX A3, CO1, INC2 LFDUX A4, CO1, INC2 LFSDUX A1, CO1, INCM5 LFSDUX A2, CO1, INC2 LFSDUX A3, CO1, INC2 LFSDUX A4, CO1, INC2 #endif #if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \ defined(RN) || defined(RT) || defined(CN) || defined(CT) fpadd f0, f0, f4 fpadd f1, f1, f5 fpadd f2, f2, f6 fpadd f3, f3, f7 #else fpsub f0, f0, f4 fpsub f1, f1, f5 fpsub f2, f2, f6 fpsub f3, f3, f7 #endif #ifndef TRMMKERNEL fxcpmadd A1, f0, AP, A1 fxcpmadd A2, f1, AP, A2 fxcpmadd A3, f2, AP, A3 fxcpmadd A4, f3, AP, A4 fxcxnpma f0, f0, AP, A1 fxcxnpma f1, f1, AP, A2 fxcxnpma f2, f2, AP, A3 fxcxnpma f3, f3, AP, A4 STFDUX f0, CO1, INCM7 STFSDUX f0, CO1, INC STFDUX f1, CO1, INC STFSDUX f1, CO1, INC STFDUX f2, CO1, INC STFSDUX f2, CO1, INC STFDUX f3, CO1, INC STFSDUX f3, CO1, INC #else fxcpmadd f12, f0, AP, f30 fxcpmadd f13, f1, AP, f30 fxcpmadd f14, f2, AP, f30 fxcpmadd f15, f3, AP, f30 fxcxnpma f0, f0, AP, f12 fxcxnpma f1, f1, AP, f13 fxcxnpma f2, f2, AP, f14 fxcxnpma f3, f3, AP, f15 STFDUX f0, CO1, INC STFSDUX f0, CO1, INC STFDUX f1, CO1, INC STFSDUX f1, CO1, INC STFDUX f2, CO1, INC STFSDUX f2, CO1, INC STFDUX f3, CO1, INC STFSDUX f3, CO1, INC #endif #ifdef TRMMKERNEL #if ( defined(LEFT) && defined(TRANSA)) || \ (!defined(LEFT) && !defined(TRANSA)) sub TEMP, K, KK #ifdef LEFT addi TEMP, TEMP, -4 #else addi TEMP, TEMP, -1 #endif slwi r0, TEMP, 2 + ZBASE_SHIFT slwi TEMP, TEMP, 0 + ZBASE_SHIFT add AO, AO, r0 add BO, BO, TEMP #endif #ifdef LEFT addi KK, KK, 4 #endif #endif addic. I, I, -1 li r0, FZERO lfpsx f0, SP, r0 bgt+ .L1051 .align 4 .L1060: andi. I, M, 2 beq .L1070 #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) addi BO, B, - 2 * SIZE fpmr f1, f0 #else slwi TEMP, KK, 1 + ZBASE_SHIFT slwi r0, KK, 0 + ZBASE_SHIFT add AO, AO, TEMP add BO, B, r0 addi BO, BO, - 2 * SIZE fpmr f1, f0 #endif #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 2 #else addi TEMP, KK, 1 #endif srawi. r0, TEMP, 2 fpmr f2, f0 mtspr CTR, r0 fpmr f3, f0 ble .L1064 #else srawi. r0, K, 2 fpmr f1, f0 addi BO, B, - 2 * SIZE fpmr f2, f0 mtspr CTR, r0 fpmr f3, f0 ble .L1064 #endif LFPDUX B1, BO, INC2 LFPDUX A1, AO, INC2 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 LFPDUX A3, AO, INC2 LFPDUX A4, AO, INC2 LFPDUX B3, BO, INC2 LFPDUX A5, AO, INC2 LFPDUX A6, AO, INC2 LFPDUX B4, BO, INC2 LFPDUX A7, AO, INC2 LFPDUX A8, AO, INC2 bdz- .L1063 .align 4 .L1062: FXCPMADD f0, B1, A1, f0 FXCSMADD f2, B1, A1, f2 LFPDUX A1, AO, INC2 FXCPMADD f1, B1, A2, f1 FXCSMADD f3, B1, A2, f3 LFPDUX A2, AO, INC2 LFPDUX B1, BO, INC2 FXCPMADD f0, B2, A3, f0 FXCSMADD f2, B2, A3, f2 LFPDUX A3, AO, INC2 FXCPMADD f1, B2, A4, f1 FXCSMADD f3, B2, A4, f3 LFPDUX A4, AO, INC2 LFPDUX B2, BO, INC2 FXCPMADD f0, B3, A5, f0 FXCSMADD f2, B3, A5, f2 LFPDUX A5, AO, INC2 FXCPMADD f1, B3, A6, f1 FXCSMADD f3, B3, A6, f3 LFPDUX A6, AO, INC2 LFPDUX B3, BO, INC2 FXCPMADD f0, B4, A7, f0 FXCSMADD f2, B4, A7, f2 LFPDUX A7, AO, INC2 FXCPMADD f1, B4, A8, f1 FXCSMADD f3, B4, A8, f3 LFPDUX A8, AO, INC2 LFPDUX B4, BO, INC2 bdnz+ .L1062 .align 4 .L1063: FXCPMADD f0, B1, A1, f0 FXCSMADD f2, B1, A1, f2 FXCPMADD f1, B1, A2, f1 FXCSMADD f3, B1, A2, f3 FXCPMADD f0, B2, A3, f0 FXCSMADD f2, B2, A3, f2 FXCPMADD f1, B2, A4, f1 FXCSMADD f3, B2, A4, f3 FXCPMADD f0, B3, A5, f0 FXCSMADD f2, B3, A5, f2 FXCPMADD f1, B3, A6, f1 FXCSMADD f3, B3, A6, f3 FXCPMADD f0, B4, A7, f0 FXCSMADD f2, B4, A7, f2 FXCPMADD f1, B4, A8, f1 FXCSMADD f3, B4, A8, f3 .align 4 .L1064: li r0, ALPHA lfpdx AP, SP, r0 #ifdef TRMMKERNEL li r0, FZERO lfpsx f30, SP, r0 #endif #if defined(TRMMKERNEL) #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 2 #else addi TEMP, KK, 1 #endif andi. r0, TEMP, 3 mtspr CTR, r0 #else andi. r0, K, 3 mtspr CTR, r0 #endif ble+ .L1068 LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 LFPDUX A2, AO, INC2 bdz- .L1067 .align 4 .L1066: FXCPMADD f0, B1, A1, f0 FXCSMADD f2, B1, A1, f2 LFPDUX A1, AO, INC2 FXCPMADD f1, B1, A2, f1 FXCSMADD f3, B1, A2, f3 LFPDUX B1, BO, INC2 LFPDUX A2, AO, INC2 bdnz+ .L1066 .align 4 .L1067: FXCPMADD f0, B1, A1, f0 FXCSMADD f2, B1, A1, f2 FXCPMADD f1, B1, A2, f1 FXCSMADD f3, B1, A2, f3 .align 4 .L1068: #ifndef TRMMKERNEL LFDUX A1, CO1, INC LFDUX A2, CO1, INC2 LFSDUX A1, CO1, INCM1 LFSDUX A2, CO1, INC2 #endif #if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \ defined(RN) || defined(RT) || defined(CN) || defined(CT) fpadd f0, f0, f2 fpadd f1, f1, f3 #else fpsub f0, f0, f2 fpsub f1, f1, f3 #endif #ifndef TRMMKERNEL fxcpmadd A1, f0, AP, A1 fxcpmadd A2, f1, AP, A2 fxcxnpma f0, f0, AP, A1 fxcxnpma f1, f1, AP, A2 STFDUX f0, CO1, INCM3 STFSDUX f0, CO1, INC STFDUX f1, CO1, INC STFSDUX f1, CO1, INC #else fxcpmadd f12, f0, AP, f30 fxcpmadd f13, f1, AP, f30 fxcxnpma f0, f0, AP, f12 fxcxnpma f1, f1, AP, f13 STFDUX f0, CO1, INC STFSDUX f0, CO1, INC STFDUX f1, CO1, INC STFSDUX f1, CO1, INC #endif #ifdef TRMMKERNEL #if ( defined(LEFT) && defined(TRANSA)) || \ (!defined(LEFT) && !defined(TRANSA)) sub TEMP, K, KK #ifdef LEFT addi TEMP, TEMP, -2 #else addi TEMP, TEMP, -1 #endif slwi r0, TEMP, 1 + ZBASE_SHIFT slwi TEMP, TEMP, 0 + ZBASE_SHIFT add AO, AO, r0 add BO, BO, TEMP #endif #ifdef LEFT addi KK, KK, 2 #endif #endif li r0, FZERO lfpsx f0, SP, r0 .align 4 .L1070: andi. I, M, 1 beq .L1089 #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) addi BO, B, - 2 * SIZE fpmr f1, f0 #else slwi TEMP, KK, 0 + ZBASE_SHIFT slwi r0, KK, 0 + ZBASE_SHIFT add AO, AO, TEMP add BO, B, r0 addi BO, BO, - 2 * SIZE fpmr f1, f0 #endif #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 1 #else addi TEMP, KK, 1 #endif srawi. r0, TEMP, 3 fpmr f2, f0 mtspr CTR, r0 fpmr f3, f0 ble .L1074 #else addi BO, B, - 2 * SIZE fpmr f1, f0 srawi. r0, K, 3 fpmr f2, f0 mtspr CTR, r0 fpmr f3, f0 ble .L1074 #endif LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 LFPDUX A3, AO, INC2 LFPDUX B3, BO, INC2 LFPDUX A4, AO, INC2 LFPDUX B4, BO, INC2 LFPDUX A5, AO, INC2 LFPDUX B5, BO, INC2 LFPDUX A6, AO, INC2 LFPDUX B6, BO, INC2 LFPDUX A7, AO, INC2 LFPDUX A9, BO, INC2 LFPDUX A8, AO, INC2 LFPDUX A10, BO, INC2 bdz- .L1073 .align 4 .L1072: FXCPMADD f0, B1, A1, f0 FXCSMADD f1, B1, A1, f1 LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 FXCPMADD f2, B2, A2, f2 FXCSMADD f3, B2, A2, f3 LFPDUX A2, AO, INC2 LFPDUX B2, BO, INC2 FXCPMADD f0, B3, A3, f0 FXCSMADD f1, B3, A3, f1 LFPDUX A3, AO, INC2 LFPDUX B3, BO, INC2 FXCPMADD f2, B4, A4, f2 FXCSMADD f3, B4, A4, f3 LFPDUX A4, AO, INC2 LFPDUX B4, BO, INC2 FXCPMADD f0, B5, A5, f0 FXCSMADD f1, B5, A5, f1 LFPDUX A5, AO, INC2 LFPDUX B5, BO, INC2 FXCPMADD f2, B6, A6, f2 FXCSMADD f3, B6, A6, f3 LFPDUX A6, AO, INC2 LFPDUX B6, BO, INC2 FXCPMADD f0, A9, A7, f0 FXCSMADD f1, A9, A7, f1 LFPDUX A7, AO, INC2 LFPDUX A9, BO, INC2 FXCPMADD f2, A10, A8, f2 FXCSMADD f3, A10, A8, f3 LFPDUX A8, AO, INC2 LFPDUX A10, BO, INC2 bdnz+ .L1072 .align 4 .L1073: FXCPMADD f0, B1, A1, f0 FXCSMADD f1, B1, A1, f1 FXCPMADD f2, B2, A2, f2 FXCSMADD f3, B2, A2, f3 FXCPMADD f0, B3, A3, f0 FXCSMADD f1, B3, A3, f1 FXCPMADD f2, B4, A4, f2 FXCSMADD f3, B4, A4, f3 FXCPMADD f0, B5, A5, f0 FXCSMADD f1, B5, A5, f1 FXCPMADD f2, B6, A6, f2 FXCSMADD f3, B6, A6, f3 FXCPMADD f0, A9, A7, f0 FXCSMADD f1, A9, A7, f1 FXCPMADD f2, A10, A8, f2 FXCSMADD f3, A10, A8, f3 .align 4 .L1074: li r0, ALPHA lfpdx AP, SP, r0 #ifdef TRMMKERNEL li r0, FZERO lfpsx f30, SP, r0 #endif #if defined(TRMMKERNEL) #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) sub TEMP, K, KK #elif defined(LEFT) addi TEMP, KK, 1 #else addi TEMP, KK, 1 #endif andi. r0, TEMP, 7 mtspr CTR, r0 #else andi. r0, K, 7 mtspr CTR, r0 #endif ble+ .L1078 LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 bdz- .L1077 .align 4 .L1076: FXCPMADD f0, B1, A1, f0 FXCSMADD f1, B1, A1, f1 LFPDUX A1, AO, INC2 LFPDUX B1, BO, INC2 bdnz+ .L1076 .align 4 .L1077: FXCPMADD f0, B1, A1, f0 FXCSMADD f1, B1, A1, f1 .align 4 .L1078: #ifndef TRMMKERNEL LFDUX A1, CO1, INC LFDUX A2, CO1, INC #endif fpadd f0, f0, f2 fpadd f1, f1, f3 fsmfp A1, A2 #if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \ defined(RN) || defined(RT) || defined(CN) || defined(CT) fpadd f0, f0, f1 #else fpsub f0, f0, f1 #endif #ifndef TRMMKERNEL fxcpmadd A1, f0, AP, A1 fxcxnpma f0, f0, AP, A1 STFDUX f0, CO1, INCM1 STFSDUX f0, CO1, INC #else fxcpmadd f12, f0, AP, f30 fxcxnpma f0, f0, AP, f12 STFDUX f0, CO1, INC STFSDUX f0, CO1, INC #endif li r0, FZERO lfpsx f0, SP, r0 .align 4 .L1089: addi B, BO, 2 * SIZE .align 4 .L10999: addi SP, SP, 20 lwzu r14, 4(SP) lwzu r15, 4(SP) lwzu r16, 4(SP) lwzu r17, 4(SP) lwzu r18, 4(SP) lwzu r19, 4(SP) lwzu r20, 4(SP) lwzu r21, 4(SP) lwzu r22, 4(SP) lwzu r23, 4(SP) lwzu r24, 4(SP) lwzu r25, 4(SP) lwzu r26, 4(SP) lwzu r27, 4(SP) lwzu r28, 4(SP) lwzu r29, 4(SP) lwzu r30, 4(SP) lwzu r31, 4(SP) subi SP, SP, 12 li r0, 16 lfpdux f31, SP, r0 lfpdux f30, SP, r0 lfpdux f29, SP, r0 lfpdux f28, SP, r0 lfpdux f27, SP, r0 lfpdux f26, SP, r0 lfpdux f25, SP, r0 lfpdux f24, SP, r0 lfpdux f23, SP, r0 lfpdux f22, SP, r0 lfpdux f21, SP, r0 lfpdux f20, SP, r0 lfpdux f19, SP, r0 lfpdux f18, SP, r0 lfpdux f17, SP, r0 lfpdux f16, SP, r0 lfpdux f15, SP, r0 lfpdux f14, SP, r0 addi SP, SP, 16 blr .align 4 EPILOGUE #endif