/*********************************************************************/ /* Copyright 2009, 2010 The University of Texas at Austin. */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ /* without modification, are permitted provided that the following */ /* conditions are met: */ /* */ /* 1. Redistributions of source code must retain the above */ /* copyright notice, this list of conditions and the following */ /* disclaimer. */ /* */ /* 2. Redistributions in binary form must reproduce the above */ /* copyright notice, this list of conditions and the following */ /* disclaimer in the documentation and/or other materials */ /* provided with the distribution. */ /* */ /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ /* POSSIBILITY OF SUCH DAMAGE. */ /* */ /* The views and conclusions contained in the software and */ /* documentation are those of the authors and should not be */ /* interpreted as representing official policies, either expressed */ /* or implied, of The University of Texas at Austin. */ /*********************************************************************/ #define ASSEMBLER #include "common.h" #define M r3 #define N r4 #define A r6 #define LDA r7 #define X r8 #define INCX r9 #define Y r10 #define INCY r5 #define I r11 #define J r12 #define INCY2 r24 #define A1 r25 #define A2 r26 #define A3 r27 #define A4 r28 #define YL r29 #define YS r30 #define INC2 r31 #define yl1 f0 #define yl2 f2 #define yl3 f3 #define yl4 f4 #define ys1 f5 #define ys2 f6 #define ys3 f7 #define ys4 f8 #define yl5 f27 #define ys5 f28 #define alpha1 f9 #define alpha2 f10 #define a1 f11 #define a2 f12 #define a3 f13 #define a4 f14 #define a5 f15 #define a6 f16 #define a7 f17 #define a8 f18 #define a9 f19 #define a10 f20 #define a11 f21 #define a12 f22 #define a13 f23 #define a14 f24 #define a15 f25 #define a16 f26 #define alpha f1 PROLOGUE PROFCODE li r0, -16 lwz INCY, 8(SP) stfpdux f14, SP, r0 stfpdux f15, SP, r0 stfpdux f16, SP, r0 stfpdux f17, SP, r0 stfpdux f18, SP, r0 stfpdux f19, SP, r0 stfpdux f20, SP, r0 stfpdux f21, SP, r0 stfpdux f22, SP, r0 stfpdux f23, SP, r0 stfpdux f24, SP, r0 stfpdux f25, SP, r0 stfpdux f26, SP, r0 stfpdux f27, SP, r0 stfpdux f28, SP, r0 stfpdux f29, SP, r0 stfpdux f30, SP, r0 stfpdux f31, SP, r0 stwu r31, -4(SP) stwu r30, -4(SP) stwu r29, -4(SP) stwu r28, -4(SP) stwu r27, -4(SP) stwu r26, -4(SP) stwu r25, -4(SP) stwu r24, -4(SP) stwu r23, -4(SP) stwu r22, -4(SP) stwu r21, -4(SP) stwu r20, -4(SP) stwu r19, -4(SP) stwu r18, -4(SP) stwu r17, -4(SP) stwu r16, -4(SP) slwi LDA, LDA, BASE_SHIFT slwi INCX, INCX, BASE_SHIFT slwi INCY, INCY, BASE_SHIFT fsmfp alpha, alpha cmpwi cr0, M, 0 ble- .L999 cmpwi cr0, N, 0 ble- .L999 add INCY2, INCY, INCY li INC2, 2 * SIZE sub X, X, INCX andi. r0, A, 2 * SIZE - 1 # bne .L100 # All cases for aligned A, even LDA cmpwi cr0, INCY, SIZE bne .L70 andi. r0, Y, 2 * SIZE - 1 bne .L40 # A : aligned LDA : even Y : Unit Aligned sub A, A, INC2 sub Y, Y, INCY2 srawi. J, N, 2 ble .L20 .align 4 .L11: LFDUX alpha1, X, INCX mr A1, A add A2, A, LDA add A3, A2, LDA LFSDUX alpha1, X, INCX LFDUX alpha2, X, INCX add A4, A3, LDA add A, A4, LDA mr YL, Y LFSDUX alpha2, X, INCX fpmul alpha1, alpha, alpha1 mr YS, Y srawi. r0, M, 3 mtspr CTR, r0 fpmul alpha2, alpha, alpha2 ble .L15 LFPDUX yl1, YL, INCY2 LFPDUX yl2, YL, INCY2 LFPDUX yl3, YL, INCY2 LFPDUX yl4, YL, INCY2 LFPDUX a1, A1, INC2 LFPDUX a5, A1, INC2 LFPDUX a9, A1, INC2 LFPDUX a13, A1, INC2 LFPDUX a2, A2, INC2 LFPDUX a6, A2, INC2 LFPDUX a10, A2, INC2 LFPDUX a14, A2, INC2 LFPDUX a3, A3, INC2 LFPDUX a7, A3, INC2 LFPDUX a11, A3, INC2 LFPDUX a15, A3, INC2 LFPDUX a4, A4, INC2 fxcpmadd ys1, alpha1, a1, yl1 LFPDUX a8, A4, INC2 fxcpmadd ys2, alpha1, a5, yl2 LFPDUX a12, A4, INC2 fxcpmadd ys3, alpha1, a9, yl3 LFPDUX a16, A4, INC2 fxcpmadd ys4, alpha1, a13, yl4 bdz .L13 .align 4 .L12: LFPDUX yl1, YL, INCY2 fxcsmadd ys1, alpha1, a2, ys1 LFPDUX a1, A1, INC2 fxcsmadd ys2, alpha1, a6, ys2 LFPDUX a5, A1, INC2 fxcsmadd ys3, alpha1, a10, ys3 LFPDUX a9, A1, INC2 fxcsmadd ys4, alpha1, a14, ys4 LFPDUX a13, A1, INC2 LFPDUX yl2, YL, INCY2 fxcpmadd ys1, alpha2, a3, ys1 LFPDUX a2, A2, INC2 fxcpmadd ys2, alpha2, a7, ys2 LFPDUX a6, A2, INC2 fxcpmadd ys3, alpha2, a11, ys3 LFPDUX a10, A2, INC2 fxcpmadd ys4, alpha2, a15, ys4 LFPDUX a14, A2, INC2 LFPDUX yl3, YL, INCY2 fxcsmadd ys1, alpha2, a4, ys1 LFPDUX a3, A3, INC2 fxcsmadd ys2, alpha2, a8, ys2 LFPDUX a7, A3, INC2 fxcsmadd ys3, alpha2, a12, ys3 LFPDUX a11, A3, INC2 fxcsmadd ys4, alpha2, a16, ys4 LFPDUX a15, A3, INC2 LFPDUX yl4, YL, INCY2 STFPDUX ys1, YS, INCY2 STFPDUX ys2, YS, INCY2 STFPDUX ys3, YS, INCY2 STFPDUX ys4, YS, INCY2 LFPDUX a4, A4, INC2 fxcpmadd ys1, alpha1, a1, yl1 LFPDUX a8, A4, INC2 fxcpmadd ys2, alpha1, a5, yl2 LFPDUX a12, A4, INC2 fxcpmadd ys3, alpha1, a9, yl3 LFPDUX a16, A4, INC2 fxcpmadd ys4, alpha1, a13, yl4 bdnz .L12 .align 4 .L13: fxcsmadd ys1, alpha1, a2, ys1 fxcsmadd ys2, alpha1, a6, ys2 fxcsmadd ys3, alpha1, a10, ys3 fxcsmadd ys4, alpha1, a14, ys4 fxcpmadd ys1, alpha2, a3, ys1 fxcpmadd ys2, alpha2, a7, ys2 fxcpmadd ys3, alpha2, a11, ys3 fxcpmadd ys4, alpha2, a15, ys4 fxcsmadd ys1, alpha2, a4, ys1 fxcsmadd ys2, alpha2, a8, ys2 fxcsmadd ys3, alpha2, a12, ys3 fxcsmadd ys4, alpha2, a16, ys4 STFPDUX ys1, YS, INCY2 STFPDUX ys2, YS, INCY2 STFPDUX ys3, YS, INCY2 STFPDUX ys4, YS, INCY2 .align 4 .L15: andi. r0, M, 7 ble .L19 andi. r0, M, 4 ble .L17 LFPDUX yl1, YL, INCY2 LFPDUX a1, A1, INC2 LFPDUX yl2, YL, INCY2 LFPDUX a5, A1, INC2 LFPDUX a2, A2, INC2 LFPDUX a6, A2, INC2 LFPDUX a3, A3, INC2 LFPDUX a7, A3, INC2 LFPDUX a4, A4, INC2 LFPDUX a8, A4, INC2 fxcpmadd ys1, alpha1, a1, yl1 fxcpmadd ys2, alpha1, a5, yl2 fxcsmadd ys1, alpha1, a2, ys1 fxcsmadd ys2, alpha1, a6, ys2 fxcpmadd ys1, alpha2, a3, ys1 fxcpmadd ys2, alpha2, a7, ys2 fxcsmadd ys1, alpha2, a4, ys1 fxcsmadd ys2, alpha2, a8, ys2 STFPDUX ys1, YS, INCY2 STFPDUX ys2, YS, INCY2 .align 4 .L17: andi. r0, M, 2 ble .L18 LFPDUX yl1, YL, INCY2 LFPDUX a1, A1, INC2 LFPDUX a2, A2, INC2 LFPDUX a3, A3, INC2 LFPDUX a4, A4, INC2 fxcpmadd ys1, alpha1, a1, yl1 fxcsmadd ys1, alpha1, a2, ys1 fxcpmadd ys1, alpha2, a3, ys1 fxcsmadd ys1, alpha2, a4, ys1 STFPDUX ys1, YS, INCY2 .align 4 .L18: andi. r0, M, 1 ble .L19 LFDUX yl1, YL, INCY2 LFDUX a1, A1, INC2 LFDUX a2, A2, INC2 LFDUX a3, A3, INC2 LFDUX a4, A4, INC2 fxcpmadd ys1, alpha1, a1, yl1 fxcsmadd ys1, alpha1, a2, ys1 fxcpmadd ys1, alpha2, a3, ys1 fxcsmadd ys1, alpha2, a4, ys1 STFDUX ys1, YS, INCY2 .align 4 .L19: addi J, J, -1 cmpi cr0, 0, J, 0 bgt .L11 .align 4 .L20: andi. J, N, 2 ble .L30 LFDUX alpha1, X, INCX mr A1, A add A2, A, LDA add A, A2, LDA LFSDUX alpha1, X, INCX mr YL, Y mr YS, Y fpmul alpha1, alpha, alpha1 srawi. r0, M, 3 mtspr CTR, r0 ble .L25 LFPDUX yl1, YL, INCY2 LFPDUX a1, A1, INC2 LFPDUX yl2, YL, INCY2 LFPDUX a5, A1, INC2 LFPDUX yl3, YL, INCY2 LFPDUX a9, A1, INC2 LFPDUX yl4, YL, INCY2 LFPDUX a13, A1, INC2 LFPDUX a2, A2, INC2 LFPDUX a6, A2, INC2 LFPDUX a10, A2, INC2 LFPDUX a14, A2, INC2 bdz .L23 .align 4 .L22: fxcpmadd ys1, alpha1, a1, yl1 LFPDUX a1, A1, INC2 LFPDUX yl1, YL, INCY2 fxcpmadd ys2, alpha1, a5, yl2 LFPDUX a5, A1, INC2 LFPDUX yl2, YL, INCY2 fxcpmadd ys3, alpha1, a9, yl3 LFPDUX a9, A1, INC2 LFPDUX yl3, YL, INCY2 fxcpmadd ys4, alpha1, a13, yl4 LFPDUX a13, A1, INC2 LFPDUX yl4, YL, INCY2 fxcsmadd ys1, alpha1, a2, ys1 LFPDUX a2, A2, INC2 fxcsmadd ys2, alpha1, a6, ys2 LFPDUX a6, A2, INC2 fxcsmadd ys3, alpha1, a10, ys3 LFPDUX a10, A2, INC2 fxcsmadd ys4, alpha1, a14, ys4 LFPDUX a14, A2, INC2 STFPDUX ys1, YS, INCY2 STFPDUX ys2, YS, INCY2 STFPDUX ys3, YS, INCY2 STFPDUX ys4, YS, INCY2 bdnz .L22 .align 4 .L23: fxcpmadd ys1, alpha1, a1, yl1 fxcpmadd ys2, alpha1, a5, yl2 fxcpmadd ys3, alpha1, a9, yl3 fxcpmadd ys4, alpha1, a13, yl4 fxcsmadd ys1, alpha1, a2, ys1 fxcsmadd ys2, alpha1, a6, ys2 fxcsmadd ys3, alpha1, a10, ys3 fxcsmadd ys4, alpha1, a14, ys4 STFPDUX ys1, YS, INCY2 STFPDUX ys2, YS, INCY2 STFPDUX ys3, YS, INCY2 STFPDUX ys4, YS, INCY2 .align 4 .L25: andi. r0, M, 7 ble .L30 andi. r0, M, 4 ble .L27 LFPDUX yl1, YL, INCY2 LFPDUX a1, A1, INC2 LFPDUX a2, A2, INC2 LFPDUX yl2, YL, INCY2 LFPDUX a5, A1, INC2 LFPDUX a6, A2, INC2 fxcpmadd ys1, alpha1, a1, yl1 fxcsmadd ys1, alpha1, a2, ys1 fxcpmadd ys2, alpha1, a5, yl2 fxcsmadd ys2, alpha1, a6, ys2 STFPDUX ys1, YS, INCY2 STFPDUX ys2, YS, INCY2 .align 4 .L27: andi. r0, M, 2 ble .L28 LFPDUX yl1, YL, INCY2 LFPDUX a1, A1, INC2 LFPDUX a2, A2, INC2 fxcpmadd ys1, alpha1, a1, yl1 fxcsmadd ys1, alpha1, a2, ys1 STFPDUX ys1, YS, INCY2 .align 4 .L28: andi. r0, M, 1 ble .L30 LFDUX yl1, YL, INCY2 LFDUX a1, A1, INC2 LFDUX a2, A2, INC2 fxcpmadd ys1, alpha1, a1, yl1 fxcsmadd ys1, alpha1, a2, ys1 STFDUX ys1, YS, INCY2 .align 4 .L30: andi. J, N, 1 ble .L999 LFDUX alpha1, X, INCX mr A1, A mr YL, Y mr YS, Y fmul alpha1, alpha, alpha1 srawi. r0, M, 3 mtspr CTR, r0 ble .L35 LFPDUX yl1, YL, INCY2 LFPDUX a1, A1, INC2 LFPDUX yl2, YL, INCY2 LFPDUX a5, A1, INC2 LFPDUX yl3, YL, INCY2 LFPDUX a9, A1, INC2 LFPDUX yl4, YL, INCY2 LFPDUX a13, A1, INC2 bdz .L33 .align 4 .L32: fxcpmadd ys1, alpha1, a1, yl1 LFPDUX yl1, YL, INCY2 LFPDUX a1, A1, INC2 fxcpmadd ys2, alpha1, a5, yl2 LFPDUX yl2, YL, INCY2 LFPDUX a5, A1, INC2 fxcpmadd ys3, alpha1, a9, yl3 LFPDUX yl3, YL, INCY2 LFPDUX a9, A1, INC2 fxcpmadd ys4, alpha1, a13, yl4 LFPDUX yl4, YL, INCY2 LFPDUX a13, A1, INC2 STFPDUX ys1, YS, INCY2 STFPDUX ys2, YS, INCY2 STFPDUX ys3, YS, INCY2 STFPDUX ys4, YS, INCY2 bdnz .L32 .align 4 .L33: fxcpmadd ys1, alpha1, a1, yl1 fxcpmadd ys2, alpha1, a5, yl2 fxcpmadd ys3, alpha1, a9, yl3 fxcpmadd ys4, alpha1, a13, yl4 STFPDUX ys1, YS, INCY2 STFPDUX ys2, YS, INCY2 STFPDUX ys3, YS, INCY2 STFPDUX ys4, YS, INCY2 .align 4 .L35: andi. r0, M, 7 ble .L999 andi. r0, M, 4 ble .L37 LFPDUX yl1, YL, INCY2 LFPDUX a1, A1, INC2 LFPDUX yl2, YL, INCY2 LFPDUX a5, A1, INC2 fxcpmadd ys1, alpha1, a1, yl1 fxcpmadd ys2, alpha1, a5, yl2 STFPDUX ys1, YS, INCY2 STFPDUX ys2, YS, INCY2 .align 4 .L37: andi. r0, M, 2 ble .L38 LFPDUX yl1, YL, INCY2 LFPDUX a1, A1, INC2 fxcpmadd ys1, alpha1, a1, yl1 STFPDUX ys1, YS, INCY2 .align 4 .L38: andi. r0, M, 1 ble .L999 LFDUX yl1, YL, INCY2 LFDUX a1, A1, INC2 fxcpmadd ys1, alpha1, a1, yl1 STFDUX ys1, YS, INCY2 b .L999 .align 4 .L40: # A : aligned LDA : even Y : Unaligned sub A, A, INC2 sub Y, Y, INCY srawi. J, N, 2 ble .L50 .align 4 .L41: LFDUX alpha1, X, INCX LFSDUX alpha1, X, INCX LFDUX alpha2, X, INCX LFSDUX alpha2, X, INCX fpmul alpha1, alpha, alpha1 fpmul alpha2, alpha, alpha2 mr A1, A add A2, A, LDA add A3, A2, LDA add A4, A3, LDA add A, A4, LDA mr YL, Y sub YS, Y, INCY2 LFSDX ys1, YS, INCY2 LFDX yl1, YL, INCY srawi. r0, M, 3 mtspr CTR, r0 ble .L45 LFPDUX a1, A1, INC2 LFPDUX a5, A1, INC2 LFPDUX a9, A1, INC2 LFPDUX a13, A1, INC2 LFXDUX yl2, YL, INCY2 LFXDUX yl3, YL, INCY2 LFXDUX yl4, YL, INCY2 LFXDUX yl5, YL, INCY2 LFPDUX a2, A2, INC2 LFPDUX a6, A2, INC2 LFPDUX a10, A2, INC2 LFPDUX a14, A2, INC2 LFPDUX a3, A3, INC2 LFPDUX a7, A3, INC2 LFPDUX a11, A3, INC2 LFPDUX a15, A3, INC2 LFPDUX a4, A4, INC2 fsmr yl1, yl2 LFPDUX a8, A4, INC2 fsmr yl2, yl3 LFPDUX a12, A4, INC2 fsmr yl3, yl4 LFPDUX a16, A4, INC2 fsmr yl4, yl5 bdz .L43 .align 4 .L42: fxcpmadd ys2, alpha1, a1, yl1 LFPDUX a1, A1, INC2 fxcpmadd ys3, alpha1, a5, yl2 LFPDUX a5, A1, INC2 fxcpmadd ys4, alpha1, a9, yl3 LFPDUX a9, A1, INC2 fxcpmadd ys5, alpha1, a13, yl4 LFPDUX a13, A1, INC2 fxcsmadd ys2, alpha1, a2, ys2 LFPDUX a2, A2, INC2 fxcsmadd ys3, alpha1, a6, ys3 LFPDUX a6, A2, INC2 fxcsmadd ys4, alpha1, a10, ys4 LFPDUX a10, A2, INC2 fxcsmadd ys5, alpha1, a14, ys5 LFPDUX a14, A2, INC2 fxcpmadd ys2, alpha2, a3, ys2 LFPDUX a3, A3, INC2 fxcpmadd ys3, alpha2, a7, ys3 LFPDUX a7, A3, INC2 fxcpmadd ys4, alpha2, a11, ys4 LFPDUX a11, A3, INC2 fxcpmadd ys5, alpha2, a15, ys5 LFPDUX a15, A3, INC2 fxcsmadd ys2, alpha2, a4, ys2 LFPDUX a4, A4, INC2 fxcsmadd ys3, alpha2, a8, ys3 LFPDUX a8, A4, INC2 fxcsmadd ys4, alpha2, a12, ys4 LFPDUX a12, A4, INC2 fxcsmadd ys5, alpha2, a16, ys5 LFPDUX a16, A4, INC2 fmr yl1, yl5 LFXDUX yl2, YL, INCY2 fmr ys1, ys2 LFXDUX yl3, YL, INCY2 fmr ys2, ys3 LFXDUX yl4, YL, INCY2 fmr ys3, ys4 LFXDUX yl5, YL, INCY2 fmr ys4, ys5 STFXDUX ys1, YS, INCY2 fsmr ys1, ys5 STFXDUX ys2, YS, INCY2 fsmr yl1, yl2 STFXDUX ys3, YS, INCY2 fsmr yl2, yl3 STFXDUX ys4, YS, INCY2 fsmr yl3, yl4 fsmr yl4, yl5 bdnz .L42 .align 4 .L43: fxcpmadd ys2, alpha1, a1, yl1 fxcpmadd ys3, alpha1, a5, yl2 fxcpmadd ys4, alpha1, a9, yl3 fxcpmadd ys5, alpha1, a13, yl4 fxcsmadd ys2, alpha1, a2, ys2 fxcsmadd ys3, alpha1, a6, ys3 fxcsmadd ys4, alpha1, a10, ys4 fxcsmadd ys5, alpha1, a14, ys5 fxcpmadd ys2, alpha2, a3, ys2 fxcpmadd ys3, alpha2, a7, ys3 fxcpmadd ys4, alpha2, a11, ys4 fxcpmadd ys5, alpha2, a15, ys5 fxcsmadd ys2, alpha2, a4, ys2 fxcsmadd ys3, alpha2, a8, ys3 fxcsmadd ys4, alpha2, a12, ys4 fxcsmadd ys5, alpha2, a16, ys5 fmr ys1, ys2 fmr ys2, ys3 fmr ys3, ys4 fmr ys4, ys5 fmr yl1, yl5 STFXDUX ys1, YS, INCY2 fsmr ys1, ys5 STFXDUX ys2, YS, INCY2 STFXDUX ys3, YS, INCY2 STFXDUX ys4, YS, INCY2 .align 4 .L45: andi. r0, M, 7 ble .L48 andi. r0, M, 4 ble .L46 LFXDUX yl2, YL, INCY2 LFXDUX yl3, YL, INCY2 LFPDUX a1, A1, INC2 LFPDUX a5, A1, INC2 LFPDUX a2, A2, INC2 LFPDUX a6, A2, INC2 LFPDUX a3, A3, INC2 LFPDUX a7, A3, INC2 LFPDUX a4, A4, INC2 fsmr yl1, yl2 LFPDUX a8, A4, INC2 fsmr yl2, yl3 fxcpmadd ys2, alpha1, a1, yl1 fxcpmadd ys3, alpha1, a5, yl2 fxcsmadd ys2, alpha1, a2, ys2 fxcsmadd ys3, alpha1, a6, ys3 fxcpmadd ys2, alpha2, a3, ys2 fxcpmadd ys3, alpha2, a7, ys3 fxcsmadd ys2, alpha2, a4, ys2 fxcsmadd ys3, alpha2, a8, ys3 fmr yl1, yl3 fmr ys1, ys2 fmr ys2, ys3 STFXDUX ys1, YS, INCY2 fsmr ys1, ys3 STFXDUX ys2, YS, INCY2 .align 4 .L46: andi. r0, M, 2 ble .L47 LFXDUX yl2, YL, INCY2 LFPDUX a1, A1, INC2 LFPDUX a2, A2, INC2 LFPDUX a3, A3, INC2 LFPDUX a4, A4, INC2 fsmr yl1, yl2 fxcpmadd ys2, alpha1, a1, yl1 fxcsmadd ys2, alpha1, a2, ys2 fxcpmadd ys2, alpha2, a3, ys2 fxcsmadd ys2, alpha2, a4, ys2 fmr yl1, yl2 fmr ys1, ys2 STFXDUX ys1, YS, INCY2 fsmr ys1, ys2 .align 4 .L47: andi. r0, M, 1 ble .L48 LFDUX a1, A1, INC2 LFDUX a2, A2, INC2 LFDUX a3, A3, INC2 LFDUX a4, A4, INC2 fxcpmadd ys2, alpha1, a1, yl1 fxcsmadd ys2, alpha1, a2, ys2 fxcpmadd ys2, alpha2, a3, ys2 fxcsmadd ys2, alpha2, a4, ys2 STFSDX ys1, YS, INCY2 add YS, YS, INCY STFDX ys2, YS, INCY2 b .L49 .align 4 .L48: STFSDUX ys1, YS, INCY2 .align 4 .L49: addi J, J, -1 cmpi cr0, 0, J, 0 bgt .L41 .align 4 .L50: andi. J, N, 2 ble .L60 LFDUX alpha1, X, INCX mr A1, A add A2, A, LDA add A, A2, LDA LFSDUX alpha1, X, INCX mr YL, Y sub YS, Y, INCY2 fpmul alpha1, alpha, alpha1 LFSDX ys1, YS, INCY2 LFDX yl1, YL, INCY srawi. r0, M, 3 mtspr CTR, r0 ble .L55 LFPDUX a1, A1, INC2 LFPDUX a5, A1, INC2 LFPDUX a9, A1, INC2 LFPDUX a13, A1, INC2 LFXDUX yl2, YL, INCY2 LFXDUX yl3, YL, INCY2 LFXDUX yl4, YL, INCY2 LFXDUX yl5, YL, INCY2 LFPDUX a2, A2, INC2 fsmr yl1, yl2 LFPDUX a6, A2, INC2 fsmr yl2, yl3 LFPDUX a10, A2, INC2 fsmr yl3, yl4 LFPDUX a14, A2, INC2 fsmr yl4, yl5 bdz .L53 .align 4 .L52: fxcpmadd ys2, alpha1, a1, yl1 LFPDUX a1, A1, INC2 fxcpmadd ys3, alpha1, a5, yl2 LFPDUX a5, A1, INC2 fxcpmadd ys4, alpha1, a9, yl3 LFPDUX a9, A1, INC2 fxcpmadd ys5, alpha1, a13, yl4 LFPDUX a13, A1, INC2 fxcsmadd ys2, alpha1, a2, ys2 LFPDUX a2, A2, INC2 fxcsmadd ys3, alpha1, a6, ys3 LFPDUX a6, A2, INC2 fxcsmadd ys4, alpha1, a10, ys4 LFPDUX a10, A2, INC2 fxcsmadd ys5, alpha1, a14, ys5 LFPDUX a14, A2, INC2 fmr yl1, yl5 LFXDUX yl2, YL, INCY2 fmr ys1, ys2 LFXDUX yl3, YL, INCY2 fmr ys2, ys3 LFXDUX yl4, YL, INCY2 fmr ys3, ys4 LFXDUX yl5, YL, INCY2 fmr ys4, ys5 STFXDUX ys1, YS, INCY2 fsmr ys1, ys5 STFXDUX ys2, YS, INCY2 fsmr yl1, yl2 STFXDUX ys3, YS, INCY2 fsmr yl2, yl3 STFXDUX ys4, YS, INCY2 fsmr yl3, yl4 fsmr yl4, yl5 bdnz .L52 .align 4 .L53: fxcpmadd ys2, alpha1, a1, yl1 fxcpmadd ys3, alpha1, a5, yl2 fxcpmadd ys4, alpha1, a9, yl3 fxcpmadd ys5, alpha1, a13, yl4 fxcsmadd ys2, alpha1, a2, ys2 fxcsmadd ys3, alpha1, a6, ys3 fxcsmadd ys4, alpha1, a10, ys4 fxcsmadd ys5, alpha1, a14, ys5 fmr yl1, yl5 fmr ys1, ys2 fmr ys2, ys3 fmr ys3, ys4 fmr ys4, ys5 STFXDUX ys1, YS, INCY2 fsmr ys1, ys5 STFXDUX ys2, YS, INCY2 STFXDUX ys3, YS, INCY2 STFXDUX ys4, YS, INCY2 .align 4 .L55: andi. r0, M, 7 ble .L59 andi. r0, M, 4 ble .L57 LFXDUX yl2, YL, INCY2 LFXDUX yl3, YL, INCY2 LFPDUX a1, A1, INC2 LFPDUX a2, A2, INC2 LFPDUX a5, A1, INC2 LFPDUX a6, A2, INC2 fsmr yl1, yl2 fsmr yl2, yl3 fxcpmadd ys2, alpha1, a1, yl1 fxcsmadd ys2, alpha1, a2, ys2 fxcpmadd ys3, alpha1, a5, yl2 fxcsmadd ys3, alpha1, a6, ys3 fmr yl1, yl3 fmr ys1, ys2 fmr ys2, ys3 STFXDUX ys1, YS, INCY2 STFXDUX ys2, YS, INCY2 fsmr ys1, ys3 .align 4 .L57: andi. r0, M, 2 ble .L58 LFXDUX yl2, YL, INCY2 LFPDUX a1, A1, INC2 LFPDUX a2, A2, INC2 fsmr yl1, yl2 fxcpmadd ys2, alpha1, a1, yl1 fxcsmadd ys2, alpha1, a2, ys2 fmr yl1, yl2 fmr ys1, ys2 STFXDUX ys1, YS, INCY2 fsmr ys1, ys2 .align 4 .L58: andi. r0, M, 1 ble .L59 LFDUX a1, A1, INC2 LFDUX a2, A2, INC2 fxmr alpha2, alpha1 fmadd ys1, alpha1, a1, yl1 fmadd ys1, alpha2, a2, ys1 STFXDUX ys1, YS, INCY2 b .L60 .align 4 .L59: STFSDUX ys1, YS, INCY2 .align 4 .L60: andi. J, N, 1 ble .L999 LFDUX alpha1, X, INCX mr A1, A mr YL, Y sub YS, Y, INCY2 fmul alpha1, alpha, alpha1 LFSDX ys1, YS, INCY2 LFDX yl1, YL, INCY srawi. r0, M, 3 mtspr CTR, r0 ble .L65 LFXDUX yl2, YL, INCY2 LFXDUX yl3, YL, INCY2 LFXDUX yl4, YL, INCY2 LFXDUX yl5, YL, INCY2 LFPDUX a1, A1, INC2 LFPDUX a5, A1, INC2 LFPDUX a9, A1, INC2 LFPDUX a13, A1, INC2 fsmr yl1, yl2 fsmr yl2, yl3 fsmr yl3, yl4 fsmr yl4, yl5 bdz .L63 .align 4 .L62: fxcpmadd ys2, alpha1, a1, yl1 LFPDUX a1, A1, INC2 fxcpmadd ys3, alpha1, a5, yl2 LFXDUX yl2, YL, INCY2 fxcpmadd ys4, alpha1, a9, yl3 LFXDUX yl3, YL, INCY2 fxcpmadd ys5, alpha1, a13, yl4 LFXDUX yl4, YL, INCY2 fmr yl1, yl5 LFXDUX yl5, YL, INCY2 fmr ys1, ys2 LFPDUX a5, A1, INC2 fmr ys2, ys3 LFPDUX a9, A1, INC2 fmr ys3, ys4 LFPDUX a13, A1, INC2 fmr ys4, ys5 STFXDUX ys1, YS, INCY2 fsmr ys1, ys5 STFXDUX ys2, YS, INCY2 fsmr yl1, yl2 STFXDUX ys3, YS, INCY2 fsmr yl2, yl3 STFXDUX ys4, YS, INCY2 fsmr yl3, yl4 fsmr yl4, yl5 bdnz .L62 .align 4 .L63: fxcpmadd ys2, alpha1, a1, yl1 fxcpmadd ys3, alpha1, a5, yl2 fxcpmadd ys4, alpha1, a9, yl3 fxcpmadd ys5, alpha1, a13, yl4 fmr yl1, yl5 fmr ys1, ys2 fmr ys2, ys3 fmr ys3, ys4 fmr ys4, ys5 STFXDUX ys1, YS, INCY2 fsmr ys1, ys5 STFXDUX ys2, YS, INCY2 STFXDUX ys3, YS, INCY2 STFXDUX ys4, YS, INCY2 .align 4 .L65: andi. r0, M, 7 ble .L69 andi. r0, M, 4 ble .L67 LFXDUX yl2, YL, INCY2 LFXDUX yl3, YL, INCY2 LFPDUX a1, A1, INC2 LFPDUX a5, A1, INC2 fsmr yl1, yl2 fsmr yl2, yl3 fxcpmadd ys2, alpha1, a1, yl1 fxcpmadd ys3, alpha1, a5, yl2 fmr yl1, yl3 fmr ys1, ys2 fmr ys2, ys3 STFXDUX ys1, YS, INCY2 fsmr ys1, ys3 STFXDUX ys2, YS, INCY2 .align 4 .L67: andi. r0, M, 2 ble .L68 LFPDUX a1, A1, INC2 LFXDUX yl2, YL, INCY2 fsmr yl1, yl2 fxcpmadd ys2, alpha1, a1, yl1 fmr yl1, yl2 fmr ys1, ys2 STFXDUX ys1, YS, INCY2 fsmr ys1, ys2 .align 4 .L68: andi. r0, M, 1 ble .L69 LFDUX a1, A1, INC2 fmadd ys1, alpha1, a1, yl1 STFXDUX ys1, YS, INCY2 b .L999 .align 4 .L69: STFSDUX ys1, YS, INCY2 b .L999 .align 4 .L70: sub A, A, INC2 sub Y, Y, INCY srawi. J, N, 2 ble .L80 .align 4 .L71: LFDUX alpha1, X, INCX mr A1, A add A2, A, LDA add A3, A2, LDA LFSDUX alpha1, X, INCX LFDUX alpha2, X, INCX add A4, A3, LDA add A, A4, LDA mr YL, Y LFSDUX alpha2, X, INCX fpmul alpha1, alpha, alpha1 mr YS, Y srawi. r0, M, 3 mtspr CTR, r0 fpmul alpha2, alpha, alpha2 ble .L75 LFDUX yl1, YL, INCY LFPDUX a1, A1, INC2 LFPDUX a5, A1, INC2 LFPDUX a9, A1, INC2 LFPDUX a13, A1, INC2 LFSDUX yl1, YL, INCY LFDUX yl2, YL, INCY LFPDUX a2, A2, INC2 LFPDUX a6, A2, INC2 LFPDUX a10, A2, INC2 LFPDUX a14, A2, INC2 LFSDUX yl2, YL, INCY LFDUX yl3, YL, INCY LFPDUX a3, A3, INC2 LFPDUX a7, A3, INC2 LFPDUX a11, A3, INC2 LFPDUX a15, A3, INC2 LFSDUX yl3, YL, INCY LFDUX yl4, YL, INCY LFPDUX a4, A4, INC2 LFPDUX a8, A4, INC2 LFPDUX a12, A4, INC2 LFPDUX a16, A4, INC2 LFSDUX yl4, YL, INCY bdz .L73 .align 4 .L72: fxcpmadd ys1, alpha1, a1, yl1 LFPDUX a1, A1, INC2 LFDUX yl1, YL, INCY fxcpmadd ys2, alpha1, a5, yl2 LFPDUX a5, A1, INC2 fxcpmadd ys3, alpha1, a9, yl3 LFPDUX a9, A1, INC2 fxcpmadd ys4, alpha1, a13, yl4 LFPDUX a13, A1, INC2 LFSDUX yl1, YL, INCY fxcsmadd ys1, alpha1, a2, ys1 LFPDUX a2, A2, INC2 LFDUX yl2, YL, INCY fxcsmadd ys2, alpha1, a6, ys2 LFPDUX a6, A2, INC2 fxcsmadd ys3, alpha1, a10, ys3 LFPDUX a10, A2, INC2 fxcsmadd ys4, alpha1, a14, ys4 LFPDUX a14, A2, INC2 LFSDUX yl2, YL, INCY fxcpmadd ys1, alpha2, a3, ys1 LFPDUX a3, A3, INC2 LFDUX yl3, YL, INCY fxcpmadd ys2, alpha2, a7, ys2 LFPDUX a7, A3, INC2 fxcpmadd ys3, alpha2, a11, ys3 LFPDUX a11, A3, INC2 fxcpmadd ys4, alpha2, a15, ys4 LFPDUX a15, A3, INC2 LFSDUX yl3, YL, INCY fxcsmadd ys1, alpha2, a4, ys1 LFPDUX a4, A4, INC2 LFDUX yl4, YL, INCY fxcsmadd ys2, alpha2, a8, ys2 LFPDUX a8, A4, INC2 fxcsmadd ys3, alpha2, a12, ys3 LFPDUX a12, A4, INC2 fxcsmadd ys4, alpha2, a16, ys4 LFPDUX a16, A4, INC2 LFSDUX yl4, YL, INCY STFDUX ys1, YS, INCY STFSDUX ys1, YS, INCY STFDUX ys2, YS, INCY STFSDUX ys2, YS, INCY STFDUX ys3, YS, INCY STFSDUX ys3, YS, INCY STFDUX ys4, YS, INCY STFSDUX ys4, YS, INCY bdnz .L72 .align 4 .L73: fxcpmadd ys1, alpha1, a1, yl1 fxcpmadd ys2, alpha1, a5, yl2 fxcpmadd ys3, alpha1, a9, yl3 fxcpmadd ys4, alpha1, a13, yl4 fxcsmadd ys1, alpha1, a2, ys1 fxcsmadd ys2, alpha1, a6, ys2 fxcsmadd ys3, alpha1, a10, ys3 fxcsmadd ys4, alpha1, a14, ys4 fxcpmadd ys1, alpha2, a3, ys1 fxcpmadd ys2, alpha2, a7, ys2 fxcpmadd ys3, alpha2, a11, ys3 fxcpmadd ys4, alpha2, a15, ys4 fxcsmadd ys1, alpha2, a4, ys1 fxcsmadd ys2, alpha2, a8, ys2 fxcsmadd ys3, alpha2, a12, ys3 fxcsmadd ys4, alpha2, a16, ys4 STFDUX ys1, YS, INCY STFSDUX ys1, YS, INCY STFDUX ys2, YS, INCY STFSDUX ys2, YS, INCY STFDUX ys3, YS, INCY STFSDUX ys3, YS, INCY STFDUX ys4, YS, INCY STFSDUX ys4, YS, INCY .align 4 .L75: andi. r0, M, 7 ble .L79 andi. r0, M, 4 ble .L77 LFDUX yl1, YL, INCY LFPDUX a1, A1, INC2 LFPDUX a5, A1, INC2 LFSDUX yl1, YL, INCY LFPDUX a2, A2, INC2 LFPDUX a6, A2, INC2 LFDUX yl2, YL, INCY LFPDUX a3, A3, INC2 LFPDUX a7, A3, INC2 LFSDUX yl2, YL, INCY LFPDUX a4, A4, INC2 LFPDUX a8, A4, INC2 fxcpmadd ys1, alpha1, a1, yl1 fxcpmadd ys2, alpha1, a5, yl2 fxcsmadd ys1, alpha1, a2, ys1 fxcsmadd ys2, alpha1, a6, ys2 fxcpmadd ys1, alpha2, a3, ys1 fxcpmadd ys2, alpha2, a7, ys2 fxcsmadd ys1, alpha2, a4, ys1 fxcsmadd ys2, alpha2, a8, ys2 STFDUX ys1, YS, INCY STFSDUX ys1, YS, INCY STFDUX ys2, YS, INCY STFSDUX ys2, YS, INCY .align 4 .L77: andi. r0, M, 2 ble .L78 LFDUX yl1, YL, INCY LFPDUX a1, A1, INC2 LFPDUX a2, A2, INC2 LFSDUX yl1, YL, INCY LFPDUX a3, A3, INC2 LFPDUX a4, A4, INC2 fxcpmadd ys1, alpha1, a1, yl1 fxcsmadd ys1, alpha1, a2, ys1 fxcpmadd ys1, alpha2, a3, ys1 fxcsmadd ys1, alpha2, a4, ys1 STFDUX ys1, YS, INCY STFSDUX ys1, YS, INCY .align 4 .L78: andi. r0, M, 1 ble .L79 LFDUX yl1, YL, INCY LFDUX a1, A1, INC2 LFDUX a2, A2, INC2 LFDUX a3, A3, INC2 LFDUX a4, A4, INC2 fxcpmadd ys1, alpha1, a1, yl1 fxcsmadd ys1, alpha1, a2, ys1 fxcpmadd ys1, alpha2, a3, ys1 fxcsmadd ys1, alpha2, a4, ys1 STFDUX ys1, YS, INCY .align 4 .L79: addi J, J, -1 cmpi cr0, 0, J, 0 bgt .L71 .align 4 .L80: andi. J, N, 2 ble .L90 LFDUX alpha1, X, INCX mr A1, A add A2, A, LDA add A, A2, LDA LFSDUX alpha1, X, INCX mr YL, Y mr YS, Y fpmul alpha1, alpha, alpha1 srawi. r0, M, 3 mtspr CTR, r0 ble .L85 LFDUX yl1, YL, INCY LFDUX a9, YL, INCY LFDUX yl2, YL, INCY LFDUX a10, YL, INCY LFPDUX a1, A1, INC2 LFPDUX a5, A1, INC2 LFPDUX a3, A1, INC2 LFPDUX a7, A1, INC2 LFDUX yl3, YL, INCY LFDUX a11, YL, INCY LFDUX yl4, YL, INCY LFDUX a12, YL, INCY LFPDUX a2, A2, INC2 LFPDUX a6, A2, INC2 LFPDUX a4, A2, INC2 LFPDUX a8, A2, INC2 bdz .L83 .align 4 .L82: fsmfp yl1, a9 fsmfp yl2, a10 fsmfp yl3, a11 fsmfp yl4, a12 fxcpmadd ys1, alpha1, a1, yl1 LFDUX yl1, YL, INCY LFDUX a9, YL, INCY LFPDUX a1, A1, INC2 fxcpmadd ys2, alpha1, a5, yl2 LFDUX yl2, YL, INCY LFDUX a10, YL, INCY LFPDUX a5, A1, INC2 fxcpmadd ys3, alpha1, a3, yl3 LFDUX yl3, YL, INCY LFDUX a11, YL, INCY LFPDUX a3, A1, INC2 fxcpmadd ys4, alpha1, a7, yl4 LFDUX yl4, YL, INCY LFDUX a12, YL, INCY LFPDUX a7, A1, INC2 fxcsmadd ys1, alpha1, a2, ys1 LFPDUX a2, A2, INC2 fxcsmadd ys2, alpha1, a6, ys2 LFPDUX a6, A2, INC2 fxcsmadd ys3, alpha1, a4, ys3 LFPDUX a4, A2, INC2 fxcsmadd ys4, alpha1, a8, ys4 LFPDUX a8, A2, INC2 STFDUX ys1, YS, INCY STFSDUX ys1, YS, INCY STFDUX ys2, YS, INCY STFSDUX ys2, YS, INCY STFDUX ys3, YS, INCY STFSDUX ys3, YS, INCY STFDUX ys4, YS, INCY STFSDUX ys4, YS, INCY bdnz .L82 .align 4 .L83: fsmfp yl1, a9 fsmfp yl2, a10 fsmfp yl3, a11 fsmfp yl4, a12 fxcpmadd ys1, alpha1, a1, yl1 fxcpmadd ys2, alpha1, a5, yl2 fxcpmadd ys3, alpha1, a3, yl3 fxcpmadd ys4, alpha1, a7, yl4 fxcsmadd ys1, alpha1, a2, ys1 fxcsmadd ys2, alpha1, a6, ys2 fxcsmadd ys3, alpha1, a4, ys3 fxcsmadd ys4, alpha1, a8, ys4 STFDUX ys1, YS, INCY STFSDUX ys1, YS, INCY STFDUX ys2, YS, INCY STFSDUX ys2, YS, INCY STFDUX ys3, YS, INCY STFSDUX ys3, YS, INCY STFDUX ys4, YS, INCY STFSDUX ys4, YS, INCY .align 4 .L85: andi. r0, M, 7 ble .L90 andi. r0, M, 4 ble .L87 LFDUX yl1, YL, INCY LFPDUX a1, A1, INC2 LFPDUX a2, A2, INC2 LFSDUX yl1, YL, INCY LFDUX yl2, YL, INCY LFPDUX a5, A1, INC2 LFPDUX a6, A2, INC2 LFSDUX yl2, YL, INCY fxcpmadd ys1, alpha1, a1, yl1 fxcpmadd ys2, alpha1, a5, yl2 fxcsmadd ys1, alpha1, a2, ys1 fxcsmadd ys2, alpha1, a6, ys2 STFDUX ys1, YS, INCY STFSDUX ys1, YS, INCY STFDUX ys2, YS, INCY STFSDUX ys2, YS, INCY .align 4 .L87: andi. r0, M, 2 ble .L88 LFDUX yl1, YL, INCY LFPDUX a1, A1, INC2 LFPDUX a2, A2, INC2 LFSDUX yl1, YL, INCY fxcpmadd ys1, alpha1, a1, yl1 fxcsmadd ys1, alpha1, a2, ys1 STFDUX ys1, YS, INCY STFSDUX ys1, YS, INCY .align 4 .L88: andi. r0, M, 1 ble .L90 LFDUX yl1, YL, INCY LFDUX a1, A1, INC2 LFDUX a2, A2, INC2 fxcpmadd ys1, alpha1, a1, yl1 fxcsmadd ys1, alpha1, a2, ys1 STFDUX ys1, YS, INCY .align 4 .L90: andi. J, N, 1 ble .L999 LFDUX alpha1, X, INCX mr A1, A mr YL, Y mr YS, Y fmul alpha1, alpha, alpha1 srawi. r0, M, 3 mtspr CTR, r0 ble .L95 LFDUX yl1, YL, INCY LFSDUX a2, YL, INCY LFDUX yl2, YL, INCY LFSDUX a4, YL, INCY LFDUX yl3, YL, INCY LFSDUX a6, YL, INCY LFDUX yl4, YL, INCY LFSDUX a8, YL, INCY LFPDUX a1, A1, INC2 LFPDUX a5, A1, INC2 LFPDUX a9, A1, INC2 LFPDUX a13, A1, INC2 bdz .L93 .align 4 .L92: fmr a2, yl1 fmr a4, yl2 fmr a6, yl3 fmr a8, yl4 fxcpmadd ys1, alpha1, a1, a2 LFDUX yl1, YL, INCY LFSDUX a2, YL, INCY fxcpmadd ys2, alpha1, a5, a4 LFDUX yl2, YL, INCY LFSDUX a4, YL, INCY fxcpmadd ys3, alpha1, a9, a6 LFDUX yl3, YL, INCY LFSDUX a6, YL, INCY fxcpmadd ys4, alpha1, a13, a8 LFDUX yl4, YL, INCY LFSDUX a8, YL, INCY LFPDUX a1, A1, INC2 LFPDUX a5, A1, INC2 LFPDUX a9, A1, INC2 LFPDUX a13, A1, INC2 STFDUX ys1, YS, INCY STFSDUX ys1, YS, INCY STFDUX ys2, YS, INCY STFSDUX ys2, YS, INCY STFDUX ys3, YS, INCY STFSDUX ys3, YS, INCY STFDUX ys4, YS, INCY STFSDUX ys4, YS, INCY bdnz .L92 .align 4 .L93: fmr a2, yl1 fmr a4, yl2 fmr a6, yl3 fmr a8, yl4 fxcpmadd ys1, alpha1, a1, a2 fxcpmadd ys2, alpha1, a5, a4 fxcpmadd ys3, alpha1, a9, a6 fxcpmadd ys4, alpha1, a13, a8 STFDUX ys1, YS, INCY STFSDUX ys1, YS, INCY STFDUX ys2, YS, INCY STFSDUX ys2, YS, INCY STFDUX ys3, YS, INCY STFSDUX ys3, YS, INCY STFDUX ys4, YS, INCY STFSDUX ys4, YS, INCY .align 4 .L95: andi. r0, M, 7 ble .L999 andi. r0, M, 4 ble .L97 LFPDUX a1, A1, INC2 LFDUX yl1, YL, INCY LFDUX yl2, YL, INCY LFPDUX a2, A1, INC2 LFDUX yl3, YL, INCY LFDUX yl4, YL, INCY fxcpmadd ys1, a1, alpha1, yl1 fxcsmadd ys2, a1, alpha1, yl2 fxcpmadd ys3, a2, alpha1, yl3 fxcsmadd ys4, a2, alpha1, yl4 STFDUX ys1, YS, INCY STFDUX ys2, YS, INCY STFDUX ys3, YS, INCY STFDUX ys4, YS, INCY .align 4 .L97: andi. r0, M, 2 ble .L98 LFPDUX a1, A1, INC2 LFDUX yl1, YL, INCY LFDUX yl2, YL, INCY fxcpmadd ys1, a1, alpha1, yl1 fxcsmadd ys2, a1, alpha1, yl2 STFDUX ys1, YS, INCY STFDUX ys2, YS, INCY .align 4 .L98: andi. r0, M, 1 ble .L999 LFDUX yl1, YL, INCY LFDUX a1, A1, INC2 fxcpmadd ys1, alpha1, a1, yl1 STFDUX ys1, YS, INCY b .L999 .align 4 .L999: addi SP, SP, -4 lwzu r16, 4(SP) lwzu r17, 4(SP) lwzu r18, 4(SP) lwzu r19, 4(SP) lwzu r20, 4(SP) lwzu r21, 4(SP) lwzu r22, 4(SP) lwzu r23, 4(SP) lwzu r24, 4(SP) lwzu r25, 4(SP) lwzu r26, 4(SP) lwzu r27, 4(SP) lwzu r28, 4(SP) lwzu r29, 4(SP) lwzu r30, 4(SP) lwzu r31, 4(SP) subi SP, SP, 12 li r0, 16 lfpdux f31, SP, r0 lfpdux f30, SP, r0 lfpdux f29, SP, r0 lfpdux f28, SP, r0 lfpdux f27, SP, r0 lfpdux f26, SP, r0 lfpdux f25, SP, r0 lfpdux f24, SP, r0 lfpdux f23, SP, r0 lfpdux f22, SP, r0 lfpdux f21, SP, r0 lfpdux f20, SP, r0 lfpdux f19, SP, r0 lfpdux f18, SP, r0 lfpdux f17, SP, r0 lfpdux f16, SP, r0 lfpdux f15, SP, r0 lfpdux f14, SP, r0 addi SP, SP, 16 blr EPILOGUE