/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#define M r3
#define N r4
#define A r6
#define LDA r7
#define X r8
#define INCX r9
#define Y r10
#define INCY r5
#define I r11
#define J r12
#define INCY2 r24
#define A1 r25
#define A2 r26
#define A3 r27
#define A4 r28
#define YL r29
#define YS r30
#define INC2 r31
#define yl1 f0
#define yl2 f2
#define yl3 f3
#define yl4 f4
#define ys1 f5
#define ys2 f6
#define ys3 f7
#define ys4 f8
#define yl5 f27
#define ys5 f28
#define alpha1 f9
#define alpha2 f10
#define a1 f11
#define a2 f12
#define a3 f13
#define a4 f14
#define a5 f15
#define a6 f16
#define a7 f17
#define a8 f18
#define a9 f19
#define a10 f20
#define a11 f21
#define a12 f22
#define a13 f23
#define a14 f24
#define a15 f25
#define a16 f26
#define alpha f1
PROLOGUE
PROFCODE
li r0, -16
lwz INCY, 8(SP)
stfpdux f14, SP, r0
stfpdux f15, SP, r0
stfpdux f16, SP, r0
stfpdux f17, SP, r0
stfpdux f18, SP, r0
stfpdux f19, SP, r0
stfpdux f20, SP, r0
stfpdux f21, SP, r0
stfpdux f22, SP, r0
stfpdux f23, SP, r0
stfpdux f24, SP, r0
stfpdux f25, SP, r0
stfpdux f26, SP, r0
stfpdux f27, SP, r0
stfpdux f28, SP, r0
stfpdux f29, SP, r0
stfpdux f30, SP, r0
stfpdux f31, SP, r0
stwu r31, -4(SP)
stwu r30, -4(SP)
stwu r29, -4(SP)
stwu r28, -4(SP)
stwu r27, -4(SP)
stwu r26, -4(SP)
stwu r25, -4(SP)
stwu r24, -4(SP)
stwu r23, -4(SP)
stwu r22, -4(SP)
stwu r21, -4(SP)
stwu r20, -4(SP)
stwu r19, -4(SP)
stwu r18, -4(SP)
stwu r17, -4(SP)
stwu r16, -4(SP)
slwi LDA, LDA, BASE_SHIFT
slwi INCX, INCX, BASE_SHIFT
slwi INCY, INCY, BASE_SHIFT
fsmfp alpha, alpha
cmpwi cr0, M, 0
ble- .L999
cmpwi cr0, N, 0
ble- .L999
add INCY2, INCY, INCY
li INC2, 2 * SIZE
sub X, X, INCX
andi. r0, A, 2 * SIZE - 1
# bne .L100
# All cases for aligned A, even LDA
cmpwi cr0, INCY, SIZE
bne .L70
andi. r0, Y, 2 * SIZE - 1
bne .L40
# A : aligned LDA : even Y : Unit Aligned
sub A, A, INC2
sub Y, Y, INCY2
srawi. J, N, 2
ble .L20
.align 4
.L11:
LFDUX alpha1, X, INCX
mr A1, A
add A2, A, LDA
add A3, A2, LDA
LFSDUX alpha1, X, INCX
LFDUX alpha2, X, INCX
add A4, A3, LDA
add A, A4, LDA
mr YL, Y
LFSDUX alpha2, X, INCX
fpmul alpha1, alpha, alpha1
mr YS, Y
srawi. r0, M, 3
mtspr CTR, r0
fpmul alpha2, alpha, alpha2
ble .L15
LFPDUX yl1, YL, INCY2
LFPDUX yl2, YL, INCY2
LFPDUX yl3, YL, INCY2
LFPDUX yl4, YL, INCY2
LFPDUX a1, A1, INC2
LFPDUX a5, A1, INC2
LFPDUX a9, A1, INC2
LFPDUX a13, A1, INC2
LFPDUX a2, A2, INC2
LFPDUX a6, A2, INC2
LFPDUX a10, A2, INC2
LFPDUX a14, A2, INC2
LFPDUX a3, A3, INC2
LFPDUX a7, A3, INC2
LFPDUX a11, A3, INC2
LFPDUX a15, A3, INC2
LFPDUX a4, A4, INC2
fxcpmadd ys1, alpha1, a1, yl1
LFPDUX a8, A4, INC2
fxcpmadd ys2, alpha1, a5, yl2
LFPDUX a12, A4, INC2
fxcpmadd ys3, alpha1, a9, yl3
LFPDUX a16, A4, INC2
fxcpmadd ys4, alpha1, a13, yl4
bdz .L13
.align 4
.L12:
LFPDUX yl1, YL, INCY2
fxcsmadd ys1, alpha1, a2, ys1
LFPDUX a1, A1, INC2
fxcsmadd ys2, alpha1, a6, ys2
LFPDUX a5, A1, INC2
fxcsmadd ys3, alpha1, a10, ys3
LFPDUX a9, A1, INC2
fxcsmadd ys4, alpha1, a14, ys4
LFPDUX a13, A1, INC2
LFPDUX yl2, YL, INCY2
fxcpmadd ys1, alpha2, a3, ys1
LFPDUX a2, A2, INC2
fxcpmadd ys2, alpha2, a7, ys2
LFPDUX a6, A2, INC2
fxcpmadd ys3, alpha2, a11, ys3
LFPDUX a10, A2, INC2
fxcpmadd ys4, alpha2, a15, ys4
LFPDUX a14, A2, INC2
LFPDUX yl3, YL, INCY2
fxcsmadd ys1, alpha2, a4, ys1
LFPDUX a3, A3, INC2
fxcsmadd ys2, alpha2, a8, ys2
LFPDUX a7, A3, INC2
fxcsmadd ys3, alpha2, a12, ys3
LFPDUX a11, A3, INC2
fxcsmadd ys4, alpha2, a16, ys4
LFPDUX a15, A3, INC2
LFPDUX yl4, YL, INCY2
STFPDUX ys1, YS, INCY2
STFPDUX ys2, YS, INCY2
STFPDUX ys3, YS, INCY2
STFPDUX ys4, YS, INCY2
LFPDUX a4, A4, INC2
fxcpmadd ys1, alpha1, a1, yl1
LFPDUX a8, A4, INC2
fxcpmadd ys2, alpha1, a5, yl2
LFPDUX a12, A4, INC2
fxcpmadd ys3, alpha1, a9, yl3
LFPDUX a16, A4, INC2
fxcpmadd ys4, alpha1, a13, yl4
bdnz .L12
.align 4
.L13:
fxcsmadd ys1, alpha1, a2, ys1
fxcsmadd ys2, alpha1, a6, ys2
fxcsmadd ys3, alpha1, a10, ys3
fxcsmadd ys4, alpha1, a14, ys4
fxcpmadd ys1, alpha2, a3, ys1
fxcpmadd ys2, alpha2, a7, ys2
fxcpmadd ys3, alpha2, a11, ys3
fxcpmadd ys4, alpha2, a15, ys4
fxcsmadd ys1, alpha2, a4, ys1
fxcsmadd ys2, alpha2, a8, ys2
fxcsmadd ys3, alpha2, a12, ys3
fxcsmadd ys4, alpha2, a16, ys4
STFPDUX ys1, YS, INCY2
STFPDUX ys2, YS, INCY2
STFPDUX ys3, YS, INCY2
STFPDUX ys4, YS, INCY2
.align 4
.L15:
andi. r0, M, 7
ble .L19
andi. r0, M, 4
ble .L17
LFPDUX yl1, YL, INCY2
LFPDUX a1, A1, INC2
LFPDUX yl2, YL, INCY2
LFPDUX a5, A1, INC2
LFPDUX a2, A2, INC2
LFPDUX a6, A2, INC2
LFPDUX a3, A3, INC2
LFPDUX a7, A3, INC2
LFPDUX a4, A4, INC2
LFPDUX a8, A4, INC2
fxcpmadd ys1, alpha1, a1, yl1
fxcpmadd ys2, alpha1, a5, yl2
fxcsmadd ys1, alpha1, a2, ys1
fxcsmadd ys2, alpha1, a6, ys2
fxcpmadd ys1, alpha2, a3, ys1
fxcpmadd ys2, alpha2, a7, ys2
fxcsmadd ys1, alpha2, a4, ys1
fxcsmadd ys2, alpha2, a8, ys2
STFPDUX ys1, YS, INCY2
STFPDUX ys2, YS, INCY2
.align 4
.L17:
andi. r0, M, 2
ble .L18
LFPDUX yl1, YL, INCY2
LFPDUX a1, A1, INC2
LFPDUX a2, A2, INC2
LFPDUX a3, A3, INC2
LFPDUX a4, A4, INC2
fxcpmadd ys1, alpha1, a1, yl1
fxcsmadd ys1, alpha1, a2, ys1
fxcpmadd ys1, alpha2, a3, ys1
fxcsmadd ys1, alpha2, a4, ys1
STFPDUX ys1, YS, INCY2
.align 4
.L18:
andi. r0, M, 1
ble .L19
LFDUX yl1, YL, INCY2
LFDUX a1, A1, INC2
LFDUX a2, A2, INC2
LFDUX a3, A3, INC2
LFDUX a4, A4, INC2
fxcpmadd ys1, alpha1, a1, yl1
fxcsmadd ys1, alpha1, a2, ys1
fxcpmadd ys1, alpha2, a3, ys1
fxcsmadd ys1, alpha2, a4, ys1
STFDUX ys1, YS, INCY2
.align 4
.L19:
addi J, J, -1
cmpi cr0, 0, J, 0
bgt .L11
.align 4
.L20:
andi. J, N, 2
ble .L30
LFDUX alpha1, X, INCX
mr A1, A
add A2, A, LDA
add A, A2, LDA
LFSDUX alpha1, X, INCX
mr YL, Y
mr YS, Y
fpmul alpha1, alpha, alpha1
srawi. r0, M, 3
mtspr CTR, r0
ble .L25
LFPDUX yl1, YL, INCY2
LFPDUX a1, A1, INC2
LFPDUX yl2, YL, INCY2
LFPDUX a5, A1, INC2
LFPDUX yl3, YL, INCY2
LFPDUX a9, A1, INC2
LFPDUX yl4, YL, INCY2
LFPDUX a13, A1, INC2
LFPDUX a2, A2, INC2
LFPDUX a6, A2, INC2
LFPDUX a10, A2, INC2
LFPDUX a14, A2, INC2
bdz .L23
.align 4
.L22:
fxcpmadd ys1, alpha1, a1, yl1
LFPDUX a1, A1, INC2
LFPDUX yl1, YL, INCY2
fxcpmadd ys2, alpha1, a5, yl2
LFPDUX a5, A1, INC2
LFPDUX yl2, YL, INCY2
fxcpmadd ys3, alpha1, a9, yl3
LFPDUX a9, A1, INC2
LFPDUX yl3, YL, INCY2
fxcpmadd ys4, alpha1, a13, yl4
LFPDUX a13, A1, INC2
LFPDUX yl4, YL, INCY2
fxcsmadd ys1, alpha1, a2, ys1
LFPDUX a2, A2, INC2
fxcsmadd ys2, alpha1, a6, ys2
LFPDUX a6, A2, INC2
fxcsmadd ys3, alpha1, a10, ys3
LFPDUX a10, A2, INC2
fxcsmadd ys4, alpha1, a14, ys4
LFPDUX a14, A2, INC2
STFPDUX ys1, YS, INCY2
STFPDUX ys2, YS, INCY2
STFPDUX ys3, YS, INCY2
STFPDUX ys4, YS, INCY2
bdnz .L22
.align 4
.L23:
fxcpmadd ys1, alpha1, a1, yl1
fxcpmadd ys2, alpha1, a5, yl2
fxcpmadd ys3, alpha1, a9, yl3
fxcpmadd ys4, alpha1, a13, yl4
fxcsmadd ys1, alpha1, a2, ys1
fxcsmadd ys2, alpha1, a6, ys2
fxcsmadd ys3, alpha1, a10, ys3
fxcsmadd ys4, alpha1, a14, ys4
STFPDUX ys1, YS, INCY2
STFPDUX ys2, YS, INCY2
STFPDUX ys3, YS, INCY2
STFPDUX ys4, YS, INCY2
.align 4
.L25:
andi. r0, M, 7
ble .L30
andi. r0, M, 4
ble .L27
LFPDUX yl1, YL, INCY2
LFPDUX a1, A1, INC2
LFPDUX a2, A2, INC2
LFPDUX yl2, YL, INCY2
LFPDUX a5, A1, INC2
LFPDUX a6, A2, INC2
fxcpmadd ys1, alpha1, a1, yl1
fxcsmadd ys1, alpha1, a2, ys1
fxcpmadd ys2, alpha1, a5, yl2
fxcsmadd ys2, alpha1, a6, ys2
STFPDUX ys1, YS, INCY2
STFPDUX ys2, YS, INCY2
.align 4
.L27:
andi. r0, M, 2
ble .L28
LFPDUX yl1, YL, INCY2
LFPDUX a1, A1, INC2
LFPDUX a2, A2, INC2
fxcpmadd ys1, alpha1, a1, yl1
fxcsmadd ys1, alpha1, a2, ys1
STFPDUX ys1, YS, INCY2
.align 4
.L28:
andi. r0, M, 1
ble .L30
LFDUX yl1, YL, INCY2
LFDUX a1, A1, INC2
LFDUX a2, A2, INC2
fxcpmadd ys1, alpha1, a1, yl1
fxcsmadd ys1, alpha1, a2, ys1
STFDUX ys1, YS, INCY2
.align 4
.L30:
andi. J, N, 1
ble .L999
LFDUX alpha1, X, INCX
mr A1, A
mr YL, Y
mr YS, Y
fmul alpha1, alpha, alpha1
srawi. r0, M, 3
mtspr CTR, r0
ble .L35
LFPDUX yl1, YL, INCY2
LFPDUX a1, A1, INC2
LFPDUX yl2, YL, INCY2
LFPDUX a5, A1, INC2
LFPDUX yl3, YL, INCY2
LFPDUX a9, A1, INC2
LFPDUX yl4, YL, INCY2
LFPDUX a13, A1, INC2
bdz .L33
.align 4
.L32:
fxcpmadd ys1, alpha1, a1, yl1
LFPDUX yl1, YL, INCY2
LFPDUX a1, A1, INC2
fxcpmadd ys2, alpha1, a5, yl2
LFPDUX yl2, YL, INCY2
LFPDUX a5, A1, INC2
fxcpmadd ys3, alpha1, a9, yl3
LFPDUX yl3, YL, INCY2
LFPDUX a9, A1, INC2
fxcpmadd ys4, alpha1, a13, yl4
LFPDUX yl4, YL, INCY2
LFPDUX a13, A1, INC2
STFPDUX ys1, YS, INCY2
STFPDUX ys2, YS, INCY2
STFPDUX ys3, YS, INCY2
STFPDUX ys4, YS, INCY2
bdnz .L32
.align 4
.L33:
fxcpmadd ys1, alpha1, a1, yl1
fxcpmadd ys2, alpha1, a5, yl2
fxcpmadd ys3, alpha1, a9, yl3
fxcpmadd ys4, alpha1, a13, yl4
STFPDUX ys1, YS, INCY2
STFPDUX ys2, YS, INCY2
STFPDUX ys3, YS, INCY2
STFPDUX ys4, YS, INCY2
.align 4
.L35:
andi. r0, M, 7
ble .L999
andi. r0, M, 4
ble .L37
LFPDUX yl1, YL, INCY2
LFPDUX a1, A1, INC2
LFPDUX yl2, YL, INCY2
LFPDUX a5, A1, INC2
fxcpmadd ys1, alpha1, a1, yl1
fxcpmadd ys2, alpha1, a5, yl2
STFPDUX ys1, YS, INCY2
STFPDUX ys2, YS, INCY2
.align 4
.L37:
andi. r0, M, 2
ble .L38
LFPDUX yl1, YL, INCY2
LFPDUX a1, A1, INC2
fxcpmadd ys1, alpha1, a1, yl1
STFPDUX ys1, YS, INCY2
.align 4
.L38:
andi. r0, M, 1
ble .L999
LFDUX yl1, YL, INCY2
LFDUX a1, A1, INC2
fxcpmadd ys1, alpha1, a1, yl1
STFDUX ys1, YS, INCY2
b .L999
.align 4
.L40:
# A : aligned LDA : even Y : Unaligned
sub A, A, INC2
sub Y, Y, INCY
srawi. J, N, 2
ble .L50
.align 4
.L41:
LFDUX alpha1, X, INCX
LFSDUX alpha1, X, INCX
LFDUX alpha2, X, INCX
LFSDUX alpha2, X, INCX
fpmul alpha1, alpha, alpha1
fpmul alpha2, alpha, alpha2
mr A1, A
add A2, A, LDA
add A3, A2, LDA
add A4, A3, LDA
add A, A4, LDA
mr YL, Y
sub YS, Y, INCY2
LFSDX ys1, YS, INCY2
LFDX yl1, YL, INCY
srawi. r0, M, 3
mtspr CTR, r0
ble .L45
LFPDUX a1, A1, INC2
LFPDUX a5, A1, INC2
LFPDUX a9, A1, INC2
LFPDUX a13, A1, INC2
LFXDUX yl2, YL, INCY2
LFXDUX yl3, YL, INCY2
LFXDUX yl4, YL, INCY2
LFXDUX yl5, YL, INCY2
LFPDUX a2, A2, INC2
LFPDUX a6, A2, INC2
LFPDUX a10, A2, INC2
LFPDUX a14, A2, INC2
LFPDUX a3, A3, INC2
LFPDUX a7, A3, INC2
LFPDUX a11, A3, INC2
LFPDUX a15, A3, INC2
LFPDUX a4, A4, INC2
fsmr yl1, yl2
LFPDUX a8, A4, INC2
fsmr yl2, yl3
LFPDUX a12, A4, INC2
fsmr yl3, yl4
LFPDUX a16, A4, INC2
fsmr yl4, yl5
bdz .L43
.align 4
.L42:
fxcpmadd ys2, alpha1, a1, yl1
LFPDUX a1, A1, INC2
fxcpmadd ys3, alpha1, a5, yl2
LFPDUX a5, A1, INC2
fxcpmadd ys4, alpha1, a9, yl3
LFPDUX a9, A1, INC2
fxcpmadd ys5, alpha1, a13, yl4
LFPDUX a13, A1, INC2
fxcsmadd ys2, alpha1, a2, ys2
LFPDUX a2, A2, INC2
fxcsmadd ys3, alpha1, a6, ys3
LFPDUX a6, A2, INC2
fxcsmadd ys4, alpha1, a10, ys4
LFPDUX a10, A2, INC2
fxcsmadd ys5, alpha1, a14, ys5
LFPDUX a14, A2, INC2
fxcpmadd ys2, alpha2, a3, ys2
LFPDUX a3, A3, INC2
fxcpmadd ys3, alpha2, a7, ys3
LFPDUX a7, A3, INC2
fxcpmadd ys4, alpha2, a11, ys4
LFPDUX a11, A3, INC2
fxcpmadd ys5, alpha2, a15, ys5
LFPDUX a15, A3, INC2
fxcsmadd ys2, alpha2, a4, ys2
LFPDUX a4, A4, INC2
fxcsmadd ys3, alpha2, a8, ys3
LFPDUX a8, A4, INC2
fxcsmadd ys4, alpha2, a12, ys4
LFPDUX a12, A4, INC2
fxcsmadd ys5, alpha2, a16, ys5
LFPDUX a16, A4, INC2
fmr yl1, yl5
LFXDUX yl2, YL, INCY2
fmr ys1, ys2
LFXDUX yl3, YL, INCY2
fmr ys2, ys3
LFXDUX yl4, YL, INCY2
fmr ys3, ys4
LFXDUX yl5, YL, INCY2
fmr ys4, ys5
STFXDUX ys1, YS, INCY2
fsmr ys1, ys5
STFXDUX ys2, YS, INCY2
fsmr yl1, yl2
STFXDUX ys3, YS, INCY2
fsmr yl2, yl3
STFXDUX ys4, YS, INCY2
fsmr yl3, yl4
fsmr yl4, yl5
bdnz .L42
.align 4
.L43:
fxcpmadd ys2, alpha1, a1, yl1
fxcpmadd ys3, alpha1, a5, yl2
fxcpmadd ys4, alpha1, a9, yl3
fxcpmadd ys5, alpha1, a13, yl4
fxcsmadd ys2, alpha1, a2, ys2
fxcsmadd ys3, alpha1, a6, ys3
fxcsmadd ys4, alpha1, a10, ys4
fxcsmadd ys5, alpha1, a14, ys5
fxcpmadd ys2, alpha2, a3, ys2
fxcpmadd ys3, alpha2, a7, ys3
fxcpmadd ys4, alpha2, a11, ys4
fxcpmadd ys5, alpha2, a15, ys5
fxcsmadd ys2, alpha2, a4, ys2
fxcsmadd ys3, alpha2, a8, ys3
fxcsmadd ys4, alpha2, a12, ys4
fxcsmadd ys5, alpha2, a16, ys5
fmr ys1, ys2
fmr ys2, ys3
fmr ys3, ys4
fmr ys4, ys5
fmr yl1, yl5
STFXDUX ys1, YS, INCY2
fsmr ys1, ys5
STFXDUX ys2, YS, INCY2
STFXDUX ys3, YS, INCY2
STFXDUX ys4, YS, INCY2
.align 4
.L45:
andi. r0, M, 7
ble .L48
andi. r0, M, 4
ble .L46
LFXDUX yl2, YL, INCY2
LFXDUX yl3, YL, INCY2
LFPDUX a1, A1, INC2
LFPDUX a5, A1, INC2
LFPDUX a2, A2, INC2
LFPDUX a6, A2, INC2
LFPDUX a3, A3, INC2
LFPDUX a7, A3, INC2
LFPDUX a4, A4, INC2
fsmr yl1, yl2
LFPDUX a8, A4, INC2
fsmr yl2, yl3
fxcpmadd ys2, alpha1, a1, yl1
fxcpmadd ys3, alpha1, a5, yl2
fxcsmadd ys2, alpha1, a2, ys2
fxcsmadd ys3, alpha1, a6, ys3
fxcpmadd ys2, alpha2, a3, ys2
fxcpmadd ys3, alpha2, a7, ys3
fxcsmadd ys2, alpha2, a4, ys2
fxcsmadd ys3, alpha2, a8, ys3
fmr yl1, yl3
fmr ys1, ys2
fmr ys2, ys3
STFXDUX ys1, YS, INCY2
fsmr ys1, ys3
STFXDUX ys2, YS, INCY2
.align 4
.L46:
andi. r0, M, 2
ble .L47
LFXDUX yl2, YL, INCY2
LFPDUX a1, A1, INC2
LFPDUX a2, A2, INC2
LFPDUX a3, A3, INC2
LFPDUX a4, A4, INC2
fsmr yl1, yl2
fxcpmadd ys2, alpha1, a1, yl1
fxcsmadd ys2, alpha1, a2, ys2
fxcpmadd ys2, alpha2, a3, ys2
fxcsmadd ys2, alpha2, a4, ys2
fmr yl1, yl2
fmr ys1, ys2
STFXDUX ys1, YS, INCY2
fsmr ys1, ys2
.align 4
.L47:
andi. r0, M, 1
ble .L48
LFDUX a1, A1, INC2
LFDUX a2, A2, INC2
LFDUX a3, A3, INC2
LFDUX a4, A4, INC2
fxcpmadd ys2, alpha1, a1, yl1
fxcsmadd ys2, alpha1, a2, ys2
fxcpmadd ys2, alpha2, a3, ys2
fxcsmadd ys2, alpha2, a4, ys2
STFSDX ys1, YS, INCY2
add YS, YS, INCY
STFDX ys2, YS, INCY2
b .L49
.align 4
.L48:
STFSDUX ys1, YS, INCY2
.align 4
.L49:
addi J, J, -1
cmpi cr0, 0, J, 0
bgt .L41
.align 4
.L50:
andi. J, N, 2
ble .L60
LFDUX alpha1, X, INCX
mr A1, A
add A2, A, LDA
add A, A2, LDA
LFSDUX alpha1, X, INCX
mr YL, Y
sub YS, Y, INCY2
fpmul alpha1, alpha, alpha1
LFSDX ys1, YS, INCY2
LFDX yl1, YL, INCY
srawi. r0, M, 3
mtspr CTR, r0
ble .L55
LFPDUX a1, A1, INC2
LFPDUX a5, A1, INC2
LFPDUX a9, A1, INC2
LFPDUX a13, A1, INC2
LFXDUX yl2, YL, INCY2
LFXDUX yl3, YL, INCY2
LFXDUX yl4, YL, INCY2
LFXDUX yl5, YL, INCY2
LFPDUX a2, A2, INC2
fsmr yl1, yl2
LFPDUX a6, A2, INC2
fsmr yl2, yl3
LFPDUX a10, A2, INC2
fsmr yl3, yl4
LFPDUX a14, A2, INC2
fsmr yl4, yl5
bdz .L53
.align 4
.L52:
fxcpmadd ys2, alpha1, a1, yl1
LFPDUX a1, A1, INC2
fxcpmadd ys3, alpha1, a5, yl2
LFPDUX a5, A1, INC2
fxcpmadd ys4, alpha1, a9, yl3
LFPDUX a9, A1, INC2
fxcpmadd ys5, alpha1, a13, yl4
LFPDUX a13, A1, INC2
fxcsmadd ys2, alpha1, a2, ys2
LFPDUX a2, A2, INC2
fxcsmadd ys3, alpha1, a6, ys3
LFPDUX a6, A2, INC2
fxcsmadd ys4, alpha1, a10, ys4
LFPDUX a10, A2, INC2
fxcsmadd ys5, alpha1, a14, ys5
LFPDUX a14, A2, INC2
fmr yl1, yl5
LFXDUX yl2, YL, INCY2
fmr ys1, ys2
LFXDUX yl3, YL, INCY2
fmr ys2, ys3
LFXDUX yl4, YL, INCY2
fmr ys3, ys4
LFXDUX yl5, YL, INCY2
fmr ys4, ys5
STFXDUX ys1, YS, INCY2
fsmr ys1, ys5
STFXDUX ys2, YS, INCY2
fsmr yl1, yl2
STFXDUX ys3, YS, INCY2
fsmr yl2, yl3
STFXDUX ys4, YS, INCY2
fsmr yl3, yl4
fsmr yl4, yl5
bdnz .L52
.align 4
.L53:
fxcpmadd ys2, alpha1, a1, yl1
fxcpmadd ys3, alpha1, a5, yl2
fxcpmadd ys4, alpha1, a9, yl3
fxcpmadd ys5, alpha1, a13, yl4
fxcsmadd ys2, alpha1, a2, ys2
fxcsmadd ys3, alpha1, a6, ys3
fxcsmadd ys4, alpha1, a10, ys4
fxcsmadd ys5, alpha1, a14, ys5
fmr yl1, yl5
fmr ys1, ys2
fmr ys2, ys3
fmr ys3, ys4
fmr ys4, ys5
STFXDUX ys1, YS, INCY2
fsmr ys1, ys5
STFXDUX ys2, YS, INCY2
STFXDUX ys3, YS, INCY2
STFXDUX ys4, YS, INCY2
.align 4
.L55:
andi. r0, M, 7
ble .L59
andi. r0, M, 4
ble .L57
LFXDUX yl2, YL, INCY2
LFXDUX yl3, YL, INCY2
LFPDUX a1, A1, INC2
LFPDUX a2, A2, INC2
LFPDUX a5, A1, INC2
LFPDUX a6, A2, INC2
fsmr yl1, yl2
fsmr yl2, yl3
fxcpmadd ys2, alpha1, a1, yl1
fxcsmadd ys2, alpha1, a2, ys2
fxcpmadd ys3, alpha1, a5, yl2
fxcsmadd ys3, alpha1, a6, ys3
fmr yl1, yl3
fmr ys1, ys2
fmr ys2, ys3
STFXDUX ys1, YS, INCY2
STFXDUX ys2, YS, INCY2
fsmr ys1, ys3
.align 4
.L57:
andi. r0, M, 2
ble .L58
LFXDUX yl2, YL, INCY2
LFPDUX a1, A1, INC2
LFPDUX a2, A2, INC2
fsmr yl1, yl2
fxcpmadd ys2, alpha1, a1, yl1
fxcsmadd ys2, alpha1, a2, ys2
fmr yl1, yl2
fmr ys1, ys2
STFXDUX ys1, YS, INCY2
fsmr ys1, ys2
.align 4
.L58:
andi. r0, M, 1
ble .L59
LFDUX a1, A1, INC2
LFDUX a2, A2, INC2
fxmr alpha2, alpha1
fmadd ys1, alpha1, a1, yl1
fmadd ys1, alpha2, a2, ys1
STFXDUX ys1, YS, INCY2
b .L60
.align 4
.L59:
STFSDUX ys1, YS, INCY2
.align 4
.L60:
andi. J, N, 1
ble .L999
LFDUX alpha1, X, INCX
mr A1, A
mr YL, Y
sub YS, Y, INCY2
fmul alpha1, alpha, alpha1
LFSDX ys1, YS, INCY2
LFDX yl1, YL, INCY
srawi. r0, M, 3
mtspr CTR, r0
ble .L65
LFXDUX yl2, YL, INCY2
LFXDUX yl3, YL, INCY2
LFXDUX yl4, YL, INCY2
LFXDUX yl5, YL, INCY2
LFPDUX a1, A1, INC2
LFPDUX a5, A1, INC2
LFPDUX a9, A1, INC2
LFPDUX a13, A1, INC2
fsmr yl1, yl2
fsmr yl2, yl3
fsmr yl3, yl4
fsmr yl4, yl5
bdz .L63
.align 4
.L62:
fxcpmadd ys2, alpha1, a1, yl1
LFPDUX a1, A1, INC2
fxcpmadd ys3, alpha1, a5, yl2
LFXDUX yl2, YL, INCY2
fxcpmadd ys4, alpha1, a9, yl3
LFXDUX yl3, YL, INCY2
fxcpmadd ys5, alpha1, a13, yl4
LFXDUX yl4, YL, INCY2
fmr yl1, yl5
LFXDUX yl5, YL, INCY2
fmr ys1, ys2
LFPDUX a5, A1, INC2
fmr ys2, ys3
LFPDUX a9, A1, INC2
fmr ys3, ys4
LFPDUX a13, A1, INC2
fmr ys4, ys5
STFXDUX ys1, YS, INCY2
fsmr ys1, ys5
STFXDUX ys2, YS, INCY2
fsmr yl1, yl2
STFXDUX ys3, YS, INCY2
fsmr yl2, yl3
STFXDUX ys4, YS, INCY2
fsmr yl3, yl4
fsmr yl4, yl5
bdnz .L62
.align 4
.L63:
fxcpmadd ys2, alpha1, a1, yl1
fxcpmadd ys3, alpha1, a5, yl2
fxcpmadd ys4, alpha1, a9, yl3
fxcpmadd ys5, alpha1, a13, yl4
fmr yl1, yl5
fmr ys1, ys2
fmr ys2, ys3
fmr ys3, ys4
fmr ys4, ys5
STFXDUX ys1, YS, INCY2
fsmr ys1, ys5
STFXDUX ys2, YS, INCY2
STFXDUX ys3, YS, INCY2
STFXDUX ys4, YS, INCY2
.align 4
.L65:
andi. r0, M, 7
ble .L69
andi. r0, M, 4
ble .L67
LFXDUX yl2, YL, INCY2
LFXDUX yl3, YL, INCY2
LFPDUX a1, A1, INC2
LFPDUX a5, A1, INC2
fsmr yl1, yl2
fsmr yl2, yl3
fxcpmadd ys2, alpha1, a1, yl1
fxcpmadd ys3, alpha1, a5, yl2
fmr yl1, yl3
fmr ys1, ys2
fmr ys2, ys3
STFXDUX ys1, YS, INCY2
fsmr ys1, ys3
STFXDUX ys2, YS, INCY2
.align 4
.L67:
andi. r0, M, 2
ble .L68
LFPDUX a1, A1, INC2
LFXDUX yl2, YL, INCY2
fsmr yl1, yl2
fxcpmadd ys2, alpha1, a1, yl1
fmr yl1, yl2
fmr ys1, ys2
STFXDUX ys1, YS, INCY2
fsmr ys1, ys2
.align 4
.L68:
andi. r0, M, 1
ble .L69
LFDUX a1, A1, INC2
fmadd ys1, alpha1, a1, yl1
STFXDUX ys1, YS, INCY2
b .L999
.align 4
.L69:
STFSDUX ys1, YS, INCY2
b .L999
.align 4
.L70:
sub A, A, INC2
sub Y, Y, INCY
srawi. J, N, 2
ble .L80
.align 4
.L71:
LFDUX alpha1, X, INCX
mr A1, A
add A2, A, LDA
add A3, A2, LDA
LFSDUX alpha1, X, INCX
LFDUX alpha2, X, INCX
add A4, A3, LDA
add A, A4, LDA
mr YL, Y
LFSDUX alpha2, X, INCX
fpmul alpha1, alpha, alpha1
mr YS, Y
srawi. r0, M, 3
mtspr CTR, r0
fpmul alpha2, alpha, alpha2
ble .L75
LFDUX yl1, YL, INCY
LFPDUX a1, A1, INC2
LFPDUX a5, A1, INC2
LFPDUX a9, A1, INC2
LFPDUX a13, A1, INC2
LFSDUX yl1, YL, INCY
LFDUX yl2, YL, INCY
LFPDUX a2, A2, INC2
LFPDUX a6, A2, INC2
LFPDUX a10, A2, INC2
LFPDUX a14, A2, INC2
LFSDUX yl2, YL, INCY
LFDUX yl3, YL, INCY
LFPDUX a3, A3, INC2
LFPDUX a7, A3, INC2
LFPDUX a11, A3, INC2
LFPDUX a15, A3, INC2
LFSDUX yl3, YL, INCY
LFDUX yl4, YL, INCY
LFPDUX a4, A4, INC2
LFPDUX a8, A4, INC2
LFPDUX a12, A4, INC2
LFPDUX a16, A4, INC2
LFSDUX yl4, YL, INCY
bdz .L73
.align 4
.L72:
fxcpmadd ys1, alpha1, a1, yl1
LFPDUX a1, A1, INC2
LFDUX yl1, YL, INCY
fxcpmadd ys2, alpha1, a5, yl2
LFPDUX a5, A1, INC2
fxcpmadd ys3, alpha1, a9, yl3
LFPDUX a9, A1, INC2
fxcpmadd ys4, alpha1, a13, yl4
LFPDUX a13, A1, INC2
LFSDUX yl1, YL, INCY
fxcsmadd ys1, alpha1, a2, ys1
LFPDUX a2, A2, INC2
LFDUX yl2, YL, INCY
fxcsmadd ys2, alpha1, a6, ys2
LFPDUX a6, A2, INC2
fxcsmadd ys3, alpha1, a10, ys3
LFPDUX a10, A2, INC2
fxcsmadd ys4, alpha1, a14, ys4
LFPDUX a14, A2, INC2
LFSDUX yl2, YL, INCY
fxcpmadd ys1, alpha2, a3, ys1
LFPDUX a3, A3, INC2
LFDUX yl3, YL, INCY
fxcpmadd ys2, alpha2, a7, ys2
LFPDUX a7, A3, INC2
fxcpmadd ys3, alpha2, a11, ys3
LFPDUX a11, A3, INC2
fxcpmadd ys4, alpha2, a15, ys4
LFPDUX a15, A3, INC2
LFSDUX yl3, YL, INCY
fxcsmadd ys1, alpha2, a4, ys1
LFPDUX a4, A4, INC2
LFDUX yl4, YL, INCY
fxcsmadd ys2, alpha2, a8, ys2
LFPDUX a8, A4, INC2
fxcsmadd ys3, alpha2, a12, ys3
LFPDUX a12, A4, INC2
fxcsmadd ys4, alpha2, a16, ys4
LFPDUX a16, A4, INC2
LFSDUX yl4, YL, INCY
STFDUX ys1, YS, INCY
STFSDUX ys1, YS, INCY
STFDUX ys2, YS, INCY
STFSDUX ys2, YS, INCY
STFDUX ys3, YS, INCY
STFSDUX ys3, YS, INCY
STFDUX ys4, YS, INCY
STFSDUX ys4, YS, INCY
bdnz .L72
.align 4
.L73:
fxcpmadd ys1, alpha1, a1, yl1
fxcpmadd ys2, alpha1, a5, yl2
fxcpmadd ys3, alpha1, a9, yl3
fxcpmadd ys4, alpha1, a13, yl4
fxcsmadd ys1, alpha1, a2, ys1
fxcsmadd ys2, alpha1, a6, ys2
fxcsmadd ys3, alpha1, a10, ys3
fxcsmadd ys4, alpha1, a14, ys4
fxcpmadd ys1, alpha2, a3, ys1
fxcpmadd ys2, alpha2, a7, ys2
fxcpmadd ys3, alpha2, a11, ys3
fxcpmadd ys4, alpha2, a15, ys4
fxcsmadd ys1, alpha2, a4, ys1
fxcsmadd ys2, alpha2, a8, ys2
fxcsmadd ys3, alpha2, a12, ys3
fxcsmadd ys4, alpha2, a16, ys4
STFDUX ys1, YS, INCY
STFSDUX ys1, YS, INCY
STFDUX ys2, YS, INCY
STFSDUX ys2, YS, INCY
STFDUX ys3, YS, INCY
STFSDUX ys3, YS, INCY
STFDUX ys4, YS, INCY
STFSDUX ys4, YS, INCY
.align 4
.L75:
andi. r0, M, 7
ble .L79
andi. r0, M, 4
ble .L77
LFDUX yl1, YL, INCY
LFPDUX a1, A1, INC2
LFPDUX a5, A1, INC2
LFSDUX yl1, YL, INCY
LFPDUX a2, A2, INC2
LFPDUX a6, A2, INC2
LFDUX yl2, YL, INCY
LFPDUX a3, A3, INC2
LFPDUX a7, A3, INC2
LFSDUX yl2, YL, INCY
LFPDUX a4, A4, INC2
LFPDUX a8, A4, INC2
fxcpmadd ys1, alpha1, a1, yl1
fxcpmadd ys2, alpha1, a5, yl2
fxcsmadd ys1, alpha1, a2, ys1
fxcsmadd ys2, alpha1, a6, ys2
fxcpmadd ys1, alpha2, a3, ys1
fxcpmadd ys2, alpha2, a7, ys2
fxcsmadd ys1, alpha2, a4, ys1
fxcsmadd ys2, alpha2, a8, ys2
STFDUX ys1, YS, INCY
STFSDUX ys1, YS, INCY
STFDUX ys2, YS, INCY
STFSDUX ys2, YS, INCY
.align 4
.L77:
andi. r0, M, 2
ble .L78
LFDUX yl1, YL, INCY
LFPDUX a1, A1, INC2
LFPDUX a2, A2, INC2
LFSDUX yl1, YL, INCY
LFPDUX a3, A3, INC2
LFPDUX a4, A4, INC2
fxcpmadd ys1, alpha1, a1, yl1
fxcsmadd ys1, alpha1, a2, ys1
fxcpmadd ys1, alpha2, a3, ys1
fxcsmadd ys1, alpha2, a4, ys1
STFDUX ys1, YS, INCY
STFSDUX ys1, YS, INCY
.align 4
.L78:
andi. r0, M, 1
ble .L79
LFDUX yl1, YL, INCY
LFDUX a1, A1, INC2
LFDUX a2, A2, INC2
LFDUX a3, A3, INC2
LFDUX a4, A4, INC2
fxcpmadd ys1, alpha1, a1, yl1
fxcsmadd ys1, alpha1, a2, ys1
fxcpmadd ys1, alpha2, a3, ys1
fxcsmadd ys1, alpha2, a4, ys1
STFDUX ys1, YS, INCY
.align 4
.L79:
addi J, J, -1
cmpi cr0, 0, J, 0
bgt .L71
.align 4
.L80:
andi. J, N, 2
ble .L90
LFDUX alpha1, X, INCX
mr A1, A
add A2, A, LDA
add A, A2, LDA
LFSDUX alpha1, X, INCX
mr YL, Y
mr YS, Y
fpmul alpha1, alpha, alpha1
srawi. r0, M, 3
mtspr CTR, r0
ble .L85
LFDUX yl1, YL, INCY
LFDUX a9, YL, INCY
LFDUX yl2, YL, INCY
LFDUX a10, YL, INCY
LFPDUX a1, A1, INC2
LFPDUX a5, A1, INC2
LFPDUX a3, A1, INC2
LFPDUX a7, A1, INC2
LFDUX yl3, YL, INCY
LFDUX a11, YL, INCY
LFDUX yl4, YL, INCY
LFDUX a12, YL, INCY
LFPDUX a2, A2, INC2
LFPDUX a6, A2, INC2
LFPDUX a4, A2, INC2
LFPDUX a8, A2, INC2
bdz .L83
.align 4
.L82:
fsmfp yl1, a9
fsmfp yl2, a10
fsmfp yl3, a11
fsmfp yl4, a12
fxcpmadd ys1, alpha1, a1, yl1
LFDUX yl1, YL, INCY
LFDUX a9, YL, INCY
LFPDUX a1, A1, INC2
fxcpmadd ys2, alpha1, a5, yl2
LFDUX yl2, YL, INCY
LFDUX a10, YL, INCY
LFPDUX a5, A1, INC2
fxcpmadd ys3, alpha1, a3, yl3
LFDUX yl3, YL, INCY
LFDUX a11, YL, INCY
LFPDUX a3, A1, INC2
fxcpmadd ys4, alpha1, a7, yl4
LFDUX yl4, YL, INCY
LFDUX a12, YL, INCY
LFPDUX a7, A1, INC2
fxcsmadd ys1, alpha1, a2, ys1
LFPDUX a2, A2, INC2
fxcsmadd ys2, alpha1, a6, ys2
LFPDUX a6, A2, INC2
fxcsmadd ys3, alpha1, a4, ys3
LFPDUX a4, A2, INC2
fxcsmadd ys4, alpha1, a8, ys4
LFPDUX a8, A2, INC2
STFDUX ys1, YS, INCY
STFSDUX ys1, YS, INCY
STFDUX ys2, YS, INCY
STFSDUX ys2, YS, INCY
STFDUX ys3, YS, INCY
STFSDUX ys3, YS, INCY
STFDUX ys4, YS, INCY
STFSDUX ys4, YS, INCY
bdnz .L82
.align 4
.L83:
fsmfp yl1, a9
fsmfp yl2, a10
fsmfp yl3, a11
fsmfp yl4, a12
fxcpmadd ys1, alpha1, a1, yl1
fxcpmadd ys2, alpha1, a5, yl2
fxcpmadd ys3, alpha1, a3, yl3
fxcpmadd ys4, alpha1, a7, yl4
fxcsmadd ys1, alpha1, a2, ys1
fxcsmadd ys2, alpha1, a6, ys2
fxcsmadd ys3, alpha1, a4, ys3
fxcsmadd ys4, alpha1, a8, ys4
STFDUX ys1, YS, INCY
STFSDUX ys1, YS, INCY
STFDUX ys2, YS, INCY
STFSDUX ys2, YS, INCY
STFDUX ys3, YS, INCY
STFSDUX ys3, YS, INCY
STFDUX ys4, YS, INCY
STFSDUX ys4, YS, INCY
.align 4
.L85:
andi. r0, M, 7
ble .L90
andi. r0, M, 4
ble .L87
LFDUX yl1, YL, INCY
LFPDUX a1, A1, INC2
LFPDUX a2, A2, INC2
LFSDUX yl1, YL, INCY
LFDUX yl2, YL, INCY
LFPDUX a5, A1, INC2
LFPDUX a6, A2, INC2
LFSDUX yl2, YL, INCY
fxcpmadd ys1, alpha1, a1, yl1
fxcpmadd ys2, alpha1, a5, yl2
fxcsmadd ys1, alpha1, a2, ys1
fxcsmadd ys2, alpha1, a6, ys2
STFDUX ys1, YS, INCY
STFSDUX ys1, YS, INCY
STFDUX ys2, YS, INCY
STFSDUX ys2, YS, INCY
.align 4
.L87:
andi. r0, M, 2
ble .L88
LFDUX yl1, YL, INCY
LFPDUX a1, A1, INC2
LFPDUX a2, A2, INC2
LFSDUX yl1, YL, INCY
fxcpmadd ys1, alpha1, a1, yl1
fxcsmadd ys1, alpha1, a2, ys1
STFDUX ys1, YS, INCY
STFSDUX ys1, YS, INCY
.align 4
.L88:
andi. r0, M, 1
ble .L90
LFDUX yl1, YL, INCY
LFDUX a1, A1, INC2
LFDUX a2, A2, INC2
fxcpmadd ys1, alpha1, a1, yl1
fxcsmadd ys1, alpha1, a2, ys1
STFDUX ys1, YS, INCY
.align 4
.L90:
andi. J, N, 1
ble .L999
LFDUX alpha1, X, INCX
mr A1, A
mr YL, Y
mr YS, Y
fmul alpha1, alpha, alpha1
srawi. r0, M, 3
mtspr CTR, r0
ble .L95
LFDUX yl1, YL, INCY
LFSDUX a2, YL, INCY
LFDUX yl2, YL, INCY
LFSDUX a4, YL, INCY
LFDUX yl3, YL, INCY
LFSDUX a6, YL, INCY
LFDUX yl4, YL, INCY
LFSDUX a8, YL, INCY
LFPDUX a1, A1, INC2
LFPDUX a5, A1, INC2
LFPDUX a9, A1, INC2
LFPDUX a13, A1, INC2
bdz .L93
.align 4
.L92:
fmr a2, yl1
fmr a4, yl2
fmr a6, yl3
fmr a8, yl4
fxcpmadd ys1, alpha1, a1, a2
LFDUX yl1, YL, INCY
LFSDUX a2, YL, INCY
fxcpmadd ys2, alpha1, a5, a4
LFDUX yl2, YL, INCY
LFSDUX a4, YL, INCY
fxcpmadd ys3, alpha1, a9, a6
LFDUX yl3, YL, INCY
LFSDUX a6, YL, INCY
fxcpmadd ys4, alpha1, a13, a8
LFDUX yl4, YL, INCY
LFSDUX a8, YL, INCY
LFPDUX a1, A1, INC2
LFPDUX a5, A1, INC2
LFPDUX a9, A1, INC2
LFPDUX a13, A1, INC2
STFDUX ys1, YS, INCY
STFSDUX ys1, YS, INCY
STFDUX ys2, YS, INCY
STFSDUX ys2, YS, INCY
STFDUX ys3, YS, INCY
STFSDUX ys3, YS, INCY
STFDUX ys4, YS, INCY
STFSDUX ys4, YS, INCY
bdnz .L92
.align 4
.L93:
fmr a2, yl1
fmr a4, yl2
fmr a6, yl3
fmr a8, yl4
fxcpmadd ys1, alpha1, a1, a2
fxcpmadd ys2, alpha1, a5, a4
fxcpmadd ys3, alpha1, a9, a6
fxcpmadd ys4, alpha1, a13, a8
STFDUX ys1, YS, INCY
STFSDUX ys1, YS, INCY
STFDUX ys2, YS, INCY
STFSDUX ys2, YS, INCY
STFDUX ys3, YS, INCY
STFSDUX ys3, YS, INCY
STFDUX ys4, YS, INCY
STFSDUX ys4, YS, INCY
.align 4
.L95:
andi. r0, M, 7
ble .L999
andi. r0, M, 4
ble .L97
LFPDUX a1, A1, INC2
LFDUX yl1, YL, INCY
LFDUX yl2, YL, INCY
LFPDUX a2, A1, INC2
LFDUX yl3, YL, INCY
LFDUX yl4, YL, INCY
fxcpmadd ys1, a1, alpha1, yl1
fxcsmadd ys2, a1, alpha1, yl2
fxcpmadd ys3, a2, alpha1, yl3
fxcsmadd ys4, a2, alpha1, yl4
STFDUX ys1, YS, INCY
STFDUX ys2, YS, INCY
STFDUX ys3, YS, INCY
STFDUX ys4, YS, INCY
.align 4
.L97:
andi. r0, M, 2
ble .L98
LFPDUX a1, A1, INC2
LFDUX yl1, YL, INCY
LFDUX yl2, YL, INCY
fxcpmadd ys1, a1, alpha1, yl1
fxcsmadd ys2, a1, alpha1, yl2
STFDUX ys1, YS, INCY
STFDUX ys2, YS, INCY
.align 4
.L98:
andi. r0, M, 1
ble .L999
LFDUX yl1, YL, INCY
LFDUX a1, A1, INC2
fxcpmadd ys1, alpha1, a1, yl1
STFDUX ys1, YS, INCY
b .L999
.align 4
.L999:
addi SP, SP, -4
lwzu r16, 4(SP)
lwzu r17, 4(SP)
lwzu r18, 4(SP)
lwzu r19, 4(SP)
lwzu r20, 4(SP)
lwzu r21, 4(SP)
lwzu r22, 4(SP)
lwzu r23, 4(SP)
lwzu r24, 4(SP)
lwzu r25, 4(SP)
lwzu r26, 4(SP)
lwzu r27, 4(SP)
lwzu r28, 4(SP)
lwzu r29, 4(SP)
lwzu r30, 4(SP)
lwzu r31, 4(SP)
subi SP, SP, 12
li r0, 16
lfpdux f31, SP, r0
lfpdux f30, SP, r0
lfpdux f29, SP, r0
lfpdux f28, SP, r0
lfpdux f27, SP, r0
lfpdux f26, SP, r0
lfpdux f25, SP, r0
lfpdux f24, SP, r0
lfpdux f23, SP, r0
lfpdux f22, SP, r0
lfpdux f21, SP, r0
lfpdux f20, SP, r0
lfpdux f19, SP, r0
lfpdux f18, SP, r0
lfpdux f17, SP, r0
lfpdux f16, SP, r0
lfpdux f15, SP, r0
lfpdux f14, SP, r0
addi SP, SP, 16
blr
EPILOGUE