/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#define ALPHA 0
#define FZERO 8
#define M r3
#define N r4
#define K r5
#ifdef linux
#define A r6
#define B r7
#define C r8
#define LDC r9
#define OFFSET r10
#endif
#define TEMP r11
#define AORIG r12
#define KK r14
#define INCM1 r15
#define INCM4 r16
#define INCM2 r17
#define INC2 r19
#define INC r20
#define INC4 r21
#define I r22
#define J r23
#define AO r24
#define BO r25
#define AO2 r26
#define BO2 r27
#define CO1 r28
#define CO2 r29
#define CO3 r30
#define CO4 r31
#ifndef NEEDPARAM
#define A1 f16
#define A2 f17
#define A3 f18
#define A4 f19
#define A5 f20
#define A6 f21
#define A7 f22
#define A8 f23
#define A9 f24
#define A10 f25
#define B1 f26
#define B2 f27
#define B3 f28
#define B4 f29
#define B5 f30
#define B6 f31
#define AP B6
PROLOGUE
PROFCODE
li r0, -16
stfpdux f14, SP, r0
stfpdux f15, SP, r0
stfpdux f16, SP, r0
stfpdux f17, SP, r0
stfpdux f18, SP, r0
stfpdux f19, SP, r0
stfpdux f20, SP, r0
stfpdux f21, SP, r0
stfpdux f22, SP, r0
stfpdux f23, SP, r0
stfpdux f24, SP, r0
stfpdux f25, SP, r0
stfpdux f26, SP, r0
stfpdux f27, SP, r0
stfpdux f28, SP, r0
stfpdux f29, SP, r0
stfpdux f30, SP, r0
stfpdux f31, SP, r0
stwu r31, -4(SP)
stwu r30, -4(SP)
stwu r29, -4(SP)
stwu r28, -4(SP)
stwu r27, -4(SP)
stwu r26, -4(SP)
stwu r25, -4(SP)
stwu r24, -4(SP)
stwu r23, -4(SP)
stwu r22, -4(SP)
stwu r21, -4(SP)
stwu r20, -4(SP)
stwu r19, -4(SP)
stwu r18, -4(SP)
stwu r17, -4(SP)
stwu r16, -4(SP)
stwu r15, -4(SP)
stwu r14, -4(SP) # dummy
li r0, 0
stwu r0, -4(SP)
stwu r0, -4(SP)
stfdu f1, -8(SP)
slwi LDC, LDC, BASE_SHIFT
cmpwi cr0, M, 0
ble .L999
cmpwi cr0, N, 0
ble .L999
cmpwi cr0, K, 0
ble .L999
li INC, 1 * SIZE
li INC2, 2 * SIZE
li INC4, 4 * SIZE
li INCM1, -1 * SIZE
li INCM2, -2 * SIZE
li INCM4, -4 * SIZE
addi C, C, - 1 * SIZE
#ifdef LN
mullw r0, M, K
slwi r0, r0, BASE_SHIFT
add A, A, r0
slwi r0, M, BASE_SHIFT
add C, C, r0
#endif
#ifdef RN
neg KK, OFFSET
#endif
#ifdef RT
mullw r0, N, K
slwi r0, r0, BASE_SHIFT
add B, B, r0
mullw r0, N, LDC
add C, C, r0
sub KK, N, OFFSET
#endif
srawi. J, N, 2
ble .L50
.align 4
.L10:
#ifdef RT
slwi r0, K, 2 + BASE_SHIFT
sub B, B, r0
slwi r0, LDC, 2
sub C, C, r0
#endif
mr CO1, C
add CO2, C, LDC
add CO3, CO2, LDC
add CO4, CO3, LDC
#ifdef LN
add KK, M, OFFSET
#endif
#ifdef LT
mr KK, OFFSET
#endif
#if defined(LN) || defined(RT)
addi AORIG, A, -4 * SIZE
#else
addi AO, A, -4 * SIZE
#endif
#ifndef RT
add C, CO4, LDC
#endif
li r0, FZERO
lfpsx f0, SP, r0
andi. I, M, 1
beq .L20
#if defined(LT) || defined(RN)
addi AO2, AO, 2 * SIZE
fpmr f1, f0
addi BO, B, - 4 * SIZE
fpmr f2, f0
addi BO2, B, - 2 * SIZE
fpmr f3, f0
srawi. r0, KK, 3
mtspr CTR, r0
ble .L44
#else
#ifdef LN
slwi r0, K, 0 + BASE_SHIFT
sub AORIG, AORIG, r0
#endif
slwi r0 , KK, 0 + BASE_SHIFT
slwi TEMP, KK, 2 + BASE_SHIFT
add AO, AORIG, r0
add BO, B, TEMP
sub TEMP, K, KK
addi AO2, AO, 2 * SIZE
fpmr f1, f0
addi BO, BO, - 4 * SIZE
fpmr f2, f0
addi BO2, BO, 2 * SIZE
fpmr f3, f0
srawi. r0, TEMP, 3
mtspr CTR, r0
ble .L44
#endif
LFPDUX A1, AO, INC4
LFPDUX B1, BO, INC4
LFPDUX B2, BO2, INC4
LFPDUX A2, AO2, INC4
LFPDUX B3, BO, INC4
LFPDUX B4, BO2, INC4
LFPDUX A3, AO, INC4
LFPDUX A5, BO, INC4
LFPDUX A6, BO2, INC4
LFPDUX A4, AO2, INC4
LFPDUX A7, BO, INC4
LFPDUX A8, BO2, INC4
bdz- .L43
.align 4
.L42:
fxcpmadd f0, A1, B1, f0
LFPDUX B1, BO, INC4
fxcpmadd f1, A1, B2, f1
LFPDUX B2, BO2, INC4
fxcsmadd f2, A1, B3, f2
LFPDUX B3, BO, INC4
fxcsmadd f3, A1, B4, f3
LFPDUX B4, BO2, INC4
LFPDUX A1, AO, INC4
fxcpmadd f0, A2, A5, f0
LFPDUX A5, BO, INC4
fxcpmadd f1, A2, A6, f1
LFPDUX A6, BO2, INC4
fxcsmadd f2, A2, A7, f2
LFPDUX A7, BO, INC4
fxcsmadd f3, A2, A8, f3
LFPDUX A8, BO2, INC4
LFPDUX A2, AO2, INC4
fxcpmadd f0, A3, B1, f0
LFPDUX B1, BO, INC4
fxcpmadd f1, A3, B2, f1
LFPDUX B2, BO2, INC4
fxcsmadd f2, A3, B3, f2
LFPDUX B3, BO, INC4
fxcsmadd f3, A3, B4, f3
LFPDUX B4, BO2, INC4
LFPDUX A3, AO, INC4
fxcpmadd f0, A4, A5, f0
LFPDUX A5, BO, INC4
fxcpmadd f1, A4, A6, f1
LFPDUX A6, BO2, INC4
fxcsmadd f2, A4, A7, f2
LFPDUX A7, BO, INC4
fxcsmadd f3, A4, A8, f3
LFPDUX A8, BO2, INC4
LFPDUX A4, AO2, INC4
bdnz+ .L42
.align 4
.L43:
fxcpmadd f0, A1, B1, f0
LFPDUX B1, BO, INC4
fxcpmadd f1, A1, B2, f1
LFPDUX B2, BO2, INC4
fxcsmadd f2, A1, B3, f2
LFPDUX B3, BO, INC4
fxcsmadd f3, A1, B4, f3
LFPDUX B4, BO2, INC4
fxcpmadd f0, A2, A5, f0
LFPDUX A5, BO, INC4
fxcpmadd f1, A2, A6, f1
LFPDUX A6, BO2, INC4
fxcsmadd f2, A2, A7, f2
LFPDUX A7, BO, INC4
fxcsmadd f3, A2, A8, f3
LFPDUX A8, BO2, INC4
fxcpmadd f0, A3, B1, f0
fxcpmadd f1, A3, B2, f1
fxcsmadd f2, A3, B3, f2
fxcsmadd f3, A3, B4, f3
fxcpmadd f0, A4, A5, f0
fxcpmadd f1, A4, A6, f1
fxcsmadd f2, A4, A7, f2
fxcsmadd f3, A4, A8, f3
.align 4
.L44:
#if defined(LT) || defined(RN)
andi. r0, KK, 7
mtspr CTR, r0
ble+ .L48
#else
andi. r0, TEMP, 7
mtspr CTR, r0
ble+ .L48
#endif
LFDX A1, AO, INC4
LFPDUX B1, BO, INC4
LFPDUX B2, BO2, INC4
add AO, AO, INC
bdz- .L47
.align 4
.L46:
fxcpmadd f0, A1, B1, f0
LFPDUX B1, BO, INC4
fxcpmadd f1, A1, B2, f1
LFDX A1, AO, INC4
LFPDUX B2, BO2, INC4
add AO, AO, INC
bdnz+ .L46
.align 4
.L47:
fxcpmadd f0, A1, B1, f0
fxcpmadd f1, A1, B2, f1
addi AO2, AO, 2 * SIZE
.align 4
.L48:
fpadd f0, f0, f2
fpadd f1, f1, f3
#if defined(LN) || defined(RT)
#ifdef LN
subi r0, KK, 1
#else
subi r0, KK, 4
#endif
slwi TEMP, r0, 0 + BASE_SHIFT
slwi r0, r0, 2 + BASE_SHIFT
add AO, AORIG, TEMP
add BO, B, r0
addi AO2, AO, 2 * SIZE
addi BO, BO, - 4 * SIZE
addi BO2, BO, 2 * SIZE
#endif
#if defined(LN) || defined(LT)
LFPDX f16, BO, INC4
LFPDX f17, BO2, INC4
fpsub f0, f16, f0
fpsub f1, f17, f1
#else
LFPDX f16, AO, INC4
LFPDX f17, AO2, INC4
fpsub f0, f16, f0
fpsub f1, f17, f1
#endif
#if defined(LN) || defined(LT)
LFPDX A1, AO, INC4
fxpmul f0, A1, f0
fxpmul f1, A1, f1
#endif
#ifdef RN
LFD A1, (4 + 0) * SIZE(BO)
LFD A2, (4 + 1) * SIZE(BO)
LFD A3, (4 + 2) * SIZE(BO)
LFD A4, (4 + 3) * SIZE(BO)
LFD A5, (4 + 5) * SIZE(BO)
LFD A6, (4 + 6) * SIZE(BO)
LFD A7, (4 + 7) * SIZE(BO)
LFD A8, (4 + 10) * SIZE(BO)
LFD A9, (4 + 11) * SIZE(BO)
LFD A10, (4 + 15) * SIZE(BO)
fsmtp f2, f0
fsmtp f3, f1
fmul f0, A1, f0
fnmsub f2, A2, f0, f2
fnmsub f1, A3, f0, f1
fnmsub f3, A4, f0, f3
fmul f2, A5, f2
fnmsub f1, A6, f2, f1
fnmsub f3, A7, f2, f3
fmul f1, A8, f1
fnmsub f3, A9, f1, f3
fmul f3, A10, f3
fsmfp f0, f2
fsmfp f1, f3
#endif
#ifdef RT
LFD A1, (4 + 15) * SIZE(BO)
LFD A2, (4 + 14) * SIZE(BO)
LFD A3, (4 + 13) * SIZE(BO)
LFD A4, (4 + 12) * SIZE(BO)
LFD A5, (4 + 10) * SIZE(BO)
LFD A6, (4 + 9) * SIZE(BO)
LFD A7, (4 + 8) * SIZE(BO)
LFD A8, (4 + 5) * SIZE(BO)
LFD A9, (4 + 4) * SIZE(BO)
LFD A10, (4 + 0) * SIZE(BO)
fsmtp f2, f0
fsmtp f3, f1
fmul f3, A1, f3
fnmsub f1, A2, f3, f1
fnmsub f2, A3, f3, f2
fnmsub f0, A4, f3, f0
fmul f1, A5, f1
fnmsub f2, A6, f1, f2
fnmsub f0, A7, f1, f0
fmul f2, A8, f2
fnmsub f0, A9, f2, f0
fmul f0, A10, f0
fsmfp f0, f2
fsmfp f1, f3
#endif
#if defined(LN) || defined(LT)
STFPDX f0, BO, INC4
STFPDX f1, BO2, INC4
#else
STFPDX f0, AO, INC4
STFPDX f1, AO2, INC4
#endif
#ifdef LN
subi CO1, CO1, 1 * SIZE
subi CO2, CO2, 1 * SIZE
subi CO3, CO3, 1 * SIZE
subi CO4, CO4, 1 * SIZE
#endif
STFDX f0, CO1, INC
STFSDX f0, CO2, INC
STFDX f1, CO3, INC
STFSDX f1, CO4, INC
#ifdef RT
slwi r0, K, 0 + BASE_SHIFT
add AORIG, AORIG, r0
#endif
#if defined(LT) || defined(RN)
sub TEMP, K, KK
slwi r0, TEMP, 0 + BASE_SHIFT
slwi TEMP, TEMP, 2 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LT
addi KK, KK, 1
#endif
#ifdef LN
subi KK, KK, 1
#endif
li r0, FZERO
lfpsx f0, SP, r0
.align 4
.L20:
andi. I, M, 2
beq .L30
#if defined(LT) || defined(RN)
addi AO2, AO, 2 * SIZE
fpmr f4, f0
addi BO, B, - 4 * SIZE
fpmr f8, f0
addi BO2, B, - 2 * SIZE
fpmr f12, f0
srawi. r0, KK, 2
mtspr CTR, r0
ble .L34
#else
#ifdef LN
slwi r0, K, 1 + BASE_SHIFT
sub AORIG, AORIG, r0
#endif
slwi r0 , KK, 1 + BASE_SHIFT
slwi TEMP, KK, 2 + BASE_SHIFT
add AO, AORIG, r0
add BO, B, TEMP
sub TEMP, K, KK
addi AO2, AO, 2 * SIZE
fpmr f4, f0
addi BO, BO, - 4 * SIZE
fpmr f8, f0
addi BO2, BO, 2 * SIZE
fpmr f12, f0
srawi. r0, TEMP, 2
mtspr CTR, r0
ble .L34
#endif
LFPDUX A1, AO, INC4
LFPDUX B1, BO, INC4
LFPDUX B2, BO2, INC4
LFPDUX A2, AO2, INC4
LFPDUX B3, BO, INC4
LFPDUX B4, BO2, INC4
LFPDUX A3, AO, INC4
LFPDUX A5, BO, INC4
LFPDUX A6, BO2, INC4
LFPDUX A4, AO2, INC4
LFPDUX A7, BO, INC4
LFPDUX A8, BO2, INC4
bdz- .L33
.align 4
.L32:
fxcpmadd f0, B1, A1, f0
fxcsmadd f4, B1, A1, f4
LFPDUX B1, BO, INC4
fxcpmadd f8, B2, A1, f8
fxcsmadd f12, B2, A1, f12
LFPDUX B2, BO2, INC4
LFPDUX A1, AO, INC4
fxcpmadd f0, B3, A2, f0
fxcsmadd f4, B3, A2, f4
LFPDUX B3, BO, INC4
fxcpmadd f8, B4, A2, f8
fxcsmadd f12, B4, A2, f12
LFPDUX B4, BO2, INC4
LFPDUX A2, AO2, INC4
fxcpmadd f0, A5, A3, f0
fxcsmadd f4, A5, A3, f4
LFPDUX A5, BO, INC4
fxcpmadd f8, A6, A3, f8
fxcsmadd f12, A6, A3, f12
LFPDUX A6, BO2, INC4
LFPDUX A3, AO, INC4
fxcpmadd f0, A7, A4, f0
fxcsmadd f4, A7, A4, f4
LFPDUX A7, BO, INC4
fxcpmadd f8, A8, A4, f8
fxcsmadd f12, A8, A4, f12
LFPDUX A8, BO2, INC4
LFPDUX A4, AO2, INC4
bdnz+ .L32
.align 4
.L33:
fxcpmadd f0, B1, A1, f0
fxcsmadd f4, B1, A1, f4
fxcpmadd f8, B2, A1, f8
fxcsmadd f12, B2, A1, f12
fxcpmadd f0, B3, A2, f0
fxcsmadd f4, B3, A2, f4
fxcpmadd f8, B4, A2, f8
fxcsmadd f12, B4, A2, f12
fxcpmadd f0, A5, A3, f0
fxcsmadd f4, A5, A3, f4
fxcpmadd f8, A6, A3, f8
fxcsmadd f12, A6, A3, f12
fxcpmadd f0, A7, A4, f0
fxcsmadd f4, A7, A4, f4
fxcpmadd f8, A8, A4, f8
fxcsmadd f12, A8, A4, f12
.align 4
.L34:
#if defined(LT) || defined(RN)
andi. r0, KK, 3
mtspr CTR, r0
ble+ .L38
#else
andi. r0, TEMP, 3
mtspr CTR, r0
ble+ .L38
#endif
LFPDX A1, AO, INC4
LFPDUX B1, BO, INC4
LFPDUX B2, BO2, INC4
add AO, AO, INC2
bdz- .L37
.align 4
.L36:
fxcpmadd f0, B1, A1, f0
fxcsmadd f4, B1, A1, f4
LFPDUX B1, BO, INC4
fxcpmadd f8, B2, A1, f8
fxcsmadd f12, B2, A1, f12
LFPDX A1, AO, INC4
LFPDUX B2, BO2, INC4
add AO, AO, INC2
bdnz+ .L36
.align 4
.L37:
fxcpmadd f0, B1, A1, f0
fxcsmadd f4, B1, A1, f4
fxcpmadd f8, B2, A1, f8
fxcsmadd f12, B2, A1, f12
.align 4
.L38:
#if defined(LN) || defined(RT)
#ifdef LN
subi r0, KK, 2
#else
subi r0, KK, 4
#endif
slwi TEMP, r0, 1 + BASE_SHIFT
slwi r0, r0, 2 + BASE_SHIFT
add AO, AORIG, TEMP
add BO, B, r0
addi AO2, AO, 2 * SIZE
addi BO, BO, - 4 * SIZE
addi BO2, BO, 2 * SIZE
#endif
#if defined(LN) || defined(LT)
fpmr f24, f0
fpmr f28, f8
fsmfp f0, f4
fsmfp f8, f12
fsmtp f4, f24
fsmtp f12, f28
LFPDUX f16, BO, INC4
LFPDUX f17, BO2, INC4
LFPDUX f18, BO, INC4
LFPDUX f19, BO2, INC4
subi BO, BO, 8 * SIZE
subi BO2, BO2, 8 * SIZE
fpsub f0, f16, f0
fpsub f8, f17, f8
fpsub f4, f18, f4
fpsub f12, f19, f12
#else
LFPDUX f16, AO, INC4
LFPDUX f17, AO2, INC4
LFPDUX f18, AO, INC4
LFPDUX f19, AO2, INC4
subi AO, AO, 8 * SIZE
subi AO2, AO2, 8 * SIZE
fpsub f0, f16, f0
fpsub f4, f17, f4
fpsub f8, f18, f8
fpsub f12, f19, f12
#endif
#ifdef LN
addi AO, AO, 8 * SIZE
addi AO2, AO2, 8 * SIZE
LFPDUX A1, AO2, INCM4
LFPDUX A2, AO, INCM4
addi AO, AO, -4 * SIZE
addi AO2, AO2, -4 * SIZE
fxsmul f4, A1, f4
fxsmul f12, A1, f12
fxcpnmsub f0, A1, f4, f0
fxcpnmsub f8, A1, f12, f8
fxpmul f0, A2, f0
fxpmul f8, A2, f8
#endif
#ifdef LT
LFPDUX A1, AO, INC4
LFPDUX A2, AO2, INC4
subi AO, AO, 4 * SIZE
subi AO2, AO2, 4 * SIZE
fxpmul f0, A1, f0
fxpmul f8, A1, f8
fxcsnmsub f4, A1, f0, f4
fxcsnmsub f12, A1, f8, f12
fxsmul f4, A2, f4
fxsmul f12, A2, f12
#endif
#ifdef RN
LFPDUX A1, BO, INC4
LFPDUX A2, BO2, INC4
LFPDUX A3, BO, INC4
LFPDUX A4, BO2, INC4
add BO, BO, INC4
LFPDUX A5, BO2, INC4
add BO, BO, INC4
LFPDUX A6, BO2, INC4
subi BO, BO, 16 * SIZE
subi BO2, BO2, 16 * SIZE
fxpmul f0, A1, f0
fxcsnmsub f4, A1, f0, f4
fxcpnmsub f8, A2, f0, f8
fxcsnmsub f12, A2, f0, f12
fxsmul f4, A3, f4
fxcpnmsub f8, A4, f4, f8
fxcsnmsub f12, A4, f4, f12
fxpmul f8, A5, f8
fxcsnmsub f12, A5, f8, f12
fxsmul f12, A6, f12
#endif
#ifdef RT
addi BO, BO, 20 * SIZE
addi BO2, BO2, 20 * SIZE
LFPDUX A1, BO2, INCM4
LFPDUX A2, BO, INCM4
LFPDUX A3, BO2, INCM4
LFPDUX A4, BO, INCM4
add BO2, BO2, INCM4
LFPDUX A5, BO, INCM4
add BO2, BO2, INCM4
LFPDUX A6, BO, INCM4
subi BO, BO, 4 * SIZE
subi BO2, BO2, 4 * SIZE
fxsmul f12, A1, f12
fxcpnmsub f8, A1, f12, f8
fxcsnmsub f4, A2, f12, f4
fxcpnmsub f0, A2, f12, f0
fxpmul f8, A3, f8
fxcsnmsub f4, A4, f8, f4
fxcpnmsub f0, A4, f8, f0
fxsmul f4, A5, f4
fxcpnmsub f0, A5, f4, f0
fxpmul f0, A6, f0
#endif
#ifdef LN
subi CO1, CO1, 2 * SIZE
subi CO2, CO2, 2 * SIZE
subi CO3, CO3, 2 * SIZE
subi CO4, CO4, 2 * SIZE
#endif
#if defined(LN) || defined(LT)
STFPDUX f0, BO, INC4
STFPDUX f8, BO2, INC4
STFPDUX f4, BO, INC4
STFPDUX f12, BO2, INC4
subi BO, BO, 8 * SIZE
subi BO2, BO2, 8 * SIZE
STFDUX f0, CO1, INC
STFDUX f4, CO1, INC
STFSDUX f0, CO2, INC
STFSDUX f4, CO2, INC
STFDUX f8, CO3, INC
STFDUX f12, CO3, INC
STFSDUX f8, CO4, INC
STFSDUX f12, CO4, INC
#else
STFPDUX f0, AO, INC4
STFPDUX f4, AO2, INC4
STFPDUX f8, AO, INC4
STFPDUX f12, AO2, INC4
subi AO, AO, 8 * SIZE
subi AO2, AO2, 8 * SIZE
STFDUX f0, CO1, INC
STFSDUX f0, CO1, INC
STFDUX f4, CO2, INC
STFSDUX f4, CO2, INC
STFDUX f8, CO3, INC
STFSDUX f8, CO3, INC
STFDUX f12, CO4, INC
STFSDUX f12, CO4, INC
#endif
#ifdef LN
subi CO1, CO1, 2 * SIZE
subi CO2, CO2, 2 * SIZE
subi CO3, CO3, 2 * SIZE
subi CO4, CO4, 2 * SIZE
#endif
#ifdef RT
slwi r0, K, 1 + BASE_SHIFT
add AORIG, AORIG, r0
#endif
#if defined(LT) || defined(RN)
sub TEMP, K, KK
slwi r0, TEMP, 1 + BASE_SHIFT
slwi TEMP, TEMP, 2 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LT
addi KK, KK, 2
#endif
#ifdef LN
subi KK, KK, 2
#endif
li r0, FZERO
lfpsx f0, SP, r0
.align 4
.L30:
andi. I, M, 4
beq .L40
#if defined(LT) || defined(RN)
addi AO2, AO, 2 * SIZE
fpmr f4, f0
addi BO, B, - 4 * SIZE
fpmr f8, f0
addi BO2, B, - 2 * SIZE
fpmr f12, f0
srawi. r0, KK, 2
fpmr f1, f0
fpmr f5, f0
fpmr f9, f0
mtspr CTR, r0
fpmr f13, f0
ble .L24
#else
#ifdef LN
slwi r0, K, 2 + BASE_SHIFT
sub AORIG, AORIG, r0
#endif
slwi r0 , KK, 2 + BASE_SHIFT
slwi TEMP, KK, 2 + BASE_SHIFT
add AO, AORIG, r0
add BO, B, TEMP
sub TEMP, K, KK
addi AO2, AO, 2 * SIZE
fpmr f4, f0
addi BO, BO, - 4 * SIZE
fpmr f8, f0
addi BO2, BO, 2 * SIZE
fpmr f12, f0
srawi. r0, TEMP, 2
fpmr f1, f0
fpmr f5, f0
fpmr f9, f0
mtspr CTR, r0
fpmr f13, f0
ble .L24
#endif
LFPDUX A1, AO, INC4
LFPDUX B1, BO, INC4
LFPDUX A2, AO2, INC4
LFPDUX B2, BO2, INC4
LFPDUX A3, AO, INC4
LFPDUX B3, BO, INC4
LFPDUX A4, AO2, INC4
LFPDUX B4, BO2, INC4
LFPDUX A5, AO, INC4
LFPDUX B5, BO, INC4
LFPDUX A6, AO2, INC4
LFPDUX B6, BO2, INC4
LFPDUX A7, AO, INC4
LFPDUX A9, BO, INC4
LFPDUX A10, BO2, INC4
bdz- .L23
.align 4
.L22:
fxcpmadd f0, B1, A1, f0
nop
fxcsmadd f4, B1, A1, f4
LFPDUX A8, AO2, INC4
fxcpmadd f8, B2, A1, f8
nop
fxcsmadd f12, B2, A1, f12
LFPDUX A1, AO, INC4
fxcpmadd f1, B1, A2, f1
nop
fxcsmadd f5, B1, A2, f5
LFPDUX B1, BO, INC4
fxcpmadd f9, B2, A2, f9
nop
fxcsmadd f13, B2, A2, f13
LFPDUX B2, BO2, INC4
fxcpmadd f0, B3, A3, f0
nop
fxcsmadd f4, B3, A3, f4
LFPDUX A2, AO2, INC4
fxcpmadd f8, B4, A3, f8
nop
fxcsmadd f12, B4, A3, f12
LFPDUX A3, AO, INC4
fxcpmadd f1, B3, A4, f1
nop
fxcsmadd f5, B3, A4, f5
LFPDUX B3, BO, INC4
fxcpmadd f9, B4, A4, f9
nop
fxcsmadd f13, B4, A4, f13
LFPDUX B4, BO2, INC4
fxcpmadd f0, B5, A5, f0
nop
fxcsmadd f4, B5, A5, f4
LFPDUX A4, AO2, INC4
fxcpmadd f8, B6, A5, f8
nop
fxcsmadd f12, B6, A5, f12
LFPDUX A5, AO, INC4
fxcpmadd f1, B5, A6, f1
nop
fxcsmadd f5, B5, A6, f5
LFPDUX B5, BO, INC4
fxcpmadd f9, B6, A6, f9
nop
fxcsmadd f13, B6, A6, f13
LFPDUX B6, BO2, INC4
fxcpmadd f0, A9, A7, f0
nop
fxcsmadd f4, A9, A7, f4
LFPDUX A6, AO2, INC4
fxcpmadd f8, A10, A7, f8
nop
fxcsmadd f12, A10, A7, f12
LFPDUX A7, AO, INC4
fxcpmadd f1, A9, A8, f1
nop
fxcsmadd f5, A9, A8, f5
LFPDUX A9, BO, INC4
fxcpmadd f9, A10, A8, f9
nop
fxcsmadd f13, A10, A8, f13
LFPDUX A10, BO2, INC4
bdnz+ .L22
.align 4
.L23:
fxcpmadd f0, B1, A1, f0
fxcsmadd f4, B1, A1, f4
LFPDUX A8, AO2, INC4
fxcpmadd f8, B2, A1, f8
fxcsmadd f12, B2, A1, f12
fxcpmadd f1, B1, A2, f1
fxcsmadd f5, B1, A2, f5
fxcpmadd f9, B2, A2, f9
fxcsmadd f13, B2, A2, f13
fxcpmadd f0, B3, A3, f0
fxcsmadd f4, B3, A3, f4
fxcpmadd f8, B4, A3, f8
fxcsmadd f12, B4, A3, f12
fxcpmadd f1, B3, A4, f1
fxcsmadd f5, B3, A4, f5
fxcpmadd f9, B4, A4, f9
fxcsmadd f13, B4, A4, f13
fxcpmadd f0, B5, A5, f0
fxcsmadd f4, B5, A5, f4
fxcpmadd f8, B6, A5, f8
fxcsmadd f12, B6, A5, f12
fxcpmadd f1, B5, A6, f1
fxcsmadd f5, B5, A6, f5
fxcpmadd f9, B6, A6, f9
fxcsmadd f13, B6, A6, f13
fxcpmadd f0, A9, A7, f0
fxcsmadd f4, A9, A7, f4
fxcpmadd f8, A10, A7, f8
fxcsmadd f12, A10, A7, f12
fxcpmadd f1, A9, A8, f1
fxcsmadd f5, A9, A8, f5
fxcpmadd f9, A10, A8, f9
fxcsmadd f13, A10, A8, f13
.align 4
.L24:
#if defined(LT) || defined(RN)
andi. r0, KK, 3
mtspr CTR, r0
ble+ .L28
#else
andi. r0, TEMP, 3
mtspr CTR, r0
ble+ .L28
#endif
LFPDUX A1, AO, INC4
LFPDUX A2, AO2, INC4
LFPDUX B1, BO, INC4
LFPDUX B2, BO2, INC4
bdz- .L27
.align 4
.L26:
fxcpmadd f0, B1, A1, f0
fxcsmadd f4, B1, A1, f4
fxcpmadd f8, B2, A1, f8
fxcsmadd f12, B2, A1, f12
LFPDUX A1, AO, INC4
fxcpmadd f1, B1, A2, f1
fxcsmadd f5, B1, A2, f5
LFPDUX B1, BO, INC4
fxcpmadd f9, B2, A2, f9
fxcsmadd f13, B2, A2, f13
LFPDUX A2, AO2, INC4
LFPDUX B2, BO2, INC4
bdnz+ .L26
.align 4
.L27:
fxcpmadd f0, B1, A1, f0
fxcsmadd f4, B1, A1, f4
fxcpmadd f8, B2, A1, f8
fxcsmadd f12, B2, A1, f12
fxcpmadd f1, B1, A2, f1
fxcsmadd f5, B1, A2, f5
fxcpmadd f9, B2, A2, f9
fxcsmadd f13, B2, A2, f13
.align 4
.L28:
#if defined(LN) || defined(RT)
#ifdef LN
subi r0, KK, 4
#else
subi r0, KK, 4
#endif
slwi TEMP, r0, 2 + BASE_SHIFT
slwi r0, r0, 2 + BASE_SHIFT
add AO, AORIG, TEMP
add BO, B, r0
addi AO2, AO, 2 * SIZE
addi BO, BO, - 4 * SIZE
addi BO2, BO, 2 * SIZE
#endif
#if defined(LN) || defined(LT)
fpmr f24, f0
fpmr f25, f1
fpmr f28, f8
fpmr f29, f9
fsmfp f0, f4
fsmfp f1, f5
fsmfp f8, f12
fsmfp f9, f13
fsmtp f4, f24
fsmtp f5, f25
fsmtp f12, f28
fsmtp f13, f29
LFPDUX f16, BO, INC4
LFPDUX f17, BO2, INC4
LFPDUX f18, BO, INC4
LFPDUX f19, BO2, INC4
LFPDUX f20, BO, INC4
LFPDUX f21, BO2, INC4
LFPDUX f22, BO, INC4
LFPDUX f23, BO2, INC4
subi BO, BO, 16 * SIZE
subi BO2, BO2, 16 * SIZE
fpsub f0, f16, f0
fpsub f8, f17, f8
fpsub f4, f18, f4
fpsub f12, f19, f12
fpsub f1, f20, f1
fpsub f9, f21, f9
fpsub f5, f22, f5
fpsub f13, f23, f13
#else
LFPDUX f16, AO, INC4
LFPDUX f17, AO2, INC4
LFPDUX f18, AO, INC4
LFPDUX f19, AO2, INC4
LFPDUX f20, AO, INC4
LFPDUX f21, AO2, INC4
LFPDUX f22, AO, INC4
LFPDUX f23, AO2, INC4
subi AO, AO, 16 * SIZE
subi AO2, AO2, 16 * SIZE
fpsub f0, f16, f0
fpsub f1, f17, f1
fpsub f4, f18, f4
fpsub f5, f19, f5
fpsub f8, f20, f8
fpsub f9, f21, f9
fpsub f12, f22, f12
fpsub f13, f23, f13
#endif
#ifdef LN
addi AO, AO, 20 * SIZE
addi AO2, AO2, 20 * SIZE
LFPDUX A1, AO2, INCM4
LFPDUX A2, AO, INCM4
LFPDUX A3, AO2, INCM4
LFPDUX A4, AO, INCM4
add AO2, AO2, INCM4
LFPDUX A5, AO, INCM4
add AO2, AO2, INCM4
LFPDUX A6, AO, INCM4
addi AO, AO, -4 * SIZE
addi AO2, AO2, -4 * SIZE
fxsmul f5, A1, f5
fxsmul f13, A1, f13
fxcpnmsub f1, A1, f5, f1
fxcpnmsub f9, A1, f13, f9
fxcsnmsub f4, A2, f5, f4
fxcsnmsub f12, A2, f13, f12
fxcpnmsub f0, A2, f5, f0
fxcpnmsub f8, A2, f13, f8
fxpmul f1, A3, f1
fxpmul f9, A3, f9
fxcsnmsub f4, A4, f1, f4
fxcsnmsub f12, A4, f9, f12
fxcpnmsub f0, A4, f1, f0
fxcpnmsub f8, A4, f9, f8
fxsmul f4, A5, f4
fxsmul f12, A5, f12
fxcpnmsub f0, A5, f4, f0
fxcpnmsub f8, A5, f12, f8
fxpmul f0, A6, f0
fxpmul f8, A6, f8
#endif
#ifdef LT
LFPDUX A1, AO, INC4
LFPDUX A2, AO2, INC4
LFPDUX A3, AO, INC4
LFPDUX A4, AO2, INC4
add AO, AO, INC4
LFPDUX A5, AO2, INC4
add AO, AO, INC4
LFPDUX A6, AO2, INC4
subi AO, AO, 16 * SIZE
subi AO2, AO2, 16 * SIZE
fxpmul f0, A1, f0
fxpmul f8, A1, f8
fxcsnmsub f4, A1, f0, f4
fxcsnmsub f12, A1, f8, f12
fxcpnmsub f1, A2, f0, f1
fxcpnmsub f9, A2, f8, f9
fxcsnmsub f5, A2, f0, f5
fxcsnmsub f13, A2, f8, f13
fxsmul f4, A3, f4
fxsmul f12, A3, f12
fxcpnmsub f1, A4, f4, f1
fxcpnmsub f9, A4, f12, f9
fxcsnmsub f5, A4, f4, f5
fxcsnmsub f13, A4, f12, f13
fxpmul f1, A5, f1
fxpmul f9, A5, f9
fxcsnmsub f5, A5, f1, f5
fxcsnmsub f13, A5, f9, f13
fxsmul f5, A6, f5
fxsmul f13, A6, f13
#endif
#ifdef RN
LFPDUX A1, BO, INC4
LFPDUX A2, BO2, INC4
LFPDUX A3, BO, INC4
LFPDUX A4, BO2, INC4
add BO, BO, INC4
LFPDUX A5, BO2, INC4
add BO, BO, INC4
LFPDUX A6, BO2, INC4
subi BO, BO, 16 * SIZE
subi BO2, BO2, 16 * SIZE
fxpmul f0, A1, f0
fxpmul f1, A1, f1
fxcsnmsub f4, A1, f0, f4
fxcsnmsub f5, A1, f1, f5
fxcpnmsub f8, A2, f0, f8
fxcpnmsub f9, A2, f1, f9
fxcsnmsub f12, A2, f0, f12
fxcsnmsub f13, A2, f1, f13
fxsmul f4, A3, f4
fxsmul f5, A3, f5
fxcpnmsub f8, A4, f4, f8
fxcpnmsub f9, A4, f5, f9
fxcsnmsub f12, A4, f4, f12
fxcsnmsub f13, A4, f5, f13
fxpmul f8, A5, f8
fxpmul f9, A5, f9
fxcsnmsub f12, A5, f8, f12
fxcsnmsub f13, A5, f9, f13
fxsmul f12, A6, f12
fxsmul f13, A6, f13
#endif
#ifdef RT
addi BO, BO, 20 * SIZE
addi BO2, BO2, 20 * SIZE
LFPDUX A1, BO2, INCM4
LFPDUX A2, BO, INCM4
LFPDUX A3, BO2, INCM4
LFPDUX A4, BO, INCM4
add BO2, BO2, INCM4
LFPDUX A5, BO, INCM4
add BO2, BO2, INCM4
LFPDUX A6, BO, INCM4
subi BO, BO, 4 * SIZE
subi BO2, BO2, 4 * SIZE
fxsmul f12, A1, f12
fxsmul f13, A1, f13
fxcpnmsub f8, A1, f12, f8
fxcpnmsub f9, A1, f13, f9
fxcsnmsub f4, A2, f12, f4
fxcsnmsub f5, A2, f13, f5
fxcpnmsub f0, A2, f12, f0
fxcpnmsub f1, A2, f13, f1
fxpmul f8, A3, f8
fxpmul f9, A3, f9
fxcsnmsub f4, A4, f8, f4
fxcsnmsub f5, A4, f9, f5
fxcpnmsub f0, A4, f8, f0
fxcpnmsub f1, A4, f9, f1
fxsmul f4, A5, f4
fxsmul f5, A5, f5
fxcpnmsub f0, A5, f4, f0
fxcpnmsub f1, A5, f5, f1
fxpmul f0, A6, f0
fxpmul f1, A6, f1
#endif
#ifdef LN
subi CO1, CO1, 4 * SIZE
subi CO2, CO2, 4 * SIZE
subi CO3, CO3, 4 * SIZE
subi CO4, CO4, 4 * SIZE
#endif
#if defined(LN) || defined(LT)
STFPDUX f0, BO, INC4
STFPDUX f8, BO2, INC4
STFPDUX f4, BO, INC4
STFPDUX f12, BO2, INC4
STFPDUX f1, BO, INC4
STFPDUX f9, BO2, INC4
STFPDUX f5, BO, INC4
STFPDUX f13, BO2, INC4
subi BO, BO, 16 * SIZE
subi BO2, BO2, 16 * SIZE
STFDUX f0, CO1, INC
STFDUX f4, CO1, INC
STFDUX f1, CO1, INC
STFDUX f5, CO1, INC
STFSDUX f0, CO2, INC
STFSDUX f4, CO2, INC
STFSDUX f1, CO2, INC
STFSDUX f5, CO2, INC
STFDUX f8, CO3, INC
STFDUX f12, CO3, INC
STFDUX f9, CO3, INC
STFDUX f13, CO3, INC
STFSDUX f8, CO4, INC
STFSDUX f12, CO4, INC
STFSDUX f9, CO4, INC
STFSDUX f13, CO4, INC
#else
STFPDUX f0, AO, INC4
STFPDUX f1, AO2, INC4
STFPDUX f4, AO, INC4
STFPDUX f5, AO2, INC4
STFPDUX f8, AO, INC4
STFPDUX f9, AO2, INC4
STFPDUX f12, AO, INC4
STFPDUX f13, AO2, INC4
subi AO, AO, 16 * SIZE
subi AO2, AO2, 16 * SIZE
STFDUX f0, CO1, INC
STFSDUX f0, CO1, INC
STFDUX f1, CO1, INC
STFSDUX f1, CO1, INC
STFDUX f4, CO2, INC
STFSDUX f4, CO2, INC
STFDUX f5, CO2, INC
STFSDUX f5, CO2, INC
STFDUX f8, CO3, INC
STFSDUX f8, CO3, INC
STFDUX f9, CO3, INC
STFSDUX f9, CO3, INC
STFDUX f12, CO4, INC
STFSDUX f12, CO4, INC
STFDUX f13, CO4, INC
STFSDUX f13, CO4, INC
#endif
#ifdef LN
subi CO1, CO1, 4 * SIZE
subi CO2, CO2, 4 * SIZE
subi CO3, CO3, 4 * SIZE
subi CO4, CO4, 4 * SIZE
#endif
#ifdef RT
slwi r0, K, 2 + BASE_SHIFT
add AORIG, AORIG, r0
#endif
#if defined(LT) || defined(RN)
sub TEMP, K, KK
slwi r0, TEMP, 2 + BASE_SHIFT
slwi TEMP, TEMP, 2 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LT
addi KK, KK, 4
#endif
#ifdef LN
subi KK, KK, 4
#endif
li r0, FZERO
lfpsx f0, SP, r0
.align 4
.L40:
srawi. I, M, 3
ble .L49
.align 4
.L11:
#if defined(LT) || defined(RN)
addi AO2, AO, 2 * SIZE
fpmr f4, f0
addi BO, B, - 4 * SIZE
fpmr f8, f0
addi BO2, B, - 2 * SIZE
fpmr f12, f0
fpmr f5, f0
fpmr f9, f0
fpmr f13, f0
fpmr f2, f0
fpmr f6, f0
fpmr f10, f0
fpmr f14, f0
fpmr f3, f0
fpmr f7, f0
fpmr f11, f0
fpmr f15, f0
nop
srawi. r0, KK, 2
fpmr f1, f0
mtspr CTR, r0
ble .L14
#else
#ifdef LN
slwi r0, K, 3 + BASE_SHIFT
sub AORIG, AORIG, r0
#endif
slwi r0 , KK, 3 + BASE_SHIFT
slwi TEMP, KK, 2 + BASE_SHIFT
add AO, AORIG, r0
add BO, B, TEMP
sub TEMP, K, KK
addi AO2, AO, 2 * SIZE
fpmr f4, f0
addi BO, BO, - 4 * SIZE
fpmr f8, f0
addi BO2, BO, 2 * SIZE
fpmr f12, f0
fpmr f5, f0
fpmr f9, f0
fpmr f13, f0
fpmr f2, f0
fpmr f6, f0
fpmr f10, f0
fpmr f14, f0
fpmr f3, f0
fpmr f7, f0
fpmr f11, f0
fpmr f15, f0
nop
srawi. r0, TEMP, 2
fpmr f1, f0
mtspr CTR, r0
ble .L14
#endif
LFPDUX A1, AO, INC4
fpmr f5, f0
LFPDUX A3, AO, INC4
fpmr f9, f0
LFPDUX B1, BO, INC4
fpmr f13, f0
LFPDUX A5, AO, INC4
fpmr f2, f0
LFPDUX A6, AO, INC4
fpmr f6, f0
LFPDUX B3, BO, INC4
fpmr f10, f0
LFPDUX A7, AO, INC4
fpmr f14, f0
LFPDUX A8, AO, INC4
fpmr f3, f0
LFPDUX B5, BO, INC4
fpmr f7, f0
LFPDUX A9, AO, INC4
fpmr f11, f0
LFPDUX A2, AO2, INC4
fpmr f15, f0
LFPDUX B2, BO2, INC4
bdz- .L13
.align 4
.L12:
## 1 ##
fxcpmadd f0, B1, A1, f0
nop
fxcsmadd f4, B1, A1, f4
nop
fxcpmadd f8, B2, A1, f8
LFPDUX B4, BO2, INC4
fxcsmadd f12, B2, A1, f12
LFPDUX B6, BO, INC4
fxcpmadd f1, B1, A2, f1
nop
fxcsmadd f5, B1, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B2, A2, f9
LFPDUX A10, AO, INC4
fxcsmadd f13, B2, A2, f13
nop
fxcpmadd f2, B1, A3, f2
nop
fxcsmadd f6, B1, A3, f6
nop
fxcpmadd f10, B2, A3, f10
nop
fxcsmadd f14, B2, A3, f14
nop
fxcpmadd f3, B1, A4, f3
nop
fxcsmadd f7, B1, A4, f7
LFPDUX A2, AO2, INC4
fxcpmadd f11, B2, A4, f11
LFPDUX A1, AO, INC4
fxcsmadd f15, B2, A4, f15
nop
## 2 ##
fxcpmadd f0, B3, A5, f0
nop
fxcsmadd f4, B3, A5, f4
nop
fxcpmadd f8, B4, A5, f8
LFPDUX B2, BO2, INC4
fxcsmadd f12, B4, A5, f12
LFPDUX B1, BO, INC4
fxcpmadd f1, B3, A2, f1
nop
fxcsmadd f5, B3, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B4, A2, f9
LFPDUX A3, AO, INC4
fxcsmadd f13, B4, A2, f13
nop
fxcpmadd f2, B3, A6, f2
nop
fxcsmadd f6, B3, A6, f6
nop
fxcpmadd f10, B4, A6, f10
nop
fxcsmadd f14, B4, A6, f14
nop
fxcpmadd f3, B3, A4, f3
nop
fxcsmadd f7, B3, A4, f7
LFPDUX A2, AO2, INC4
fxcpmadd f11, B4, A4, f11
LFPDUX A5, AO, INC4
fxcsmadd f15, B4, A4, f15
nop
## 3 ##
fxcpmadd f0, B5, A7, f0
nop
fxcsmadd f4, B5, A7, f4
nop
fxcpmadd f8, B2, A7, f8
LFPDUX B4, BO2, INC4
fxcsmadd f12, B2, A7, f12
LFPDUX B3, BO, INC4
fxcpmadd f1, B5, A2, f1
nop
fxcsmadd f5, B5, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B2, A2, f9
LFPDUX A6, AO, INC4
fxcsmadd f13, B2, A2, f13
nop
fxcpmadd f2, B5, A8, f2
nop
fxcsmadd f6, B5, A8, f6
nop
fxcpmadd f10, B2, A8, f10
nop
fxcsmadd f14, B2, A8, f14
nop
fxcpmadd f3, B5, A4, f3
nop
fxcsmadd f7, B5, A4, f7
LFPDUX A2, AO2, INC4
fxcpmadd f11, B2, A4, f11
LFPDUX A7, AO, INC4
fxcsmadd f15, B2, A4, f15
nop
## 4 ##
fxcpmadd f0, B6, A9, f0
nop
fxcsmadd f4, B6, A9, f4
nop
fxcpmadd f8, B4, A9, f8
LFPDUX B2, BO2, INC4
fxcsmadd f12, B4, A9, f12
LFPDUX B5, BO, INC4
fxcpmadd f1, B6, A2, f1
nop
fxcsmadd f5, B6, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B4, A2, f9
LFPDUX A8, AO, INC4
fxcsmadd f13, B4, A2, f13
nop
fxcpmadd f2, B6, A10, f2
nop
fxcsmadd f6, B6, A10, f6
nop
fxcpmadd f10, B4, A10, f10
nop
fxcsmadd f14, B4, A10, f14
nop
fxcpmadd f3, B6, A4, f3
LFPDUX A2, AO2, INC4
fxcsmadd f7, B6, A4, f7
LFPDUX A9, AO, INC4
fxcpmadd f11, B4, A4, f11
nop
fxcsmadd f15, B4, A4, f15
bdnz+ .L12
.align 4
.L13:
## 1 ##
fxcpmadd f0, B1, A1, f0
nop
fxcsmadd f4, B1, A1, f4
nop
fxcpmadd f8, B2, A1, f8
LFPDUX B4, BO2, INC4
fxcsmadd f12, B2, A1, f12
LFPDUX B6, BO, INC4
fxcpmadd f1, B1, A2, f1
nop
fxcsmadd f5, B1, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B2, A2, f9
LFPDUX A10, AO, INC4
fxcsmadd f13, B2, A2, f13
nop
fxcpmadd f2, B1, A3, f2
nop
fxcsmadd f6, B1, A3, f6
nop
fxcpmadd f10, B2, A3, f10
nop
fxcsmadd f14, B2, A3, f14
nop
fxcpmadd f3, B1, A4, f3
nop
fxcsmadd f7, B1, A4, f7
LFPDUX A2, AO2, INC4
fxcpmadd f11, B2, A4, f11
nop
fxcsmadd f15, B2, A4, f15
nop
## 2 ##
fxcpmadd f0, B3, A5, f0
nop
fxcsmadd f4, B3, A5, f4
nop
fxcpmadd f8, B4, A5, f8
LFPDUX B2, BO2, INC4
fxcsmadd f12, B4, A5, f12
nop
fxcpmadd f1, B3, A2, f1
nop
fxcsmadd f5, B3, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B4, A2, f9
nop
fxcsmadd f13, B4, A2, f13
nop
fxcpmadd f2, B3, A6, f2
nop
fxcsmadd f6, B3, A6, f6
nop
fxcpmadd f10, B4, A6, f10
nop
fxcsmadd f14, B4, A6, f14
nop
fxcpmadd f3, B3, A4, f3
nop
fxcsmadd f7, B3, A4, f7
LFPDUX A2, AO2, INC4
fxcpmadd f11, B4, A4, f11
nop
fxcsmadd f15, B4, A4, f15
nop
## 3 ##
fxcpmadd f0, B5, A7, f0
nop
fxcsmadd f4, B5, A7, f4
nop
fxcpmadd f8, B2, A7, f8
LFPDUX B4, BO2, INC4
fxcsmadd f12, B2, A7, f12
nop
fxcpmadd f1, B5, A2, f1
nop
fxcsmadd f5, B5, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B2, A2, f9
nop
fxcsmadd f13, B2, A2, f13
fxcpmadd f2, B5, A8, f2
nop
fxcsmadd f6, B5, A8, f6
nop
fxcpmadd f10, B2, A8, f10
nop
fxcsmadd f14, B2, A8, f14
nop
fxcpmadd f3, B5, A4, f3
nop
fxcsmadd f7, B5, A4, f7
LFPDUX A2, AO2, INC4
fxcpmadd f11, B2, A4, f11
nop
fxcsmadd f15, B2, A4, f15
nop
## 4 ##
fxcpmadd f0, B6, A9, f0
nop
fxcsmadd f4, B6, A9, f4
nop
fxcpmadd f8, B4, A9, f8
nop
fxcsmadd f12, B4, A9, f12
nop
fxcpmadd f1, B6, A2, f1
nop
fxcsmadd f5, B6, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B4, A2, f9
nop
fxcsmadd f13, B4, A2, f13
nop
fxcpmadd f2, B6, A10, f2
nop
fxcsmadd f6, B6, A10, f6
nop
fxcpmadd f10, B4, A10, f10
nop
fxcsmadd f14, B4, A10, f14
nop
fxcpmadd f3, B6, A4, f3
nop
fxcsmadd f7, B6, A4, f7
nop
fxcpmadd f11, B4, A4, f11
nop
fxcsmadd f15, B4, A4, f15
nop
.align 4
.L14:
#if defined(LT) || defined(RN)
andi. r0, KK, 3
mtspr CTR, r0
ble+ .L18
#else
andi. r0, TEMP, 3
mtspr CTR, r0
ble+ .L18
#endif
.align 4
.L15:
LFPDUX A2, AO, INC4
LFPDUX A4, AO2, INC4
LFPDUX A10, BO, INC4
LFPDUX B4, BO2, INC4
bdz- .L17
.align 4
.L16:
fxcpmadd f0, A10, A2, f0
fxcsmadd f4, A10, A2, f4
fxcpmadd f8, B4, A2, f8
fxcsmadd f12, B4, A2, f12
LFPDUX A2, AO, INC4
fxcpmadd f1, A10, A4, f1
fxcsmadd f5, A10, A4, f5
fxcpmadd f9, B4, A4, f9
fxcsmadd f13, B4, A4, f13
LFPDUX A4, AO2, INC4
fxcpmadd f2, A10, A2, f2
fxcsmadd f6, A10, A2, f6
fxcpmadd f10, B4, A2, f10
fxcsmadd f14, B4, A2, f14
LFPDUX A2, AO, INC4
fxcpmadd f3, A10, A4, f3
fxcsmadd f7, A10, A4, f7
LFPDUX A10, BO, INC4
fxcpmadd f11, B4, A4, f11
fxcsmadd f15, B4, A4, f15
LFPDUX A4, AO2, INC4
LFPDUX B4, BO2, INC4
bdnz+ .L16
.align 4
.L17:
fxcpmadd f0, A10, A2, f0
fxcsmadd f4, A10, A2, f4
fxcpmadd f8, B4, A2, f8
fxcsmadd f12, B4, A2, f12
LFPDUX A2, AO, INC4
fxcpmadd f1, A10, A4, f1
fxcsmadd f5, A10, A4, f5
fxcpmadd f9, B4, A4, f9
fxcsmadd f13, B4, A4, f13
LFPDUX A4, AO2, INC4
fxcpmadd f2, A10, A2, f2
fxcsmadd f6, A10, A2, f6
fxcpmadd f10, B4, A2, f10
fxcsmadd f14, B4, A2, f14
fxcpmadd f3, A10, A4, f3
fxcsmadd f7, A10, A4, f7
fxcpmadd f11, B4, A4, f11
fxcsmadd f15, B4, A4, f15
.align 4
.L18:
#if defined(LN) || defined(RT)
#ifdef LN
subi r0, KK, 8
#else
subi r0, KK, 4
#endif
slwi TEMP, r0, 3 + BASE_SHIFT
slwi r0, r0, 2 + BASE_SHIFT
add AO, AORIG, TEMP
add BO, B, r0
addi AO2, AO, 2 * SIZE
addi BO, BO, - 4 * SIZE
addi BO2, BO, 2 * SIZE
#endif
#if defined(LN) || defined(LT)
fpmr f24, f0
LFPDUX f16, BO, INC4
fpmr f25, f1
nop
fpmr f26, f2
LFPDUX f17, BO2, INC4
fpmr f27, f3
nop
fpmr f28, f8
LFPDUX f18, BO, INC4
fpmr f29, f9
nop
fpmr f30, f10
LFPDUX f19, BO2, INC4
fpmr f31, f11
nop
fsmfp f0, f4
LFPDUX f20, BO, INC4
fsmfp f1, f5
nop
fsmfp f2, f6
LFPDUX f21, BO2, INC4
fsmfp f3, f7
nop
fsmfp f8, f12
LFPDUX f22, BO, INC4
fsmfp f9, f13
nop
fsmfp f10, f14
LFPDUX f23, BO2, INC4
fsmfp f11, f15
nop
fsmtp f4, f24
LFPDUX f24, BO, INC4
fsmtp f5, f25
nop
fsmtp f6, f26
LFPDUX f25, BO2, INC4
fsmtp f7, f27
nop
fsmtp f12, f28
LFPDUX f26, BO, INC4
fsmtp f13, f29
nop
fsmtp f14, f30
LFPDUX f27, BO2, INC4
fsmtp f15, f31
nop
fpsub f0, f16, f0
LFPDUX f28, BO, INC4
fpsub f8, f17, f8
nop
fpsub f4, f18, f4
LFPDUX f29, BO2, INC4
fpsub f12, f19, f12
nop
fpsub f1, f20, f1
LFPDUX f30, BO, INC4
fpsub f9, f21, f9
subi BO, BO, 32 * SIZE
fpsub f5, f22, f5
LFPDUX f31, BO2, INC4
fpsub f13, f23, f13
subi BO2, BO2, 32 * SIZE
fpsub f2, f24, f2
fpsub f10, f25, f10
fpsub f6, f26, f6
fpsub f14, f27, f14
fpsub f3, f28, f3
fpsub f11, f29, f11
fpsub f7, f30, f7
fpsub f15, f31, f15
#else
LFPDUX f16, AO, INC4
LFPDUX f17, AO2, INC4
LFPDUX f18, AO, INC4
LFPDUX f19, AO2, INC4
LFPDUX f20, AO, INC4
LFPDUX f21, AO2, INC4
LFPDUX f22, AO, INC4
LFPDUX f23, AO2, INC4
fpsub f0, f16, f0
LFPDUX f24, AO, INC4
fpsub f1, f17, f1
LFPDUX f25, AO2, INC4
fpsub f2, f18, f2
LFPDUX f26, AO, INC4
fpsub f3, f19, f3
LFPDUX f27, AO2, INC4
fpsub f4, f20, f4
LFPDUX f28, AO, INC4
fpsub f5, f21, f5
LFPDUX f29, AO2, INC4
fpsub f6, f22, f6
LFPDUX f30, AO, INC4
fpsub f7, f23, f7
LFPDUX f31, AO2, INC4
fpsub f8, f24, f8
subi AO, AO, 32 * SIZE
fpsub f9, f25, f9
subi AO2, AO2, 32 * SIZE
fpsub f10, f26, f10
fpsub f11, f27, f11
fpsub f12, f28, f12
fpsub f13, f29, f13
fpsub f14, f30, f14
fpsub f15, f31, f15
#endif
#ifdef LN
addi AO, AO, 68 * SIZE
addi AO2, AO2, 68 * SIZE
LFPDUX A1, AO2, INCM4
LFPDUX A2, AO, INCM4
LFPDUX A3, AO2, INCM4
LFPDUX A4, AO, INCM4
LFPDUX A5, AO2, INCM4
LFPDUX A6, AO, INCM4
LFPDUX A7, AO2, INCM4
LFPDUX A8, AO, INCM4
fxsmul f7, A1, f7
fxsmul f15, A1, f15
fxcpnmsub f3, A1, f7, f3
fxcpnmsub f11, A1, f15, f11
fxcsnmsub f6, A2, f7, f6
fxcsnmsub f14, A2, f15, f14
fxcpnmsub f2, A2, f7, f2
fxcpnmsub f10, A2, f15, f10
fxcsnmsub f5, A3, f7, f5
fxcsnmsub f13, A3, f15, f13
fxcpnmsub f1, A3, f7, f1
fxcpnmsub f9, A3, f15, f9
fxcsnmsub f4, A4, f7, f4
fxcsnmsub f12, A4, f15, f12
fxcpnmsub f0, A4, f7, f0
fxcpnmsub f8, A4, f15, f8
fxpmul f3, A5, f3
fxpmul f11, A5, f11
fxcsnmsub f6, A6, f3, f6
fxcsnmsub f14, A6, f11, f14
fxcpnmsub f2, A6, f3, f2
fxcpnmsub f10, A6, f11, f10
fxcsnmsub f5, A7, f3, f5
fxcsnmsub f13, A7, f11, f13
fxcpnmsub f1, A7, f3, f1
fxcpnmsub f9, A7, f11, f9
fxcsnmsub f4, A8, f3, f4
fxcsnmsub f12, A8, f11, f12
fxcpnmsub f0, A8, f3, f0
fxcpnmsub f8, A8, f11, f8
add AO2, AO2, INCM4
LFPDUX A1, AO, INCM4
LFPDUX A2, AO2, INCM4
LFPDUX A3, AO, INCM4
add AO2, AO2, INCM4
LFPDUX A4, AO, INCM4
LFPDUX A5, AO2, INCM4
LFPDUX A6, AO, INCM4
add AO2, AO2, INCM4
add AO, AO, INCM4
LFPDUX A7, AO2, INCM4
LFPDUX A8, AO, INCM4
fxsmul f6, A1, f6
fxsmul f14, A1, f14
fxcpnmsub f2, A1, f6, f2
fxcpnmsub f10, A1, f14, f10
fxcsnmsub f5, A2, f6, f5
fxcsnmsub f13, A2, f14, f13
fxcpnmsub f1, A2, f6, f1
fxcpnmsub f9, A2, f14, f9
fxcsnmsub f4, A3, f6, f4
fxcsnmsub f12, A3, f14, f12
fxcpnmsub f0, A3, f6, f0
fxcpnmsub f8, A3, f14, f8
fxpmul f2, A4, f2
fxpmul f10, A4, f10
fxcsnmsub f5, A5, f2, f5
fxcsnmsub f13, A5, f10, f13
fxcpnmsub f1, A5, f2, f1
fxcpnmsub f9, A5, f10, f9
fxcsnmsub f4, A6, f2, f4
fxcsnmsub f12, A6, f10, f12
fxcpnmsub f0, A6, f2, f0
fxcpnmsub f8, A6, f10, f8
fxsmul f5, A7, f5
fxsmul f13, A7, f13
fxcpnmsub f1, A7, f5, f1
fxcpnmsub f9, A7, f13, f9
fxcsnmsub f4, A8, f5, f4
fxcsnmsub f12, A8, f13, f12
fxcpnmsub f0, A8, f5, f0
fxcpnmsub f8, A8, f13, f8
add AO2, AO2, INCM4
add AO, AO, INCM4
LFPDUX A1, AO2, INCM4
LFPDUX A2, AO, INCM4
subi AO2, AO2, 8 * SIZE
add AO, AO, INCM4
LFPDUX A3, AO, INCM4
subi AO2, AO2, 8 * SIZE
add AO, AO, INCM4
LFPDUX A4, AO, INCM4
addi AO, AO, -4 * SIZE
addi AO2, AO2, -4 * SIZE
fxpmul f1, A1, f1
fxpmul f9, A1, f9
fxcsnmsub f4, A2, f1, f4
fxcsnmsub f12, A2, f9, f12
fxcpnmsub f0, A2, f1, f0
fxcpnmsub f8, A2, f9, f8
fxsmul f4, A3, f4
fxsmul f12, A3, f12
fxcpnmsub f0, A3, f4, f0
fxcpnmsub f8, A3, f12, f8
fxpmul f0, A4, f0
fxpmul f8, A4, f8
#endif
#ifdef LT
LFPDUX A1, AO, INC4
LFPDUX A2, AO2, INC4
LFPDUX A3, AO, INC4
LFPDUX A4, AO2, INC4
LFPDUX A5, AO, INC4
LFPDUX A6, AO2, INC4
LFPDUX A7, AO, INC4
LFPDUX A8, AO2, INC4
fxpmul f0, A1, f0
fxpmul f8, A1, f8
fxcsnmsub f4, A1, f0, f4
fxcsnmsub f12, A1, f8, f12
fxcpnmsub f1, A2, f0, f1
fxcpnmsub f9, A2, f8, f9
fxcsnmsub f5, A2, f0, f5
fxcsnmsub f13, A2, f8, f13
fxcpnmsub f2, A3, f0, f2
fxcpnmsub f10, A3, f8, f10
fxcsnmsub f6, A3, f0, f6
fxcsnmsub f14, A3, f8, f14
fxcpnmsub f3, A4, f0, f3
fxcpnmsub f11, A4, f8, f11
fxcsnmsub f7, A4, f0, f7
fxcsnmsub f15, A4, f8, f15
fxsmul f4, A5, f4
fxsmul f12, A5, f12
fxcpnmsub f1, A6, f4, f1
fxcpnmsub f9, A6, f12, f9
fxcsnmsub f5, A6, f4, f5
fxcsnmsub f13, A6, f12, f13
fxcpnmsub f2, A7, f4, f2
fxcpnmsub f10, A7, f12, f10
fxcsnmsub f6, A7, f4, f6
fxcsnmsub f14, A7, f12, f14
fxcpnmsub f3, A8, f4, f3
fxcpnmsub f11, A8, f12, f11
fxcsnmsub f7, A8, f4, f7
fxcsnmsub f15, A8, f12, f15
add AO, AO, INC4
LFPDUX A1, AO2, INC4
LFPDUX A2, AO, INC4
LFPDUX A3, AO2, INC4
add AO, AO, INC4
LFPDUX A4, AO2, INC4
LFPDUX A5, AO, INC4
LFPDUX A6, AO2, INC4
add AO, AO, INC4
add AO2, AO2, INC4
LFPDUX A7, AO, INC4
LFPDUX A8, AO2, INC4
fxpmul f1, A1, f1
fxpmul f9, A1, f9
fxcsnmsub f5, A1, f1, f5
fxcsnmsub f13, A1, f9, f13
fxcpnmsub f2, A2, f1, f2
fxcpnmsub f10, A2, f9, f10
fxcsnmsub f6, A2, f1, f6
fxcsnmsub f14, A2, f9, f14
fxcpnmsub f3, A3, f1, f3
fxcpnmsub f11, A3, f9, f11
fxcsnmsub f7, A3, f1, f7
fxcsnmsub f15, A3, f9, f15
fxsmul f5, A4, f5
fxsmul f13, A4, f13
fxcpnmsub f2, A5, f5, f2
fxcpnmsub f10, A5, f13, f10
fxcsnmsub f6, A5, f5, f6
fxcsnmsub f14, A5, f13, f14
fxcpnmsub f3, A6, f5, f3
fxcpnmsub f11, A6, f13, f11
fxcsnmsub f7, A6, f5, f7
fxcsnmsub f15, A6, f13, f15
fxpmul f2, A7, f2
fxpmul f10, A7, f10
fxcsnmsub f6, A7, f2, f6
fxcsnmsub f14, A7, f10, f14
fxcpnmsub f3, A8, f2, f3
fxcpnmsub f11, A8, f10, f11
fxcsnmsub f7, A8, f2, f7
fxcsnmsub f15, A8, f10, f15
add AO, AO, INC4
add AO2, AO2, INC4
LFPDUX A1, AO, INC4
LFPDUX A2, AO2, INC4
addi AO, AO, 8 * SIZE
addi AO2, AO2, 4 * SIZE
LFPDUX A3, AO2, INC4
addi AO, AO, 8 * SIZE
addi AO2, AO2, 4 * SIZE
LFPDUX A4, AO2, INC4
subi AO, AO, 64 * SIZE
subi AO2, AO2, 64 * SIZE
fxsmul f6, A1, f6
fxsmul f14, A1, f14
fxcpnmsub f3, A2, f6, f3
fxcpnmsub f11, A2, f14, f11
fxcsnmsub f7, A2, f6, f7
fxcsnmsub f15, A2, f14, f15
fxpmul f3, A3, f3
fxpmul f11, A3, f11
fxcsnmsub f7, A3, f3, f7
fxcsnmsub f15, A3, f11, f15
fxsmul f7, A4, f7
fxsmul f15, A4, f15
#endif
#ifdef RN
LFPDUX A1, BO, INC4
LFPDUX A2, BO2, INC4
LFPDUX A3, BO, INC4
LFPDUX A4, BO2, INC4
add BO, BO, INC4
LFPDUX A5, BO2, INC4
add BO, BO, INC4
LFPDUX A6, BO2, INC4
subi BO, BO, 16 * SIZE
subi BO2, BO2, 16 * SIZE
fxpmul f0, A1, f0
fxpmul f1, A1, f1
fxpmul f2, A1, f2
fxpmul f3, A1, f3
fxcsnmsub f4, A1, f0, f4
fxcsnmsub f5, A1, f1, f5
fxcsnmsub f6, A1, f2, f6
fxcsnmsub f7, A1, f3, f7
fxcpnmsub f8, A2, f0, f8
fxcpnmsub f9, A2, f1, f9
fxcpnmsub f10, A2, f2, f10
fxcpnmsub f11, A2, f3, f11
fxcsnmsub f12, A2, f0, f12
fxcsnmsub f13, A2, f1, f13
fxcsnmsub f14, A2, f2, f14
fxcsnmsub f15, A2, f3, f15
fxsmul f4, A3, f4
fxsmul f5, A3, f5
fxsmul f6, A3, f6
fxsmul f7, A3, f7
fxcpnmsub f8, A4, f4, f8
fxcpnmsub f9, A4, f5, f9
fxcpnmsub f10, A4, f6, f10
fxcpnmsub f11, A4, f7, f11
fxcsnmsub f12, A4, f4, f12
fxcsnmsub f13, A4, f5, f13
fxcsnmsub f14, A4, f6, f14
fxcsnmsub f15, A4, f7, f15
fxpmul f8, A5, f8
fxpmul f9, A5, f9
fxpmul f10, A5, f10
fxpmul f11, A5, f11
fxcsnmsub f12, A5, f8, f12
fxcsnmsub f13, A5, f9, f13
fxcsnmsub f14, A5, f10, f14
fxcsnmsub f15, A5, f11, f15
fxsmul f12, A6, f12
fxsmul f13, A6, f13
fxsmul f14, A6, f14
fxsmul f15, A6, f15
#endif
#ifdef RT
addi BO, BO, 20 * SIZE
addi BO2, BO2, 20 * SIZE
LFPDUX A1, BO2, INCM4
LFPDUX A2, BO, INCM4
LFPDUX A3, BO2, INCM4
LFPDUX A4, BO, INCM4
add BO2, BO2, INCM4
LFPDUX A5, BO, INCM4
add BO2, BO2, INCM4
LFPDUX A6, BO, INCM4
subi BO, BO, 4 * SIZE
subi BO2, BO2, 4 * SIZE
fxsmul f12, A1, f12
fxsmul f13, A1, f13
fxsmul f14, A1, f14
fxsmul f15, A1, f15
fxcpnmsub f8, A1, f12, f8
fxcpnmsub f9, A1, f13, f9
fxcpnmsub f10, A1, f14, f10
fxcpnmsub f11, A1, f15, f11
fxcsnmsub f4, A2, f12, f4
fxcsnmsub f5, A2, f13, f5
fxcsnmsub f6, A2, f14, f6
fxcsnmsub f7, A2, f15, f7
fxcpnmsub f0, A2, f12, f0
fxcpnmsub f1, A2, f13, f1
fxcpnmsub f2, A2, f14, f2
fxcpnmsub f3, A2, f15, f3
fxpmul f8, A3, f8
fxpmul f9, A3, f9
fxpmul f10, A3, f10
fxpmul f11, A3, f11
fxcsnmsub f4, A4, f8, f4
fxcsnmsub f5, A4, f9, f5
fxcsnmsub f6, A4, f10, f6
fxcsnmsub f7, A4, f11, f7
fxcpnmsub f0, A4, f8, f0
fxcpnmsub f1, A4, f9, f1
fxcpnmsub f2, A4, f10, f2
fxcpnmsub f3, A4, f11, f3
fxsmul f4, A5, f4
fxsmul f5, A5, f5
fxsmul f6, A5, f6
fxsmul f7, A5, f7
fxcpnmsub f0, A5, f4, f0
fxcpnmsub f1, A5, f5, f1
fxcpnmsub f2, A5, f6, f2
fxcpnmsub f3, A5, f7, f3
fxpmul f0, A6, f0
fxpmul f1, A6, f1
fxpmul f2, A6, f2
fxpmul f3, A6, f3
#endif
#ifdef LN
subi CO1, CO1, 8 * SIZE
subi CO2, CO2, 8 * SIZE
subi CO3, CO3, 8 * SIZE
subi CO4, CO4, 8 * SIZE
#endif
#if defined(LN) || defined(LT)
STFPDUX f0, BO, INC4
STFPDUX f8, BO2, INC4
STFPDUX f4, BO, INC4
STFPDUX f12, BO2, INC4
STFPDUX f1, BO, INC4
STFPDUX f9, BO2, INC4
STFPDUX f5, BO, INC4
STFPDUX f13, BO2, INC4
STFPDUX f2, BO, INC4
STFPDUX f10, BO2, INC4
STFPDUX f6, BO, INC4
STFPDUX f14, BO2, INC4
STFPDUX f3, BO, INC4
STFPDUX f11, BO2, INC4
STFPDUX f7, BO, INC4
STFPDUX f15, BO2, INC4
subi BO, BO, 32 * SIZE
subi BO2, BO2, 32 * SIZE
STFDUX f0, CO1, INC
STFDUX f4, CO1, INC
STFDUX f1, CO1, INC
STFDUX f5, CO1, INC
STFDUX f2, CO1, INC
STFDUX f6, CO1, INC
STFDUX f3, CO1, INC
STFDUX f7, CO1, INC
STFSDUX f0, CO2, INC
STFSDUX f4, CO2, INC
STFSDUX f1, CO2, INC
STFSDUX f5, CO2, INC
STFSDUX f2, CO2, INC
STFSDUX f6, CO2, INC
STFSDUX f3, CO2, INC
STFSDUX f7, CO2, INC
STFDUX f8, CO3, INC
STFDUX f12, CO3, INC
STFDUX f9, CO3, INC
STFDUX f13, CO3, INC
STFDUX f10, CO3, INC
STFDUX f14, CO3, INC
STFDUX f11, CO3, INC
STFDUX f15, CO3, INC
STFSDUX f8, CO4, INC
STFSDUX f12, CO4, INC
STFSDUX f9, CO4, INC
STFSDUX f13, CO4, INC
STFSDUX f10, CO4, INC
STFSDUX f14, CO4, INC
STFSDUX f11, CO4, INC
STFSDUX f15, CO4, INC
#else
STFPDUX f0, AO, INC4
STFPDUX f1, AO2, INC4
STFPDUX f2, AO, INC4
STFPDUX f3, AO2, INC4
STFPDUX f4, AO, INC4
STFPDUX f5, AO2, INC4
STFPDUX f6, AO, INC4
STFPDUX f7, AO2, INC4
STFPDUX f8, AO, INC4
STFPDUX f9, AO2, INC4
STFPDUX f10, AO, INC4
STFPDUX f11, AO2, INC4
STFPDUX f12, AO, INC4
STFPDUX f13, AO2, INC4
STFPDUX f14, AO, INC4
STFPDUX f15, AO2, INC4
subi AO, AO, 32 * SIZE
subi AO2, AO2, 32 * SIZE
STFDUX f0, CO1, INC
STFSDUX f0, CO1, INC
STFDUX f1, CO1, INC
STFSDUX f1, CO1, INC
STFDUX f2, CO1, INC
STFSDUX f2, CO1, INC
STFDUX f3, CO1, INC
STFSDUX f3, CO1, INC
STFDUX f4, CO2, INC
STFSDUX f4, CO2, INC
STFDUX f5, CO2, INC
STFSDUX f5, CO2, INC
STFDUX f6, CO2, INC
STFSDUX f6, CO2, INC
STFDUX f7, CO2, INC
STFSDUX f7, CO2, INC
STFDUX f8, CO3, INC
STFSDUX f8, CO3, INC
STFDUX f9, CO3, INC
STFSDUX f9, CO3, INC
STFDUX f10, CO3, INC
STFSDUX f10, CO3, INC
STFDUX f11, CO3, INC
STFSDUX f11, CO3, INC
STFDUX f12, CO4, INC
STFSDUX f12, CO4, INC
STFDUX f13, CO4, INC
STFSDUX f13, CO4, INC
STFDUX f14, CO4, INC
STFSDUX f14, CO4, INC
STFDUX f15, CO4, INC
STFSDUX f15, CO4, INC
#endif
#ifdef LN
subi CO1, CO1, 8 * SIZE
subi CO2, CO2, 8 * SIZE
subi CO3, CO3, 8 * SIZE
subi CO4, CO4, 8 * SIZE
#endif
#ifdef RT
slwi r0, K, 3 + BASE_SHIFT
add AORIG, AORIG, r0
#endif
#if defined(LT) || defined(RN)
sub TEMP, K, KK
slwi r0, TEMP, 3 + BASE_SHIFT
slwi TEMP, TEMP, 2 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LT
addi KK, KK, 8
#endif
#ifdef LN
subi KK, KK, 8
#endif
addic. I, I, -1
li r0, FZERO
lfpsx f0, SP, r0
bgt+ .L11
.align 4
.L49:
#ifdef LN
slwi r0, K, 2 + BASE_SHIFT
add B, B, r0
#endif
#if defined(LT) || defined(RN)
addi B, BO, 4 * SIZE
#endif
#ifdef RN
addi KK, KK, 4
#endif
#ifdef RT
subi KK, KK, 4
#endif
addic. J, J, -1
bgt+ .L10
.align 4
.L50:
andi. J, N, 2
beq .L90
#ifdef RT
slwi r0, K, 1 + BASE_SHIFT
sub B, B, r0
slwi r0, LDC, 1
sub C, C, r0
#endif
mr CO1, C
add CO2, C, LDC
#ifdef LN
add KK, M, OFFSET
#endif
#ifdef LT
mr KK, OFFSET
#endif
#if defined(LN) || defined(RT)
addi AORIG, A, -2 * SIZE
#else
addi AO, A, -2 * SIZE
#endif
#ifndef RT
add C, CO2, LDC
#endif
li r0, FZERO
lfpsx f0, SP, r0
andi. I, M, 1
beq .L60
#if defined(LT) || defined(RN)
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
srawi. r0, KK, 3
mtspr CTR, r0
ble .L84
#else
#ifdef LN
slwi r0, K, 0 + BASE_SHIFT
sub AORIG, AORIG, r0
#endif
slwi r0 , KK, 0 + BASE_SHIFT
slwi TEMP, KK, 1 + BASE_SHIFT
add AO, AORIG, r0
add BO, B, TEMP
sub TEMP, K, KK
addi BO, BO, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
srawi. r0, TEMP, 3
mtspr CTR, r0
ble .L84
#endif
LFPDUX B1, BO, INC2
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
LFPDUX B3, BO, INC2
LFPDUX B4, BO, INC2
bdz- .L83
.align 4
.L82:
fxcpmadd f0, A1, B1, f0
LFPDUX B1, BO, INC2
fxcsmadd f1, A1, B2, f1
LFPDUX B2, BO, INC2
LFPDUX A1, AO, INC2
fxcpmadd f2, A2, B3, f2
LFPDUX B3, BO, INC2
fxcsmadd f3, A2, B4, f3
LFPDUX B4, BO, INC2
LFPDUX A2, AO, INC2
fxcpmadd f0, A3, B1, f0
LFPDUX B1, BO, INC2
fxcsmadd f1, A3, B2, f1
LFPDUX B2, BO, INC2
LFPDUX A3, AO, INC2
fxcpmadd f2, A4, B3, f2
LFPDUX B3, BO, INC2
fxcsmadd f3, A4, B4, f3
LFPDUX B4, BO, INC2
LFPDUX A4, AO, INC2
bdnz+ .L82
.align 4
.L83:
fxcpmadd f0, A1, B1, f0
LFPDUX B1, BO, INC2
fxcsmadd f1, A1, B2, f1
LFPDUX B2, BO, INC2
fxcpmadd f2, A2, B3, f2
LFPDUX B3, BO, INC2
fxcsmadd f3, A2, B4, f3
LFPDUX B4, BO, INC2
fxcpmadd f0, A3, B1, f0
fxcsmadd f1, A3, B2, f1
fxcpmadd f2, A4, B3, f2
fxcsmadd f3, A4, B4, f3
.align 4
.L84:
#if defined(LT) || defined(RN)
andi. r0, KK, 7
mtspr CTR, r0
ble+ .L88
#else
andi. r0, TEMP, 7
mtspr CTR, r0
ble+ .L88
#endif
LFDX A1, AO, INC2
LFPDUX B1, BO, INC2
add AO, AO, INC
bdz- .L87
.align 4
.L86:
fxcpmadd f0, A1, B1, f0
LFDX A1, AO, INC2
LFPDUX B1, BO, INC2
add AO, AO, INC
bdnz+ .L86
.align 4
.L87:
fxcpmadd f0, A1, B1, f0
.align 4
.L88:
fpadd f0, f0, f1
fpadd f2, f2, f3
fpadd f0, f0, f2
#if defined(LN) || defined(RT)
#ifdef LN
subi r0, KK, 1
#else
subi r0, KK, 2
#endif
slwi TEMP, r0, 0 + BASE_SHIFT
slwi r0, r0, 1 + BASE_SHIFT
add AO, AORIG, TEMP
add BO, B, r0
addi BO, BO, - 2 * SIZE
#endif
#if defined(LN) || defined(LT)
LFPDX f16, BO, INC2
fpsub f0, f16, f0
#else
LFPDX f16, AO, INC2
fpsub f0, f16, f0
#endif
#ifdef LN
LFPDX A1, AO, INC2
fxpmul f0, A1, f0
#endif
#ifdef LT
LFPDX A1, AO, INC2
fxpmul f0, A1, f0
#endif
#ifdef RN
LFD A1, (2 + 0) * SIZE(BO)
LFD A2, (2 + 1) * SIZE(BO)
LFD A3, (2 + 3) * SIZE(BO)
fsmtp f1, f0
fmul f0, A1, f0
fnmsub f1, A2, f0, f1
fmul f1, A3, f1
fsmfp f0, f1
#endif
#ifdef RT
LFD A1, (2 + 3) * SIZE(BO)
LFD A2, (2 + 2) * SIZE(BO)
LFD A3, (2 + 0) * SIZE(BO)
fsmtp f1, f0
fmul f1, A1, f1
fnmsub f0, A2, f1, f0
fmul f0, A3, f0
fsmfp f0, f1
#endif
#ifdef LN
subi CO1, CO1, 1 * SIZE
subi CO2, CO2, 1 * SIZE
#endif
#if defined(LN) || defined(LT)
STFPDX f0, BO, INC2
STFDUX f0, CO1, INC
STFSDUX f0, CO2, INC
#else
STFPDX f0, AO, INC2
STFDUX f0, CO1, INC
STFDUX f1, CO2, INC
#endif
#ifdef LN
subi CO1, CO1, 1 * SIZE
subi CO2, CO2, 1 * SIZE
#endif
#ifdef RT
slwi r0, K, 0 + BASE_SHIFT
add AORIG, AORIG, r0
#endif
#if defined(LT) || defined(RN)
sub TEMP, K, KK
slwi r0, TEMP, 0 + BASE_SHIFT
slwi TEMP, TEMP, 1 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LT
addi KK, KK, 1
#endif
#ifdef LN
subi KK, KK, 1
#endif
li r0, FZERO
lfpsx f0, SP, r0
.align 4
.L60:
andi. I, M, 2
beq .L70
#if defined(LT) || defined(RN)
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
srawi. r0, KK, 3
mtspr CTR, r0
ble .L74
#else
#ifdef LN
slwi r0, K, 1 + BASE_SHIFT
sub AORIG, AORIG, r0
#endif
slwi r0 , KK, 1 + BASE_SHIFT
slwi TEMP, KK, 1 + BASE_SHIFT
add AO, AORIG, r0
add BO, B, TEMP
sub TEMP, K, KK
addi BO, BO, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
srawi. r0, TEMP, 3
mtspr CTR, r0
ble .L74
#endif
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
LFPDUX A2, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A3, AO, INC2
LFPDUX B3, BO, INC2
LFPDUX A4, AO, INC2
LFPDUX B4, BO, INC2
LFPDUX A5, AO, INC2
LFPDUX B5, BO, INC2
LFPDUX A6, AO, INC2
LFPDUX B6, BO, INC2
LFPDUX A7, AO, INC2
LFPDUX A9, BO, INC2
LFPDUX A8, AO, INC2
LFPDUX A10, BO, INC2
bdz- .L73
.align 4
.L72:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A1, f1
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
fxcpmadd f2, B2, A2, f2
fxcsmadd f3, B2, A2, f3
LFPDUX A2, AO, INC2
LFPDUX B2, BO, INC2
fxcpmadd f0, B3, A3, f0
fxcsmadd f1, B3, A3, f1
LFPDUX A3, AO, INC2
LFPDUX B3, BO, INC2
fxcpmadd f2, B4, A4, f2
fxcsmadd f3, B4, A4, f3
LFPDUX A4, AO, INC2
LFPDUX B4, BO, INC2
fxcpmadd f0, B5, A5, f0
fxcsmadd f1, B5, A5, f1
LFPDUX A5, AO, INC2
LFPDUX B5, BO, INC2
fxcpmadd f2, B6, A6, f2
fxcsmadd f3, B6, A6, f3
LFPDUX A6, AO, INC2
LFPDUX B6, BO, INC2
fxcpmadd f0, A9, A7, f0
fxcsmadd f1, A9, A7, f1
LFPDUX A7, AO, INC2
LFPDUX A9, BO, INC2
fxcpmadd f2, A10, A8, f2
fxcsmadd f3, A10, A8, f3
LFPDUX A8, AO, INC2
LFPDUX A10, BO, INC2
bdnz+ .L72
.align 4
.L73:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A1, f1
fxcpmadd f2, B2, A2, f2
fxcsmadd f3, B2, A2, f3
fxcpmadd f0, B3, A3, f0
fxcsmadd f1, B3, A3, f1
fxcpmadd f2, B4, A4, f2
fxcsmadd f3, B4, A4, f3
fxcpmadd f0, B5, A5, f0
fxcsmadd f1, B5, A5, f1
fxcpmadd f2, B6, A6, f2
fxcsmadd f3, B6, A6, f3
fxcpmadd f0, A9, A7, f0
fxcsmadd f1, A9, A7, f1
fxcpmadd f2, A10, A8, f2
fxcsmadd f3, A10, A8, f3
.align 4
.L74:
#if defined(LT) || defined(RN)
andi. r0, KK, 7
mtspr CTR, r0
ble+ .L78
#else
andi. r0, TEMP, 7
mtspr CTR, r0
ble+ .L78
#endif
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
bdz- .L77
.align 4
.L76:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A1, f1
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
bdnz+ .L76
.align 4
.L77:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A1, f1
.align 4
.L78:
fpadd f0, f0, f2
fpadd f1, f1, f3
#if defined(LN) || defined(RT)
#ifdef LN
subi r0, KK, 2
#else
subi r0, KK, 2
#endif
slwi TEMP, r0, 1 + BASE_SHIFT
slwi r0, r0, 1 + BASE_SHIFT
add AO, AORIG, TEMP
add BO, B, r0
addi BO, BO, - 2 * SIZE
#endif
#if defined(LN) || defined(LT)
fpmr f24, f0
fsmfp f0, f1
fsmtp f1, f24
LFPDUX f16, BO, INC2
LFPDUX f17, BO, INC2
subi BO, BO, 4 * SIZE
fpsub f0, f16, f0
fpsub f1, f17, f1
#else
LFPDUX f16, AO, INC2
LFPDUX f17, AO, INC2
subi AO, AO, 4 * SIZE
fpsub f0, f16, f0
fpsub f1, f17, f1
#endif
#ifdef LN
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
addi AO, AO, -4 * SIZE
fxsmul f1, A2, f1
fxcpnmsub f0, A2, f1, f0
fxpmul f0, A1, f0
#endif
#ifdef LT
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
addi AO, AO, -4 * SIZE
fxpmul f0, A1, f0
fxcsnmsub f1, A1, f0, f1
fxsmul f1, A2, f1
#endif
#ifdef RN
LFPDUX A1, BO, INC2
LFPDUX A2, BO, INC2
subi BO, BO, 4 * SIZE
fxpmul f0, A1, f0
fxcsnmsub f1, A1, f0, f1
fxsmul f1, A2, f1
#endif
#ifdef RT
LFPDUX A2, BO, INC2
LFPDUX A1, BO, INC2
subi BO, BO, 4 * SIZE
fxsmul f1, A1, f1
fxcpnmsub f0, A1, f1, f0
fxpmul f0, A2, f0
#endif
#ifdef LN
subi CO1, CO1, 2 * SIZE
subi CO2, CO2, 2 * SIZE
#endif
#if defined(LN) || defined(LT)
STFPDUX f0, BO, INC2
STFPDUX f1, BO, INC2
subi BO, BO, 4 * SIZE
STFDUX f0, CO1, INC
STFDUX f1, CO1, INC
STFSDUX f0, CO2, INC
STFSDUX f1, CO2, INC
#else
STFPDUX f0, AO, INC2
STFPDUX f1, AO, INC2
subi AO, AO, 4 * SIZE
STFDUX f0, CO1, INC
STFSDUX f0, CO1, INC
STFDUX f1, CO2, INC
STFSDUX f1, CO2, INC
#endif
#ifdef LN
subi CO1, CO1, 2 * SIZE
subi CO2, CO2, 2 * SIZE
#endif
#ifdef RT
slwi r0, K, 1 + BASE_SHIFT
add AORIG, AORIG, r0
#endif
#if defined(LT) || defined(RN)
sub TEMP, K, KK
slwi r0, TEMP, 1 + BASE_SHIFT
slwi TEMP, TEMP, 1 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LT
addi KK, KK, 2
#endif
#ifdef LN
subi KK, KK, 2
#endif
li r0, FZERO
lfpsx f0, SP, r0
.align 4
.L70:
andi. I, M, 4
beq .L80
#if defined(LT) || defined(RN)
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
srawi. r0, KK, 2
mtspr CTR, r0
ble .L64
#else
#ifdef LN
slwi r0, K, 2 + BASE_SHIFT
sub AORIG, AORIG, r0
#endif
slwi r0 , KK, 2 + BASE_SHIFT
slwi TEMP, KK, 1 + BASE_SHIFT
add AO, AORIG, r0
add BO, B, TEMP
sub TEMP, K, KK
fpmr f1, f0
addi BO, BO, - 2 * SIZE
fpmr f2, f0
fpmr f3, f0
srawi. r0, TEMP, 2
mtspr CTR, r0
ble .L64
#endif
LFPDUX B1, BO, INC2
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
LFPDUX B3, BO, INC2
LFPDUX A5, AO, INC2
LFPDUX A6, AO, INC2
LFPDUX B4, BO, INC2
LFPDUX A7, AO, INC2
LFPDUX A8, AO, INC2
bdz- .L63
.align 4
.L62:
fxcpmadd f0, B1, A1, f0
fxcsmadd f2, B1, A1, f2
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
fxcsmadd f3, B1, A2, f3
LFPDUX A2, AO, INC2
LFPDUX B1, BO, INC2
fxcpmadd f0, B2, A3, f0
fxcsmadd f2, B2, A3, f2
LFPDUX A3, AO, INC2
fxcpmadd f1, B2, A4, f1
fxcsmadd f3, B2, A4, f3
LFPDUX A4, AO, INC2
LFPDUX B2, BO, INC2
fxcpmadd f0, B3, A5, f0
fxcsmadd f2, B3, A5, f2
LFPDUX A5, AO, INC2
fxcpmadd f1, B3, A6, f1
fxcsmadd f3, B3, A6, f3
LFPDUX A6, AO, INC2
LFPDUX B3, BO, INC2
fxcpmadd f0, B4, A7, f0
fxcsmadd f2, B4, A7, f2
LFPDUX A7, AO, INC2
fxcpmadd f1, B4, A8, f1
fxcsmadd f3, B4, A8, f3
LFPDUX A8, AO, INC2
LFPDUX B4, BO, INC2
bdnz+ .L62
.align 4
.L63:
fxcpmadd f0, B1, A1, f0
fxcsmadd f2, B1, A1, f2
fxcpmadd f1, B1, A2, f1
fxcsmadd f3, B1, A2, f3
fxcpmadd f0, B2, A3, f0
fxcsmadd f2, B2, A3, f2
fxcpmadd f1, B2, A4, f1
fxcsmadd f3, B2, A4, f3
fxcpmadd f0, B3, A5, f0
fxcsmadd f2, B3, A5, f2
fxcpmadd f1, B3, A6, f1
fxcsmadd f3, B3, A6, f3
fxcpmadd f0, B4, A7, f0
fxcsmadd f2, B4, A7, f2
fxcpmadd f1, B4, A8, f1
fxcsmadd f3, B4, A8, f3
.align 4
.L64:
#if defined(LT) || defined(RN)
andi. r0, KK, 3
mtspr CTR, r0
ble+ .L68
#else
andi. r0, TEMP, 3
mtspr CTR, r0
ble+ .L68
#endif
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
LFPDUX A2, AO, INC2
bdz- .L67
.align 4
.L66:
fxcpmadd f0, B1, A1, f0
fxcsmadd f2, B1, A1, f2
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
fxcsmadd f3, B1, A2, f3
LFPDUX B1, BO, INC2
LFPDUX A2, AO, INC2
bdnz+ .L66
.align 4
.L67:
fxcpmadd f0, B1, A1, f0
fxcsmadd f2, B1, A1, f2
fxcpmadd f1, B1, A2, f1
fxcsmadd f3, B1, A2, f3
.align 4
.L68:
#if defined(LN) || defined(RT)
#ifdef LN
subi r0, KK, 4
#else
subi r0, KK, 2
#endif
slwi TEMP, r0, 2 + BASE_SHIFT
slwi r0, r0, 1 + BASE_SHIFT
add AO, AORIG, TEMP
add BO, B, r0
addi BO, BO, - 2 * SIZE
#endif
#if defined(LN) || defined(LT)
fpmr f24, f0
fpmr f25, f1
fsmfp f0, f2
fsmfp f1, f3
fsmtp f2, f24
fsmtp f3, f25
LFPDUX f16, BO, INC2
LFPDUX f17, BO, INC2
LFPDUX f18, BO, INC2
LFPDUX f19, BO, INC2
subi BO, BO, 8 * SIZE
fpsub f0, f16, f0
fpsub f2, f17, f2
fpsub f1, f18, f1
fpsub f3, f19, f3
#else
LFPDUX f16, AO, INC2
LFPDUX f17, AO, INC2
LFPDUX f18, AO, INC2
LFPDUX f19, AO, INC2
subi AO, AO, 8 * SIZE
fpsub f0, f16, f0
fpsub f1, f17, f1
fpsub f2, f18, f2
fpsub f3, f19, f3
#endif
#ifdef LN
addi AO, AO, 18 * SIZE
LFPDUX A1, AO, INCM2
LFPDUX A2, AO, INCM2
LFPDUX A3, AO, INCM2
LFPDUX A4, AO, INCM2
add AO, AO, INCM2
LFPDUX A5, AO, INCM2
add AO, AO, INCM2
LFPDUX A6, AO, INCM2
subi AO, AO, 2 * SIZE
fxsmul f3, A1, f3
fxcpnmsub f1, A1, f3, f1
fxcsnmsub f2, A2, f3, f2
fxcpnmsub f0, A2, f3, f0
fxpmul f1, A3, f1
fxcsnmsub f2, A4, f1, f2
fxcpnmsub f0, A4, f1, f0
fxsmul f2, A5, f2
fxcpnmsub f0, A5, f2, f0
fxpmul f0, A6, f0
#endif
#ifdef LT
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
add AO, AO, INC2
LFPDUX A5, AO, INC2
add AO, AO, INC2
LFPDUX A6, AO, INC2
subi AO, AO, 16 * SIZE
fxpmul f0, A1, f0
fxcsnmsub f2, A1, f0, f2
fxcpnmsub f1, A2, f0, f1
fxcsnmsub f3, A2, f0, f3
fxsmul f2, A3, f2
fxcpnmsub f1, A4, f2, f1
fxcsnmsub f3, A4, f2, f3
fxpmul f1, A5, f1
fxcsnmsub f3, A5, f1, f3
fxsmul f3, A6, f3
#endif
#ifdef RN
LFPDUX A1, BO, INC2
LFPDUX A2, BO, INC2
subi BO, BO, 4 * SIZE
fxpmul f0, A1, f0
fxpmul f1, A1, f1
fxcsnmsub f2, A1, f0, f2
fxcsnmsub f3, A1, f1, f3
fxsmul f2, A2, f2
fxsmul f3, A2, f3
#endif
#ifdef RT
LFPDUX A2, BO, INC2
LFPDUX A1, BO, INC2
subi BO, BO, 4 * SIZE
fxsmul f2, A1, f2
fxsmul f3, A1, f3
fxcpnmsub f0, A1, f2, f0
fxcpnmsub f1, A1, f3, f1
fxpmul f0, A2, f0
fxpmul f1, A2, f1
#endif
#ifdef LN
subi CO1, CO1, 4 * SIZE
subi CO2, CO2, 4 * SIZE
#endif
#if defined(LN) || defined(LT)
STFPDUX f0, BO, INC2
STFPDUX f2, BO, INC2
STFPDUX f1, BO, INC2
STFPDUX f3, BO, INC2
subi BO, BO, 8 * SIZE
STFDUX f0, CO1, INC
STFDUX f2, CO1, INC
STFDUX f1, CO1, INC
STFDUX f3, CO1, INC
STFSDUX f0, CO2, INC
STFSDUX f2, CO2, INC
STFSDUX f1, CO2, INC
STFSDUX f3, CO2, INC
#else
STFPDUX f0, AO, INC2
STFPDUX f1, AO, INC2
STFPDUX f2, AO, INC2
STFPDUX f3, AO, INC2
subi AO, AO, 8 * SIZE
STFDUX f0, CO1, INC
STFSDUX f0, CO1, INC
STFDUX f1, CO1, INC
STFSDUX f1, CO1, INC
STFDUX f2, CO2, INC
STFSDUX f2, CO2, INC
STFDUX f3, CO2, INC
STFSDUX f3, CO2, INC
#endif
#ifdef LN
subi CO1, CO1, 4 * SIZE
subi CO2, CO2, 4 * SIZE
#endif
#ifdef RT
slwi r0, K, 2 + BASE_SHIFT
add AORIG, AORIG, r0
#endif
#if defined(LT) || defined(RN)
sub TEMP, K, KK
slwi r0, TEMP, 2 + BASE_SHIFT
slwi TEMP, TEMP, 1 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LT
addi KK, KK, 4
#endif
#ifdef LN
subi KK, KK, 4
#endif
li r0, FZERO
lfpsx f0, SP, r0
.align 4
.L80:
srawi. I, M, 3
ble .L89
.align 4
.L51:
#if defined(LT) || defined(RN)
fpmr f4, f0
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f5, f0
fpmr f2, f0
fpmr f6, f0
srawi. r0, KK, 2
fpmr f3, f0
mtspr CTR, r0
fpmr f7, f0
ble .L54
#else
#ifdef LN
slwi r0, K, 3 + BASE_SHIFT
sub AORIG, AORIG, r0
#endif
slwi r0 , KK, 3 + BASE_SHIFT
slwi TEMP, KK, 1 + BASE_SHIFT
add AO, AORIG, r0
add BO, B, TEMP
sub TEMP, K, KK
fpmr f4, f0
addi BO, BO, - 2 * SIZE
fpmr f1, f0
fpmr f5, f0
fpmr f2, f0
fpmr f6, f0
srawi. r0, TEMP, 2
fpmr f3, f0
mtspr CTR, r0
fpmr f7, f0
ble .L54
#endif
LFPDUX B1, BO, INC2
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
LFPDUX B3, BO, INC2
LFPDUX A5, AO, INC2
LFPDUX A6, AO, INC2
LFPDUX A7, AO, INC2
LFPDUX A8, AO, INC2
bdz- .L53
.align 4
.L52:
fxcpmadd f0, B1, A1, f0
LFPDUX B4, BO, INC2
fxcsmadd f4, B1, A1, f4
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
nop
fxcsmadd f5, B1, A2, f5
LFPDUX A2, AO, INC2
fxcpmadd f2, B1, A3, f2
nop
fxcsmadd f6, B1, A3, f6
LFPDUX A3, AO, INC2
fxcpmadd f3, B1, A4, f3
nop
fxcsmadd f7, B1, A4, f7
LFPDUX A4, AO, INC2
fxcpmadd f0, B2, A5, f0
LFPDUX B1, BO, INC2
fxcsmadd f4, B2, A5, f4
LFPDUX A5, AO, INC2
fxcpmadd f1, B2, A6, f1
nop
fxcsmadd f5, B2, A6, f5
LFPDUX A6, AO, INC2
fxcpmadd f2, B2, A7, f2
nop
fxcsmadd f6, B2, A7, f6
LFPDUX A7, AO, INC2
fxcpmadd f3, B2, A8, f3
nop
fxcsmadd f7, B2, A8, f7
LFPDUX A8, AO, INC2
fxcpmadd f0, B3, A1, f0
LFPDUX B2, BO, INC2
fxcsmadd f4, B3, A1, f4
LFPDUX A1, AO, INC2
fxcpmadd f1, B3, A2, f1
nop
fxcsmadd f5, B3, A2, f5
LFPDUX A2, AO, INC2
fxcpmadd f2, B3, A3, f2
nop
fxcsmadd f6, B3, A3, f6
LFPDUX A3, AO, INC2
fxcpmadd f3, B3, A4, f3
nop
fxcsmadd f7, B3, A4, f7
LFPDUX A4, AO, INC2
fxcpmadd f0, B4, A5, f0
LFPDUX B3, BO, INC2
fxcsmadd f4, B4, A5, f4
LFPDUX A5, AO, INC2
fxcpmadd f1, B4, A6, f1
nop
fxcsmadd f5, B4, A6, f5
LFPDUX A6, AO, INC2
fxcpmadd f2, B4, A7, f2
nop
fxcsmadd f6, B4, A7, f6
LFPDUX A7, AO, INC2
fxcpmadd f3, B4, A8, f3
nop
fxcsmadd f7, B4, A8, f7
LFPDUX A8, AO, INC2
bdnz+ .L52
.align 4
.L53:
fxcpmadd f0, B1, A1, f0
LFPDUX B4, BO, INC2
fxcsmadd f4, B1, A1, f4
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
nop
fxcsmadd f5, B1, A2, f5
LFPDUX A2, AO, INC2
fxcpmadd f2, B1, A3, f2
nop
fxcsmadd f6, B1, A3, f6
LFPDUX A3, AO, INC2
fxcpmadd f3, B1, A4, f3
nop
fxcsmadd f7, B1, A4, f7
LFPDUX A4, AO, INC2
fxcpmadd f0, B2, A5, f0
nop
fxcsmadd f4, B2, A5, f4
LFPDUX A5, AO, INC2
fxcpmadd f1, B2, A6, f1
nop
fxcsmadd f5, B2, A6, f5
LFPDUX A6, AO, INC2
fxcpmadd f2, B2, A7, f2
nop
fxcsmadd f6, B2, A7, f6
LFPDUX A7, AO, INC2
fxcpmadd f3, B2, A8, f3
nop
fxcsmadd f7, B2, A8, f7
LFPDUX A8, AO, INC2
fxcpmadd f0, B3, A1, f0
fxcsmadd f4, B3, A1, f4
fxcpmadd f1, B3, A2, f1
fxcsmadd f5, B3, A2, f5
fxcpmadd f2, B3, A3, f2
fxcsmadd f6, B3, A3, f6
fxcpmadd f3, B3, A4, f3
fxcsmadd f7, B3, A4, f7
fxcpmadd f0, B4, A5, f0
fxcsmadd f4, B4, A5, f4
fxcpmadd f1, B4, A6, f1
fxcsmadd f5, B4, A6, f5
fxcpmadd f2, B4, A7, f2
fxcsmadd f6, B4, A7, f6
fxcpmadd f3, B4, A8, f3
fxcsmadd f7, B4, A8, f7
.align 4
.L54:
#if defined(LT) || defined(RN)
andi. r0, KK, 3
mtspr CTR, r0
ble+ .L58
#else
andi. r0, TEMP, 3
mtspr CTR, r0
ble+ .L58
#endif
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
LFPDUX A2, AO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
bdz- .L57
.align 4
.L56:
fxcpmadd f0, B1, A1, f0
fxcsmadd f4, B1, A1, f4
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
fxcsmadd f5, B1, A2, f5
LFPDUX A2, AO, INC2
fxcpmadd f2, B1, A3, f2
fxcsmadd f6, B1, A3, f6
LFPDUX A3, AO, INC2
fxcpmadd f3, B1, A4, f3
fxcsmadd f7, B1, A4, f7
LFPDUX A4, AO, INC2
LFPDUX B1, BO, INC2
bdnz+ .L56
.align 4
.L57:
fxcpmadd f0, B1, A1, f0
fxcsmadd f4, B1, A1, f4
fxcpmadd f1, B1, A2, f1
fxcsmadd f5, B1, A2, f5
fxcpmadd f2, B1, A3, f2
fxcsmadd f6, B1, A3, f6
fxcpmadd f3, B1, A4, f3
fxcsmadd f7, B1, A4, f7
.align 4
.L58:
#if defined(LN) || defined(RT)
#ifdef LN
subi r0, KK, 8
#else
subi r0, KK, 2
#endif
slwi TEMP, r0, 3 + BASE_SHIFT
slwi r0, r0, 1 + BASE_SHIFT
add AO, AORIG, TEMP
add BO, B, r0
addi BO, BO, - 2 * SIZE
#endif
#if defined(LN) || defined(LT)
fpmr f24, f0
fpmr f25, f1
fpmr f26, f2
fpmr f27, f3
fsmfp f0, f4
fsmfp f1, f5
fsmfp f2, f6
fsmfp f3, f7
fsmtp f4, f24
fsmtp f5, f25
fsmtp f6, f26
fsmtp f7, f27
LFPDUX f16, BO, INC2
LFPDUX f17, BO, INC2
LFPDUX f18, BO, INC2
LFPDUX f19, BO, INC2
LFPDUX f20, BO, INC2
LFPDUX f21, BO, INC2
LFPDUX f22, BO, INC2
LFPDUX f23, BO, INC2
subi BO, BO, 16 * SIZE
fpsub f0, f16, f0
fpsub f4, f17, f4
fpsub f1, f18, f1
fpsub f5, f19, f5
fpsub f2, f20, f2
fpsub f6, f21, f6
fpsub f3, f22, f3
fpsub f7, f23, f7
#else
LFPDUX f16, AO, INC2
LFPDUX f17, AO, INC2
LFPDUX f18, AO, INC2
LFPDUX f19, AO, INC2
LFPDUX f20, AO, INC2
LFPDUX f21, AO, INC2
LFPDUX f22, AO, INC2
LFPDUX f23, AO, INC2
subi AO, AO, 16 * SIZE
fpsub f0, f16, f0
fpsub f1, f17, f1
fpsub f2, f18, f2
fpsub f3, f19, f3
fpsub f4, f20, f4
fpsub f5, f21, f5
fpsub f6, f22, f6
fpsub f7, f23, f7
#endif
#ifdef LN
addi AO, AO, 66 * SIZE
LFPDUX A1, AO, INCM2
LFPDUX A2, AO, INCM2
LFPDUX A3, AO, INCM2
LFPDUX A4, AO, INCM2
LFPDUX A5, AO, INCM2
LFPDUX A6, AO, INCM2
LFPDUX A7, AO, INCM2
LFPDUX A8, AO, INCM2
fxsmul f7, A1, f7
fxcpnmsub f3, A1, f7, f3
fxcsnmsub f6, A2, f7, f6
fxcpnmsub f2, A2, f7, f2
fxcsnmsub f5, A3, f7, f5
fxcpnmsub f1, A3, f7, f1
fxcsnmsub f4, A4, f7, f4
fxcpnmsub f0, A4, f7, f0
fxpmul f3, A5, f3
fxcsnmsub f6, A6, f3, f6
fxcpnmsub f2, A6, f3, f2
fxcsnmsub f5, A7, f3, f5
fxcpnmsub f1, A7, f3, f1
fxcsnmsub f4, A8, f3, f4
fxcpnmsub f0, A8, f3, f0
add AO, AO, INCM2
LFPDUX A1, AO, INCM2
LFPDUX A2, AO, INCM2
LFPDUX A3, AO, INCM2
add AO, AO, INCM2
LFPDUX A4, AO, INCM2
LFPDUX A5, AO, INCM2
LFPDUX A6, AO, INCM2
add AO, AO, INCM2
add AO, AO, INCM2
LFPDUX A7, AO, INCM2
LFPDUX A8, AO, INCM2
fxsmul f6, A1, f6
fxcpnmsub f2, A1, f6, f2
fxcsnmsub f5, A2, f6, f5
fxcpnmsub f1, A2, f6, f1
fxcsnmsub f4, A3, f6, f4
fxcpnmsub f0, A3, f6, f0
fxpmul f2, A4, f2
fxcsnmsub f5, A5, f2, f5
fxcpnmsub f1, A5, f2, f1
fxcsnmsub f4, A6, f2, f4
fxcpnmsub f0, A6, f2, f0
fxsmul f5, A7, f5
fxcpnmsub f1, A7, f5, f1
fxcsnmsub f4, A8, f5, f4
fxcpnmsub f0, A8, f5, f0
add AO, AO, INCM2
add AO, AO, INCM2
LFPDUX A1, AO, INCM2
LFPDUX A2, AO, INCM2
subi AO, AO, 6 * SIZE
LFPDUX A3, AO, INCM2
subi AO, AO, 6 * SIZE
LFPDUX A4, AO, INCM2
addi AO, AO, -2 * SIZE
fxpmul f1, A1, f1
fxcsnmsub f4, A2, f1, f4
fxcpnmsub f0, A2, f1, f0
fxsmul f4, A3, f4
fxcpnmsub f0, A3, f4, f0
fxpmul f0, A4, f0
#endif
#ifdef LT
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
LFPDUX A5, AO, INC2
LFPDUX A6, AO, INC2
LFPDUX A7, AO, INC2
LFPDUX A8, AO, INC2
fxpmul f0, A1, f0
fxcsnmsub f4, A1, f0, f4
fxcpnmsub f1, A2, f0, f1
fxcsnmsub f5, A2, f0, f5
fxcpnmsub f2, A3, f0, f2
fxcsnmsub f6, A3, f0, f6
fxcpnmsub f3, A4, f0, f3
fxcsnmsub f7, A4, f0, f7
fxsmul f4, A5, f4
fxcpnmsub f1, A6, f4, f1
fxcsnmsub f5, A6, f4, f5
fxcpnmsub f2, A7, f4, f2
fxcsnmsub f6, A7, f4, f6
fxcpnmsub f3, A8, f4, f3
fxcsnmsub f7, A8, f4, f7
add AO, AO, INC2
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX A3, AO, INC2
add AO, AO, INC2
LFPDUX A4, AO, INC2
LFPDUX A5, AO, INC2
LFPDUX A6, AO, INC2
add AO, AO, INC2
add AO, AO, INC2
LFPDUX A7, AO, INC2
LFPDUX A8, AO, INC2
fxpmul f1, A1, f1
fxcsnmsub f5, A1, f1, f5
fxcpnmsub f2, A2, f1, f2
fxcsnmsub f6, A2, f1, f6
fxcpnmsub f3, A3, f1, f3
fxcsnmsub f7, A3, f1, f7
fxsmul f5, A4, f5
fxcpnmsub f2, A5, f5, f2
fxcsnmsub f6, A5, f5, f6
fxcpnmsub f3, A6, f5, f3
fxcsnmsub f7, A6, f5, f7
fxpmul f2, A7, f2
fxcsnmsub f6, A7, f2, f6
fxcpnmsub f3, A8, f2, f3
fxcsnmsub f7, A8, f2, f7
add AO, AO, INC2
add AO, AO, INC2
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
addi AO, AO, 6 * SIZE
LFPDUX A3, AO, INC2
addi AO, AO, 6 * SIZE
LFPDUX A4, AO, INC2
subi AO, AO, 64 * SIZE
fxsmul f6, A1, f6
fxcpnmsub f3, A2, f6, f3
fxcsnmsub f7, A2, f6, f7
fxpmul f3, A3, f3
fxcsnmsub f7, A3, f3, f7
fxsmul f7, A4, f7
#endif
#ifdef RN
LFPDUX A1, BO, INC2
LFPDUX A2, BO, INC2
subi BO, BO, 4 * SIZE
fxpmul f0, A1, f0
fxpmul f1, A1, f1
fxpmul f2, A1, f2
fxpmul f3, A1, f3
fxcsnmsub f4, A1, f0, f4
fxcsnmsub f5, A1, f1, f5
fxcsnmsub f6, A1, f2, f6
fxcsnmsub f7, A1, f3, f7
fxsmul f4, A2, f4
fxsmul f5, A2, f5
fxsmul f6, A2, f6
fxsmul f7, A2, f7
#endif
#ifdef RT
LFPDUX A2, BO, INC2
LFPDUX A1, BO, INC2
subi BO, BO, 4 * SIZE
fxsmul f4, A1, f4
fxsmul f5, A1, f5
fxsmul f6, A1, f6
fxsmul f7, A1, f7
fxcpnmsub f0, A1, f4, f0
fxcpnmsub f1, A1, f5, f1
fxcpnmsub f2, A1, f6, f2
fxcpnmsub f3, A1, f7, f3
fxpmul f0, A2, f0
fxpmul f1, A2, f1
fxpmul f2, A2, f2
fxpmul f3, A2, f3
#endif
#ifdef LN
subi CO1, CO1, 8 * SIZE
subi CO2, CO2, 8 * SIZE
#endif
#if defined(LN) || defined(LT)
STFPDUX f0, BO, INC2
STFPDUX f4, BO, INC2
STFPDUX f1, BO, INC2
STFPDUX f5, BO, INC2
STFPDUX f2, BO, INC2
STFPDUX f6, BO, INC2
STFPDUX f3, BO, INC2
STFPDUX f7, BO, INC2
subi BO, BO, 16 * SIZE
STFDUX f0, CO1, INC
STFDUX f4, CO1, INC
STFDUX f1, CO1, INC
STFDUX f5, CO1, INC
STFDUX f2, CO1, INC
STFDUX f6, CO1, INC
STFDUX f3, CO1, INC
STFDUX f7, CO1, INC
STFSDUX f0, CO2, INC
STFSDUX f4, CO2, INC
STFSDUX f1, CO2, INC
STFSDUX f5, CO2, INC
STFSDUX f2, CO2, INC
STFSDUX f6, CO2, INC
STFSDUX f3, CO2, INC
STFSDUX f7, CO2, INC
#else
STFPDUX f0, AO, INC2
STFPDUX f1, AO, INC2
STFPDUX f2, AO, INC2
STFPDUX f3, AO, INC2
STFPDUX f4, AO, INC2
STFPDUX f5, AO, INC2
STFPDUX f6, AO, INC2
STFPDUX f7, AO, INC2
subi AO, AO, 16 * SIZE
STFDUX f0, CO1, INC
STFSDUX f0, CO1, INC
STFDUX f1, CO1, INC
STFSDUX f1, CO1, INC
STFDUX f2, CO1, INC
STFSDUX f2, CO1, INC
STFDUX f3, CO1, INC
STFSDUX f3, CO1, INC
STFDUX f4, CO2, INC
STFSDUX f4, CO2, INC
STFDUX f5, CO2, INC
STFSDUX f5, CO2, INC
STFDUX f6, CO2, INC
STFSDUX f6, CO2, INC
STFDUX f7, CO2, INC
STFSDUX f7, CO2, INC
#endif
#ifdef LN
subi CO1, CO1, 8 * SIZE
subi CO2, CO2, 8 * SIZE
#endif
#ifdef RT
slwi r0, K, 3 + BASE_SHIFT
add AORIG, AORIG, r0
#endif
#if defined(LT) || defined(RN)
sub TEMP, K, KK
slwi r0, TEMP, 3 + BASE_SHIFT
slwi TEMP, TEMP, 1 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LT
addi KK, KK, 8
#endif
#ifdef LN
subi KK, KK, 8
#endif
addic. I, I, -1
li r0, FZERO
lfpsx f0, SP, r0
bgt+ .L51
.align 4
.L89:
#ifdef LN
slwi r0, K, 1 + BASE_SHIFT
add B, B, r0
#endif
#if defined(LT) || defined(RN)
addi B, BO, 2 * SIZE
#endif
#ifdef RN
addi KK, KK, 2
#endif
#ifdef RT
subi KK, KK, 2
#endif
.align 4
.L90:
andi. J, N, 1
beq .L999
#ifdef RT
slwi r0, K, 0 + BASE_SHIFT
sub B, B, r0
sub C, C, LDC
#endif
mr CO1, C
#ifdef LN
add KK, M, OFFSET
#endif
#ifdef LT
mr KK, OFFSET
#endif
#if defined(LN) || defined(RT)
addi AORIG, A, -2 * SIZE
#else
addi AO, A, -2 * SIZE
#endif
#ifndef RT
add C, CO1, LDC
#endif
li r0, FZERO
lfpsx f0, SP, r0
andi. I, M, 1
beq .L100
#if defined(LT) || defined(RN)
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
srawi. r0, KK, 3
mtspr CTR, r0
ble .L124
#else
#ifdef LN
slwi r0, K, 0 + BASE_SHIFT
sub AORIG, AORIG, r0
#endif
slwi r0 , KK, 0 + BASE_SHIFT
slwi TEMP, KK, 0 + BASE_SHIFT
add AO, AORIG, r0
add BO, B, TEMP
sub TEMP, K, KK
addi BO, BO, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
srawi. r0, TEMP, 3
mtspr CTR, r0
ble .L124
#endif
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
LFPDUX A2, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A3, AO, INC2
LFPDUX B3, BO, INC2
LFPDUX A4, AO, INC2
LFPDUX B4, BO, INC2
bdz- .L123
.align 4
.L122:
fpmadd f0, A1, B1, f0
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
fpmadd f1, A2, B2, f1
LFPDUX A2, AO, INC2
LFPDUX B2, BO, INC2
fpmadd f2, A3, B3, f2
LFPDUX A3, AO, INC2
LFPDUX B3, BO, INC2
fpmadd f3, A4, B4, f3
LFPDUX A4, AO, INC2
LFPDUX B4, BO, INC2
bdnz+ .L122
.align 4
.L123:
fpmadd f0, A1, B1, f0
fpmadd f1, A2, B2, f1
fpmadd f2, A3, B3, f2
fpmadd f3, A4, B4, f3
.align 4
.L124:
#if defined(LT) || defined(RN)
andi. r0, KK, 7
mtspr CTR, r0
ble+ .L128
#else
andi. r0, TEMP, 7
mtspr CTR, r0
ble+ .L128
#endif
LFDX A1, AO, INC2
LFDX B1, BO, INC2
add AO, AO, INC
add BO, BO, INC
bdz- .L127
.align 4
.L126:
fmadd f0, A1, B1, f0
LFDX A1, AO, INC2
LFDX B1, BO, INC2
add AO, AO, INC
add BO, BO, INC
bdnz+ .L126
.align 4
.L127:
fmadd f0, A1, B1, f0
.align 4
.L128:
fpadd f0, f0, f1
fpadd f2, f2, f3
fpadd f0, f0, f2
fsmtp f1, f0
fadd f0, f0, f1
#if defined(LN) || defined(RT)
#ifdef LN
subi r0, KK, 1
#else
subi r0, KK, 1
#endif
slwi TEMP, r0, 0 + BASE_SHIFT
slwi r0, r0, 0 + BASE_SHIFT
add AO, AORIG, TEMP
add BO, B, r0
addi BO, BO, - 2 * SIZE
#endif
#if defined(LN) || defined(LT)
LFDX f16, BO, INC2
fsub f0, f16, f0
#else
LFDX f16, AO, INC2
fsub f0, f16, f0
#endif
#ifdef LN
LFD A1, (2 + 0) * SIZE(AO)
fmul f0, A1, f0
#endif
#ifdef LT
LFD A1, (2 + 0) * SIZE(AO)
fmul f0, A1, f0
#endif
#ifdef RN
LFDX A1, BO, INC2
fmul f0, A1, f0
#endif
#ifdef RT
LFDX A1, BO, INC2
fmul f0, A1, f0
#endif
#ifdef LN
subi CO1, CO1, 1 * SIZE
#endif
#if defined(LN) || defined(LT)
STFDX f0, BO, INC2
STFDUX f0, CO1, INC
#else
STFDX f0, AO, INC2
STFDUX f0, CO1, INC
#endif
#ifdef LN
subi CO1, CO1, 1 * SIZE
#endif
#ifdef RT
slwi r0, K, 0 + BASE_SHIFT
add AORIG, AORIG, r0
#endif
#if defined(LT) || defined(RN)
sub TEMP, K, KK
slwi r0, TEMP, 0 + BASE_SHIFT
slwi TEMP, TEMP, 0 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LT
addi KK, KK, 1
#endif
#ifdef LN
subi KK, KK, 1
#endif
li r0, FZERO
lfpsx f0, SP, r0
.align 4
.L100:
andi. I, M, 2
beq .L110
#if defined(LT) || defined(RN)
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
srawi. r0, KK, 3
mtspr CTR, r0
ble .L114
#else
#ifdef LN
slwi r0, K, 1 + BASE_SHIFT
sub AORIG, AORIG, r0
#endif
slwi r0 , KK, 1 + BASE_SHIFT
slwi TEMP, KK, 0 + BASE_SHIFT
add AO, AORIG, r0
add BO, B, TEMP
sub TEMP, K, KK
addi BO, BO, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
srawi. r0, TEMP, 3
mtspr CTR, r0
ble .L114
#endif
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX B1, BO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A5, AO, INC2
LFPDUX A6, AO, INC2
LFPDUX B3, BO, INC2
LFPDUX A7, AO, INC2
LFPDUX A8, AO, INC2
LFPDUX B4, BO, INC2
bdz- .L113
.align 4
.L112:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
fxcsmadd f1, B1, A2, f1
LFPDUX A2, AO, INC2
LFPDUX B1, BO, INC2
fxcpmadd f2, B2, A3, f2
LFPDUX A3, AO, INC2
fxcsmadd f3, B2, A4, f3
LFPDUX A4, AO, INC2
LFPDUX B2, BO, INC2
fxcpmadd f0, B3, A5, f0
LFPDUX A5, AO, INC2
fxcsmadd f1, B3, A6, f1
LFPDUX A6, AO, INC2
LFPDUX B3, BO, INC2
fxcpmadd f2, B4, A7, f2
LFPDUX A7, AO, INC2
fxcsmadd f3, B4, A8, f3
LFPDUX A8, AO, INC2
LFPDUX B4, BO, INC2
bdnz+ .L112
.align 4
.L113:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A2, f1
fxcpmadd f2, B2, A3, f2
fxcsmadd f3, B2, A4, f3
fxcpmadd f0, B3, A5, f0
fxcsmadd f1, B3, A6, f1
fxcpmadd f2, B4, A7, f2
fxcsmadd f3, B4, A8, f3
.align 4
.L114:
#if defined(LT) || defined(RN)
andi. r0, KK, 7
mtspr CTR, r0
ble+ .L118
#else
andi. r0, TEMP, 7
mtspr CTR, r0
ble+ .L118
#endif
LFPDUX A1, AO, INC2
LFDX B1, BO, INC2
add BO, BO, INC
bdz- .L117
.align 4
.L116:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
LFDX B1, BO, INC2
add BO, BO, INC
bdnz+ .L116
.align 4
.L117:
fxcpmadd f0, B1, A1, f0
.align 4
.L118:
fpadd f0, f0, f1
fpadd f2, f3, f2
fpadd f0, f0, f2
#if defined(LN) || defined(RT)
#ifdef LN
subi r0, KK, 2
#else
subi r0, KK, 1
#endif
slwi TEMP, r0, 1 + BASE_SHIFT
slwi r0, r0, 0 + BASE_SHIFT
add AO, AORIG, TEMP
add BO, B, r0
addi BO, BO, - 2 * SIZE
#endif
#if defined(LN) || defined(LT)
LFPDX f16, BO, INC2
fpsub f0, f16, f0
#else
LFPDX f16, AO, INC2
fpsub f0, f16, f0
#endif
#ifdef LN
fsmtp f4, f0
LFD A1, (2 + 3) * SIZE(AO)
LFD A2, (2 + 2) * SIZE(AO)
LFD A3, (2 + 0) * SIZE(AO)
fmul f4, A1, f4
fnmsub f0, A2, f4, f0
fmul f0, A3, f0
fsmfp f0, f4
#endif
#ifdef LT
fsmtp f4, f0
LFD A1, (2 + 0) * SIZE(AO)
LFD A2, (2 + 1) * SIZE(AO)
LFD A3, (2 + 3) * SIZE(AO)
fmul f0, A1, f0
fnmsub f4, A2, f0, f4
fmul f4, A3, f4
fsmfp f0, f4
#endif
#ifdef RN
LFPDX A1, BO, INC2
fxpmul f0, A1, f0
#endif
#ifdef RT
LFPDX A1, BO, INC2
fxpmul f0, A1, f0
#endif
#ifdef LN
subi CO1, CO1, 2 * SIZE
#endif
#if defined(LN) || defined(LT)
STFPDX f0, BO, INC2
STFDUX f0, CO1, INC
STFSDUX f0, CO1, INC
#else
STFPDX f0, AO, INC2
STFDUX f0, CO1, INC
STFSDUX f0, CO1, INC
#endif
#ifdef LN
subi CO1, CO1, 2 * SIZE
#endif
#ifdef RT
slwi r0, K, 1 + BASE_SHIFT
add AORIG, AORIG, r0
#endif
#if defined(LT) || defined(RN)
sub TEMP, K, KK
slwi r0, TEMP, 1 + BASE_SHIFT
slwi TEMP, TEMP, 0 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LT
addi KK, KK, 2
#endif
#ifdef LN
subi KK, KK, 2
#endif
li r0, FZERO
lfpsx f0, SP, r0
.align 4
.L110:
andi. I, M, 4
beq .L120
#if defined(LT) || defined(RN)
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
srawi. r0, KK, 3
mtspr CTR, r0
ble .L104
#else
#ifdef LN
slwi r0, K, 2 + BASE_SHIFT
sub AORIG, AORIG, r0
#endif
slwi r0 , KK, 2 + BASE_SHIFT
slwi TEMP, KK, 0 + BASE_SHIFT
add AO, AORIG, r0
add BO, B, TEMP
sub TEMP, K, KK
addi BO, BO, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
srawi. r0, TEMP, 3
mtspr CTR, r0
ble .L104
#endif
LFPDUX B1, BO, INC2
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A5, AO, INC2
LFPDUX A6, AO, INC2
LFPDUX A7, AO, INC2
LFPDUX A8, AO, INC2
LFPDUX B3, BO, INC2
LFPDUX B4, BO, INC2
bdz- .L103
.align 4
.L102:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
LFPDUX A2, AO, INC2
fxcsmadd f2, B1, A3, f2
LFPDUX A3, AO, INC2
fxcsmadd f3, B1, A4, f3
LFPDUX A4, AO, INC2
LFPDUX B1, BO, INC2
fxcpmadd f0, B2, A5, f0
LFPDUX A5, AO, INC2
fxcpmadd f1, B2, A6, f1
LFPDUX A6, AO, INC2
fxcsmadd f2, B2, A7, f2
LFPDUX A7, AO, INC2
fxcsmadd f3, B2, A8, f3
LFPDUX A8, AO, INC2
LFPDUX B2, BO, INC2
fxcpmadd f0, B3, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B3, A2, f1
LFPDUX A2, AO, INC2
fxcsmadd f2, B3, A3, f2
LFPDUX A3, AO, INC2
fxcsmadd f3, B3, A4, f3
LFPDUX A4, AO, INC2
LFPDUX B3, BO, INC2
fxcpmadd f0, B4, A5, f0
LFPDUX A5, AO, INC2
fxcpmadd f1, B4, A6, f1
LFPDUX A6, AO, INC2
fxcsmadd f2, B4, A7, f2
LFPDUX A7, AO, INC2
fxcsmadd f3, B4, A8, f3
LFPDUX A8, AO, INC2
LFPDUX B4, BO, INC2
bdnz+ .L102
.align 4
.L103:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
LFPDUX A2, AO, INC2
fxcsmadd f2, B1, A3, f2
LFPDUX A3, AO, INC2
fxcsmadd f3, B1, A4, f3
LFPDUX A4, AO, INC2
fxcpmadd f0, B2, A5, f0
LFPDUX A5, AO, INC2
fxcpmadd f1, B2, A6, f1
LFPDUX A6, AO, INC2
fxcsmadd f2, B2, A7, f2
LFPDUX A7, AO, INC2
fxcsmadd f3, B2, A8, f3
LFPDUX A8, AO, INC2
fxcpmadd f0, B3, A1, f0
fxcpmadd f1, B3, A2, f1
fxcsmadd f2, B3, A3, f2
fxcsmadd f3, B3, A4, f3
fxcpmadd f0, B4, A5, f0
fxcpmadd f1, B4, A6, f1
fxcsmadd f2, B4, A7, f2
fxcsmadd f3, B4, A8, f3
.align 4
.L104:
#if defined(LT) || defined(RN)
andi. r0, KK, 7
mtspr CTR, r0
ble+ .L108
#else
andi. r0, TEMP, 7
mtspr CTR, r0
ble+ .L108
#endif
LFPDUX A1, AO, INC2
LFDX B1, BO, INC2
LFPDUX A2, AO, INC2
add BO, BO, INC
bdz- .L107
.align 4
.L106:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
LFDX B1, BO, INC2
LFPDUX A2, AO, INC2
add BO, BO, INC
bdnz+ .L106
.align 4
.L107:
fxcpmadd f0, B1, A1, f0
fxcpmadd f1, B1, A2, f1
.align 4
.L108:
fpadd f0, f0, f2
fpadd f1, f1, f3
#if defined(LN) || defined(RT)
#ifdef LN
subi r0, KK, 4
#else
subi r0, KK, 1
#endif
slwi TEMP, r0, 2 + BASE_SHIFT
slwi r0, r0, 0 + BASE_SHIFT
add AO, AORIG, TEMP
add BO, B, r0
addi BO, BO, - 2 * SIZE
#endif
#if defined(LN) || defined(LT)
LFPDUX f16, BO, INC2
LFPDUX f17, BO, INC2
subi BO, BO, 4 * SIZE
fpsub f0, f16, f0
fpsub f1, f17, f1
#else
LFPDUX f16, AO, INC2
LFPDUX f17, AO, INC2
subi AO, AO, 4 * SIZE
fpsub f0, f16, f0
fpsub f1, f17, f1
#endif
#ifdef LN
fsmtp f4, f0
fsmtp f5, f1
LFD A1, (2 + 15) * SIZE(AO)
LFD A2, (2 + 14) * SIZE(AO)
LFD A3, (2 + 13) * SIZE(AO)
LFD A4, (2 + 12) * SIZE(AO)
fmul f5, A1, f5
fnmsub f1, A2, f5, f1
fnmsub f4, A3, f5, f4
fnmsub f0, A4, f5, f0
LFD A1, (2 + 10) * SIZE(AO)
LFD A2, (2 + 9) * SIZE(AO)
LFD A3, (2 + 8) * SIZE(AO)
fmul f1, A1, f1
fnmsub f4, A2, f1, f4
fnmsub f0, A3, f1, f0
LFD A1, (2 + 5) * SIZE(AO)
LFD A2, (2 + 4) * SIZE(AO)
fmul f4, A1, f4
fnmsub f0, A2, f4, f0
LFD A1, (2 + 0) * SIZE(AO)
fmul f0, A1, f0
fsmfp f0, f4
fsmfp f1, f5
#endif
#ifdef LT
fsmtp f4, f0
fsmtp f5, f1
LFD A1, (2 + 0) * SIZE(AO)
LFD A2, (2 + 1) * SIZE(AO)
LFD A3, (2 + 2) * SIZE(AO)
LFD A4, (2 + 3) * SIZE(AO)
fmul f0, A1, f0
fnmsub f4, A2, f0, f4
fnmsub f1, A3, f0, f1
fnmsub f5, A4, f0, f5
LFD A1, (2 + 5) * SIZE(AO)
LFD A2, (2 + 6) * SIZE(AO)
LFD A3, (2 + 7) * SIZE(AO)
fmul f4, A1, f4
fnmsub f1, A2, f4, f1
fnmsub f5, A3, f4, f5
LFD A1, (2 + 10) * SIZE(AO)
LFD A2, (2 + 11) * SIZE(AO)
fmul f1, A1, f1
fnmsub f5, A2, f1, f5
LFD A1, (2 + 15) * SIZE(AO)
fmul f5, A1, f5
fsmfp f0, f4
fsmfp f1, f5
#endif
#ifdef RN
LFPDX A1, BO, INC2
fxpmul f0, A1, f0
fxpmul f1, A1, f1
#endif
#ifdef RT
LFPDX A1, BO, INC2
fxpmul f0, A1, f0
fxpmul f1, A1, f1
#endif
#ifdef LN
subi CO1, CO1, 4 * SIZE
#endif
#if defined(LN) || defined(LT)
STFPDUX f0, BO, INC2
STFPDUX f1, BO, INC2
subi BO, BO, 4 * SIZE
STFDUX f0, CO1, INC
STFSDUX f0, CO1, INC
STFDUX f1, CO1, INC
STFSDUX f1, CO1, INC
#else
STFPDUX f0, AO, INC2
STFPDUX f1, AO, INC2
subi AO, AO, 4 * SIZE
STFDUX f0, CO1, INC
STFSDUX f0, CO1, INC
STFDUX f1, CO1, INC
STFSDUX f1, CO1, INC
#endif
#ifdef LN
subi CO1, CO1, 4 * SIZE
#endif
#ifdef RT
slwi r0, K, 2 + BASE_SHIFT
add AORIG, AORIG, r0
#endif
#if defined(LT) || defined(RN)
sub TEMP, K, KK
slwi r0, TEMP, 2 + BASE_SHIFT
slwi TEMP, TEMP, 0 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LT
addi KK, KK, 4
#endif
#ifdef LN
subi KK, KK, 4
#endif
li r0, FZERO
lfpsx f0, SP, r0
.align 4
.L120:
srawi. I, M, 3
ble .L129
.align 4
.L91:
#if defined(LT) || defined(RN)
fpmr f1, f0
addi BO, B, - 2 * SIZE
fpmr f2, f0
fpmr f3, f0
srawi. r0, KK, 2
mtspr CTR, r0
ble .L94
#else
#ifdef LN
slwi r0, K, 3 + BASE_SHIFT
sub AORIG, AORIG, r0
#endif
slwi r0 , KK, 3 + BASE_SHIFT
slwi TEMP, KK, 0 + BASE_SHIFT
add AO, AORIG, r0
add BO, B, TEMP
sub TEMP, K, KK
fpmr f1, f0
addi BO, BO, - 2 * SIZE
fpmr f2, f0
fpmr f3, f0
srawi. r0, TEMP, 2
mtspr CTR, r0
ble .L94
#endif
LFPDUX B1, BO, INC2
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A5, AO, INC2
LFPDUX A6, AO, INC2
LFPDUX A7, AO, INC2
LFPDUX A8, AO, INC2
bdz- .L93
.align 4
.L92:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
LFPDUX A2, AO, INC2
fxcpmadd f2, B1, A3, f2
LFPDUX A3, AO, INC2
fxcpmadd f3, B1, A4, f3
LFPDUX A4, AO, INC2
fxcsmadd f0, B1, A5, f0
LFPDUX A5, AO, INC2
fxcsmadd f1, B1, A6, f1
LFPDUX A6, AO, INC2
fxcsmadd f2, B1, A7, f2
LFPDUX A7, AO, INC2
fxcsmadd f3, B1, A8, f3
LFPDUX A8, AO, INC2
LFPDUX B1, BO, INC2
fxcpmadd f0, B2, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B2, A2, f1
LFPDUX A2, AO, INC2
fxcpmadd f2, B2, A3, f2
LFPDUX A3, AO, INC2
fxcpmadd f3, B2, A4, f3
LFPDUX A4, AO, INC2
fxcsmadd f0, B2, A5, f0
LFPDUX A5, AO, INC2
fxcsmadd f1, B2, A6, f1
LFPDUX A6, AO, INC2
fxcsmadd f2, B2, A7, f2
LFPDUX A7, AO, INC2
fxcsmadd f3, B2, A8, f3
LFPDUX A8, AO, INC2
LFPDUX B2, BO, INC2
bdnz+ .L92
.align 4
.L93:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
LFPDUX A2, AO, INC2
fxcpmadd f2, B1, A3, f2
LFPDUX A3, AO, INC2
fxcpmadd f3, B1, A4, f3
LFPDUX A4, AO, INC2
fxcsmadd f0, B1, A5, f0
LFPDUX A5, AO, INC2
fxcsmadd f1, B1, A6, f1
LFPDUX A6, AO, INC2
fxcsmadd f2, B1, A7, f2
LFPDUX A7, AO, INC2
fxcsmadd f3, B1, A8, f3
LFPDUX A8, AO, INC2
fxcpmadd f0, B2, A1, f0
fxcpmadd f1, B2, A2, f1
fxcpmadd f2, B2, A3, f2
fxcpmadd f3, B2, A4, f3
fxcsmadd f0, B2, A5, f0
fxcsmadd f1, B2, A6, f1
fxcsmadd f2, B2, A7, f2
fxcsmadd f3, B2, A8, f3
.align 4
.L94:
#if defined(LT) || defined(RN)
andi. r0, KK, 3
mtspr CTR, r0
ble+ .L98
#else
andi. r0, TEMP, 3
mtspr CTR, r0
ble+ .L98
#endif
LFDX B1, BO, INC2
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
add BO, BO, INC
bdz- .L97
.align 4
.L96:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
LFPDUX A2, AO, INC2
fxcpmadd f2, B1, A3, f2
LFPDUX A3, AO, INC2
fxcpmadd f3, B1, A4, f3
LFDX B1, BO, INC2
LFPDUX A4, AO, INC2
add BO, BO, INC
bdnz+ .L96
.align 4
.L97:
fxcpmadd f0, B1, A1, f0
fxcpmadd f1, B1, A2, f1
fxcpmadd f2, B1, A3, f2
fxcpmadd f3, B1, A4, f3
.align 4
.L98:
#if defined(LN) || defined(RT)
#ifdef LN
subi r0, KK, 8
#else
subi r0, KK, 1
#endif
slwi TEMP, r0, 3 + BASE_SHIFT
slwi r0, r0, 0 + BASE_SHIFT
add AO, AORIG, TEMP
add BO, B, r0
addi BO, BO, - 2 * SIZE
#endif
#if defined(LN) || defined(LT)
LFPDUX f16, BO, INC2
LFPDUX f17, BO, INC2
LFPDUX f18, BO, INC2
LFPDUX f19, BO, INC2
subi BO, BO, 8 * SIZE
fpsub f0, f16, f0
fpsub f1, f17, f1
fpsub f2, f18, f2
fpsub f3, f19, f3
#else
LFPDUX f16, AO, INC2
LFPDUX f17, AO, INC2
LFPDUX f18, AO, INC2
LFPDUX f19, AO, INC2
subi AO, AO, 8 * SIZE
fpsub f0, f16, f0
fpsub f1, f17, f1
fpsub f2, f18, f2
fpsub f3, f19, f3
#endif
#ifdef LN
fsmtp f4, f0
fsmtp f5, f1
fsmtp f6, f2
fsmtp f7, f3
LFD A1, (2 + 63) * SIZE(AO)
LFD A2, (2 + 62) * SIZE(AO)
LFD A3, (2 + 61) * SIZE(AO)
LFD A4, (2 + 60) * SIZE(AO)
LFD A5, (2 + 59) * SIZE(AO)
LFD A6, (2 + 58) * SIZE(AO)
LFD A7, (2 + 57) * SIZE(AO)
LFD A8, (2 + 56) * SIZE(AO)
fmul f7, A1, f7
fnmsub f3, A2, f7, f3
fnmsub f6, A3, f7, f6
fnmsub f2, A4, f7, f2
fnmsub f5, A5, f7, f5
fnmsub f1, A6, f7, f1
fnmsub f4, A7, f7, f4
fnmsub f0, A8, f7, f0
LFD A1, (2 + 54) * SIZE(AO)
LFD A2, (2 + 53) * SIZE(AO)
LFD A3, (2 + 52) * SIZE(AO)
LFD A4, (2 + 51) * SIZE(AO)
LFD A5, (2 + 50) * SIZE(AO)
LFD A6, (2 + 49) * SIZE(AO)
LFD A7, (2 + 48) * SIZE(AO)
fmul f3, A1, f3
fnmsub f6, A2, f3, f6
fnmsub f2, A3, f3, f2
fnmsub f5, A4, f3, f5
fnmsub f1, A5, f3, f1
fnmsub f4, A6, f3, f4
fnmsub f0, A7, f3, f0
LFD A1, (2 + 45) * SIZE(AO)
LFD A2, (2 + 44) * SIZE(AO)
LFD A3, (2 + 43) * SIZE(AO)
LFD A4, (2 + 42) * SIZE(AO)
LFD A5, (2 + 41) * SIZE(AO)
LFD A6, (2 + 40) * SIZE(AO)
fmul f6, A1, f6
fnmsub f2, A2, f6, f2
fnmsub f5, A3, f6, f5
fnmsub f1, A4, f6, f1
fnmsub f4, A5, f6, f4
fnmsub f0, A6, f6, f0
LFD A1, (2 + 36) * SIZE(AO)
LFD A2, (2 + 35) * SIZE(AO)
LFD A3, (2 + 34) * SIZE(AO)
LFD A4, (2 + 33) * SIZE(AO)
LFD A5, (2 + 32) * SIZE(AO)
fmul f2, A1, f2
fnmsub f5, A2, f2, f5
fnmsub f1, A3, f2, f1
fnmsub f4, A4, f2, f4
fnmsub f0, A5, f2, f0
LFD A1, (2 + 27) * SIZE(AO)
LFD A2, (2 + 26) * SIZE(AO)
LFD A3, (2 + 25) * SIZE(AO)
LFD A4, (2 + 24) * SIZE(AO)
fmul f5, A1, f5
fnmsub f1, A2, f5, f1
fnmsub f4, A3, f5, f4
fnmsub f0, A4, f5, f0
LFD A1, (2 + 18) * SIZE(AO)
LFD A2, (2 + 17) * SIZE(AO)
LFD A3, (2 + 16) * SIZE(AO)
fmul f1, A1, f1
fnmsub f4, A2, f1, f4
fnmsub f0, A3, f1, f0
LFD A1, (2 + 9) * SIZE(AO)
LFD A2, (2 + 8) * SIZE(AO)
fmul f4, A1, f4
fnmsub f0, A2, f4, f0
LFD A1, (2 + 0) * SIZE(AO)
fmul f0, A1, f0
fsmfp f0, f4
fsmfp f1, f5
fsmfp f2, f6
fsmfp f3, f7
#endif
#ifdef LT
fsmtp f4, f0
fsmtp f5, f1
fsmtp f6, f2
fsmtp f7, f3
LFD A1, (2 + 0) * SIZE(AO)
LFD A2, (2 + 1) * SIZE(AO)
LFD A3, (2 + 2) * SIZE(AO)
LFD A4, (2 + 3) * SIZE(AO)
LFD A5, (2 + 4) * SIZE(AO)
LFD A6, (2 + 5) * SIZE(AO)
LFD A7, (2 + 6) * SIZE(AO)
LFD A8, (2 + 7) * SIZE(AO)
fmul f0, A1, f0
fnmsub f4, A2, f0, f4
fnmsub f1, A3, f0, f1
fnmsub f5, A4, f0, f5
fnmsub f2, A5, f0, f2
fnmsub f6, A6, f0, f6
fnmsub f3, A7, f0, f3
fnmsub f7, A8, f0, f7
LFD A1, (2 + 9) * SIZE(AO)
LFD A2, (2 + 10) * SIZE(AO)
LFD A3, (2 + 11) * SIZE(AO)
LFD A4, (2 + 12) * SIZE(AO)
LFD A5, (2 + 13) * SIZE(AO)
LFD A6, (2 + 14) * SIZE(AO)
LFD A7, (2 + 15) * SIZE(AO)
fmul f4, A1, f4
fnmsub f1, A2, f4, f1
fnmsub f5, A3, f4, f5
fnmsub f2, A4, f4, f2
fnmsub f6, A5, f4, f6
fnmsub f3, A6, f4, f3
fnmsub f7, A7, f4, f7
LFD A1, (2 + 18) * SIZE(AO)
LFD A2, (2 + 19) * SIZE(AO)
LFD A3, (2 + 20) * SIZE(AO)
LFD A4, (2 + 21) * SIZE(AO)
LFD A5, (2 + 22) * SIZE(AO)
LFD A6, (2 + 23) * SIZE(AO)
fmul f1, A1, f1
fnmsub f5, A2, f1, f5
fnmsub f2, A3, f1, f2
fnmsub f6, A4, f1, f6
fnmsub f3, A5, f1, f3
fnmsub f7, A6, f1, f7
LFD A1, (2 + 27) * SIZE(AO)
LFD A2, (2 + 28) * SIZE(AO)
LFD A3, (2 + 29) * SIZE(AO)
LFD A4, (2 + 30) * SIZE(AO)
LFD A5, (2 + 31) * SIZE(AO)
fmul f5, A1, f5
fnmsub f2, A2, f5, f2
fnmsub f6, A3, f5, f6
fnmsub f3, A4, f5, f3
fnmsub f7, A5, f5, f7
LFD A1, (2 + 36) * SIZE(AO)
LFD A2, (2 + 37) * SIZE(AO)
LFD A3, (2 + 38) * SIZE(AO)
LFD A4, (2 + 39) * SIZE(AO)
fmul f2, A1, f2
fnmsub f6, A2, f2, f6
fnmsub f3, A3, f2, f3
fnmsub f7, A4, f2, f7
LFD A1, (2 + 45) * SIZE(AO)
LFD A2, (2 + 46) * SIZE(AO)
LFD A3, (2 + 47) * SIZE(AO)
fmul f6, A1, f6
fnmsub f3, A2, f6, f3
fnmsub f7, A3, f6, f7
LFD A1, (2 + 54) * SIZE(AO)
LFD A2, (2 + 55) * SIZE(AO)
fmul f3, A1, f3
fnmsub f7, A2, f3, f7
LFD A1, (2 + 63) * SIZE(AO)
fmul f7, A1, f7
fsmfp f0, f4
fsmfp f1, f5
fsmfp f2, f6
fsmfp f3, f7
#endif
#ifdef RN
LFPDX A1, BO, INC2
fxpmul f0, A1, f0
fxpmul f1, A1, f1
fxpmul f2, A1, f2
fxpmul f3, A1, f3
#endif
#ifdef RT
LFPDX A1, BO, INC2
fxpmul f0, A1, f0
fxpmul f1, A1, f1
fxpmul f2, A1, f2
fxpmul f3, A1, f3
#endif
#ifdef LN
subi CO1, CO1, 8 * SIZE
#endif
#if defined(LN) || defined(LT)
STFPDUX f0, BO, INC2
STFPDUX f1, BO, INC2
STFPDUX f2, BO, INC2
STFPDUX f3, BO, INC2
subi BO, BO, 8 * SIZE
STFDUX f0, CO1, INC
STFSDUX f0, CO1, INC
STFDUX f1, CO1, INC
STFSDUX f1, CO1, INC
STFDUX f2, CO1, INC
STFSDUX f2, CO1, INC
STFDUX f3, CO1, INC
STFSDUX f3, CO1, INC
#else
STFPDUX f0, AO, INC2
STFPDUX f1, AO, INC2
STFPDUX f2, AO, INC2
STFPDUX f3, AO, INC2
subi AO, AO, 8 * SIZE
STFDUX f0, CO1, INC
STFSDUX f0, CO1, INC
STFDUX f1, CO1, INC
STFSDUX f1, CO1, INC
STFDUX f2, CO1, INC
STFSDUX f2, CO1, INC
STFDUX f3, CO1, INC
STFSDUX f3, CO1, INC
#endif
#ifdef LN
subi CO1, CO1, 8 * SIZE
#endif
#ifdef RT
slwi r0, K, 3 + BASE_SHIFT
add AORIG, AORIG, r0
#endif
#if defined(LT) || defined(RN)
sub TEMP, K, KK
slwi r0, TEMP, 3 + BASE_SHIFT
slwi TEMP, TEMP, 0 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LT
addi KK, KK, 8
#endif
#ifdef LN
subi KK, KK, 8
#endif
addic. I, I, -1
li r0, FZERO
lfpsx f0, SP, r0
bgt+ .L91
.align 4
.L129:
#ifdef LN
slwi r0, K, 0 + BASE_SHIFT
add B, B, r0
#endif
#if defined(LT) || defined(RN)
addi B, BO, 2 * SIZE
#endif
#ifdef RN
addi KK, KK, 1
#endif
#ifdef RT
subi KK, KK, 1
#endif
.align 4
.L999:
addi SP, SP, 12
lwzu r14, 4(SP)
lwzu r15, 4(SP)
lwzu r16, 4(SP)
lwzu r17, 4(SP)
lwzu r18, 4(SP)
lwzu r19, 4(SP)
lwzu r20, 4(SP)
lwzu r21, 4(SP)
lwzu r22, 4(SP)
lwzu r23, 4(SP)
lwzu r24, 4(SP)
lwzu r25, 4(SP)
lwzu r26, 4(SP)
lwzu r27, 4(SP)
lwzu r28, 4(SP)
lwzu r29, 4(SP)
lwzu r30, 4(SP)
lwzu r31, 4(SP)
subi SP, SP, 12
li r0, 16
lfpdux f31, SP, r0
lfpdux f30, SP, r0
lfpdux f29, SP, r0
lfpdux f28, SP, r0
lfpdux f27, SP, r0
lfpdux f26, SP, r0
lfpdux f25, SP, r0
lfpdux f24, SP, r0
lfpdux f23, SP, r0
lfpdux f22, SP, r0
lfpdux f21, SP, r0
lfpdux f20, SP, r0
lfpdux f19, SP, r0
lfpdux f18, SP, r0
lfpdux f17, SP, r0
lfpdux f16, SP, r0
lfpdux f15, SP, r0
lfpdux f14, SP, r0
addi SP, SP, 16
blr
EPILOGUE
#endif