/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#define ALPHA 0
#define FZERO 8
#define M r3
#define N r4
#define K r5
#ifdef linux
#define A r6
#define B r7
#define C r8
#define LDC r9
#define OFFSET r10
#endif
#define TEMP r11
#define KK r14
#define INCM1 r15
#define INCM3 r16
#define INCM5 r17
#define INCM7 r18
#define INC2 r19
#define INC r20
#define INC4 r21
#define I r22
#define J r23
#define AO r24
#define BO r25
#define AO2 r26
#define BO2 r27
#define CO1 r28
#define CO2 r29
#define CO3 r30
#define CO4 r31
#ifndef NEEDPARAM
#define A1 f16
#define A2 f17
#define A3 f18
#define A4 f19
#define A5 f20
#define A6 f21
#define A7 f22
#define A8 f23
#define A9 f24
#define A10 f25
#define B1 f26
#define B2 f27
#define B3 f28
#define B4 f29
#define B5 f30
#define B6 f31
#define AP B6
PROLOGUE
PROFCODE
li r0, -16
stfpdux f14, SP, r0
stfpdux f15, SP, r0
stfpdux f16, SP, r0
stfpdux f17, SP, r0
stfpdux f18, SP, r0
stfpdux f19, SP, r0
stfpdux f20, SP, r0
stfpdux f21, SP, r0
stfpdux f22, SP, r0
stfpdux f23, SP, r0
stfpdux f24, SP, r0
stfpdux f25, SP, r0
stfpdux f26, SP, r0
stfpdux f27, SP, r0
stfpdux f28, SP, r0
stfpdux f29, SP, r0
stfpdux f30, SP, r0
stfpdux f31, SP, r0
stwu r31, -4(SP)
stwu r30, -4(SP)
stwu r29, -4(SP)
stwu r28, -4(SP)
stwu r27, -4(SP)
stwu r26, -4(SP)
stwu r25, -4(SP)
stwu r24, -4(SP)
stwu r23, -4(SP)
stwu r22, -4(SP)
stwu r21, -4(SP)
stwu r20, -4(SP)
stwu r19, -4(SP)
stwu r18, -4(SP)
stwu r17, -4(SP)
stwu r16, -4(SP)
stwu r15, -4(SP)
stwu r14, -4(SP) # dummy
li r0, 0
stwu r0, -4(SP)
stwu r0, -4(SP)
stfdu f1, -8(SP)
slwi LDC, LDC, BASE_SHIFT
cmpwi cr0, M, 0
ble .L999
cmpwi cr0, N, 0
ble .L999
cmpwi cr0, K, 0
ble .L999
li INC, 1 * SIZE
li INC2, 2 * SIZE
li INC4, 4 * SIZE
#if defined(TRMMKERNEL) && !defined(LEFT)
neg KK, OFFSET
#endif
andi. r0, C, 2 * SIZE - 1
bne .L1000
andi. r0, LDC, 2 * SIZE - 1
bne .L1000
/* High performance version */
li INCM3, -2 * SIZE
li INCM5, -5 * SIZE
li INCM7, -6 * SIZE
addi C, C, - 2 * SIZE
srawi. J, N, 2
ble .L50
.align 4
.L10:
mr CO1, C
add CO2, C, LDC
add CO3, CO2, LDC
add CO4, CO3, LDC
add C, CO4, LDC
#if defined(TRMMKERNEL) && defined(LEFT)
mr KK, OFFSET
#endif
addi AO, A, -4 * SIZE
li r0, FZERO
lfpsx f0, SP, r0
srawi. I, M, 3
ble .L20
.align 4
.L11:
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi AO2, AO, 2 * SIZE
fpmr f4, f0
addi BO, B, - 4 * SIZE
fpmr f8, f0
addi BO2, B, - 2 * SIZE
fpmr f12, f0
#else
slwi TEMP, KK, 3 + BASE_SHIFT
slwi r0, KK, 2 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
addi AO2, AO, 2 * SIZE
fpmr f4, f0
addi BO, BO, - 4 * SIZE
fpmr f8, f0
addi BO2, BO, 2 * SIZE
fpmr f12, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 8
#else
addi TEMP, KK, 4
#endif
srawi. TEMP, TEMP, 2
fpmr f1, f0
mtspr CTR, TEMP
ble .L14
#else
addi AO2, AO, 2 * SIZE
fpmr f4, f0
addi BO, B, - 4 * SIZE
fpmr f8, f0
addi BO2, B, - 2 * SIZE
fpmr f12, f0
srawi. r0, K, 2
fpmr f1, f0
mtspr CTR, r0
ble .L14
#endif
LFPDUX A1, AO, INC4
fpmr f5, f0
LFPDUX A3, AO, INC4
fpmr f9, f0
LFPDUX B1, BO, INC4
fpmr f13, f0
LFPDUX A5, AO, INC4
fpmr f2, f0
LFPDUX A6, AO, INC4
fpmr f6, f0
LFPDUX B3, BO, INC4
fpmr f10, f0
LFPDUX A7, AO, INC4
fpmr f14, f0
LFPDUX A8, AO, INC4
fpmr f3, f0
LFPDUX B5, BO, INC4
fpmr f7, f0
LFPDUX A9, AO, INC4
fpmr f11, f0
LFPDUX A2, AO2, INC4
fpmr f15, f0
LFPDUX B2, BO2, INC4
bdz- .L13
.align 4
.L12:
## 1 ##
fxcpmadd f0, B1, A1, f0
nop
fxcsmadd f4, B1, A1, f4
nop
fxcpmadd f8, B2, A1, f8
LFPDUX B4, BO2, INC4
fxcsmadd f12, B2, A1, f12
LFPDUX B6, BO, INC4
fxcpmadd f1, B1, A2, f1
nop
fxcsmadd f5, B1, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B2, A2, f9
LFPDUX A10, AO, INC4
fxcsmadd f13, B2, A2, f13
nop
fxcpmadd f2, B1, A3, f2
nop
fxcsmadd f6, B1, A3, f6
nop
fxcpmadd f10, B2, A3, f10
nop
fxcsmadd f14, B2, A3, f14
nop
fxcpmadd f3, B1, A4, f3
nop
fxcsmadd f7, B1, A4, f7
LFPDUX A2, AO2, INC4
fxcpmadd f11, B2, A4, f11
LFPDUX A1, AO, INC4
fxcsmadd f15, B2, A4, f15
nop
## 2 ##
fxcpmadd f0, B3, A5, f0
nop
fxcsmadd f4, B3, A5, f4
nop
fxcpmadd f8, B4, A5, f8
LFPDUX B2, BO2, INC4
fxcsmadd f12, B4, A5, f12
LFPDUX B1, BO, INC4
fxcpmadd f1, B3, A2, f1
nop
fxcsmadd f5, B3, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B4, A2, f9
LFPDUX A3, AO, INC4
fxcsmadd f13, B4, A2, f13
nop
fxcpmadd f2, B3, A6, f2
nop
fxcsmadd f6, B3, A6, f6
nop
fxcpmadd f10, B4, A6, f10
nop
fxcsmadd f14, B4, A6, f14
nop
fxcpmadd f3, B3, A4, f3
nop
fxcsmadd f7, B3, A4, f7
LFPDUX A2, AO2, INC4
fxcpmadd f11, B4, A4, f11
LFPDUX A5, AO, INC4
fxcsmadd f15, B4, A4, f15
nop
## 3 ##
fxcpmadd f0, B5, A7, f0
nop
fxcsmadd f4, B5, A7, f4
nop
fxcpmadd f8, B2, A7, f8
LFPDUX B4, BO2, INC4
fxcsmadd f12, B2, A7, f12
LFPDUX B3, BO, INC4
fxcpmadd f1, B5, A2, f1
nop
fxcsmadd f5, B5, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B2, A2, f9
LFPDUX A6, AO, INC4
fxcsmadd f13, B2, A2, f13
nop
fxcpmadd f2, B5, A8, f2
nop
fxcsmadd f6, B5, A8, f6
nop
fxcpmadd f10, B2, A8, f10
nop
fxcsmadd f14, B2, A8, f14
nop
fxcpmadd f3, B5, A4, f3
nop
fxcsmadd f7, B5, A4, f7
LFPDUX A2, AO2, INC4
fxcpmadd f11, B2, A4, f11
LFPDUX A7, AO, INC4
fxcsmadd f15, B2, A4, f15
nop
## 4 ##
fxcpmadd f0, B6, A9, f0
nop
fxcsmadd f4, B6, A9, f4
nop
fxcpmadd f8, B4, A9, f8
LFPDUX B2, BO2, INC4
fxcsmadd f12, B4, A9, f12
LFPDUX B5, BO, INC4
fxcpmadd f1, B6, A2, f1
nop
fxcsmadd f5, B6, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B4, A2, f9
LFPDUX A8, AO, INC4
fxcsmadd f13, B4, A2, f13
nop
fxcpmadd f2, B6, A10, f2
nop
fxcsmadd f6, B6, A10, f6
nop
fxcpmadd f10, B4, A10, f10
nop
fxcsmadd f14, B4, A10, f14
nop
fxcpmadd f3, B6, A4, f3
LFPDUX A2, AO2, INC4
fxcsmadd f7, B6, A4, f7
LFPDUX A9, AO, INC4
fxcpmadd f11, B4, A4, f11
nop
fxcsmadd f15, B4, A4, f15
bdnz+ .L12
.align 4
.L13:
## 1 ##
fxcpmadd f0, B1, A1, f0
nop
fxcsmadd f4, B1, A1, f4
nop
fxcpmadd f8, B2, A1, f8
LFPDUX B4, BO2, INC4
fxcsmadd f12, B2, A1, f12
LFPDUX B6, BO, INC4
fxcpmadd f1, B1, A2, f1
nop
fxcsmadd f5, B1, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B2, A2, f9
LFPDUX A10, AO, INC4
fxcsmadd f13, B2, A2, f13
nop
fxcpmadd f2, B1, A3, f2
nop
fxcsmadd f6, B1, A3, f6
nop
fxcpmadd f10, B2, A3, f10
nop
fxcsmadd f14, B2, A3, f14
nop
fxcpmadd f3, B1, A4, f3
nop
fxcsmadd f7, B1, A4, f7
LFPDUX A2, AO2, INC4
fxcpmadd f11, B2, A4, f11
#ifndef TRMMKERNEL
LFPDUX A1, CO1, INC2
#else
nop
#endif
fxcsmadd f15, B2, A4, f15
nop
## 2 ##
fxcpmadd f0, B3, A5, f0
nop
fxcsmadd f4, B3, A5, f4
nop
fxcpmadd f8, B4, A5, f8
LFPDUX B2, BO2, INC4
fxcsmadd f12, B4, A5, f12
#ifndef TRMMKERNEL
LFPDUX B1, CO1, INC4
#else
nop
#endif
fxcpmadd f1, B3, A2, f1
nop
fxcsmadd f5, B3, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B4, A2, f9
#ifndef TRMMKERNEL
LFPDUX A3, CO2, INC2
#else
nop
#endif
fxcsmadd f13, B4, A2, f13
nop
fxcpmadd f2, B3, A6, f2
nop
fxcsmadd f6, B3, A6, f6
nop
fxcpmadd f10, B4, A6, f10
nop
fxcsmadd f14, B4, A6, f14
nop
fxcpmadd f3, B3, A4, f3
nop
fxcsmadd f7, B3, A4, f7
LFPDUX A2, AO2, INC4
fxcpmadd f11, B4, A4, f11
#ifndef TRMMKERNEL
LFPDUX A5, CO2, INC4
#else
nop
#endif
fxcsmadd f15, B4, A4, f15
nop
## 3 ##
fxcpmadd f0, B5, A7, f0
nop
fxcsmadd f4, B5, A7, f4
nop
fxcpmadd f8, B2, A7, f8
LFPDUX B4, BO2, INC4
fxcsmadd f12, B2, A7, f12
#ifndef TRMMKERNEL
LFPDUX B3, CO3, INC2
#else
nop
#endif
fxcpmadd f1, B5, A2, f1
nop
fxcsmadd f5, B5, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B2, A2, f9
#ifndef TRMMKERNEL
LFPDUX A6, CO3, INC4
#else
nop
#endif
fxcsmadd f13, B2, A2, f13
nop
fxcpmadd f2, B5, A8, f2
nop
fxcsmadd f6, B5, A8, f6
nop
fxcpmadd f10, B2, A8, f10
nop
fxcsmadd f14, B2, A8, f14
nop
fxcpmadd f3, B5, A4, f3
nop
fxcsmadd f7, B5, A4, f7
LFPDUX A2, AO2, INC4
fxcpmadd f11, B2, A4, f11
#ifndef TRMMKERNEL
LFPDUX A7, CO4, INC2
#else
nop
#endif
fxcsmadd f15, B2, A4, f15
nop
## 4 ##
fxcpmadd f0, B6, A9, f0
nop
fxcsmadd f4, B6, A9, f4
nop
fxcpmadd f8, B4, A9, f8
nop
fxcsmadd f12, B4, A9, f12
#ifndef TRMMKERNEL
LFPDUX B2, CO4, INC4
#else
nop
#endif
fxcpmadd f1, B6, A2, f1
nop
fxcsmadd f5, B6, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B4, A2, f9
#ifndef TRMMKERNEL
LFPDUX B5, CO1, INCM3
#else
nop
#endif
fxcsmadd f13, B4, A2, f13
nop
fxcpmadd f2, B6, A10, f2
nop
fxcsmadd f6, B6, A10, f6
nop
fxcpmadd f10, B4, A10, f10
nop
fxcsmadd f14, B4, A10, f14
#ifndef TRMMKERNEL
LFPDUX A8, CO1, INC4
#else
nop
#endif
fxcpmadd f3, B6, A4, f3
nop
fxcsmadd f7, B6, A4, f7
nop
fxcpmadd f11, B4, A4, f11
nop
fxcsmadd f15, B4, A4, f15
#ifndef TRMMKERNEL
LFPDUX A9, CO2, INCM3
#else
nop
#endif
.align 4
.L14:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 8
#else
addi TEMP, KK, 4
#endif
andi. r0, TEMP, 3
mtspr CTR, r0
ble+ .L18
cmpwi cr0, TEMP, 3
bgt+ .L15
#else
andi. r0, K, 3
mtspr CTR, r0
ble+ .L18
cmpwi cr0, K, 3
bgt+ .L15
#endif
#ifndef TRMMKERNEL
LFPDUX A1, CO1, INC2
fpmr f5, f0
LFPDUX B1, CO1, INC4
fpmr f9, f0
LFPDUX A3, CO2, INC2
fpmr f13, f0
LFPDUX A5, CO2, INC4
fpmr f2, f0
LFPDUX B3, CO3, INC2
fpmr f6, f0
LFPDUX A6, CO3, INC4
fpmr f10, f0
LFPDUX A7, CO4, INC2
fpmr f14, f0
LFPDUX B2, CO4, INC4
fpmr f3, f0
LFPDUX B5, CO1, INCM3
fpmr f7, f0
LFPDUX A8, CO1, INC4
fpmr f11, f0
LFPDUX A9, CO2, INCM3
fpmr f15, f0
#else
fpmr f5, f0
fpmr f9, f0
fpmr f13, f0
fpmr f2, f0
fpmr f6, f0
fpmr f10, f0
fpmr f14, f0
fpmr f3, f0
fpmr f7, f0
fpmr f11, f0
fpmr f15, f0
nop
#endif
.align 4
.L15:
LFPDUX A2, AO, INC4
LFPDUX A4, AO2, INC4
LFPDUX A10, BO, INC4
LFPDUX B4, BO2, INC4
bdz- .L17
.align 4
.L16:
fxcpmadd f0, A10, A2, f0
fxcsmadd f4, A10, A2, f4
fxcpmadd f8, B4, A2, f8
fxcsmadd f12, B4, A2, f12
LFPDUX A2, AO, INC4
fxcpmadd f1, A10, A4, f1
fxcsmadd f5, A10, A4, f5
fxcpmadd f9, B4, A4, f9
fxcsmadd f13, B4, A4, f13
LFPDUX A4, AO2, INC4
fxcpmadd f2, A10, A2, f2
fxcsmadd f6, A10, A2, f6
fxcpmadd f10, B4, A2, f10
fxcsmadd f14, B4, A2, f14
LFPDUX A2, AO, INC4
fxcpmadd f3, A10, A4, f3
fxcsmadd f7, A10, A4, f7
LFPDUX A10, BO, INC4
fxcpmadd f11, B4, A4, f11
fxcsmadd f15, B4, A4, f15
LFPDUX A4, AO2, INC4
LFPDUX B4, BO2, INC4
bdnz+ .L16
.align 4
.L17:
fxcpmadd f0, A10, A2, f0
fxcsmadd f4, A10, A2, f4
fxcpmadd f8, B4, A2, f8
fxcsmadd f12, B4, A2, f12
LFPDUX A2, AO, INC4
fxcpmadd f1, A10, A4, f1
fxcsmadd f5, A10, A4, f5
fxcpmadd f9, B4, A4, f9
fxcsmadd f13, B4, A4, f13
LFPDUX A4, AO2, INC4
fxcpmadd f2, A10, A2, f2
fxcsmadd f6, A10, A2, f6
fxcpmadd f10, B4, A2, f10
fxcsmadd f14, B4, A2, f14
fxcpmadd f3, A10, A4, f3
fxcsmadd f7, A10, A4, f7
fxcpmadd f11, B4, A4, f11
fxcsmadd f15, B4, A4, f15
.align 4
.L18:
#ifndef TRMMKERNEL
fxcpmadd f0, AP, f0, A1
LFPDUX B4, CO2, INC4
fxcpmadd f1, AP, f1, B5
LFPDUX A2, CO3, INCM3
fxcpmadd f2, AP, f2, B1
LFPDUX A4, CO3, INC4
fxcpmadd f3, AP, f3, A8
LFPDUX A10, CO4, INCM3
fxcpmadd f4, AP, f4, A3
LFPDUX A1, CO4, INC4
fxcpmadd f5, AP, f5, A9
STFPDUX f0, CO1, INCM7
fxcpmadd f6, AP, f6, A5
STFPDUX f1, CO1, INC2
fxcpmadd f7, AP, f7, B4
STFPDUX f2, CO1, INC2
fxcpmadd f8, AP, f8, B3
STFPDUX f3, CO1, INC2
fxcpmadd f9, AP, f9, A2
STFPDUX f4, CO2, INCM7
fxcpmadd f10, AP, f10, A6
STFPDUX f5, CO2, INC2
fxcpmadd f11, AP, f11, A4
STFPDUX f6, CO2, INC2
fxcpmadd f12, AP, f12, A7
STFPDUX f7, CO2, INC2
fxcpmadd f13, AP, f13, A10
STFPDUX f8, CO3, INCM7
fxcpmadd f14, AP, f14, B2
STFPDUX f9, CO3, INC2
fxcpmadd f15, AP, f15, A1
STFPDUX f10, CO3, INC2
STFPDUX f11, CO3, INC2
STFPDUX f12, CO4, INCM7
STFPDUX f13, CO4, INC2
STFPDUX f14, CO4, INC2
STFPDUX f15, CO4, INC2
#else
fpmul f0, AP, f0
fpmul f1, AP, f1
fpmul f2, AP, f2
fpmul f3, AP, f3
fpmul f4, AP, f4
fpmul f5, AP, f5
STFPDUX f0, CO1, INC2
fpmul f6, AP, f6
STFPDUX f1, CO1, INC2
fpmul f7, AP, f7
STFPDUX f2, CO1, INC2
fpmul f8, AP, f8
STFPDUX f3, CO1, INC2
fpmul f9, AP, f9
STFPDUX f4, CO2, INC2
fpmul f10, AP, f10
STFPDUX f5, CO2, INC2
fpmul f11, AP, f11
STFPDUX f6, CO2, INC2
fpmul f12, AP, f12
STFPDUX f7, CO2, INC2
fpmul f13, AP, f13
STFPDUX f8, CO3, INC2
fpmul f14, AP, f14
STFPDUX f9, CO3, INC2
fpmul f15, AP, f15
STFPDUX f10, CO3, INC2
STFPDUX f11, CO3, INC2
STFPDUX f12, CO4, INC2
STFPDUX f13, CO4, INC2
STFPDUX f14, CO4, INC2
STFPDUX f15, CO4, INC2
#endif
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -8
#else
addi TEMP, TEMP, -4
#endif
slwi r0, TEMP, 3 + BASE_SHIFT
slwi TEMP, TEMP, 2 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 8
#endif
#endif
addic. I, I, -1
li r0, FZERO
lfpsx f0, SP, r0
bgt+ .L11
.align 4
.L20:
andi. I, M, 4
beq .L30
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi AO2, AO, 2 * SIZE
fpmr f4, f0
addi BO, B, - 4 * SIZE
fpmr f8, f0
addi BO2, B, - 2 * SIZE
fpmr f12, f0
#else
slwi TEMP, KK, 2 + BASE_SHIFT
slwi r0, KK, 2 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
addi AO2, AO, 2 * SIZE
fpmr f4, f0
addi BO, BO, - 4 * SIZE
fpmr f8, f0
addi BO2, BO, 2 * SIZE
fpmr f12, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 4
#else
addi TEMP, KK, 4
#endif
srawi. TEMP, TEMP, 2
fpmr f1, f0
fpmr f5, f0
fpmr f9, f0
mtspr CTR, TEMP
fpmr f13, f0
ble .L24
#else
addi AO2, AO, 2 * SIZE
fpmr f4, f0
addi BO, B, - 4 * SIZE
fpmr f8, f0
addi BO2, B, - 2 * SIZE
fpmr f12, f0
srawi. r0, K, 2
fpmr f1, f0
fpmr f5, f0
fpmr f9, f0
mtspr CTR, r0
fpmr f13, f0
ble .L24
#endif
LFPDUX A1, AO, INC4
LFPDUX B1, BO, INC4
LFPDUX A2, AO2, INC4
LFPDUX B2, BO2, INC4
LFPDUX A3, AO, INC4
LFPDUX B3, BO, INC4
LFPDUX A4, AO2, INC4
LFPDUX B4, BO2, INC4
LFPDUX A5, AO, INC4
LFPDUX B5, BO, INC4
LFPDUX A6, AO2, INC4
LFPDUX B6, BO2, INC4
LFPDUX A7, AO, INC4
LFPDUX A9, BO, INC4
LFPDUX A10, BO2, INC4
bdz- .L23
.align 4
.L22:
fxcpmadd f0, B1, A1, f0
nop
fxcsmadd f4, B1, A1, f4
LFPDUX A8, AO2, INC4
fxcpmadd f8, B2, A1, f8
nop
fxcsmadd f12, B2, A1, f12
LFPDUX A1, AO, INC4
fxcpmadd f1, B1, A2, f1
nop
fxcsmadd f5, B1, A2, f5
LFPDUX B1, BO, INC4
fxcpmadd f9, B2, A2, f9
nop
fxcsmadd f13, B2, A2, f13
LFPDUX B2, BO2, INC4
fxcpmadd f0, B3, A3, f0
nop
fxcsmadd f4, B3, A3, f4
LFPDUX A2, AO2, INC4
fxcpmadd f8, B4, A3, f8
nop
fxcsmadd f12, B4, A3, f12
LFPDUX A3, AO, INC4
fxcpmadd f1, B3, A4, f1
nop
fxcsmadd f5, B3, A4, f5
LFPDUX B3, BO, INC4
fxcpmadd f9, B4, A4, f9
nop
fxcsmadd f13, B4, A4, f13
LFPDUX B4, BO2, INC4
fxcpmadd f0, B5, A5, f0
nop
fxcsmadd f4, B5, A5, f4
LFPDUX A4, AO2, INC4
fxcpmadd f8, B6, A5, f8
nop
fxcsmadd f12, B6, A5, f12
LFPDUX A5, AO, INC4
fxcpmadd f1, B5, A6, f1
nop
fxcsmadd f5, B5, A6, f5
LFPDUX B5, BO, INC4
fxcpmadd f9, B6, A6, f9
nop
fxcsmadd f13, B6, A6, f13
LFPDUX B6, BO2, INC4
fxcpmadd f0, A9, A7, f0
nop
fxcsmadd f4, A9, A7, f4
LFPDUX A6, AO2, INC4
fxcpmadd f8, A10, A7, f8
nop
fxcsmadd f12, A10, A7, f12
LFPDUX A7, AO, INC4
fxcpmadd f1, A9, A8, f1
nop
fxcsmadd f5, A9, A8, f5
LFPDUX A9, BO, INC4
fxcpmadd f9, A10, A8, f9
nop
fxcsmadd f13, A10, A8, f13
LFPDUX A10, BO2, INC4
bdnz+ .L22
.align 4
.L23:
fxcpmadd f0, B1, A1, f0
fxcsmadd f4, B1, A1, f4
LFPDUX A8, AO2, INC4
fxcpmadd f8, B2, A1, f8
fxcsmadd f12, B2, A1, f12
fxcpmadd f1, B1, A2, f1
fxcsmadd f5, B1, A2, f5
fxcpmadd f9, B2, A2, f9
fxcsmadd f13, B2, A2, f13
fxcpmadd f0, B3, A3, f0
fxcsmadd f4, B3, A3, f4
fxcpmadd f8, B4, A3, f8
fxcsmadd f12, B4, A3, f12
fxcpmadd f1, B3, A4, f1
fxcsmadd f5, B3, A4, f5
fxcpmadd f9, B4, A4, f9
fxcsmadd f13, B4, A4, f13
fxcpmadd f0, B5, A5, f0
fxcsmadd f4, B5, A5, f4
fxcpmadd f8, B6, A5, f8
fxcsmadd f12, B6, A5, f12
fxcpmadd f1, B5, A6, f1
fxcsmadd f5, B5, A6, f5
fxcpmadd f9, B6, A6, f9
fxcsmadd f13, B6, A6, f13
fxcpmadd f0, A9, A7, f0
fxcsmadd f4, A9, A7, f4
fxcpmadd f8, A10, A7, f8
fxcsmadd f12, A10, A7, f12
fxcpmadd f1, A9, A8, f1
fxcsmadd f5, A9, A8, f5
fxcpmadd f9, A10, A8, f9
fxcsmadd f13, A10, A8, f13
.align 4
.L24:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 4
#else
addi TEMP, KK, 4
#endif
andi. TEMP, TEMP, 3
mtspr CTR, TEMP
#else
andi. r0, K, 3
mtspr CTR, r0
#endif
ble+ .L28
LFPDUX A1, AO, INC4
LFPDUX A2, AO2, INC4
LFPDUX B1, BO, INC4
LFPDUX B2, BO2, INC4
bdz- .L27
.align 4
.L26:
fxcpmadd f0, B1, A1, f0
fxcsmadd f4, B1, A1, f4
fxcpmadd f8, B2, A1, f8
fxcsmadd f12, B2, A1, f12
LFPDUX A1, AO, INC4
fxcpmadd f1, B1, A2, f1
fxcsmadd f5, B1, A2, f5
LFPDUX B1, BO, INC4
fxcpmadd f9, B2, A2, f9
fxcsmadd f13, B2, A2, f13
LFPDUX A2, AO2, INC4
LFPDUX B2, BO2, INC4
bdnz+ .L26
.align 4
.L27:
fxcpmadd f0, B1, A1, f0
fxcsmadd f4, B1, A1, f4
fxcpmadd f8, B2, A1, f8
fxcsmadd f12, B2, A1, f12
fxcpmadd f1, B1, A2, f1
fxcsmadd f5, B1, A2, f5
fxcpmadd f9, B2, A2, f9
fxcsmadd f13, B2, A2, f13
.align 4
.L28:
#ifndef TRMMKERNEL
LFPDUX A1, CO1, INC2
LFPDUX B1, CO1, INC2
LFPDUX B3, CO2, INC2
LFPDUX A6, CO2, INC2
LFPDUX B5, CO3, INC2
LFPDUX A8, CO3, INC2
LFPDUX A2, CO4, INC2
LFPDUX A4, CO4, INC2
fxcpmadd f0, AP, f0, A1
fxcpmadd f1, AP, f1, B1
fxcpmadd f4, AP, f4, B3
fxcpmadd f5, AP, f5, A6
fxcpmadd f8, AP, f8, B5
fxcpmadd f9, AP, f9, A8
STFPDUX f0, CO1, INCM3
fxcpmadd f12, AP, f12, A2
STFPDUX f1, CO1, INC2
fxcpmadd f13, AP, f13, A4
STFPDUX f4, CO2, INCM3
STFPDUX f5, CO2, INC2
STFPDUX f8, CO3, INCM3
STFPDUX f9, CO3, INC2
STFPDUX f12, CO4, INCM3
STFPDUX f13, CO4, INC2
#else
fpmul f0, AP, f0
fpmul f1, AP, f1
fpmul f4, AP, f4
fpmul f5, AP, f5
fpmul f8, AP, f8
fpmul f9, AP, f9
STFPDUX f0, CO1, INC2
fpmul f12, AP, f12
STFPDUX f1, CO1, INC2
fpmul f13, AP, f13
STFPDUX f4, CO2, INC2
STFPDUX f5, CO2, INC2
STFPDUX f8, CO3, INC2
STFPDUX f9, CO3, INC2
STFPDUX f12, CO4, INC2
STFPDUX f13, CO4, INC2
#endif
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -4
#else
addi TEMP, TEMP, -4
#endif
slwi r0, TEMP, 2 + BASE_SHIFT
slwi TEMP, TEMP, 2 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 4
#endif
#endif
li r0, FZERO
lfpsx f0, SP, r0
.align 4
.L30:
andi. I, M, 2
beq .L40
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi AO2, AO, 2 * SIZE
fpmr f1, f0
addi BO, B, - 4 * SIZE
fpmr f2, f0
addi BO2, B, - 2 * SIZE
fpmr f3, f0
#else
slwi TEMP, KK, 1 + BASE_SHIFT
slwi r0, KK, 2 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
addi AO2, AO, 2 * SIZE
fpmr f1, f0
addi BO, BO, - 4 * SIZE
fpmr f2, f0
addi BO2, BO, 2 * SIZE
fpmr f3, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 2
#else
addi TEMP, KK, 4
#endif
srawi. r0, TEMP, 2
mtspr CTR, r0
ble .L34
#else
addi AO2, AO, 2 * SIZE
fpmr f1, f0
addi BO, B, - 4 * SIZE
fpmr f2, f0
addi BO2, B, - 2 * SIZE
fpmr f3, f0
srawi. r0, K, 2
mtspr CTR, r0
ble .L34
#endif
LFPDUX A1, AO, INC4
LFPDUX B1, BO, INC4
LFPDUX B2, BO2, INC4
LFPDUX A2, AO2, INC4
LFPDUX B3, BO, INC4
LFPDUX B4, BO2, INC4
LFPDUX A3, AO, INC4
LFPDUX A5, BO, INC4
LFPDUX A6, BO2, INC4
LFPDUX A4, AO2, INC4
LFPDUX A7, BO, INC4
LFPDUX A8, BO2, INC4
bdz- .L33
.align 4
.L32:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A1, f1
LFPDUX B1, BO, INC4
fxcpmadd f2, B2, A1, f2
fxcsmadd f3, B2, A1, f3
LFPDUX B2, BO2, INC4
LFPDUX A1, AO, INC4
fxcpmadd f0, B3, A2, f0
fxcsmadd f1, B3, A2, f1
LFPDUX B3, BO, INC4
fxcpmadd f2, B4, A2, f2
fxcsmadd f3, B4, A2, f3
LFPDUX B4, BO2, INC4
LFPDUX A2, AO2, INC4
fxcpmadd f0, A5, A3, f0
fxcsmadd f1, A5, A3, f1
LFPDUX A5, BO, INC4
fxcpmadd f2, A6, A3, f2
fxcsmadd f3, A6, A3, f3
LFPDUX A6, BO2, INC4
LFPDUX A3, AO, INC4
fxcpmadd f0, A7, A4, f0
fxcsmadd f1, A7, A4, f1
LFPDUX A7, BO, INC4
fxcpmadd f2, A8, A4, f2
fxcsmadd f3, A8, A4, f3
LFPDUX A8, BO2, INC4
LFPDUX A4, AO2, INC4
bdnz+ .L32
.align 4
.L33:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A1, f1
fxcpmadd f2, B2, A1, f2
fxcsmadd f3, B2, A1, f3
fxcpmadd f0, B3, A2, f0
fxcsmadd f1, B3, A2, f1
fxcpmadd f2, B4, A2, f2
fxcsmadd f3, B4, A2, f3
fxcpmadd f0, A5, A3, f0
fxcsmadd f1, A5, A3, f1
fxcpmadd f2, A6, A3, f2
fxcsmadd f3, A6, A3, f3
fxcpmadd f0, A7, A4, f0
fxcsmadd f1, A7, A4, f1
fxcpmadd f2, A8, A4, f2
fxcsmadd f3, A8, A4, f3
.align 4
.L34:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 2
#else
addi TEMP, KK, 4
#endif
andi. TEMP, TEMP, 3
mtspr CTR, TEMP
#else
andi. r0, K, 3
mtspr CTR, r0
#endif
ble+ .L38
LFPDX A1, AO, INC4
LFPDUX B1, BO, INC4
LFPDUX B2, BO2, INC4
add AO, AO, INC2
bdz- .L37
.align 4
.L36:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A1, f1
LFPDUX B1, BO, INC4
fxcpmadd f2, B2, A1, f2
fxcsmadd f3, B2, A1, f3
LFPDX A1, AO, INC4
LFPDUX B2, BO2, INC4
add AO, AO, INC2
bdnz+ .L36
.align 4
.L37:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A1, f1
fxcpmadd f2, B2, A1, f2
fxcsmadd f3, B2, A1, f3
.align 4
.L38:
#ifndef TRMMKERNEL
LFPDX A1, CO1, INC2
LFPDX A2, CO2, INC2
LFPDX A3, CO3, INC2
LFPDX A4, CO4, INC2
fxcpmadd f0, AP, f0, A1
fxcpmadd f1, AP, f1, A2
fxcpmadd f2, AP, f2, A3
fxcpmadd f3, AP, f3, A4
#else
fpmul f0, AP, f0
fpmul f1, AP, f1
fpmul f2, AP, f2
fpmul f3, AP, f3
#endif
STFPDUX f0, CO1, INC2
STFPDUX f1, CO2, INC2
STFPDUX f2, CO3, INC2
STFPDUX f3, CO4, INC2
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -2
#else
addi TEMP, TEMP, -4
#endif
slwi r0, TEMP, 1 + BASE_SHIFT
slwi TEMP, TEMP, 2 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 2
#endif
#endif
li r0, FZERO
lfpsx f0, SP, r0
.align 4
.L40:
andi. I, M, 1
beq .L49
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi AO2, AO, 2 * SIZE
fpmr f1, f0
addi BO, B, - 4 * SIZE
fpmr f2, f0
addi BO2, B, - 2 * SIZE
fpmr f3, f0
#else
slwi TEMP, KK, 0 + BASE_SHIFT
slwi r0, KK, 2 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
addi AO2, AO, 2 * SIZE
fpmr f1, f0
addi BO, BO, - 4 * SIZE
fpmr f2, f0
addi BO2, BO, 2 * SIZE
fpmr f3, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 1
#else
addi TEMP, KK, 4
#endif
srawi. r0, TEMP, 3
mtspr CTR, r0
ble .L44
#else
addi AO2, AO, 2 * SIZE
fpmr f1, f0
addi BO, B, - 4 * SIZE
fpmr f2, f0
addi BO2, B, - 2 * SIZE
fpmr f3, f0
srawi. r0, K, 3
mtspr CTR, r0
ble .L44
#endif
LFPDUX A1, AO, INC4
LFPDUX B1, BO, INC4
LFPDUX B2, BO2, INC4
LFPDUX A2, AO2, INC4
LFPDUX B3, BO, INC4
LFPDUX B4, BO2, INC4
LFPDUX A3, AO, INC4
LFPDUX A5, BO, INC4
LFPDUX A6, BO2, INC4
LFPDUX A4, AO2, INC4
LFPDUX A7, BO, INC4
LFPDUX A8, BO2, INC4
bdz- .L43
.align 4
.L42:
fxcpmadd f0, A1, B1, f0
LFPDUX B1, BO, INC4
fxcpmadd f1, A1, B2, f1
LFPDUX B2, BO2, INC4
fxcsmadd f2, A1, B3, f2
LFPDUX B3, BO, INC4
fxcsmadd f3, A1, B4, f3
LFPDUX B4, BO2, INC4
LFPDUX A1, AO, INC4
fxcpmadd f0, A2, A5, f0
LFPDUX A5, BO, INC4
fxcpmadd f1, A2, A6, f1
LFPDUX A6, BO2, INC4
fxcsmadd f2, A2, A7, f2
LFPDUX A7, BO, INC4
fxcsmadd f3, A2, A8, f3
LFPDUX A8, BO2, INC4
LFPDUX A2, AO2, INC4
fxcpmadd f0, A3, B1, f0
LFPDUX B1, BO, INC4
fxcpmadd f1, A3, B2, f1
LFPDUX B2, BO2, INC4
fxcsmadd f2, A3, B3, f2
LFPDUX B3, BO, INC4
fxcsmadd f3, A3, B4, f3
LFPDUX B4, BO2, INC4
LFPDUX A3, AO, INC4
fxcpmadd f0, A4, A5, f0
LFPDUX A5, BO, INC4
fxcpmadd f1, A4, A6, f1
LFPDUX A6, BO2, INC4
fxcsmadd f2, A4, A7, f2
LFPDUX A7, BO, INC4
fxcsmadd f3, A4, A8, f3
LFPDUX A8, BO2, INC4
LFPDUX A4, AO2, INC4
bdnz+ .L42
.align 4
.L43:
fxcpmadd f0, A1, B1, f0
LFPDUX B1, BO, INC4
fxcpmadd f1, A1, B2, f1
LFPDUX B2, BO2, INC4
fxcsmadd f2, A1, B3, f2
LFPDUX B3, BO, INC4
fxcsmadd f3, A1, B4, f3
LFPDUX B4, BO2, INC4
fxcpmadd f0, A2, A5, f0
LFPDUX A5, BO, INC4
fxcpmadd f1, A2, A6, f1
LFPDUX A6, BO2, INC4
fxcsmadd f2, A2, A7, f2
LFPDUX A7, BO, INC4
fxcsmadd f3, A2, A8, f3
LFPDUX A8, BO2, INC4
fxcpmadd f0, A3, B1, f0
fxcpmadd f1, A3, B2, f1
fxcsmadd f2, A3, B3, f2
fxcsmadd f3, A3, B4, f3
fxcpmadd f0, A4, A5, f0
fxcpmadd f1, A4, A6, f1
fxcsmadd f2, A4, A7, f2
fxcsmadd f3, A4, A8, f3
.align 4
.L44:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 1
#else
addi TEMP, KK, 4
#endif
andi. TEMP, TEMP, 7
mtspr CTR, TEMP
#else
andi. r0, K, 7
mtspr CTR, r0
#endif
ble+ .L48
LFDX A1, AO, INC4
LFPDUX B1, BO, INC4
LFPDUX B2, BO2, INC4
add AO, AO, INC
bdz- .L47
.align 4
.L46:
fxcpmadd f0, A1, B1, f0
LFPDUX B1, BO, INC4
fxcpmadd f1, A1, B2, f1
LFDX A1, AO, INC4
LFPDUX B2, BO2, INC4
add AO, AO, INC
bdnz+ .L46
.align 4
.L47:
fxcpmadd f0, A1, B1, f0
fxcpmadd f1, A1, B2, f1
.align 4
.L48:
#ifndef TRMMKERNEL
LFDX A1, CO1, INC2
LFDX A2, CO2, INC2
LFDX A3, CO3, INC2
LFDX A4, CO4, INC2
fpadd f0, f0, f2
fpadd f1, f1, f3
fsmfp A1, A2
fsmfp A3, A4
fxcpmadd f0, AP, f0, A1
fxcpmadd f1, AP, f1, A3
#else
fpadd f0, f0, f2
fpadd f1, f1, f3
fpmul f0, AP, f0
fpmul f1, AP, f1
#endif
STFDX f0, CO1, INC2
STFSDX f0, CO2, INC2
STFDX f1, CO3, INC2
STFSDX f1, CO4, INC2
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -1
#else
addi TEMP, TEMP, -4
#endif
slwi r0, TEMP, 0 + BASE_SHIFT
slwi TEMP, TEMP, 2 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 1
#endif
#endif
.align 4
.L49:
#if defined(TRMMKERNEL) && !defined(LEFT)
addi KK, KK, 4
#endif
addi B, BO, 4 * SIZE
addic. J, J, -1
bgt+ .L10
.align 4
.L50:
andi. J, N, 2
beq .L90
mr CO1, C
add CO2, C, LDC
add C, CO2, LDC
#if defined(TRMMKERNEL) && defined(LEFT)
mr KK, OFFSET
#endif
addi AO, A, -2 * SIZE
li r0, FZERO
lfpsx f0, SP, r0
srawi. I, M, 3
ble .L60
.align 4
.L51:
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
fpmr f4, f0
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f5, f0
fpmr f2, f0
fpmr f6, f0
#else
slwi TEMP, KK, 3 + BASE_SHIFT
slwi r0, KK, 1 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
fpmr f4, f0
addi BO, BO, - 2 * SIZE
fpmr f1, f0
fpmr f5, f0
fpmr f2, f0
fpmr f6, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 8
#else
addi TEMP, KK, 2
#endif
srawi. r0, TEMP, 2
fpmr f3, f0
mtspr CTR, r0
fpmr f7, f0
ble .L54
#else
fpmr f4, f0
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f5, f0
fpmr f2, f0
fpmr f6, f0
srawi. r0, K, 2
fpmr f3, f0
mtspr CTR, r0
fpmr f7, f0
ble .L54
#endif
LFPDUX B1, BO, INC2
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
LFPDUX B3, BO, INC2
LFPDUX A5, AO, INC2
LFPDUX A6, AO, INC2
LFPDUX A7, AO, INC2
LFPDUX A8, AO, INC2
bdz- .L53
.align 4
.L52:
fxcpmadd f0, B1, A1, f0
LFPDUX B4, BO, INC2
fxcsmadd f4, B1, A1, f4
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
nop
fxcsmadd f5, B1, A2, f5
LFPDUX A2, AO, INC2
fxcpmadd f2, B1, A3, f2
nop
fxcsmadd f6, B1, A3, f6
LFPDUX A3, AO, INC2
fxcpmadd f3, B1, A4, f3
nop
fxcsmadd f7, B1, A4, f7
LFPDUX A4, AO, INC2
fxcpmadd f0, B2, A5, f0
LFPDUX B1, BO, INC2
fxcsmadd f4, B2, A5, f4
LFPDUX A5, AO, INC2
fxcpmadd f1, B2, A6, f1
nop
fxcsmadd f5, B2, A6, f5
LFPDUX A6, AO, INC2
fxcpmadd f2, B2, A7, f2
nop
fxcsmadd f6, B2, A7, f6
LFPDUX A7, AO, INC2
fxcpmadd f3, B2, A8, f3
nop
fxcsmadd f7, B2, A8, f7
LFPDUX A8, AO, INC2
fxcpmadd f0, B3, A1, f0
LFPDUX B2, BO, INC2
fxcsmadd f4, B3, A1, f4
LFPDUX A1, AO, INC2
fxcpmadd f1, B3, A2, f1
nop
fxcsmadd f5, B3, A2, f5
LFPDUX A2, AO, INC2
fxcpmadd f2, B3, A3, f2
nop
fxcsmadd f6, B3, A3, f6
LFPDUX A3, AO, INC2
fxcpmadd f3, B3, A4, f3
nop
fxcsmadd f7, B3, A4, f7
LFPDUX A4, AO, INC2
fxcpmadd f0, B4, A5, f0
LFPDUX B3, BO, INC2
fxcsmadd f4, B4, A5, f4
LFPDUX A5, AO, INC2
fxcpmadd f1, B4, A6, f1
nop
fxcsmadd f5, B4, A6, f5
LFPDUX A6, AO, INC2
fxcpmadd f2, B4, A7, f2
nop
fxcsmadd f6, B4, A7, f6
LFPDUX A7, AO, INC2
fxcpmadd f3, B4, A8, f3
nop
fxcsmadd f7, B4, A8, f7
LFPDUX A8, AO, INC2
bdnz+ .L52
.align 4
.L53:
fxcpmadd f0, B1, A1, f0
LFPDUX B4, BO, INC2
fxcsmadd f4, B1, A1, f4
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
nop
fxcsmadd f5, B1, A2, f5
LFPDUX A2, AO, INC2
fxcpmadd f2, B1, A3, f2
nop
fxcsmadd f6, B1, A3, f6
LFPDUX A3, AO, INC2
fxcpmadd f3, B1, A4, f3
nop
fxcsmadd f7, B1, A4, f7
LFPDUX A4, AO, INC2
fxcpmadd f0, B2, A5, f0
nop
fxcsmadd f4, B2, A5, f4
LFPDUX A5, AO, INC2
fxcpmadd f1, B2, A6, f1
nop
fxcsmadd f5, B2, A6, f5
LFPDUX A6, AO, INC2
fxcpmadd f2, B2, A7, f2
nop
fxcsmadd f6, B2, A7, f6
LFPDUX A7, AO, INC2
fxcpmadd f3, B2, A8, f3
nop
fxcsmadd f7, B2, A8, f7
LFPDUX A8, AO, INC2
fxcpmadd f0, B3, A1, f0
fxcsmadd f4, B3, A1, f4
fxcpmadd f1, B3, A2, f1
fxcsmadd f5, B3, A2, f5
fxcpmadd f2, B3, A3, f2
fxcsmadd f6, B3, A3, f6
fxcpmadd f3, B3, A4, f3
fxcsmadd f7, B3, A4, f7
fxcpmadd f0, B4, A5, f0
fxcsmadd f4, B4, A5, f4
fxcpmadd f1, B4, A6, f1
fxcsmadd f5, B4, A6, f5
fxcpmadd f2, B4, A7, f2
fxcsmadd f6, B4, A7, f6
fxcpmadd f3, B4, A8, f3
fxcsmadd f7, B4, A8, f7
.align 4
.L54:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 8
#else
addi TEMP, KK, 2
#endif
andi. TEMP, TEMP, 3
mtspr CTR, TEMP
#else
andi. r0, K, 3
mtspr CTR, r0
#endif
ble+ .L58
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
LFPDUX A2, AO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
bdz- .L57
.align 4
.L56:
fxcpmadd f0, B1, A1, f0
fxcsmadd f4, B1, A1, f4
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
fxcsmadd f5, B1, A2, f5
LFPDUX A2, AO, INC2
fxcpmadd f2, B1, A3, f2
fxcsmadd f6, B1, A3, f6
LFPDUX A3, AO, INC2
fxcpmadd f3, B1, A4, f3
fxcsmadd f7, B1, A4, f7
LFPDUX A4, AO, INC2
LFPDUX B1, BO, INC2
bdnz+ .L56
.align 4
.L57:
fxcpmadd f0, B1, A1, f0
fxcsmadd f4, B1, A1, f4
fxcpmadd f1, B1, A2, f1
fxcsmadd f5, B1, A2, f5
fxcpmadd f2, B1, A3, f2
fxcsmadd f6, B1, A3, f6
fxcpmadd f3, B1, A4, f3
fxcsmadd f7, B1, A4, f7
.align 4
.L58:
#ifndef TRMMKERNEL
LFPDUX A1, CO1, INC2
LFPDUX B1, CO1, INC2
LFPDUX A3, CO1, INC2
LFPDUX A5, CO1, INC2
LFPDUX B3, CO2, INC2
LFPDUX A6, CO2, INC2
LFPDUX A7, CO2, INC2
LFPDUX B2, CO2, INC2
fxcpmadd f0, AP, f0, A1
fxcpmadd f1, AP, f1, B1
fxcpmadd f2, AP, f2, A3
fxcpmadd f3, AP, f3, A5
fxcpmadd f4, AP, f4, B3
fxcpmadd f5, AP, f5, A6
STFPDUX f0, CO1, INCM7
fxcpmadd f6, AP, f6, A7
STFPDUX f1, CO1, INC2
fxcpmadd f7, AP, f7, B2
STFPDUX f2, CO1, INC2
STFPDUX f3, CO1, INC2
STFPDUX f4, CO2, INCM7
STFPDUX f5, CO2, INC2
STFPDUX f6, CO2, INC2
STFPDUX f7, CO2, INC2
#else
fpmul f0, AP, f0
fpmul f1, AP, f1
fpmul f2, AP, f2
fpmul f3, AP, f3
fpmul f4, AP, f4
fpmul f5, AP, f5
STFPDUX f0, CO1, INC2
fpmul f6, AP, f6
STFPDUX f1, CO1, INC2
fpmul f7, AP, f7
STFPDUX f2, CO1, INC2
STFPDUX f3, CO1, INC2
STFPDUX f4, CO2, INC2
STFPDUX f5, CO2, INC2
STFPDUX f6, CO2, INC2
STFPDUX f7, CO2, INC2
#endif
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -8
#else
addi TEMP, TEMP, -2
#endif
slwi r0, TEMP, 3 + BASE_SHIFT
slwi TEMP, TEMP, 1 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 8
#endif
#endif
addic. I, I, -1
li r0, FZERO
lfpsx f0, SP, r0
bgt+ .L51
.align 4
.L60:
andi. I, M, 4
beq .L70
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi BO, B, - 2 * SIZE
fpmr f1, f0
#else
slwi TEMP, KK, 2 + BASE_SHIFT
slwi r0, KK, 1 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
addi BO, BO, - 2 * SIZE
fpmr f1, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 4
#else
addi TEMP, KK, 2
#endif
fpmr f2, f0
srawi. r0, TEMP, 2
mtspr CTR, r0
fpmr f3, f0
ble .L64
#else
srawi. r0, K, 2
fpmr f1, f0
addi BO, B, - 2 * SIZE
fpmr f2, f0
mtspr CTR, r0
fpmr f3, f0
ble .L64
#endif
LFPDUX B1, BO, INC2
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
LFPDUX B3, BO, INC2
LFPDUX A5, AO, INC2
LFPDUX A6, AO, INC2
LFPDUX B4, BO, INC2
LFPDUX A7, AO, INC2
LFPDUX A8, AO, INC2
bdz- .L63
.align 4
.L62:
fxcpmadd f0, B1, A1, f0
fxcsmadd f2, B1, A1, f2
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
fxcsmadd f3, B1, A2, f3
LFPDUX A2, AO, INC2
LFPDUX B1, BO, INC2
fxcpmadd f0, B2, A3, f0
fxcsmadd f2, B2, A3, f2
LFPDUX A3, AO, INC2
fxcpmadd f1, B2, A4, f1
fxcsmadd f3, B2, A4, f3
LFPDUX A4, AO, INC2
LFPDUX B2, BO, INC2
fxcpmadd f0, B3, A5, f0
fxcsmadd f2, B3, A5, f2
LFPDUX A5, AO, INC2
fxcpmadd f1, B3, A6, f1
fxcsmadd f3, B3, A6, f3
LFPDUX A6, AO, INC2
LFPDUX B3, BO, INC2
fxcpmadd f0, B4, A7, f0
fxcsmadd f2, B4, A7, f2
LFPDUX A7, AO, INC2
fxcpmadd f1, B4, A8, f1
fxcsmadd f3, B4, A8, f3
LFPDUX A8, AO, INC2
LFPDUX B4, BO, INC2
bdnz+ .L62
.align 4
.L63:
fxcpmadd f0, B1, A1, f0
fxcsmadd f2, B1, A1, f2
fxcpmadd f1, B1, A2, f1
fxcsmadd f3, B1, A2, f3
fxcpmadd f0, B2, A3, f0
fxcsmadd f2, B2, A3, f2
fxcpmadd f1, B2, A4, f1
fxcsmadd f3, B2, A4, f3
fxcpmadd f0, B3, A5, f0
fxcsmadd f2, B3, A5, f2
fxcpmadd f1, B3, A6, f1
fxcsmadd f3, B3, A6, f3
fxcpmadd f0, B4, A7, f0
fxcsmadd f2, B4, A7, f2
fxcpmadd f1, B4, A8, f1
fxcsmadd f3, B4, A8, f3
.align 4
.L64:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 4
#else
addi TEMP, KK, 2
#endif
andi. TEMP, TEMP, 3
mtspr CTR, TEMP
#else
andi. r0, K, 3
mtspr CTR, r0
#endif
ble+ .L68
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
LFPDUX A2, AO, INC2
bdz- .L67
.align 4
.L66:
fxcpmadd f0, B1, A1, f0
fxcsmadd f2, B1, A1, f2
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
fxcsmadd f3, B1, A2, f3
LFPDUX B1, BO, INC2
LFPDUX A2, AO, INC2
bdnz+ .L66
.align 4
.L67:
fxcpmadd f0, B1, A1, f0
fxcsmadd f2, B1, A1, f2
fxcpmadd f1, B1, A2, f1
fxcsmadd f3, B1, A2, f3
.align 4
.L68:
#ifndef TRMMKERNEL
LFPDUX A1, CO1, INC2
LFPDUX A2, CO1, INC2
LFPDUX A3, CO2, INC2
LFPDUX A4, CO2, INC2
fxcpmadd f0, AP, f0, A1
fxcpmadd f1, AP, f1, A2
fxcpmadd f2, AP, f2, A3
fxcpmadd f3, AP, f3, A4
STFPDUX f0, CO1, INCM3
STFPDUX f1, CO1, INC2
STFPDUX f2, CO2, INCM3
STFPDUX f3, CO2, INC2
#else
fpmul f0, AP, f0
fpmul f1, AP, f1
fpmul f2, AP, f2
fpmul f3, AP, f3
STFPDUX f0, CO1, INC2
STFPDUX f1, CO1, INC2
STFPDUX f2, CO2, INC2
STFPDUX f3, CO2, INC2
#endif
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -4
#else
addi TEMP, TEMP, -2
#endif
slwi r0, TEMP, 2 + BASE_SHIFT
slwi TEMP, TEMP, 1 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 4
#endif
#endif
li r0, FZERO
lfpsx f0, SP, r0
.align 4
.L70:
andi. I, M, 2
beq .L80
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi BO, B, - 2 * SIZE
fpmr f1, f0
#else
slwi TEMP, KK, 1 + BASE_SHIFT
slwi r0, KK, 1 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
addi BO, BO, - 2 * SIZE
fpmr f1, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 2
#else
addi TEMP, KK, 2
#endif
srawi. r0, TEMP, 3
fpmr f2, f0
mtspr CTR, r0
fpmr f3, f0
ble .L74
#else
addi BO, B, - 2 * SIZE
fpmr f1, f0
srawi. r0, K, 3
fpmr f2, f0
mtspr CTR, r0
fpmr f3, f0
ble .L74
#endif
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
LFPDUX A2, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A3, AO, INC2
LFPDUX B3, BO, INC2
LFPDUX A4, AO, INC2
LFPDUX B4, BO, INC2
LFPDUX A5, AO, INC2
LFPDUX B5, BO, INC2
LFPDUX A6, AO, INC2
LFPDUX B6, BO, INC2
LFPDUX A7, AO, INC2
LFPDUX A9, BO, INC2
LFPDUX A8, AO, INC2
LFPDUX A10, BO, INC2
bdz- .L73
.align 4
.L72:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A1, f1
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
fxcpmadd f2, B2, A2, f2
fxcsmadd f3, B2, A2, f3
LFPDUX A2, AO, INC2
LFPDUX B2, BO, INC2
fxcpmadd f0, B3, A3, f0
fxcsmadd f1, B3, A3, f1
LFPDUX A3, AO, INC2
LFPDUX B3, BO, INC2
fxcpmadd f2, B4, A4, f2
fxcsmadd f3, B4, A4, f3
LFPDUX A4, AO, INC2
LFPDUX B4, BO, INC2
fxcpmadd f0, B5, A5, f0
fxcsmadd f1, B5, A5, f1
LFPDUX A5, AO, INC2
LFPDUX B5, BO, INC2
fxcpmadd f2, B6, A6, f2
fxcsmadd f3, B6, A6, f3
LFPDUX A6, AO, INC2
LFPDUX B6, BO, INC2
fxcpmadd f0, A9, A7, f0
fxcsmadd f1, A9, A7, f1
LFPDUX A7, AO, INC2
LFPDUX A9, BO, INC2
fxcpmadd f2, A10, A8, f2
fxcsmadd f3, A10, A8, f3
LFPDUX A8, AO, INC2
LFPDUX A10, BO, INC2
bdnz+ .L72
.align 4
.L73:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A1, f1
fxcpmadd f2, B2, A2, f2
fxcsmadd f3, B2, A2, f3
fxcpmadd f0, B3, A3, f0
fxcsmadd f1, B3, A3, f1
fxcpmadd f2, B4, A4, f2
fxcsmadd f3, B4, A4, f3
fxcpmadd f0, B5, A5, f0
fxcsmadd f1, B5, A5, f1
fxcpmadd f2, B6, A6, f2
fxcsmadd f3, B6, A6, f3
fxcpmadd f0, A9, A7, f0
fxcsmadd f1, A9, A7, f1
fxcpmadd f2, A10, A8, f2
fxcsmadd f3, A10, A8, f3
.align 4
.L74:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 2
#else
addi TEMP, KK, 2
#endif
andi. TEMP, TEMP, 7
mtspr CTR, TEMP
#else
andi. r0, K, 7
mtspr CTR, r0
#endif
ble+ .L78
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
bdz- .L77
.align 4
.L76:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A1, f1
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
bdnz+ .L76
.align 4
.L77:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A1, f1
.align 4
.L78:
#ifndef TRMMKERNEL
LFPDX A1, CO1, INC2
LFPDX B3, CO2, INC2
fpadd f0, f0, f2
fpadd f1, f1, f3
fxcpmadd f0, AP, f0, A1
fxcpmadd f1, AP, f1, B3
#else
fpadd f0, f0, f2
fpadd f1, f1, f3
fpmul f0, AP, f0
fpmul f1, AP, f1
#endif
STFPDUX f0, CO1, INC2
STFPDUX f1, CO2, INC2
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -2
#else
addi TEMP, TEMP, -2
#endif
slwi r0, TEMP, 1 + BASE_SHIFT
slwi TEMP, TEMP, 1 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 2
#endif
#endif
li r0, FZERO
lfpsx f0, SP, r0
.align 4
.L80:
andi. I, M, 1
beq .L89
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
#else
slwi TEMP, KK, 0 + BASE_SHIFT
slwi r0, KK, 1 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
addi BO, BO, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 1
#else
addi TEMP, KK, 2
#endif
srawi. r0, TEMP, 3
mtspr CTR, r0
ble .L84
#else
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
srawi. r0, K, 3
mtspr CTR, r0
ble .L84
#endif
LFPDUX B1, BO, INC2
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
LFPDUX B3, BO, INC2
LFPDUX B4, BO, INC2
bdz- .L83
.align 4
.L82:
fxcpmadd f0, A1, B1, f0
LFPDUX B1, BO, INC2
fxcsmadd f1, A1, B2, f1
LFPDUX B2, BO, INC2
LFPDUX A1, AO, INC2
fxcpmadd f2, A2, B3, f2
LFPDUX B3, BO, INC2
fxcsmadd f3, A2, B4, f3
LFPDUX B4, BO, INC2
LFPDUX A2, AO, INC2
fxcpmadd f0, A3, B1, f0
LFPDUX B1, BO, INC2
fxcsmadd f1, A3, B2, f1
LFPDUX B2, BO, INC2
LFPDUX A3, AO, INC2
fxcpmadd f2, A4, B3, f2
LFPDUX B3, BO, INC2
fxcsmadd f3, A4, B4, f3
LFPDUX B4, BO, INC2
LFPDUX A4, AO, INC2
bdnz+ .L82
.align 4
.L83:
fxcpmadd f0, A1, B1, f0
LFPDUX B1, BO, INC2
fxcsmadd f1, A1, B2, f1
LFPDUX B2, BO, INC2
fxcpmadd f2, A2, B3, f2
LFPDUX B3, BO, INC2
fxcsmadd f3, A2, B4, f3
LFPDUX B4, BO, INC2
fxcpmadd f0, A3, B1, f0
fxcsmadd f1, A3, B2, f1
fxcpmadd f2, A4, B3, f2
fxcsmadd f3, A4, B4, f3
.align 4
.L84:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 1
#else
addi TEMP, KK, 2
#endif
andi. TEMP, TEMP, 7
mtspr CTR, TEMP
#else
andi. r0, K, 7
mtspr CTR, r0
#endif
ble+ .L88
LFDX A1, AO, INC2
LFPDUX B1, BO, INC2
add AO, AO, INC
bdz- .L87
.align 4
.L86:
fxcpmadd f0, A1, B1, f0
LFDX A1, AO, INC2
LFPDUX B1, BO, INC2
add AO, AO, INC
bdnz+ .L86
.align 4
.L87:
fxcpmadd f0, A1, B1, f0
.align 4
.L88:
#ifndef TRMMKERNEL
LFDX A1, CO1, INC2
LFDX A2, CO2, INC2
fpadd f0, f0, f1
fpadd f2, f2, f3
fsmfp A1, A2
fpadd f0, f0, f2
fxcpmadd f0, AP, f0, A1
#else
fpadd f0, f0, f1
fpadd f2, f2, f3
fsmfp A1, A2
fpadd f0, f0, f2
fpmul f0, AP, f0
#endif
STFDX f0, CO1, INC2
STFSDX f0, CO2, INC2
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -1
#else
addi TEMP, TEMP, -2
#endif
slwi r0, TEMP, 0 + BASE_SHIFT
slwi TEMP, TEMP, 1 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 1
#endif
#endif
.align 4
.L89:
#if defined(TRMMKERNEL) && !defined(LEFT)
addi KK, KK, 2
#endif
addi B, BO, 2 * SIZE
.align 4
.L90:
andi. J, N, 1
beq .L999
#if defined(TRMMKERNEL) && defined(LEFT)
mr KK, OFFSET
#endif
mr CO1, C
addi AO, A, -2 * SIZE
li r0, FZERO
lfpsx f0, SP, r0
srawi. I, M, 3
ble .L100
.align 4
.L91:
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi BO, B, - 2 * SIZE
fpmr f1, f0
#else
slwi TEMP, KK, 3 + BASE_SHIFT
slwi r0, KK, 0 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
addi BO, BO, - 2 * SIZE
fpmr f1, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 8
#else
addi TEMP, KK, 1
#endif
fpmr f2, f0
srawi. r0, TEMP, 2
fpmr f3, f0
mtspr CTR, r0
ble .L94
#else
srawi. r0, K, 2
fpmr f1, f0
addi BO, B, - 2 * SIZE
fpmr f2, f0
fpmr f3, f0
mtspr CTR, r0
ble .L94
#endif
LFPDUX B1, BO, INC2
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A5, AO, INC2
LFPDUX A6, AO, INC2
LFPDUX A7, AO, INC2
LFPDUX A8, AO, INC2
bdz- .L93
.align 4
.L92:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
LFPDUX A2, AO, INC2
fxcpmadd f2, B1, A3, f2
LFPDUX A3, AO, INC2
fxcpmadd f3, B1, A4, f3
LFPDUX A4, AO, INC2
fxcsmadd f0, B1, A5, f0
LFPDUX A5, AO, INC2
fxcsmadd f1, B1, A6, f1
LFPDUX A6, AO, INC2
fxcsmadd f2, B1, A7, f2
LFPDUX A7, AO, INC2
fxcsmadd f3, B1, A8, f3
LFPDUX A8, AO, INC2
LFPDUX B1, BO, INC2
fxcpmadd f0, B2, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B2, A2, f1
LFPDUX A2, AO, INC2
fxcpmadd f2, B2, A3, f2
LFPDUX A3, AO, INC2
fxcpmadd f3, B2, A4, f3
LFPDUX A4, AO, INC2
fxcsmadd f0, B2, A5, f0
LFPDUX A5, AO, INC2
fxcsmadd f1, B2, A6, f1
LFPDUX A6, AO, INC2
fxcsmadd f2, B2, A7, f2
LFPDUX A7, AO, INC2
fxcsmadd f3, B2, A8, f3
LFPDUX A8, AO, INC2
LFPDUX B2, BO, INC2
bdnz+ .L92
.align 4
.L93:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
LFPDUX A2, AO, INC2
fxcpmadd f2, B1, A3, f2
LFPDUX A3, AO, INC2
fxcpmadd f3, B1, A4, f3
LFPDUX A4, AO, INC2
fxcsmadd f0, B1, A5, f0
LFPDUX A5, AO, INC2
fxcsmadd f1, B1, A6, f1
LFPDUX A6, AO, INC2
fxcsmadd f2, B1, A7, f2
LFPDUX A7, AO, INC2
fxcsmadd f3, B1, A8, f3
LFPDUX A8, AO, INC2
fxcpmadd f0, B2, A1, f0
fxcpmadd f1, B2, A2, f1
fxcpmadd f2, B2, A3, f2
fxcpmadd f3, B2, A4, f3
fxcsmadd f0, B2, A5, f0
fxcsmadd f1, B2, A6, f1
fxcsmadd f2, B2, A7, f2
fxcsmadd f3, B2, A8, f3
.align 4
.L94:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 8
#else
addi TEMP, KK, 1
#endif
andi. TEMP, TEMP, 3
mtspr CTR, TEMP
#else
andi. r0, K, 3
mtspr CTR, r0
#endif
ble+ .L98
LFDX B1, BO, INC2
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
add BO, BO, INC
bdz- .L97
.align 4
.L96:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
LFPDUX A2, AO, INC2
fxcpmadd f2, B1, A3, f2
LFPDUX A3, AO, INC2
fxcpmadd f3, B1, A4, f3
LFDX B1, BO, INC2
LFPDUX A4, AO, INC2
add BO, BO, INC
bdnz+ .L96
.align 4
.L97:
fxcpmadd f0, B1, A1, f0
fxcpmadd f1, B1, A2, f1
fxcpmadd f2, B1, A3, f2
fxcpmadd f3, B1, A4, f3
.align 4
.L98:
#ifndef TRMMKERNEL
LFPDUX A1, CO1, INC2
LFPDUX B1, CO1, INC2
LFPDUX A3, CO1, INC2
LFPDUX A5, CO1, INC2
fxcpmadd f0, AP, f0, A1
fxcpmadd f1, AP, f1, B1
fxcpmadd f2, AP, f2, A3
fxcpmadd f3, AP, f3, A5
STFPDUX f0, CO1, INCM7
STFPDUX f1, CO1, INC2
STFPDUX f2, CO1, INC2
STFPDUX f3, CO1, INC2
#else
fpmul f0, AP, f0
fpmul f1, AP, f1
fpmul f2, AP, f2
fpmul f3, AP, f3
STFPDUX f0, CO1, INC2
STFPDUX f1, CO1, INC2
STFPDUX f2, CO1, INC2
STFPDUX f3, CO1, INC2
#endif
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -8
#else
addi TEMP, TEMP, -1
#endif
slwi r0, TEMP, 3 + BASE_SHIFT
slwi TEMP, TEMP, 0 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 8
#endif
#endif
addic. I, I, -1
li r0, FZERO
lfpsx f0, SP, r0
bgt+ .L91
.align 4
.L100:
andi. I, M, 4
beq .L110
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
#else
slwi TEMP, KK, 2 + BASE_SHIFT
slwi r0, KK, 0 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
fpmr f1, f0
addi BO, BO, - 2 * SIZE
fpmr f2, f0
fpmr f3, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 4
#else
addi TEMP, KK, 1
#endif
srawi. r0, TEMP, 3
mtspr CTR, r0
ble .L104
#else
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
srawi. r0, K, 3
mtspr CTR, r0
ble .L104
#endif
LFPDUX B1, BO, INC2
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A5, AO, INC2
LFPDUX A6, AO, INC2
LFPDUX A7, AO, INC2
LFPDUX A8, AO, INC2
LFPDUX B3, BO, INC2
LFPDUX B4, BO, INC2
bdz- .L103
.align 4
.L102:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
LFPDUX A2, AO, INC2
fxcsmadd f2, B1, A3, f2
LFPDUX A3, AO, INC2
fxcsmadd f3, B1, A4, f3
LFPDUX A4, AO, INC2
LFPDUX B1, BO, INC2
fxcpmadd f0, B2, A5, f0
LFPDUX A5, AO, INC2
fxcpmadd f1, B2, A6, f1
LFPDUX A6, AO, INC2
fxcsmadd f2, B2, A7, f2
LFPDUX A7, AO, INC2
fxcsmadd f3, B2, A8, f3
LFPDUX A8, AO, INC2
LFPDUX B2, BO, INC2
fxcpmadd f0, B3, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B3, A2, f1
LFPDUX A2, AO, INC2
fxcsmadd f2, B3, A3, f2
LFPDUX A3, AO, INC2
fxcsmadd f3, B3, A4, f3
LFPDUX A4, AO, INC2
LFPDUX B3, BO, INC2
fxcpmadd f0, B4, A5, f0
LFPDUX A5, AO, INC2
fxcpmadd f1, B4, A6, f1
LFPDUX A6, AO, INC2
fxcsmadd f2, B4, A7, f2
LFPDUX A7, AO, INC2
fxcsmadd f3, B4, A8, f3
LFPDUX A8, AO, INC2
LFPDUX B4, BO, INC2
bdnz+ .L102
.align 4
.L103:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
LFPDUX A2, AO, INC2
fxcsmadd f2, B1, A3, f2
LFPDUX A3, AO, INC2
fxcsmadd f3, B1, A4, f3
LFPDUX A4, AO, INC2
fxcpmadd f0, B2, A5, f0
LFPDUX A5, AO, INC2
fxcpmadd f1, B2, A6, f1
LFPDUX A6, AO, INC2
fxcsmadd f2, B2, A7, f2
LFPDUX A7, AO, INC2
fxcsmadd f3, B2, A8, f3
LFPDUX A8, AO, INC2
fxcpmadd f0, B3, A1, f0
fxcpmadd f1, B3, A2, f1
fxcsmadd f2, B3, A3, f2
fxcsmadd f3, B3, A4, f3
fxcpmadd f0, B4, A5, f0
fxcpmadd f1, B4, A6, f1
fxcsmadd f2, B4, A7, f2
fxcsmadd f3, B4, A8, f3
.align 4
.L104:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 4
#else
addi TEMP, KK, 1
#endif
andi. TEMP, TEMP, 7
mtspr CTR, TEMP
#else
andi. r0, K, 7
mtspr CTR, r0
#endif
ble+ .L108
LFPDUX A1, AO, INC2
LFDX B1, BO, INC2
LFPDUX A2, AO, INC2
add BO, BO, INC
bdz- .L107
.align 4
.L106:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
LFDX B1, BO, INC2
LFPDUX A2, AO, INC2
add BO, BO, INC
bdnz+ .L106
.align 4
.L107:
fxcpmadd f0, B1, A1, f0
fxcpmadd f1, B1, A2, f1
.align 4
.L108:
#ifndef TRMMKERNEL
LFPDUX A1, CO1, INC2
LFPDUX B1, CO1, INC2
fpadd f0, f0, f2
fpadd f1, f1, f3
fxcpmadd f0, AP, f0, A1
fxcpmadd f1, AP, f1, B1
STFPDUX f0, CO1, INCM3
STFPDUX f1, CO1, INC2
#else
fpadd f0, f0, f2
fpadd f1, f1, f3
fpmul f0, AP, f0
fpmul f1, AP, f1
STFPDUX f0, CO1, INC2
STFPDUX f1, CO1, INC2
#endif
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -4
#else
addi TEMP, TEMP, -1
#endif
slwi r0, TEMP, 2 + BASE_SHIFT
slwi TEMP, TEMP, 0 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 4
#endif
#endif
li r0, FZERO
lfpsx f0, SP, r0
.align 4
.L110:
andi. I, M, 2
beq .L120
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
#else
slwi TEMP, KK, 1 + BASE_SHIFT
slwi r0, KK, 0 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
fpmr f1, f0
addi BO, BO, - 2 * SIZE
fpmr f2, f0
fpmr f3, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 2
#else
addi TEMP, KK, 1
#endif
srawi. r0, TEMP, 3
mtspr CTR, r0
ble .L114
#else
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
srawi. r0, K, 3
mtspr CTR, r0
ble .L114
#endif
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX B1, BO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A5, AO, INC2
LFPDUX A6, AO, INC2
LFPDUX B3, BO, INC2
LFPDUX A7, AO, INC2
LFPDUX A8, AO, INC2
LFPDUX B4, BO, INC2
bdz- .L113
.align 4
.L112:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
fxcsmadd f1, B1, A2, f1
LFPDUX A2, AO, INC2
LFPDUX B1, BO, INC2
fxcpmadd f2, B2, A3, f2
LFPDUX A3, AO, INC2
fxcsmadd f3, B2, A4, f3
LFPDUX A4, AO, INC2
LFPDUX B2, BO, INC2
fxcpmadd f0, B3, A5, f0
LFPDUX A5, AO, INC2
fxcsmadd f1, B3, A6, f1
LFPDUX A6, AO, INC2
LFPDUX B3, BO, INC2
fxcpmadd f2, B4, A7, f2
LFPDUX A7, AO, INC2
fxcsmadd f3, B4, A8, f3
LFPDUX A8, AO, INC2
LFPDUX B4, BO, INC2
bdnz+ .L112
.align 4
.L113:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A2, f1
fxcpmadd f2, B2, A3, f2
fxcsmadd f3, B2, A4, f3
fxcpmadd f0, B3, A5, f0
fxcsmadd f1, B3, A6, f1
fxcpmadd f2, B4, A7, f2
fxcsmadd f3, B4, A8, f3
.align 4
.L114:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 2
#else
addi TEMP, KK, 1
#endif
andi. TEMP, TEMP, 7
mtspr CTR, TEMP
#else
andi. r0, K, 7
mtspr CTR, r0
#endif
ble+ .L118
LFPDUX A1, AO, INC2
LFDX B1, BO, INC2
add BO, BO, INC
bdz- .L117
.align 4
.L116:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
LFDX B1, BO, INC2
add BO, BO, INC
bdnz+ .L116
.align 4
.L117:
fxcpmadd f0, B1, A1, f0
.align 4
.L118:
#ifndef TRMMKERNEL
LFPDX A1, CO1, INC2
fpadd f0, f0, f1
fpadd f2, f3, f2
fpadd f0, f0, f2
fxcpmadd f1, AP, f0, A1
li r0, FZERO
lfpsx f0, SP, r0
STFPDUX f1, CO1, INC2
#else
fpadd f0, f0, f1
fpadd f2, f3, f2
fpadd f0, f0, f2
fpmul f1, AP, f0
li r0, FZERO
lfpsx f0, SP, r0
STFPDUX f1, CO1, INC2
#endif
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -2
#else
addi TEMP, TEMP, -1
#endif
slwi r0, TEMP, 1 + BASE_SHIFT
slwi TEMP, TEMP, 0 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 2
#endif
#endif
.align 4
.L120:
andi. I, M, 1
beq .L999
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
#else
slwi TEMP, KK, 0 + BASE_SHIFT
slwi r0, KK, 0 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
fpmr f1, f0
addi BO, BO, - 2 * SIZE
fpmr f2, f0
fpmr f3, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 1
#else
addi TEMP, KK, 1
#endif
srawi. r0, TEMP, 3
mtspr CTR, r0
ble .L124
#else
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
srawi. r0, K, 3
mtspr CTR, r0
ble .L124
#endif
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
LFPDUX A2, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A3, AO, INC2
LFPDUX B3, BO, INC2
LFPDUX A4, AO, INC2
LFPDUX B4, BO, INC2
bdz- .L123
.align 4
.L122:
fpmadd f0, A1, B1, f0
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
fpmadd f1, A2, B2, f1
LFPDUX A2, AO, INC2
LFPDUX B2, BO, INC2
fpmadd f2, A3, B3, f2
LFPDUX A3, AO, INC2
LFPDUX B3, BO, INC2
fpmadd f3, A4, B4, f3
LFPDUX A4, AO, INC2
LFPDUX B4, BO, INC2
bdnz+ .L122
.align 4
.L123:
fpmadd f0, A1, B1, f0
fpmadd f1, A2, B2, f1
fpmadd f2, A3, B3, f2
fpmadd f3, A4, B4, f3
.align 4
.L124:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 1
#else
addi TEMP, KK, 1
#endif
andi. TEMP, TEMP, 7
mtspr CTR, TEMP
#else
andi. r0, K, 7
mtspr CTR, r0
#endif
ble+ .L128
LFDX A1, AO, INC2
LFDX B1, BO, INC2
add AO, AO, INC
add BO, BO, INC
bdz- .L127
.align 4
.L126:
fmadd f0, A1, B1, f0
LFDX A1, AO, INC2
LFDX B1, BO, INC2
add AO, AO, INC
add BO, BO, INC
bdnz+ .L126
.align 4
.L127:
fmadd f0, A1, B1, f0
.align 4
.L128:
#ifndef TRMMKERNEL
LFDX A1, CO1, INC2
fpadd f0, f0, f1
fpadd f2, f2, f3
fpadd f0, f0, f2
fsmtp f1, f0
fadd f0, f0, f1
fmadd f0, AP, f0, A1
#else
fpadd f0, f0, f1
fpadd f2, f2, f3
fpadd f0, f0, f2
fsmtp f1, f0
fadd f0, f0, f1
fpmul f0, AP, f0
#endif
STFDUX f0, CO1, INC2
.align 4
.L999:
addi SP, SP, 12
lwzu r14, 4(SP)
lwzu r15, 4(SP)
lwzu r16, 4(SP)
lwzu r17, 4(SP)
lwzu r18, 4(SP)
lwzu r19, 4(SP)
lwzu r20, 4(SP)
lwzu r21, 4(SP)
lwzu r22, 4(SP)
lwzu r23, 4(SP)
lwzu r24, 4(SP)
lwzu r25, 4(SP)
lwzu r26, 4(SP)
lwzu r27, 4(SP)
lwzu r28, 4(SP)
lwzu r29, 4(SP)
lwzu r30, 4(SP)
lwzu r31, 4(SP)
subi SP, SP, 12
li r0, 16
lfpdux f31, SP, r0
lfpdux f30, SP, r0
lfpdux f29, SP, r0
lfpdux f28, SP, r0
lfpdux f27, SP, r0
lfpdux f26, SP, r0
lfpdux f25, SP, r0
lfpdux f24, SP, r0
lfpdux f23, SP, r0
lfpdux f22, SP, r0
lfpdux f21, SP, r0
lfpdux f20, SP, r0
lfpdux f19, SP, r0
lfpdux f18, SP, r0
lfpdux f17, SP, r0
lfpdux f16, SP, r0
lfpdux f15, SP, r0
lfpdux f14, SP, r0
addi SP, SP, 16
blr
.align 4
.L1000:
li INCM1, -1 * SIZE
li INCM3, -3 * SIZE
li INCM5, -5 * SIZE
li INCM7, -7 * SIZE
addi C, C, - 1 * SIZE
srawi. J, N, 2
ble .L1050
.align 4
.L1010:
mr CO1, C
add CO2, C, LDC
add CO3, CO2, LDC
add CO4, CO3, LDC
add C, CO4, LDC
#if defined(TRMMKERNEL) && defined(LEFT)
mr KK, OFFSET
#endif
addi AO, A, -4 * SIZE
li r0, FZERO
lfpsx f0, SP, r0
srawi. I, M, 3
ble .L1020
.align 4
.L1011:
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi AO2, AO, 2 * SIZE
fpmr f4, f0
addi BO, B, - 4 * SIZE
fpmr f8, f0
addi BO2, B, - 2 * SIZE
fpmr f12, f0
#else
slwi TEMP, KK, 3 + BASE_SHIFT
slwi r0, KK, 2 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
addi AO2, AO, 2 * SIZE
fpmr f4, f0
addi BO, BO, - 4 * SIZE
fpmr f8, f0
addi BO2, BO, 2 * SIZE
fpmr f12, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 8
#else
addi TEMP, KK, 4
#endif
srawi. TEMP, TEMP, 2
fpmr f1, f0
mtspr CTR, TEMP
ble .L1014
#else
addi AO2, AO, 2 * SIZE
fpmr f4, f0
addi BO, B, - 4 * SIZE
fpmr f8, f0
addi BO2, B, - 2 * SIZE
fpmr f12, f0
srawi. r0, K, 2
fpmr f1, f0
mtspr CTR, r0
ble .L1014
#endif
LFPDUX A1, AO, INC4
fpmr f5, f0
LFPDUX A3, AO, INC4
fpmr f9, f0
LFPDUX B1, BO, INC4
fpmr f13, f0
LFPDUX A5, AO, INC4
fpmr f2, f0
LFPDUX A6, AO, INC4
fpmr f6, f0
LFPDUX B3, BO, INC4
fpmr f10, f0
LFPDUX A7, AO, INC4
fpmr f14, f0
LFPDUX A8, AO, INC4
fpmr f3, f0
LFPDUX B5, BO, INC4
fpmr f7, f0
LFPDUX A9, AO, INC4
fpmr f11, f0
LFPDUX A2, AO2, INC4
fpmr f15, f0
LFPDUX B2, BO2, INC4
bdz- .L1013
.align 4
.L1012:
## 1 ##
fxcpmadd f0, B1, A1, f0
nop
fxcsmadd f4, B1, A1, f4
nop
fxcpmadd f8, B2, A1, f8
LFPDUX B4, BO2, INC4
fxcsmadd f12, B2, A1, f12
LFPDUX B6, BO, INC4
fxcpmadd f1, B1, A2, f1
nop
fxcsmadd f5, B1, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B2, A2, f9
LFPDUX A10, AO, INC4
fxcsmadd f13, B2, A2, f13
nop
fxcpmadd f2, B1, A3, f2
nop
fxcsmadd f6, B1, A3, f6
nop
fxcpmadd f10, B2, A3, f10
nop
fxcsmadd f14, B2, A3, f14
nop
fxcpmadd f3, B1, A4, f3
nop
fxcsmadd f7, B1, A4, f7
LFPDUX A2, AO2, INC4
fxcpmadd f11, B2, A4, f11
LFPDUX A1, AO, INC4
fxcsmadd f15, B2, A4, f15
nop
## 2 ##
fxcpmadd f0, B3, A5, f0
nop
fxcsmadd f4, B3, A5, f4
nop
fxcpmadd f8, B4, A5, f8
LFPDUX B2, BO2, INC4
fxcsmadd f12, B4, A5, f12
LFPDUX B1, BO, INC4
fxcpmadd f1, B3, A2, f1
nop
fxcsmadd f5, B3, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B4, A2, f9
LFPDUX A3, AO, INC4
fxcsmadd f13, B4, A2, f13
nop
fxcpmadd f2, B3, A6, f2
nop
fxcsmadd f6, B3, A6, f6
nop
fxcpmadd f10, B4, A6, f10
nop
fxcsmadd f14, B4, A6, f14
nop
fxcpmadd f3, B3, A4, f3
nop
fxcsmadd f7, B3, A4, f7
LFPDUX A2, AO2, INC4
fxcpmadd f11, B4, A4, f11
LFPDUX A5, AO, INC4
fxcsmadd f15, B4, A4, f15
nop
## 3 ##
fxcpmadd f0, B5, A7, f0
nop
fxcsmadd f4, B5, A7, f4
nop
fxcpmadd f8, B2, A7, f8
LFPDUX B4, BO2, INC4
fxcsmadd f12, B2, A7, f12
LFPDUX B3, BO, INC4
fxcpmadd f1, B5, A2, f1
nop
fxcsmadd f5, B5, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B2, A2, f9
LFPDUX A6, AO, INC4
fxcsmadd f13, B2, A2, f13
nop
fxcpmadd f2, B5, A8, f2
nop
fxcsmadd f6, B5, A8, f6
nop
fxcpmadd f10, B2, A8, f10
nop
fxcsmadd f14, B2, A8, f14
nop
fxcpmadd f3, B5, A4, f3
nop
fxcsmadd f7, B5, A4, f7
LFPDUX A2, AO2, INC4
fxcpmadd f11, B2, A4, f11
LFPDUX A7, AO, INC4
fxcsmadd f15, B2, A4, f15
nop
## 4 ##
fxcpmadd f0, B6, A9, f0
nop
fxcsmadd f4, B6, A9, f4
nop
fxcpmadd f8, B4, A9, f8
LFPDUX B2, BO2, INC4
fxcsmadd f12, B4, A9, f12
LFPDUX B5, BO, INC4
fxcpmadd f1, B6, A2, f1
nop
fxcsmadd f5, B6, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B4, A2, f9
LFPDUX A8, AO, INC4
fxcsmadd f13, B4, A2, f13
nop
fxcpmadd f2, B6, A10, f2
nop
fxcsmadd f6, B6, A10, f6
nop
fxcpmadd f10, B4, A10, f10
nop
fxcsmadd f14, B4, A10, f14
nop
fxcpmadd f3, B6, A4, f3
LFPDUX A2, AO2, INC4
fxcsmadd f7, B6, A4, f7
LFPDUX A9, AO, INC4
fxcpmadd f11, B4, A4, f11
nop
fxcsmadd f15, B4, A4, f15
bdnz+ .L1012
.align 4
.L1013:
## 1 ##
fxcpmadd f0, B1, A1, f0
nop
fxcsmadd f4, B1, A1, f4
nop
fxcpmadd f8, B2, A1, f8
LFPDUX B4, BO2, INC4
fxcsmadd f12, B2, A1, f12
LFPDUX B6, BO, INC4
fxcpmadd f1, B1, A2, f1
nop
fxcsmadd f5, B1, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B2, A2, f9
LFPDUX A10, AO, INC4
fxcsmadd f13, B2, A2, f13
nop
fxcpmadd f2, B1, A3, f2
nop
fxcsmadd f6, B1, A3, f6
nop
fxcpmadd f10, B2, A3, f10
nop
fxcsmadd f14, B2, A3, f14
nop
fxcpmadd f3, B1, A4, f3
nop
fxcsmadd f7, B1, A4, f7
LFPDUX A2, AO2, INC4
fxcpmadd f11, B2, A4, f11
#ifndef TRMMKERNEL
LFDUX A1, CO1, INC
#else
nop
#endif
fxcsmadd f15, B2, A4, f15
nop
## 2 ##
fxcpmadd f0, B3, A5, f0
nop
fxcsmadd f4, B3, A5, f4
nop
fxcpmadd f8, B4, A5, f8
LFPDUX B2, BO2, INC4
fxcsmadd f12, B4, A5, f12
#ifndef TRMMKERNEL
LFDUX B1, CO1, INC2
#else
nop
#endif
fxcpmadd f1, B3, A2, f1
nop
fxcsmadd f5, B3, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B4, A2, f9
#ifndef TRMMKERNEL
LFDUX A3, CO1, INC2
#else
nop
#endif
fxcsmadd f13, B4, A2, f13
nop
fxcpmadd f2, B3, A6, f2
nop
fxcsmadd f6, B3, A6, f6
nop
fxcpmadd f10, B4, A6, f10
nop
fxcsmadd f14, B4, A6, f14
nop
fxcpmadd f3, B3, A4, f3
nop
fxcsmadd f7, B3, A4, f7
LFPDUX A2, AO2, INC4
fxcpmadd f11, B4, A4, f11
#ifndef TRMMKERNEL
LFDUX A5, CO1, INC2
#else
nop
#endif
fxcsmadd f15, B4, A4, f15
nop
## 3 ##
fxcpmadd f0, B5, A7, f0
nop
fxcsmadd f4, B5, A7, f4
nop
fxcpmadd f8, B2, A7, f8
LFPDUX B4, BO2, INC4
fxcsmadd f12, B2, A7, f12
#ifndef TRMMKERNEL
LFSDUX A1, CO1, INCM5
#else
nop
#endif
fxcpmadd f1, B5, A2, f1
nop
fxcsmadd f5, B5, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B2, A2, f9
#ifndef TRMMKERNEL
LFSDUX B1, CO1, INC2
#else
nop
#endif
fxcsmadd f13, B2, A2, f13
nop
fxcpmadd f2, B5, A8, f2
nop
fxcsmadd f6, B5, A8, f6
nop
fxcpmadd f10, B2, A8, f10
nop
fxcsmadd f14, B2, A8, f14
nop
fxcpmadd f3, B5, A4, f3
nop
fxcsmadd f7, B5, A4, f7
LFPDUX A2, AO2, INC4
fxcpmadd f11, B2, A4, f11
#ifndef TRMMKERNEL
LFSDUX A3, CO1, INC2
#else
nop
#endif
fxcsmadd f15, B2, A4, f15
nop
## 4 ##
fxcpmadd f0, B6, A9, f0
nop
fxcsmadd f4, B6, A9, f4
nop
fxcpmadd f8, B4, A9, f8
#ifndef TRMMKERNEL
LFSDUX A5, CO1, INC2
#else
nop
#endif
fxcsmadd f12, B4, A9, f12
#ifndef TRMMKERNEL
LFDUX B3, CO2, INC
#else
nop
#endif
fxcpmadd f1, B6, A2, f1
nop
fxcsmadd f5, B6, A2, f5
LFPDUX A4, AO2, INC4
fxcpmadd f9, B4, A2, f9
#ifndef TRMMKERNEL
LFDUX A6, CO2, INC2
#else
nop
#endif
fxcsmadd f13, B4, A2, f13
nop
fxcpmadd f2, B6, A10, f2
nop
fxcsmadd f6, B6, A10, f6
nop
fxcpmadd f10, B4, A10, f10
nop
fxcsmadd f14, B4, A10, f14
#ifndef TRMMKERNEL
LFDUX A7, CO2, INC2
#else
nop
#endif
fxcpmadd f3, B6, A4, f3
nop
fxcsmadd f7, B6, A4, f7
nop
fxcpmadd f11, B4, A4, f11
nop
fxcsmadd f15, B4, A4, f15
#ifndef TRMMKERNEL
LFDUX B2, CO2, INC2
#else
nop
#endif
.align 4
.L1014:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 8
#else
addi TEMP, KK, 4
#endif
andi. r0, TEMP, 3
mtspr CTR, r0
ble+ .L1018
cmpwi cr0, TEMP, 3
bgt+ .L1015
#else
andi. r0, K, 3
mtspr CTR, r0
ble+ .L1018
cmpwi cr0, K, 3
bgt+ .L1015
#endif
#ifndef TRMMKERNEL
LFDUX A1, CO1, INC
fpmr f5, f0
LFDUX B1, CO1, INC2
fpmr f9, f0
LFDUX A3, CO1, INC2
fpmr f13, f0
LFDUX A5, CO1, INC2
fpmr f2, f0
LFSDUX A1, CO1, INCM5
fpmr f6, f0
LFSDUX B1, CO1, INC2
fpmr f10, f0
LFSDUX A3, CO1, INC2
fpmr f14, f0
LFSDUX A5, CO1, INC2
fpmr f3, f0
LFDUX B3, CO2, INC
fpmr f7, f0
LFDUX A6, CO2, INC2
fpmr f11, f0
LFDUX A7, CO2, INC2
fpmr f15, f0
LFDUX B2, CO2, INC2
#else
fpmr f5, f0
fpmr f9, f0
fpmr f13, f0
fpmr f2, f0
fpmr f6, f0
fpmr f10, f0
fpmr f14, f0
fpmr f3, f0
fpmr f7, f0
fpmr f11, f0
fpmr f15, f0
nop
#endif
.align 4
.L1015:
LFPDUX A2, AO, INC4
LFPDUX A4, AO2, INC4
LFPDUX A10, BO, INC4
LFPDUX B4, BO2, INC4
bdz- .L1017
.align 4
.L1016:
fxcpmadd f0, A10, A2, f0
fxcsmadd f4, A10, A2, f4
fxcpmadd f8, B4, A2, f8
fxcsmadd f12, B4, A2, f12
LFPDUX A2, AO, INC4
fxcpmadd f1, A10, A4, f1
fxcsmadd f5, A10, A4, f5
fxcpmadd f9, B4, A4, f9
fxcsmadd f13, B4, A4, f13
LFPDUX A4, AO2, INC4
fxcpmadd f2, A10, A2, f2
fxcsmadd f6, A10, A2, f6
fxcpmadd f10, B4, A2, f10
fxcsmadd f14, B4, A2, f14
LFPDUX A2, AO, INC4
fxcpmadd f3, A10, A4, f3
fxcsmadd f7, A10, A4, f7
LFPDUX A10, BO, INC4
fxcpmadd f11, B4, A4, f11
fxcsmadd f15, B4, A4, f15
LFPDUX A4, AO2, INC4
LFPDUX B4, BO2, INC4
bdnz+ .L1016
.align 4
.L1017:
fxcpmadd f0, A10, A2, f0
fxcsmadd f4, A10, A2, f4
fxcpmadd f8, B4, A2, f8
fxcsmadd f12, B4, A2, f12
LFPDUX A2, AO, INC4
fxcpmadd f1, A10, A4, f1
fxcsmadd f5, A10, A4, f5
fxcpmadd f9, B4, A4, f9
fxcsmadd f13, B4, A4, f13
LFPDUX A4, AO2, INC4
fxcpmadd f2, A10, A2, f2
fxcsmadd f6, A10, A2, f6
fxcpmadd f10, B4, A2, f10
fxcsmadd f14, B4, A2, f14
fxcpmadd f3, A10, A4, f3
fxcsmadd f7, A10, A4, f7
fxcpmadd f11, B4, A4, f11
fxcsmadd f15, B4, A4, f15
.align 4
.L1018:
#ifndef TRMMKERNEL
LFSDUX B3, CO2, INCM5
LFSDUX A6, CO2, INC2
LFSDUX A7, CO2, INC2
LFSDUX B2, CO2, INC2
LFDUX B5, CO3, INC
LFDUX A8, CO3, INC2
LFDUX A9, CO3, INC2
LFDUX B4, CO3, INC2
LFSDUX B5, CO3, INCM5
LFSDUX A8, CO3, INC2
LFSDUX A9, CO3, INC2
LFSDUX B4, CO3, INC2
LFDUX A2, CO4, INC
LFDUX A4, CO4, INC2
fxcpmadd f0, AP, f0, A1
LFDUX A10, CO4, INC2
LFDUX A1, CO4, INC2
fxcpmadd f1, AP, f1, B1
LFSDUX A2, CO4, INCM5
LFSDUX A4, CO4, INC2
fxcpmadd f2, AP, f2, A3
LFSDUX A10, CO4, INC2
LFSDUX A1, CO4, INC2
fxcpmadd f3, AP, f3, A5
STFDUX f0, CO1, INCM7
STFSDUX f0, CO1, INC
fxcpmadd f4, AP, f4, B3
STFDUX f1, CO1, INC
STFSDUX f1, CO1, INC
fxcpmadd f5, AP, f5, A6
STFDUX f2, CO1, INC
STFSDUX f2, CO1, INC
fxcpmadd f6, AP, f6, A7
STFDUX f3, CO1, INC
STFSDUX f3, CO1, INC
fxcpmadd f7, AP, f7, B2
STFDUX f4, CO2, INCM7
STFSDUX f4, CO2, INC
fxcpmadd f8, AP, f8, B5
STFDUX f5, CO2, INC
STFSDUX f5, CO2, INC
fxcpmadd f9, AP, f9, A8
STFDUX f6, CO2, INC
STFSDUX f6, CO2, INC
fxcpmadd f10, AP, f10, A9
STFDUX f7, CO2, INC
STFSDUX f7, CO2, INC
fxcpmadd f11, AP, f11, B4
STFDUX f8, CO3, INCM7
STFSDUX f8, CO3, INC
fxcpmadd f12, AP, f12, A2
STFDUX f9, CO3, INC
STFSDUX f9, CO3, INC
fxcpmadd f13, AP, f13, A4
STFDUX f10, CO3, INC
STFSDUX f10, CO3, INC
fxcpmadd f14, AP, f14, A10
STFDUX f11, CO3, INC
STFSDUX f11, CO3, INC
fxcpmadd f15, AP, f15, A1
STFDUX f12, CO4, INCM7
#else
fpmul f0, AP, f0
fpmul f1, AP, f1
fpmul f2, AP, f2
fpmul f3, AP, f3
STFDUX f0, CO1, INC
STFSDUX f0, CO1, INC
fpmul f4, AP, f4
STFDUX f1, CO1, INC
STFSDUX f1, CO1, INC
fpmul f5, AP, f5
STFDUX f2, CO1, INC
STFSDUX f2, CO1, INC
fpmul f6, AP, f6
STFDUX f3, CO1, INC
STFSDUX f3, CO1, INC
fpmul f7, AP, f7
STFDUX f4, CO2, INC
STFSDUX f4, CO2, INC
fpmul f8, AP, f8
STFDUX f5, CO2, INC
STFSDUX f5, CO2, INC
fpmul f9, AP, f9
STFDUX f6, CO2, INC
STFSDUX f6, CO2, INC
fpmul f10, AP, f10
STFDUX f7, CO2, INC
STFSDUX f7, CO2, INC
fpmul f11, AP, f11
STFDUX f8, CO3, INC
STFSDUX f8, CO3, INC
fpmul f12, AP, f12
STFDUX f9, CO3, INC
STFSDUX f9, CO3, INC
fpmul f13, AP, f13
STFDUX f10, CO3, INC
STFSDUX f10, CO3, INC
fpmul f14, AP, f14
STFDUX f11, CO3, INC
STFSDUX f11, CO3, INC
fpmul f15, AP, f15
STFDUX f12, CO4, INC
#endif
STFSDUX f12, CO4, INC
STFDUX f13, CO4, INC
STFSDUX f13, CO4, INC
STFDUX f14, CO4, INC
STFSDUX f14, CO4, INC
STFDUX f15, CO4, INC
STFSDUX f15, CO4, INC
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -8
#else
addi TEMP, TEMP, -4
#endif
slwi r0, TEMP, 3 + BASE_SHIFT
slwi TEMP, TEMP, 2 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 8
#endif
#endif
addic. I, I, -1
li r0, FZERO
lfpsx f0, SP, r0
bgt+ .L1011
.align 4
.L1020:
andi. I, M, 4
beq .L1030
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi AO2, AO, 2 * SIZE
fpmr f4, f0
addi BO, B, - 4 * SIZE
fpmr f8, f0
addi BO2, B, - 2 * SIZE
fpmr f12, f0
#else
slwi TEMP, KK, 2 + BASE_SHIFT
slwi r0, KK, 2 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
addi AO2, AO, 2 * SIZE
fpmr f4, f0
addi BO, BO, - 4 * SIZE
fpmr f8, f0
addi BO2, BO, 2 * SIZE
fpmr f12, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 4
#else
addi TEMP, KK, 4
#endif
srawi. TEMP, TEMP, 2
fpmr f1, f0
fpmr f5, f0
fpmr f9, f0
mtspr CTR, TEMP
fpmr f13, f0
ble .L1024
#else
addi AO2, AO, 2 * SIZE
fpmr f4, f0
addi BO, B, - 4 * SIZE
fpmr f8, f0
addi BO2, B, - 2 * SIZE
fpmr f12, f0
srawi. r0, K, 2
fpmr f1, f0
fpmr f5, f0
fpmr f9, f0
mtspr CTR, r0
fpmr f13, f0
ble .L1024
#endif
LFPDUX A1, AO, INC4
LFPDUX B1, BO, INC4
LFPDUX A2, AO2, INC4
LFPDUX B2, BO2, INC4
LFPDUX A3, AO, INC4
LFPDUX B3, BO, INC4
LFPDUX A4, AO2, INC4
LFPDUX B4, BO2, INC4
LFPDUX A5, AO, INC4
LFPDUX B5, BO, INC4
LFPDUX A6, AO2, INC4
LFPDUX B6, BO2, INC4
LFPDUX A7, AO, INC4
LFPDUX A9, BO, INC4
LFPDUX A10, BO2, INC4
bdz- .L1023
.align 4
.L1022:
fxcpmadd f0, B1, A1, f0
nop
fxcsmadd f4, B1, A1, f4
LFPDUX A8, AO2, INC4
fxcpmadd f8, B2, A1, f8
nop
fxcsmadd f12, B2, A1, f12
LFPDUX A1, AO, INC4
fxcpmadd f1, B1, A2, f1
nop
fxcsmadd f5, B1, A2, f5
LFPDUX B1, BO, INC4
fxcpmadd f9, B2, A2, f9
nop
fxcsmadd f13, B2, A2, f13
LFPDUX B2, BO2, INC4
fxcpmadd f0, B3, A3, f0
nop
fxcsmadd f4, B3, A3, f4
LFPDUX A2, AO2, INC4
fxcpmadd f8, B4, A3, f8
nop
fxcsmadd f12, B4, A3, f12
LFPDUX A3, AO, INC4
fxcpmadd f1, B3, A4, f1
nop
fxcsmadd f5, B3, A4, f5
LFPDUX B3, BO, INC4
fxcpmadd f9, B4, A4, f9
nop
fxcsmadd f13, B4, A4, f13
LFPDUX B4, BO2, INC4
fxcpmadd f0, B5, A5, f0
nop
fxcsmadd f4, B5, A5, f4
LFPDUX A4, AO2, INC4
fxcpmadd f8, B6, A5, f8
nop
fxcsmadd f12, B6, A5, f12
LFPDUX A5, AO, INC4
fxcpmadd f1, B5, A6, f1
nop
fxcsmadd f5, B5, A6, f5
LFPDUX B5, BO, INC4
fxcpmadd f9, B6, A6, f9
nop
fxcsmadd f13, B6, A6, f13
LFPDUX B6, BO2, INC4
fxcpmadd f0, A9, A7, f0
nop
fxcsmadd f4, A9, A7, f4
LFPDUX A6, AO2, INC4
fxcpmadd f8, A10, A7, f8
nop
fxcsmadd f12, A10, A7, f12
LFPDUX A7, AO, INC4
fxcpmadd f1, A9, A8, f1
nop
fxcsmadd f5, A9, A8, f5
LFPDUX A9, BO, INC4
fxcpmadd f9, A10, A8, f9
nop
fxcsmadd f13, A10, A8, f13
LFPDUX A10, BO2, INC4
bdnz+ .L1022
.align 4
.L1023:
fxcpmadd f0, B1, A1, f0
fxcsmadd f4, B1, A1, f4
LFPDUX A8, AO2, INC4
fxcpmadd f8, B2, A1, f8
fxcsmadd f12, B2, A1, f12
fxcpmadd f1, B1, A2, f1
fxcsmadd f5, B1, A2, f5
fxcpmadd f9, B2, A2, f9
fxcsmadd f13, B2, A2, f13
fxcpmadd f0, B3, A3, f0
fxcsmadd f4, B3, A3, f4
fxcpmadd f8, B4, A3, f8
fxcsmadd f12, B4, A3, f12
fxcpmadd f1, B3, A4, f1
fxcsmadd f5, B3, A4, f5
fxcpmadd f9, B4, A4, f9
fxcsmadd f13, B4, A4, f13
fxcpmadd f0, B5, A5, f0
fxcsmadd f4, B5, A5, f4
fxcpmadd f8, B6, A5, f8
fxcsmadd f12, B6, A5, f12
fxcpmadd f1, B5, A6, f1
fxcsmadd f5, B5, A6, f5
fxcpmadd f9, B6, A6, f9
fxcsmadd f13, B6, A6, f13
fxcpmadd f0, A9, A7, f0
fxcsmadd f4, A9, A7, f4
fxcpmadd f8, A10, A7, f8
fxcsmadd f12, A10, A7, f12
fxcpmadd f1, A9, A8, f1
fxcsmadd f5, A9, A8, f5
fxcpmadd f9, A10, A8, f9
fxcsmadd f13, A10, A8, f13
.align 4
.L1024:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 4
#else
addi TEMP, KK, 4
#endif
andi. TEMP, TEMP, 3
mtspr CTR, TEMP
#else
andi. r0, K, 3
mtspr CTR, r0
#endif
ble+ .L1028
LFPDUX A1, AO, INC4
LFPDUX A2, AO2, INC4
LFPDUX B1, BO, INC4
LFPDUX B2, BO2, INC4
bdz- .L1027
.align 4
.L1026:
fxcpmadd f0, B1, A1, f0
fxcsmadd f4, B1, A1, f4
fxcpmadd f8, B2, A1, f8
fxcsmadd f12, B2, A1, f12
LFPDUX A1, AO, INC4
fxcpmadd f1, B1, A2, f1
fxcsmadd f5, B1, A2, f5
LFPDUX B1, BO, INC4
fxcpmadd f9, B2, A2, f9
fxcsmadd f13, B2, A2, f13
LFPDUX A2, AO2, INC4
LFPDUX B2, BO2, INC4
bdnz+ .L1026
.align 4
.L1027:
fxcpmadd f0, B1, A1, f0
fxcsmadd f4, B1, A1, f4
fxcpmadd f8, B2, A1, f8
fxcsmadd f12, B2, A1, f12
fxcpmadd f1, B1, A2, f1
fxcsmadd f5, B1, A2, f5
fxcpmadd f9, B2, A2, f9
fxcsmadd f13, B2, A2, f13
.align 4
.L1028:
#ifndef TRMMKERNEL
LFDUX A1, CO1, INC
LFDUX B1, CO1, INC2
LFDUX B3, CO2, INC
LFDUX A6, CO2, INC2
LFSDUX A1, CO1, INCM1
LFSDUX B1, CO1, INC2
LFSDUX B3, CO2, INCM1
LFSDUX A6, CO2, INC2
LFDUX B5, CO3, INC
LFDUX A8, CO3, INC2
LFDUX A2, CO4, INC
LFDUX A4, CO4, INC2
fxcpmadd f0, AP, f0, A1
LFSDUX B5, CO3, INCM1
LFSDUX A8, CO3, INC2
fxcpmadd f1, AP, f1, B1
LFSDUX A2, CO4, INCM1
LFSDUX A4, CO4, INC2
fxcpmadd f4, AP, f4, B3
STFDUX f0, CO1, INCM3
STFSDUX f0, CO1, INC
fxcpmadd f5, AP, f5, A6
STFDUX f1, CO1, INC
STFSDUX f1, CO1, INC
fxcpmadd f8, AP, f8, B5
STFDUX f4, CO2, INCM3
STFSDUX f4, CO2, INC
fxcpmadd f9, AP, f9, A8
STFDUX f5, CO2, INC
STFSDUX f5, CO2, INC
fxcpmadd f12, AP, f12, A2
STFDUX f8, CO3, INCM3
STFSDUX f8, CO3, INC
fxcpmadd f13, AP, f13, A4
STFDUX f9, CO3, INC
STFSDUX f9, CO3, INC
STFDUX f12, CO4, INCM3
STFSDUX f12, CO4, INC
STFDUX f13, CO4, INC
STFSDUX f13, CO4, INC
#else
fpmul f0, AP, f0
fpmul f1, AP, f1
fpmul f4, AP, f4
STFDUX f0, CO1, INC
STFSDUX f0, CO1, INC
fpmul f5, AP, f5
STFDUX f1, CO1, INC
STFSDUX f1, CO1, INC
fpmul f8, AP, f8
STFDUX f4, CO2, INC
STFSDUX f4, CO2, INC
fpmul f9, AP, f9
STFDUX f5, CO2, INC
STFSDUX f5, CO2, INC
fpmul f12, AP, f12
STFDUX f8, CO3, INC
STFSDUX f8, CO3, INC
fpmul f13, AP, f13
STFDUX f9, CO3, INC
STFSDUX f9, CO3, INC
STFDUX f12, CO4, INC
STFSDUX f12, CO4, INC
STFDUX f13, CO4, INC
STFSDUX f13, CO4, INC
#endif
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -4
#else
addi TEMP, TEMP, -4
#endif
slwi r0, TEMP, 2 + BASE_SHIFT
slwi TEMP, TEMP, 2 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 4
#endif
#endif
li r0, FZERO
lfpsx f0, SP, r0
.align 4
.L1030:
andi. I, M, 2
beq .L1040
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi AO2, AO, 2 * SIZE
fpmr f1, f0
addi BO, B, - 4 * SIZE
fpmr f2, f0
addi BO2, B, - 2 * SIZE
fpmr f3, f0
#else
slwi TEMP, KK, 1 + BASE_SHIFT
slwi r0, KK, 2 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
addi AO2, AO, 2 * SIZE
fpmr f1, f0
addi BO, BO, - 4 * SIZE
fpmr f2, f0
addi BO2, BO, 2 * SIZE
fpmr f3, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 2
#else
addi TEMP, KK, 4
#endif
srawi. r0, TEMP, 2
mtspr CTR, r0
ble .L1034
#else
addi AO2, AO, 2 * SIZE
fpmr f1, f0
addi BO, B, - 4 * SIZE
fpmr f2, f0
addi BO2, B, - 2 * SIZE
fpmr f3, f0
srawi. r0, K, 2
mtspr CTR, r0
ble .L1034
#endif
LFPDUX A1, AO, INC4
LFPDUX B1, BO, INC4
LFPDUX B2, BO2, INC4
LFPDUX A2, AO2, INC4
LFPDUX B3, BO, INC4
LFPDUX B4, BO2, INC4
LFPDUX A3, AO, INC4
LFPDUX A5, BO, INC4
LFPDUX A6, BO2, INC4
LFPDUX A4, AO2, INC4
LFPDUX A7, BO, INC4
LFPDUX A8, BO2, INC4
bdz- .L1033
.align 4
.L1032:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A1, f1
LFPDUX B1, BO, INC4
fxcpmadd f2, B2, A1, f2
fxcsmadd f3, B2, A1, f3
LFPDUX B2, BO2, INC4
LFPDUX A1, AO, INC4
fxcpmadd f0, B3, A2, f0
fxcsmadd f1, B3, A2, f1
LFPDUX B3, BO, INC4
fxcpmadd f2, B4, A2, f2
fxcsmadd f3, B4, A2, f3
LFPDUX B4, BO2, INC4
LFPDUX A2, AO2, INC4
fxcpmadd f0, A5, A3, f0
fxcsmadd f1, A5, A3, f1
LFPDUX A5, BO, INC4
fxcpmadd f2, A6, A3, f2
fxcsmadd f3, A6, A3, f3
LFPDUX A6, BO2, INC4
LFPDUX A3, AO, INC4
fxcpmadd f0, A7, A4, f0
fxcsmadd f1, A7, A4, f1
LFPDUX A7, BO, INC4
fxcpmadd f2, A8, A4, f2
fxcsmadd f3, A8, A4, f3
LFPDUX A8, BO2, INC4
LFPDUX A4, AO2, INC4
bdnz+ .L1032
.align 4
.L1033:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A1, f1
fxcpmadd f2, B2, A1, f2
fxcsmadd f3, B2, A1, f3
fxcpmadd f0, B3, A2, f0
fxcsmadd f1, B3, A2, f1
fxcpmadd f2, B4, A2, f2
fxcsmadd f3, B4, A2, f3
fxcpmadd f0, A5, A3, f0
fxcsmadd f1, A5, A3, f1
fxcpmadd f2, A6, A3, f2
fxcsmadd f3, A6, A3, f3
fxcpmadd f0, A7, A4, f0
fxcsmadd f1, A7, A4, f1
fxcpmadd f2, A8, A4, f2
fxcsmadd f3, A8, A4, f3
.align 4
.L1034:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 2
#else
addi TEMP, KK, 4
#endif
andi. TEMP, TEMP, 3
mtspr CTR, TEMP
#else
andi. r0, K, 3
mtspr CTR, r0
#endif
ble+ .L1038
LFPDX A1, AO, INC4
LFPDUX B1, BO, INC4
LFPDUX B2, BO2, INC4
add AO, AO, INC2
bdz- .L1037
.align 4
.L1036:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A1, f1
LFPDUX B1, BO, INC4
fxcpmadd f2, B2, A1, f2
fxcsmadd f3, B2, A1, f3
LFPDX A1, AO, INC4
LFPDUX B2, BO2, INC4
add AO, AO, INC2
bdnz+ .L1036
.align 4
.L1037:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A1, f1
fxcpmadd f2, B2, A1, f2
fxcsmadd f3, B2, A1, f3
.align 4
.L1038:
#ifndef TRMMKERNEL
LFDUX A1, CO1, INC
LFDUX A2, CO2, INC
LFDUX A3, CO3, INC
LFDUX A4, CO4, INC
LFSDUX A1, CO1, INC
LFSDUX A2, CO2, INC
LFSDUX A3, CO3, INC
LFSDUX A4, CO4, INC
fxcpmadd f0, AP, f0, A1
fxcpmadd f1, AP, f1, A2
fxcpmadd f2, AP, f2, A3
fxcpmadd f3, AP, f3, A4
STFDUX f0, CO1, INCM1
STFSDUX f0, CO1, INC
STFDUX f1, CO2, INCM1
STFSDUX f1, CO2, INC
STFDUX f2, CO3, INCM1
STFSDUX f2, CO3, INC
STFDUX f3, CO4, INCM1
STFSDUX f3, CO4, INC
#else
fpmul f0, AP, f0
fpmul f1, AP, f1
fpmul f2, AP, f2
fpmul f3, AP, f3
STFDUX f0, CO1, INC
STFSDUX f0, CO1, INC
STFDUX f1, CO2, INC
STFSDUX f1, CO2, INC
STFDUX f2, CO3, INC
STFSDUX f2, CO3, INC
STFDUX f3, CO4, INC
STFSDUX f3, CO4, INC
#endif
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -2
#else
addi TEMP, TEMP, -4
#endif
slwi r0, TEMP, 1 + BASE_SHIFT
slwi TEMP, TEMP, 2 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 2
#endif
#endif
li r0, FZERO
lfpsx f0, SP, r0
.align 4
.L1040:
andi. I, M, 1
beq .L1049
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi AO2, AO, 2 * SIZE
fpmr f1, f0
addi BO, B, - 4 * SIZE
fpmr f2, f0
addi BO2, B, - 2 * SIZE
fpmr f3, f0
#else
slwi TEMP, KK, 0 + BASE_SHIFT
slwi r0, KK, 2 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
addi AO2, AO, 2 * SIZE
fpmr f1, f0
addi BO, BO, - 4 * SIZE
fpmr f2, f0
addi BO2, BO, 2 * SIZE
fpmr f3, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 1
#else
addi TEMP, KK, 4
#endif
srawi. r0, TEMP, 3
mtspr CTR, r0
ble .L1044
#else
addi AO2, AO, 2 * SIZE
fpmr f1, f0
addi BO, B, - 4 * SIZE
fpmr f2, f0
addi BO2, B, - 2 * SIZE
fpmr f3, f0
srawi. r0, K, 3
mtspr CTR, r0
ble .L1044
#endif
LFPDUX A1, AO, INC4
LFPDUX B1, BO, INC4
LFPDUX B2, BO2, INC4
LFPDUX A2, AO2, INC4
LFPDUX B3, BO, INC4
LFPDUX B4, BO2, INC4
LFPDUX A3, AO, INC4
LFPDUX A5, BO, INC4
LFPDUX A6, BO2, INC4
LFPDUX A4, AO2, INC4
LFPDUX A7, BO, INC4
LFPDUX A8, BO2, INC4
bdz- .L1043
.align 4
.L1042:
fxcpmadd f0, A1, B1, f0
LFPDUX B1, BO, INC4
fxcpmadd f1, A1, B2, f1
LFPDUX B2, BO2, INC4
fxcsmadd f2, A1, B3, f2
LFPDUX B3, BO, INC4
fxcsmadd f3, A1, B4, f3
LFPDUX B4, BO2, INC4
LFPDUX A1, AO, INC4
fxcpmadd f0, A2, A5, f0
LFPDUX A5, BO, INC4
fxcpmadd f1, A2, A6, f1
LFPDUX A6, BO2, INC4
fxcsmadd f2, A2, A7, f2
LFPDUX A7, BO, INC4
fxcsmadd f3, A2, A8, f3
LFPDUX A8, BO2, INC4
LFPDUX A2, AO2, INC4
fxcpmadd f0, A3, B1, f0
LFPDUX B1, BO, INC4
fxcpmadd f1, A3, B2, f1
LFPDUX B2, BO2, INC4
fxcsmadd f2, A3, B3, f2
LFPDUX B3, BO, INC4
fxcsmadd f3, A3, B4, f3
LFPDUX B4, BO2, INC4
LFPDUX A3, AO, INC4
fxcpmadd f0, A4, A5, f0
LFPDUX A5, BO, INC4
fxcpmadd f1, A4, A6, f1
LFPDUX A6, BO2, INC4
fxcsmadd f2, A4, A7, f2
LFPDUX A7, BO, INC4
fxcsmadd f3, A4, A8, f3
LFPDUX A8, BO2, INC4
LFPDUX A4, AO2, INC4
bdnz+ .L1042
.align 4
.L1043:
fxcpmadd f0, A1, B1, f0
LFPDUX B1, BO, INC4
fxcpmadd f1, A1, B2, f1
LFPDUX B2, BO2, INC4
fxcsmadd f2, A1, B3, f2
LFPDUX B3, BO, INC4
fxcsmadd f3, A1, B4, f3
LFPDUX B4, BO2, INC4
fxcpmadd f0, A2, A5, f0
LFPDUX A5, BO, INC4
fxcpmadd f1, A2, A6, f1
LFPDUX A6, BO2, INC4
fxcsmadd f2, A2, A7, f2
LFPDUX A7, BO, INC4
fxcsmadd f3, A2, A8, f3
LFPDUX A8, BO2, INC4
fxcpmadd f0, A3, B1, f0
fxcpmadd f1, A3, B2, f1
fxcsmadd f2, A3, B3, f2
fxcsmadd f3, A3, B4, f3
fxcpmadd f0, A4, A5, f0
fxcpmadd f1, A4, A6, f1
fxcsmadd f2, A4, A7, f2
fxcsmadd f3, A4, A8, f3
.align 4
.L1044:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 1
#else
addi TEMP, KK, 4
#endif
andi. TEMP, TEMP, 7
mtspr CTR, TEMP
#else
andi. r0, K, 7
mtspr CTR, r0
#endif
ble+ .L1048
LFDX A1, AO, INC4
LFPDUX B1, BO, INC4
LFPDUX B2, BO2, INC4
add AO, AO, INC
bdz- .L1047
.align 4
.L1046:
fxcpmadd f0, A1, B1, f0
LFPDUX B1, BO, INC4
fxcpmadd f1, A1, B2, f1
LFDX A1, AO, INC4
LFPDUX B2, BO2, INC4
add AO, AO, INC
bdnz+ .L1046
.align 4
.L1047:
fxcpmadd f0, A1, B1, f0
fxcpmadd f1, A1, B2, f1
.align 4
.L1048:
#ifndef TRMMKERNEL
LFDX A1, CO1, INC
LFDX B3, CO3, INC
LFSDX A1, CO2, INC
LFSDX B3, CO4, INC
fpadd f0, f0, f2
fpadd f1, f1, f3
fxcpmadd f0, AP, f0, A1
fxcpmadd f1, AP, f1, B3
#else
fpadd f0, f0, f2
fpadd f1, f1, f3
fpmul f0, AP, f0
fpmul f1, AP, f1
#endif
STFDUX f0, CO1, INC
STFSDUX f0, CO2, INC
STFDUX f1, CO3, INC
STFSDUX f1, CO4, INC
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -1
#else
addi TEMP, TEMP, -4
#endif
slwi r0, TEMP, 0 + BASE_SHIFT
slwi TEMP, TEMP, 2 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 1
#endif
#endif
.align 4
.L1049:
#if defined(TRMMKERNEL) && !defined(LEFT)
addi KK, KK, 4
#endif
addi B, BO, 4 * SIZE
addic. J, J, -1
bgt+ .L1010
.align 4
.L1050:
andi. J, N, 2
beq .L1090
mr CO1, C
add CO2, C, LDC
add C, CO2, LDC
#if defined(TRMMKERNEL) && defined(LEFT)
mr KK, OFFSET
#endif
addi AO, A, -2 * SIZE
li r0, FZERO
lfpsx f0, SP, r0
srawi. I, M, 3
ble .L1060
.align 4
.L1051:
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
fpmr f4, f0
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f5, f0
fpmr f2, f0
fpmr f6, f0
#else
slwi TEMP, KK, 3 + BASE_SHIFT
slwi r0, KK, 1 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
fpmr f4, f0
addi BO, BO, - 2 * SIZE
fpmr f1, f0
fpmr f5, f0
fpmr f2, f0
fpmr f6, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 8
#else
addi TEMP, KK, 2
#endif
srawi. r0, TEMP, 2
fpmr f3, f0
mtspr CTR, r0
fpmr f7, f0
ble .L1054
#else
fpmr f4, f0
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f5, f0
fpmr f2, f0
fpmr f6, f0
srawi. r0, K, 2
fpmr f3, f0
mtspr CTR, r0
fpmr f7, f0
ble .L1054
#endif
LFPDUX B1, BO, INC2
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
LFPDUX B3, BO, INC2
LFPDUX A5, AO, INC2
LFPDUX A6, AO, INC2
LFPDUX A7, AO, INC2
LFPDUX A8, AO, INC2
bdz- .L1053
.align 4
.L1052:
fxcpmadd f0, B1, A1, f0
LFPDUX B4, BO, INC2
fxcsmadd f4, B1, A1, f4
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
nop
fxcsmadd f5, B1, A2, f5
LFPDUX A2, AO, INC2
fxcpmadd f2, B1, A3, f2
nop
fxcsmadd f6, B1, A3, f6
LFPDUX A3, AO, INC2
fxcpmadd f3, B1, A4, f3
nop
fxcsmadd f7, B1, A4, f7
LFPDUX A4, AO, INC2
fxcpmadd f0, B2, A5, f0
LFPDUX B1, BO, INC2
fxcsmadd f4, B2, A5, f4
LFPDUX A5, AO, INC2
fxcpmadd f1, B2, A6, f1
nop
fxcsmadd f5, B2, A6, f5
LFPDUX A6, AO, INC2
fxcpmadd f2, B2, A7, f2
nop
fxcsmadd f6, B2, A7, f6
LFPDUX A7, AO, INC2
fxcpmadd f3, B2, A8, f3
nop
fxcsmadd f7, B2, A8, f7
LFPDUX A8, AO, INC2
fxcpmadd f0, B3, A1, f0
LFPDUX B2, BO, INC2
fxcsmadd f4, B3, A1, f4
LFPDUX A1, AO, INC2
fxcpmadd f1, B3, A2, f1
nop
fxcsmadd f5, B3, A2, f5
LFPDUX A2, AO, INC2
fxcpmadd f2, B3, A3, f2
nop
fxcsmadd f6, B3, A3, f6
LFPDUX A3, AO, INC2
fxcpmadd f3, B3, A4, f3
nop
fxcsmadd f7, B3, A4, f7
LFPDUX A4, AO, INC2
fxcpmadd f0, B4, A5, f0
LFPDUX B3, BO, INC2
fxcsmadd f4, B4, A5, f4
LFPDUX A5, AO, INC2
fxcpmadd f1, B4, A6, f1
nop
fxcsmadd f5, B4, A6, f5
LFPDUX A6, AO, INC2
fxcpmadd f2, B4, A7, f2
nop
fxcsmadd f6, B4, A7, f6
LFPDUX A7, AO, INC2
fxcpmadd f3, B4, A8, f3
nop
fxcsmadd f7, B4, A8, f7
LFPDUX A8, AO, INC2
bdnz+ .L1052
.align 4
.L1053:
fxcpmadd f0, B1, A1, f0
LFPDUX B4, BO, INC2
fxcsmadd f4, B1, A1, f4
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
nop
fxcsmadd f5, B1, A2, f5
LFPDUX A2, AO, INC2
fxcpmadd f2, B1, A3, f2
nop
fxcsmadd f6, B1, A3, f6
LFPDUX A3, AO, INC2
fxcpmadd f3, B1, A4, f3
nop
fxcsmadd f7, B1, A4, f7
LFPDUX A4, AO, INC2
fxcpmadd f0, B2, A5, f0
nop
fxcsmadd f4, B2, A5, f4
LFPDUX A5, AO, INC2
fxcpmadd f1, B2, A6, f1
nop
fxcsmadd f5, B2, A6, f5
LFPDUX A6, AO, INC2
fxcpmadd f2, B2, A7, f2
nop
fxcsmadd f6, B2, A7, f6
LFPDUX A7, AO, INC2
fxcpmadd f3, B2, A8, f3
nop
fxcsmadd f7, B2, A8, f7
LFPDUX A8, AO, INC2
fxcpmadd f0, B3, A1, f0
fxcsmadd f4, B3, A1, f4
fxcpmadd f1, B3, A2, f1
fxcsmadd f5, B3, A2, f5
fxcpmadd f2, B3, A3, f2
fxcsmadd f6, B3, A3, f6
fxcpmadd f3, B3, A4, f3
fxcsmadd f7, B3, A4, f7
fxcpmadd f0, B4, A5, f0
fxcsmadd f4, B4, A5, f4
fxcpmadd f1, B4, A6, f1
fxcsmadd f5, B4, A6, f5
fxcpmadd f2, B4, A7, f2
fxcsmadd f6, B4, A7, f6
fxcpmadd f3, B4, A8, f3
fxcsmadd f7, B4, A8, f7
.align 4
.L1054:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 8
#else
addi TEMP, KK, 2
#endif
andi. TEMP, TEMP, 3
mtspr CTR, TEMP
#else
andi. r0, K, 3
mtspr CTR, r0
#endif
ble+ .L1058
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
LFPDUX A2, AO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
bdz- .L1057
.align 4
.L1056:
fxcpmadd f0, B1, A1, f0
fxcsmadd f4, B1, A1, f4
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
fxcsmadd f5, B1, A2, f5
LFPDUX A2, AO, INC2
fxcpmadd f2, B1, A3, f2
fxcsmadd f6, B1, A3, f6
LFPDUX A3, AO, INC2
fxcpmadd f3, B1, A4, f3
fxcsmadd f7, B1, A4, f7
LFPDUX A4, AO, INC2
LFPDUX B1, BO, INC2
bdnz+ .L1056
.align 4
.L1057:
fxcpmadd f0, B1, A1, f0
fxcsmadd f4, B1, A1, f4
fxcpmadd f1, B1, A2, f1
fxcsmadd f5, B1, A2, f5
fxcpmadd f2, B1, A3, f2
fxcsmadd f6, B1, A3, f6
fxcpmadd f3, B1, A4, f3
fxcsmadd f7, B1, A4, f7
.align 4
.L1058:
#ifndef TRMMKERNEL
LFDUX A1, CO1, INC
LFDUX B1, CO1, INC2
LFDUX A3, CO1, INC2
LFDUX A5, CO1, INC2
LFSDUX A1, CO1, INCM5
LFSDUX B1, CO1, INC2
LFSDUX A3, CO1, INC2
LFSDUX A5, CO1, INC2
LFDUX B3, CO2, INC
LFDUX A6, CO2, INC2
LFDUX A7, CO2, INC2
LFDUX B2, CO2, INC2
fxcpmadd f0, AP, f0, A1
LFSDUX B3, CO2, INCM5
LFSDUX A6, CO2, INC2
fxcpmadd f1, AP, f1, B1
LFSDUX A7, CO2, INC2
LFSDUX B2, CO2, INC2
fxcpmadd f2, AP, f2, A3
STFDUX f0, CO1, INCM7
STFSDUX f0, CO1, INC
fxcpmadd f3, AP, f3, A5
STFDUX f1, CO1, INC
STFSDUX f1, CO1, INC
fxcpmadd f4, AP, f4, B3
STFDUX f2, CO1, INC
STFSDUX f2, CO1, INC
fxcpmadd f5, AP, f5, A6
STFDUX f3, CO1, INC
STFSDUX f3, CO1, INC
fxcpmadd f6, AP, f6, A7
STFDUX f4, CO2, INCM7
STFSDUX f4, CO2, INC
fxcpmadd f7, AP, f7, B2
STFDUX f5, CO2, INC
STFSDUX f5, CO2, INC
STFDUX f6, CO2, INC
STFSDUX f6, CO2, INC
STFDUX f7, CO2, INC
STFSDUX f7, CO2, INC
#else
fpmul f0, AP, f0
fpmul f1, AP, f1
fpmul f2, AP, f2
STFDUX f0, CO1, INC
STFSDUX f0, CO1, INC
fpmul f3, AP, f3
STFDUX f1, CO1, INC
STFSDUX f1, CO1, INC
fpmul f4, AP, f4
STFDUX f2, CO1, INC
STFSDUX f2, CO1, INC
fpmul f5, AP, f5
STFDUX f3, CO1, INC
STFSDUX f3, CO1, INC
fpmul f6, AP, f6
STFDUX f4, CO2, INC
STFSDUX f4, CO2, INC
fpmul f7, AP, f7
STFDUX f5, CO2, INC
STFSDUX f5, CO2, INC
STFDUX f6, CO2, INC
STFSDUX f6, CO2, INC
STFDUX f7, CO2, INC
STFSDUX f7, CO2, INC
#endif
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -8
#else
addi TEMP, TEMP, -2
#endif
slwi r0, TEMP, 3 + BASE_SHIFT
slwi TEMP, TEMP, 1 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 8
#endif
#endif
addic. I, I, -1
li r0, FZERO
lfpsx f0, SP, r0
bgt+ .L1051
.align 4
.L1060:
andi. I, M, 4
beq .L1070
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi BO, B, - 2 * SIZE
fpmr f1, f0
#else
slwi TEMP, KK, 2 + BASE_SHIFT
slwi r0, KK, 1 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
addi BO, BO, - 2 * SIZE
fpmr f1, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 4
#else
addi TEMP, KK, 2
#endif
fpmr f2, f0
srawi. r0, TEMP, 2
mtspr CTR, r0
fpmr f3, f0
ble .L1064
#else
srawi. r0, K, 2
fpmr f1, f0
addi BO, B, - 2 * SIZE
fpmr f2, f0
mtspr CTR, r0
fpmr f3, f0
ble .L1064
#endif
LFPDUX B1, BO, INC2
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
LFPDUX B3, BO, INC2
LFPDUX A5, AO, INC2
LFPDUX A6, AO, INC2
LFPDUX B4, BO, INC2
LFPDUX A7, AO, INC2
LFPDUX A8, AO, INC2
bdz- .L1063
.align 4
.L1062:
fxcpmadd f0, B1, A1, f0
fxcsmadd f2, B1, A1, f2
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
fxcsmadd f3, B1, A2, f3
LFPDUX A2, AO, INC2
LFPDUX B1, BO, INC2
fxcpmadd f0, B2, A3, f0
fxcsmadd f2, B2, A3, f2
LFPDUX A3, AO, INC2
fxcpmadd f1, B2, A4, f1
fxcsmadd f3, B2, A4, f3
LFPDUX A4, AO, INC2
LFPDUX B2, BO, INC2
fxcpmadd f0, B3, A5, f0
fxcsmadd f2, B3, A5, f2
LFPDUX A5, AO, INC2
fxcpmadd f1, B3, A6, f1
fxcsmadd f3, B3, A6, f3
LFPDUX A6, AO, INC2
LFPDUX B3, BO, INC2
fxcpmadd f0, B4, A7, f0
fxcsmadd f2, B4, A7, f2
LFPDUX A7, AO, INC2
fxcpmadd f1, B4, A8, f1
fxcsmadd f3, B4, A8, f3
LFPDUX A8, AO, INC2
LFPDUX B4, BO, INC2
bdnz+ .L1062
.align 4
.L1063:
fxcpmadd f0, B1, A1, f0
fxcsmadd f2, B1, A1, f2
fxcpmadd f1, B1, A2, f1
fxcsmadd f3, B1, A2, f3
fxcpmadd f0, B2, A3, f0
fxcsmadd f2, B2, A3, f2
fxcpmadd f1, B2, A4, f1
fxcsmadd f3, B2, A4, f3
fxcpmadd f0, B3, A5, f0
fxcsmadd f2, B3, A5, f2
fxcpmadd f1, B3, A6, f1
fxcsmadd f3, B3, A6, f3
fxcpmadd f0, B4, A7, f0
fxcsmadd f2, B4, A7, f2
fxcpmadd f1, B4, A8, f1
fxcsmadd f3, B4, A8, f3
.align 4
.L1064:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 4
#else
addi TEMP, KK, 2
#endif
andi. TEMP, TEMP, 3
mtspr CTR, TEMP
#else
andi. r0, K, 3
mtspr CTR, r0
#endif
ble+ .L1068
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
LFPDUX A2, AO, INC2
bdz- .L1067
.align 4
.L1066:
fxcpmadd f0, B1, A1, f0
fxcsmadd f2, B1, A1, f2
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
fxcsmadd f3, B1, A2, f3
LFPDUX B1, BO, INC2
LFPDUX A2, AO, INC2
bdnz+ .L1066
.align 4
.L1067:
fxcpmadd f0, B1, A1, f0
fxcsmadd f2, B1, A1, f2
fxcpmadd f1, B1, A2, f1
fxcsmadd f3, B1, A2, f3
.align 4
.L1068:
#ifndef TRMMKERNEL
LFDUX A1, CO1, INC
LFDUX A2, CO1, INC2
LFDUX A3, CO2, INC
LFDUX A4, CO2, INC2
LFSDUX A1, CO1, INCM1
LFSDUX A2, CO1, INC2
LFSDUX A3, CO2, INCM1
LFSDUX A4, CO2, INC2
fxcpmadd f0, AP, f0, A1
fxcpmadd f1, AP, f1, A2
fxcpmadd f2, AP, f2, A3
STFDUX f0, CO1, INCM3
STFSDUX f0, CO1, INC
fxcpmadd f3, AP, f3, A4
STFDUX f1, CO1, INC
STFSDUX f1, CO1, INC
STFDUX f2, CO2, INCM3
STFSDUX f2, CO2, INC
STFDUX f3, CO2, INC
STFSDUX f3, CO2, INC
#else
fpmul f0, AP, f0
fpmul f1, AP, f1
fpmul f2, AP, f2
STFDUX f0, CO1, INC
STFSDUX f0, CO1, INC
fpmul f3, AP, f3
STFDUX f1, CO1, INC
STFSDUX f1, CO1, INC
STFDUX f2, CO2, INC
STFSDUX f2, CO2, INC
STFDUX f3, CO2, INC
STFSDUX f3, CO2, INC
#endif
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -4
#else
addi TEMP, TEMP, -2
#endif
slwi r0, TEMP, 2 + BASE_SHIFT
slwi TEMP, TEMP, 1 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 4
#endif
#endif
li r0, FZERO
lfpsx f0, SP, r0
.align 4
.L1070:
andi. I, M, 2
beq .L1080
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi BO, B, - 2 * SIZE
fpmr f1, f0
#else
slwi TEMP, KK, 1 + BASE_SHIFT
slwi r0, KK, 1 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
addi BO, BO, - 2 * SIZE
fpmr f1, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 2
#else
addi TEMP, KK, 2
#endif
srawi. r0, TEMP, 3
fpmr f2, f0
mtspr CTR, r0
fpmr f3, f0
ble .L1074
#else
addi BO, B, - 2 * SIZE
fpmr f1, f0
srawi. r0, K, 3
fpmr f2, f0
mtspr CTR, r0
fpmr f3, f0
ble .L1074
#endif
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
LFPDUX A2, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A3, AO, INC2
LFPDUX B3, BO, INC2
LFPDUX A4, AO, INC2
LFPDUX B4, BO, INC2
LFPDUX A5, AO, INC2
LFPDUX B5, BO, INC2
LFPDUX A6, AO, INC2
LFPDUX B6, BO, INC2
LFPDUX A7, AO, INC2
LFPDUX A9, BO, INC2
LFPDUX A8, AO, INC2
LFPDUX A10, BO, INC2
bdz- .L1073
.align 4
.L1072:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A1, f1
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
fxcpmadd f2, B2, A2, f2
fxcsmadd f3, B2, A2, f3
LFPDUX A2, AO, INC2
LFPDUX B2, BO, INC2
fxcpmadd f0, B3, A3, f0
fxcsmadd f1, B3, A3, f1
LFPDUX A3, AO, INC2
LFPDUX B3, BO, INC2
fxcpmadd f2, B4, A4, f2
fxcsmadd f3, B4, A4, f3
LFPDUX A4, AO, INC2
LFPDUX B4, BO, INC2
fxcpmadd f0, B5, A5, f0
fxcsmadd f1, B5, A5, f1
LFPDUX A5, AO, INC2
LFPDUX B5, BO, INC2
fxcpmadd f2, B6, A6, f2
fxcsmadd f3, B6, A6, f3
LFPDUX A6, AO, INC2
LFPDUX B6, BO, INC2
fxcpmadd f0, A9, A7, f0
fxcsmadd f1, A9, A7, f1
LFPDUX A7, AO, INC2
LFPDUX A9, BO, INC2
fxcpmadd f2, A10, A8, f2
fxcsmadd f3, A10, A8, f3
LFPDUX A8, AO, INC2
LFPDUX A10, BO, INC2
bdnz+ .L1072
.align 4
.L1073:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A1, f1
fxcpmadd f2, B2, A2, f2
fxcsmadd f3, B2, A2, f3
fxcpmadd f0, B3, A3, f0
fxcsmadd f1, B3, A3, f1
fxcpmadd f2, B4, A4, f2
fxcsmadd f3, B4, A4, f3
fxcpmadd f0, B5, A5, f0
fxcsmadd f1, B5, A5, f1
fxcpmadd f2, B6, A6, f2
fxcsmadd f3, B6, A6, f3
fxcpmadd f0, A9, A7, f0
fxcsmadd f1, A9, A7, f1
fxcpmadd f2, A10, A8, f2
fxcsmadd f3, A10, A8, f3
.align 4
.L1074:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 2
#else
addi TEMP, KK, 2
#endif
andi. TEMP, TEMP, 7
mtspr CTR, TEMP
#else
andi. r0, K, 7
mtspr CTR, r0
#endif
ble+ .L1078
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
bdz- .L1077
.align 4
.L1076:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A1, f1
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
bdnz+ .L1076
.align 4
.L1077:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A1, f1
.align 4
.L1078:
#ifndef TRMMKERNEL
LFDUX A1, CO1, INC
LFDUX B3, CO2, INC
LFSDUX A1, CO1, INC
LFSDUX B3, CO2, INC
fpadd f0, f0, f2
fpadd f1, f1, f3
fxcpmadd f0, AP, f0, A1
fxcpmadd f1, AP, f1, B3
STFDUX f0, CO1, INCM1
STFSDUX f0, CO1, INC
STFDUX f1, CO2, INCM1
STFSDUX f1, CO2, INC
#else
fpadd f0, f0, f2
fpadd f1, f1, f3
fpmul f0, AP, f0
fpmul f1, AP, f1
STFDUX f0, CO1, INC
STFSDUX f0, CO1, INC
STFDUX f1, CO2, INC
STFSDUX f1, CO2, INC
#endif
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -2
#else
addi TEMP, TEMP, -2
#endif
slwi r0, TEMP, 1 + BASE_SHIFT
slwi TEMP, TEMP, 1 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 2
#endif
#endif
li r0, FZERO
lfpsx f0, SP, r0
.align 4
.L1080:
andi. I, M, 1
beq .L1089
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
#else
slwi TEMP, KK, 0 + BASE_SHIFT
slwi r0, KK, 1 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
addi BO, BO, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 1
#else
addi TEMP, KK, 2
#endif
srawi. r0, TEMP, 3
mtspr CTR, r0
ble .L1084
#else
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
srawi. r0, K, 3
mtspr CTR, r0
ble .L1084
#endif
LFPDUX B1, BO, INC2
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
LFPDUX B3, BO, INC2
LFPDUX B4, BO, INC2
bdz- .L1083
.align 4
.L1082:
fxcpmadd f0, A1, B1, f0
LFPDUX B1, BO, INC2
fxcsmadd f1, A1, B2, f1
LFPDUX B2, BO, INC2
LFPDUX A1, AO, INC2
fxcpmadd f2, A2, B3, f2
LFPDUX B3, BO, INC2
fxcsmadd f3, A2, B4, f3
LFPDUX B4, BO, INC2
LFPDUX A2, AO, INC2
fxcpmadd f0, A3, B1, f0
LFPDUX B1, BO, INC2
fxcsmadd f1, A3, B2, f1
LFPDUX B2, BO, INC2
LFPDUX A3, AO, INC2
fxcpmadd f2, A4, B3, f2
LFPDUX B3, BO, INC2
fxcsmadd f3, A4, B4, f3
LFPDUX B4, BO, INC2
LFPDUX A4, AO, INC2
bdnz+ .L1082
.align 4
.L1083:
fxcpmadd f0, A1, B1, f0
LFPDUX B1, BO, INC2
fxcsmadd f1, A1, B2, f1
LFPDUX B2, BO, INC2
fxcpmadd f2, A2, B3, f2
LFPDUX B3, BO, INC2
fxcsmadd f3, A2, B4, f3
LFPDUX B4, BO, INC2
fxcpmadd f0, A3, B1, f0
fxcsmadd f1, A3, B2, f1
fxcpmadd f2, A4, B3, f2
fxcsmadd f3, A4, B4, f3
.align 4
.L1084:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 1
#else
addi TEMP, KK, 2
#endif
andi. TEMP, TEMP, 7
mtspr CTR, TEMP
#else
andi. r0, K, 7
mtspr CTR, r0
#endif
ble+ .L1088
LFDX A1, AO, INC2
LFPDUX B1, BO, INC2
add AO, AO, INC
bdz- .L1087
.align 4
.L1086:
fxcpmadd f0, A1, B1, f0
LFDX A1, AO, INC2
LFPDUX B1, BO, INC2
add AO, AO, INC
bdnz+ .L1086
.align 4
.L1087:
fxcpmadd f0, A1, B1, f0
.align 4
.L1088:
#ifndef TRMMKERNEL
LFDX A1, CO1, INC
LFDX A2, CO2, INC
fpadd f0, f0, f1
fpadd f2, f2, f3
fsmfp A1, A2
fpadd f0, f0, f2
fxcpmadd f0, AP, f0, A1
#else
fpadd f0, f0, f1
fpadd f2, f2, f3
fsmfp A1, A2
fpadd f0, f0, f2
fpmul f0, AP, f0
#endif
STFDUX f0, CO1, INC
STFSDUX f0, CO2, INC
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -1
#else
addi TEMP, TEMP, -2
#endif
slwi r0, TEMP, 0 + BASE_SHIFT
slwi TEMP, TEMP, 1 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 1
#endif
#endif
.align 4
.L1089:
#if defined(TRMMKERNEL) && !defined(LEFT)
addi KK, KK, 2
#endif
addi B, BO, 2 * SIZE
.align 4
.L1090:
andi. J, N, 1
beq .L10999
#if defined(TRMMKERNEL) && defined(LEFT)
mr KK, OFFSET
#endif
mr CO1, C
addi AO, A, -2 * SIZE
li r0, FZERO
lfpsx f0, SP, r0
srawi. I, M, 3
ble .L10100
.align 4
.L1091:
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi BO, B, - 2 * SIZE
fpmr f1, f0
#else
slwi TEMP, KK, 3 + BASE_SHIFT
slwi r0, KK, 0 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
addi BO, BO, - 2 * SIZE
fpmr f1, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 8
#else
addi TEMP, KK, 1
#endif
fpmr f2, f0
srawi. r0, TEMP, 2
fpmr f3, f0
mtspr CTR, r0
ble .L1094
#else
srawi. r0, K, 2
fpmr f1, f0
addi BO, B, - 2 * SIZE
fpmr f2, f0
fpmr f3, f0
mtspr CTR, r0
ble .L1094
#endif
LFPDUX B1, BO, INC2
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A5, AO, INC2
LFPDUX A6, AO, INC2
LFPDUX A7, AO, INC2
LFPDUX A8, AO, INC2
bdz- .L1093
.align 4
.L1092:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
LFPDUX A2, AO, INC2
fxcpmadd f2, B1, A3, f2
LFPDUX A3, AO, INC2
fxcpmadd f3, B1, A4, f3
LFPDUX A4, AO, INC2
fxcsmadd f0, B1, A5, f0
LFPDUX A5, AO, INC2
fxcsmadd f1, B1, A6, f1
LFPDUX A6, AO, INC2
fxcsmadd f2, B1, A7, f2
LFPDUX A7, AO, INC2
fxcsmadd f3, B1, A8, f3
LFPDUX A8, AO, INC2
LFPDUX B1, BO, INC2
fxcpmadd f0, B2, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B2, A2, f1
LFPDUX A2, AO, INC2
fxcpmadd f2, B2, A3, f2
LFPDUX A3, AO, INC2
fxcpmadd f3, B2, A4, f3
LFPDUX A4, AO, INC2
fxcsmadd f0, B2, A5, f0
LFPDUX A5, AO, INC2
fxcsmadd f1, B2, A6, f1
LFPDUX A6, AO, INC2
fxcsmadd f2, B2, A7, f2
LFPDUX A7, AO, INC2
fxcsmadd f3, B2, A8, f3
LFPDUX A8, AO, INC2
LFPDUX B2, BO, INC2
bdnz+ .L1092
.align 4
.L1093:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
LFPDUX A2, AO, INC2
fxcpmadd f2, B1, A3, f2
LFPDUX A3, AO, INC2
fxcpmadd f3, B1, A4, f3
LFPDUX A4, AO, INC2
fxcsmadd f0, B1, A5, f0
LFPDUX A5, AO, INC2
fxcsmadd f1, B1, A6, f1
LFPDUX A6, AO, INC2
fxcsmadd f2, B1, A7, f2
LFPDUX A7, AO, INC2
fxcsmadd f3, B1, A8, f3
LFPDUX A8, AO, INC2
fxcpmadd f0, B2, A1, f0
fxcpmadd f1, B2, A2, f1
fxcpmadd f2, B2, A3, f2
fxcpmadd f3, B2, A4, f3
fxcsmadd f0, B2, A5, f0
fxcsmadd f1, B2, A6, f1
fxcsmadd f2, B2, A7, f2
fxcsmadd f3, B2, A8, f3
.align 4
.L1094:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 8
#else
addi TEMP, KK, 1
#endif
andi. TEMP, TEMP, 3
mtspr CTR, TEMP
#else
andi. r0, K, 3
mtspr CTR, r0
#endif
ble+ .L1098
LFDX B1, BO, INC2
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
add BO, BO, INC
bdz- .L1097
.align 4
.L1096:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
LFPDUX A2, AO, INC2
fxcpmadd f2, B1, A3, f2
LFPDUX A3, AO, INC2
fxcpmadd f3, B1, A4, f3
LFDX B1, BO, INC2
LFPDUX A4, AO, INC2
add BO, BO, INC
bdnz+ .L1096
.align 4
.L1097:
fxcpmadd f0, B1, A1, f0
fxcpmadd f1, B1, A2, f1
fxcpmadd f2, B1, A3, f2
fxcpmadd f3, B1, A4, f3
.align 4
.L1098:
#ifndef TRMMKERNEL
LFDUX A1, CO1, INC
LFDUX B1, CO1, INC2
LFDUX A3, CO1, INC2
LFDUX A5, CO1, INC2
LFSDUX A1, CO1, INCM5
LFSDUX B1, CO1, INC2
LFSDUX A3, CO1, INC2
LFSDUX A5, CO1, INC2
fxcpmadd f0, AP, f0, A1
fxcpmadd f1, AP, f1, B1
fxcpmadd f2, AP, f2, A3
STFDUX f0, CO1, INCM7
STFSDUX f0, CO1, INC
fxcpmadd f3, AP, f3, A5
#else
fpmul f0, AP, f0
fpmul f1, AP, f1
fpmul f2, AP, f2
STFDUX f0, CO1, INC
STFSDUX f0, CO1, INC
fpmul f3, AP, f3
#endif
STFDUX f1, CO1, INC
STFSDUX f1, CO1, INC
STFDUX f2, CO1, INC
STFSDUX f2, CO1, INC
STFDUX f3, CO1, INC
STFSDUX f3, CO1, INC
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -8
#else
addi TEMP, TEMP, -1
#endif
slwi r0, TEMP, 3 + BASE_SHIFT
slwi TEMP, TEMP, 0 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 8
#endif
#endif
addic. I, I, -1
li r0, FZERO
lfpsx f0, SP, r0
bgt+ .L1091
.align 4
.L10100:
andi. I, M, 4
beq .L10110
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
#else
slwi TEMP, KK, 2 + BASE_SHIFT
slwi r0, KK, 0 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
fpmr f1, f0
addi BO, BO, - 2 * SIZE
fpmr f2, f0
fpmr f3, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 4
#else
addi TEMP, KK, 1
#endif
srawi. r0, TEMP, 3
mtspr CTR, r0
ble .L10104
#else
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
srawi. r0, K, 3
mtspr CTR, r0
ble .L10104
#endif
LFPDUX B1, BO, INC2
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A5, AO, INC2
LFPDUX A6, AO, INC2
LFPDUX A7, AO, INC2
LFPDUX A8, AO, INC2
LFPDUX B3, BO, INC2
LFPDUX B4, BO, INC2
bdz- .L10103
.align 4
.L10102:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
LFPDUX A2, AO, INC2
fxcsmadd f2, B1, A3, f2
LFPDUX A3, AO, INC2
fxcsmadd f3, B1, A4, f3
LFPDUX A4, AO, INC2
LFPDUX B1, BO, INC2
fxcpmadd f0, B2, A5, f0
LFPDUX A5, AO, INC2
fxcpmadd f1, B2, A6, f1
LFPDUX A6, AO, INC2
fxcsmadd f2, B2, A7, f2
LFPDUX A7, AO, INC2
fxcsmadd f3, B2, A8, f3
LFPDUX A8, AO, INC2
LFPDUX B2, BO, INC2
fxcpmadd f0, B3, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B3, A2, f1
LFPDUX A2, AO, INC2
fxcsmadd f2, B3, A3, f2
LFPDUX A3, AO, INC2
fxcsmadd f3, B3, A4, f3
LFPDUX A4, AO, INC2
LFPDUX B3, BO, INC2
fxcpmadd f0, B4, A5, f0
LFPDUX A5, AO, INC2
fxcpmadd f1, B4, A6, f1
LFPDUX A6, AO, INC2
fxcsmadd f2, B4, A7, f2
LFPDUX A7, AO, INC2
fxcsmadd f3, B4, A8, f3
LFPDUX A8, AO, INC2
LFPDUX B4, BO, INC2
bdnz+ .L10102
.align 4
.L10103:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
LFPDUX A2, AO, INC2
fxcsmadd f2, B1, A3, f2
LFPDUX A3, AO, INC2
fxcsmadd f3, B1, A4, f3
LFPDUX A4, AO, INC2
fxcpmadd f0, B2, A5, f0
LFPDUX A5, AO, INC2
fxcpmadd f1, B2, A6, f1
LFPDUX A6, AO, INC2
fxcsmadd f2, B2, A7, f2
LFPDUX A7, AO, INC2
fxcsmadd f3, B2, A8, f3
LFPDUX A8, AO, INC2
fxcpmadd f0, B3, A1, f0
fxcpmadd f1, B3, A2, f1
fxcsmadd f2, B3, A3, f2
fxcsmadd f3, B3, A4, f3
fxcpmadd f0, B4, A5, f0
fxcpmadd f1, B4, A6, f1
fxcsmadd f2, B4, A7, f2
fxcsmadd f3, B4, A8, f3
.align 4
.L10104:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 4
#else
addi TEMP, KK, 1
#endif
andi. TEMP, TEMP, 7
mtspr CTR, TEMP
#else
andi. r0, K, 7
mtspr CTR, r0
#endif
ble+ .L10108
LFPDUX A1, AO, INC2
LFDX B1, BO, INC2
LFPDUX A2, AO, INC2
add BO, BO, INC
bdz- .L10107
.align 4
.L10106:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
fxcpmadd f1, B1, A2, f1
LFDX B1, BO, INC2
LFPDUX A2, AO, INC2
add BO, BO, INC
bdnz+ .L10106
.align 4
.L10107:
fxcpmadd f0, B1, A1, f0
fxcpmadd f1, B1, A2, f1
.align 4
.L10108:
#ifndef TRMMKERNEL
LFDUX A1, CO1, INC
LFDUX B1, CO1, INC2
LFSDUX A1, CO1, INCM1
LFSDUX B1, CO1, INC2
fpadd f0, f0, f2
fpadd f1, f1, f3
fxcpmadd f0, AP, f0, A1
fxcpmadd f1, AP, f1, B1
STFDUX f0, CO1, INCM3
STFSDUX f0, CO1, INC
#else
fpadd f0, f0, f2
fpadd f1, f1, f3
fpmul f0, AP, f0
fpmul f1, AP, f1
STFDUX f0, CO1, INC
STFSDUX f0, CO1, INC
#endif
STFDUX f1, CO1, INC
STFSDUX f1, CO1, INC
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -4
#else
addi TEMP, TEMP, -1
#endif
slwi r0, TEMP, 2 + BASE_SHIFT
slwi TEMP, TEMP, 0 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 4
#endif
#endif
li r0, FZERO
lfpsx f0, SP, r0
.align 4
.L10110:
andi. I, M, 2
beq .L10120
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
#else
slwi TEMP, KK, 1 + BASE_SHIFT
slwi r0, KK, 0 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
fpmr f1, f0
addi BO, BO, - 2 * SIZE
fpmr f2, f0
fpmr f3, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 2
#else
addi TEMP, KK, 1
#endif
srawi. r0, TEMP, 3
mtspr CTR, r0
ble .L10114
#else
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
srawi. r0, K, 3
mtspr CTR, r0
ble .L10114
#endif
LFPDUX A1, AO, INC2
LFPDUX A2, AO, INC2
LFPDUX B1, BO, INC2
LFPDUX A3, AO, INC2
LFPDUX A4, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A5, AO, INC2
LFPDUX A6, AO, INC2
LFPDUX B3, BO, INC2
LFPDUX A7, AO, INC2
LFPDUX A8, AO, INC2
LFPDUX B4, BO, INC2
bdz- .L10113
.align 4
.L10112:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
fxcsmadd f1, B1, A2, f1
LFPDUX A2, AO, INC2
LFPDUX B1, BO, INC2
fxcpmadd f2, B2, A3, f2
LFPDUX A3, AO, INC2
fxcsmadd f3, B2, A4, f3
LFPDUX A4, AO, INC2
LFPDUX B2, BO, INC2
fxcpmadd f0, B3, A5, f0
LFPDUX A5, AO, INC2
fxcsmadd f1, B3, A6, f1
LFPDUX A6, AO, INC2
LFPDUX B3, BO, INC2
fxcpmadd f2, B4, A7, f2
LFPDUX A7, AO, INC2
fxcsmadd f3, B4, A8, f3
LFPDUX A8, AO, INC2
LFPDUX B4, BO, INC2
bdnz+ .L10112
.align 4
.L10113:
fxcpmadd f0, B1, A1, f0
fxcsmadd f1, B1, A2, f1
fxcpmadd f2, B2, A3, f2
fxcsmadd f3, B2, A4, f3
fxcpmadd f0, B3, A5, f0
fxcsmadd f1, B3, A6, f1
fxcpmadd f2, B4, A7, f2
fxcsmadd f3, B4, A8, f3
.align 4
.L10114:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 2
#else
addi TEMP, KK, 1
#endif
andi. TEMP, TEMP, 7
mtspr CTR, TEMP
#else
andi. r0, K, 7
mtspr CTR, r0
#endif
ble+ .L10118
LFPDUX A1, AO, INC2
LFDX B1, BO, INC2
add BO, BO, INC
bdz- .L10117
.align 4
.L10116:
fxcpmadd f0, B1, A1, f0
LFPDUX A1, AO, INC2
LFDX B1, BO, INC2
add BO, BO, INC
bdnz+ .L10116
.align 4
.L10117:
fxcpmadd f0, B1, A1, f0
.align 4
.L10118:
#ifndef TRMMKERNEL
LFDUX A1, CO1, INC
LFDUX A2, CO1, INC
fpadd f0, f0, f1
fpadd f2, f3, f2
fsmfp A1, A2
fpadd f0, f0, f2
fxcpmadd f1, AP, f0, A1
li r0, FZERO
lfpsx f0, SP, r0
STFDUX f1, CO1, INCM1
STFSDUX f1, CO1, INC
#else
fpadd f0, f0, f1
fpadd f2, f3, f2
fsmfp A1, A2
fpadd f0, f0, f2
fpmul f1, AP, f0
li r0, FZERO
lfpsx f0, SP, r0
STFDUX f1, CO1, INC
STFSDUX f1, CO1, INC
#endif
#ifdef TRMMKERNEL
#if ( defined(LEFT) && defined(TRANSA)) || \
(!defined(LEFT) && !defined(TRANSA))
sub TEMP, K, KK
#ifdef LEFT
addi TEMP, TEMP, -2
#else
addi TEMP, TEMP, -1
#endif
slwi r0, TEMP, 1 + BASE_SHIFT
slwi TEMP, TEMP, 0 + BASE_SHIFT
add AO, AO, r0
add BO, BO, TEMP
#endif
#ifdef LEFT
addi KK, KK, 2
#endif
#endif
.align 4
.L10120:
andi. I, M, 1
beq .L10999
#if defined(TRMMKERNEL)
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
#else
slwi TEMP, KK, 0 + BASE_SHIFT
slwi r0, KK, 0 + BASE_SHIFT
add AO, AO, TEMP
add BO, B, r0
fpmr f1, f0
addi BO, BO, - 2 * SIZE
fpmr f2, f0
fpmr f3, f0
#endif
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 1
#else
addi TEMP, KK, 1
#endif
srawi. r0, TEMP, 3
mtspr CTR, r0
ble .L10124
#else
addi BO, B, - 2 * SIZE
fpmr f1, f0
fpmr f2, f0
fpmr f3, f0
srawi. r0, K, 3
mtspr CTR, r0
ble .L10124
#endif
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
LFPDUX A2, AO, INC2
LFPDUX B2, BO, INC2
LFPDUX A3, AO, INC2
LFPDUX B3, BO, INC2
LFPDUX A4, AO, INC2
LFPDUX B4, BO, INC2
bdz- .L10123
.align 4
.L10122:
fpmadd f0, A1, B1, f0
LFPDUX A1, AO, INC2
LFPDUX B1, BO, INC2
fpmadd f1, A2, B2, f1
LFPDUX A2, AO, INC2
LFPDUX B2, BO, INC2
fpmadd f2, A3, B3, f2
LFPDUX A3, AO, INC2
LFPDUX B3, BO, INC2
fpmadd f3, A4, B4, f3
LFPDUX A4, AO, INC2
LFPDUX B4, BO, INC2
bdnz+ .L10122
.align 4
.L10123:
fpmadd f0, A1, B1, f0
fpmadd f1, A2, B2, f1
fpmadd f2, A3, B3, f2
fpmadd f3, A4, B4, f3
.align 4
.L10124:
lfd AP, ALPHA(SP)
#ifdef TRMMKERNEL
fsmfp AP, AP
#endif
#if defined(TRMMKERNEL)
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
sub TEMP, K, KK
#elif defined(LEFT)
addi TEMP, KK, 1
#else
addi TEMP, KK, 1
#endif
andi. TEMP, TEMP, 7
mtspr CTR, TEMP
#else
andi. r0, K, 7
mtspr CTR, r0
#endif
ble+ .L10128
LFDX A1, AO, INC2
LFDX B1, BO, INC2
add AO, AO, INC
add BO, BO, INC
bdz- .L10127
.align 4
.L10126:
fmadd f0, A1, B1, f0
LFDX A1, AO, INC2
LFDX B1, BO, INC2
add AO, AO, INC
add BO, BO, INC
bdnz+ .L10126
.align 4
.L10127:
fmadd f0, A1, B1, f0
.align 4
.L10128:
#ifndef TRMMKERNEL
LFDX A1, CO1, INC
fpadd f0, f0, f1
fpadd f2, f2, f3
fpadd f0, f0, f2
fsmtp f1, f0
fadd f0, f0, f1
fmadd f0, AP, f0, A1
STFDUX f0, CO1, INC
#else
fpadd f0, f0, f1
fpadd f2, f2, f3
fpadd f0, f0, f2
fsmtp f1, f0
fadd f0, f0, f1
fmul f0, AP, f0
STFDUX f0, CO1, INC
#endif
.align 4
.L10999:
addi SP, SP, 12
lwzu r14, 4(SP)
lwzu r15, 4(SP)
lwzu r16, 4(SP)
lwzu r17, 4(SP)
lwzu r18, 4(SP)
lwzu r19, 4(SP)
lwzu r20, 4(SP)
lwzu r21, 4(SP)
lwzu r22, 4(SP)
lwzu r23, 4(SP)
lwzu r24, 4(SP)
lwzu r25, 4(SP)
lwzu r26, 4(SP)
lwzu r27, 4(SP)
lwzu r28, 4(SP)
lwzu r29, 4(SP)
lwzu r30, 4(SP)
lwzu r31, 4(SP)
subi SP, SP, 12
li r0, 16
lfpdux f31, SP, r0
lfpdux f30, SP, r0
lfpdux f29, SP, r0
lfpdux f28, SP, r0
lfpdux f27, SP, r0
lfpdux f26, SP, r0
lfpdux f25, SP, r0
lfpdux f24, SP, r0
lfpdux f23, SP, r0
lfpdux f22, SP, r0
lfpdux f21, SP, r0
lfpdux f20, SP, r0
lfpdux f19, SP, r0
lfpdux f18, SP, r0
lfpdux f17, SP, r0
lfpdux f16, SP, r0
lfpdux f15, SP, r0
lfpdux f14, SP, r0
addi SP, SP, 16
blr
EPILOGUE
#endif