/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#define M $4
#define N $5
#define K $6
#define A $9
#define B $10
#define C $11
#define LDC $8
#define AO $12
#define BO $13
#define I $2
#define J $3
#define L $7
#define CO1 $14
#define CO2 $15
#define CO3 $16
#define CO4 $17
#define CO5 $18
#define CO6 $19
#define CO7 $20
#define CO8 $21
#if defined(TRMMKERNEL)
#define OFFSET $22
#define KK $23
#define TEMP $24
#endif
#define a1 $f0
#define a2 $f1
#define a3 $f28
#define a4 $f29
#define b1 $f2
#define b2 $f3
#define b3 $f4
#define b4 $f5
#define b5 $f6
#define b6 $f7
#define b7 $f8
#define b8 $f9
#define a5 b8
#define c11 $f10
#define c12 $f11
#define c21 $f12
#define c22 $f13
#define c31 $f14
#define c32 $f17
#define c41 $f18
#define c42 $f19
#define c51 $f20
#define c52 $f21
#define c61 $f22
#define c62 $f23
#define c71 $f24
#define c72 $f25
#define c81 $f26
#define c82 $f27
#define ALPHA_R $f15
#define ALPHA_I $f16
PROLOGUE
daddiu $sp, $sp, -128
SDARG $16, 0($sp)
SDARG $17, 8($sp)
SDARG $18, 16($sp)
SDARG $19, 24($sp)
SDARG $20, 32($sp)
SDARG $21, 40($sp)
sdc1 $f24, 48($sp)
sdc1 $f25, 56($sp)
sdc1 $f26, 64($sp)
sdc1 $f27, 72($sp)
sdc1 $f28, 80($sp)
sdc1 $f29, 88($sp)
LDARG LDC, 128($sp)
dsll LDC, LDC, ZBASE_SHIFT
dsra J, N, 3
blez J, .L30
nop
.L10:
move CO1, C
MTC $0, c11
daddu CO2, C, LDC
move AO, A
daddu CO3, CO2, LDC
daddiu J, J, -1
daddu CO4, CO3, LDC
MOV c21, c11
daddu CO5, CO4, LDC
MOV c31, c11
daddu CO6, CO5, LDC
MOV c41, c11
daddu CO7, CO6, LDC
MOV c51, c11
daddu CO8, CO7, LDC
dsra I, M, 1
daddu C, CO8, LDC
blez I, .L20
MOV c61, c11
.L11:
LD a1, 0 * SIZE(AO)
MOV c71, c11
LD b1, 0 * SIZE(B)
MOV c81, c11
LD a3, 4 * SIZE(AO)
MOV c12, c11
LD b2, 1 * SIZE(B)
MOV c22, c11
dsra L, K, 2
MOV c32, c11
LD b3, 2 * SIZE(B)
MOV c42, c11
LD b4, 3 * SIZE(B)
MOV c52, c11
LD b5, 4 * SIZE(B)
MOV c62, c11
LD b6, 8 * SIZE(B)
MOV c72, c11
LD b7, 12 * SIZE(B)
MOV c82, c11
blez L, .L15
move BO, B
MADD c11, c11, a1, b1
LD a2, 1 * SIZE(AO)
MADD c21, c21, a1, b2
daddiu L, L, -1
MADD c31, c31, a1, b3
blez L, .L13
MADD c41, c41, a1, b4
NOP
.align 3
.L12:
MADD c12, c12, a2, b1
LD b1, 16 * SIZE(BO)
MADD c22, c22, a2, b2
LD b2, 5 * SIZE(BO)
MADD c32, c32, a2, b3
LD b3, 6 * SIZE(BO)
MADD c42, c42, a2, b4
LD b4, 7 * SIZE(BO)
MADD c51, c51, a1, b5
LD a4, 2 * SIZE(AO)
MADD c61, c61, a1, b2
NOP
MADD c71, c71, a1, b3
NOP
MADD c81, c81, a1, b4
LD a1, 8 * SIZE(AO)
MADD c52, c52, a2, b5
LD b5, 20 * SIZE(BO)
MADD c62, c62, a2, b2
LD b2, 9 * SIZE(BO)
MADD c72, c72, a2, b3
LD b3, 10 * SIZE(BO)
MADD c82, c82, a2, b4
LD b4, 11 * SIZE(BO)
MADD c11, c11, a4, b6
LD a2, 3 * SIZE(AO)
MADD c21, c21, a4, b2
NOP
MADD c31, c31, a4, b3
NOP
MADD c41, c41, a4, b4
NOP
MADD c12, c12, a2, b6
LD b6, 24 * SIZE(BO)
MADD c22, c22, a2, b2
LD b2, 13 * SIZE(BO)
MADD c32, c32, a2, b3
LD b3, 14 * SIZE(BO)
MADD c42, c42, a2, b4
LD b4, 15 * SIZE(BO)
MADD c51, c51, a4, b7
NOP
MADD c61, c61, a4, b2
NOP
MADD c71, c71, a4, b3
NOP
MADD c81, c81, a4, b4
NOP
MADD c52, c52, a2, b7
LD b7, 28 * SIZE(BO)
MADD c62, c62, a2, b2
LD b2, 17 * SIZE(BO)
MADD c72, c72, a2, b3
LD b3, 18 * SIZE(BO)
MADD c82, c82, a2, b4
LD b4, 19 * SIZE(BO)
MADD c11, c11, a3, b1
LD a2, 5 * SIZE(AO)
MADD c21, c21, a3, b2
NOP
MADD c31, c31, a3, b3
NOP
MADD c41, c41, a3, b4
NOP
MADD c12, c12, a2, b1
LD b1, 32 * SIZE(BO)
MADD c22, c22, a2, b2
LD b2, 21 * SIZE(BO)
MADD c32, c32, a2, b3
LD b3, 22 * SIZE(BO)
MADD c42, c42, a2, b4
LD b4, 23 * SIZE(BO)
MADD c51, c51, a3, b5
LD a4, 6 * SIZE(AO)
MADD c61, c61, a3, b2
NOP
MADD c71, c71, a3, b3
NOP
MADD c81, c81, a3, b4
LD a3, 12 * SIZE(AO)
MADD c52, c52, a2, b5
LD b5, 36 * SIZE(BO)
MADD c62, c62, a2, b2
LD b2, 25 * SIZE(BO)
MADD c72, c72, a2, b3
LD b3, 26 * SIZE(BO)
MADD c82, c82, a2, b4
LD b4, 27 * SIZE(BO)
MADD c11, c11, a4, b6
LD a2, 7 * SIZE(AO)
MADD c21, c21, a4, b2
NOP
MADD c31, c31, a4, b3
NOP
MADD c41, c41, a4, b4
daddiu L, L, -1
MADD c12, c12, a2, b6
LD b6, 40 * SIZE(BO)
MADD c22, c22, a2, b2
LD b2, 29 * SIZE(BO)
MADD c32, c32, a2, b3
LD b3, 30 * SIZE(BO)
MADD c42, c42, a2, b4
LD b4, 31 * SIZE(BO)
MADD c51, c51, a4, b7
daddiu BO, BO, 32 * SIZE
MADD c61, c61, a4, b2
daddiu AO, AO, 8 * SIZE
MADD c71, c71, a4, b3
NOP
MADD c81, c81, a4, b4
NOP
MADD c52, c52, a2, b7
LD b7, 12 * SIZE(BO)
MADD c62, c62, a2, b2
LD b2, 1 * SIZE(BO)
MADD c72, c72, a2, b3
LD b3, 2 * SIZE(BO)
MADD c82, c82, a2, b4
LD b4, 3 * SIZE(BO)
MADD c11, c11, a1, b1
LD a2, 1 * SIZE(AO)
MADD c21, c21, a1, b2
NOP
MADD c31, c31, a1, b3
bgtz L, .L12
MADD c41, c41, a1, b4
NOP
.align 3
.L13:
MADD c12, c12, a2, b1
LD b1, 16 * SIZE(BO)
MADD c22, c22, a2, b2
LD b2, 5 * SIZE(BO)
MADD c32, c32, a2, b3
LD b3, 6 * SIZE(BO)
MADD c42, c42, a2, b4
LD b4, 7 * SIZE(BO)
MADD c51, c51, a1, b5
NOP
MADD c61, c61, a1, b2
LD a4, 2 * SIZE(AO)
MADD c71, c71, a1, b3
NOP
MADD c81, c81, a1, b4
LD a1, 8 * SIZE(AO)
MADD c52, c52, a2, b5
LD b5, 20 * SIZE(BO)
MADD c62, c62, a2, b2
LD b2, 9 * SIZE(BO)
MADD c72, c72, a2, b3
LD b3, 10 * SIZE(BO)
MADD c82, c82, a2, b4
LD b4, 11 * SIZE(BO)
MADD c11, c11, a4, b6
LD a2, 3 * SIZE(AO)
MADD c21, c21, a4, b2
NOP
MADD c31, c31, a4, b3
NOP
MADD c41, c41, a4, b4
NOP
MADD c12, c12, a2, b6
LD b6, 24 * SIZE(BO)
MADD c22, c22, a2, b2
LD b2, 13 * SIZE(BO)
MADD c32, c32, a2, b3
LD b3, 14 * SIZE(BO)
MADD c42, c42, a2, b4
LD b4, 15 * SIZE(BO)
MADD c51, c51, a4, b7
NOP
MADD c61, c61, a4, b2
NOP
MADD c71, c71, a4, b3
NOP
MADD c81, c81, a4, b4
NOP
MADD c52, c52, a2, b7
LD b7, 28 * SIZE(BO)
MADD c62, c62, a2, b2
LD b2, 17 * SIZE(BO)
MADD c72, c72, a2, b3
LD b3, 18 * SIZE(BO)
MADD c82, c82, a2, b4
LD b4, 19 * SIZE(BO)
MADD c11, c11, a3, b1
LD a2, 5 * SIZE(AO)
MADD c21, c21, a3, b2
NOP
MADD c31, c31, a3, b3
NOP
MADD c41, c41, a3, b4
NOP
MADD c12, c12, a2, b1
LD b1, 32 * SIZE(BO)
MADD c22, c22, a2, b2
LD b2, 21 * SIZE(BO)
MADD c32, c32, a2, b3
LD b3, 22 * SIZE(BO)
MADD c42, c42, a2, b4
LD b4, 23 * SIZE(BO)
MADD c51, c51, a3, b5
NOP
MADD c61, c61, a3, b2
LD a4, 6 * SIZE(AO)
MADD c71, c71, a3, b3
NOP
MADD c81, c81, a3, b4
LD a3, 12 * SIZE(AO)
MADD c52, c52, a2, b5
LD b5, 36 * SIZE(BO)
MADD c62, c62, a2, b2
LD b2, 25 * SIZE(BO)
MADD c72, c72, a2, b3
LD b3, 26 * SIZE(BO)
MADD c82, c82, a2, b4
LD b4, 27 * SIZE(BO)
MADD c11, c11, a4, b6
LD a2, 7 * SIZE(AO)
MADD c21, c21, a4, b2
NOP
MADD c31, c31, a4, b3
NOP
MADD c41, c41, a4, b4
NOP
MADD c12, c12, a2, b6
LD b6, 40 * SIZE(BO)
MADD c22, c22, a2, b2
LD b2, 29 * SIZE(BO)
MADD c32, c32, a2, b3
LD b3, 30 * SIZE(BO)
MADD c42, c42, a2, b4
LD b4, 31 * SIZE(BO)
MADD c51, c51, a4, b7
daddiu BO, BO, 32 * SIZE
MADD c61, c61, a4, b2
daddiu AO, AO, 8 * SIZE
MADD c71, c71, a4, b3
NOP
MADD c81, c81, a4, b4
NOP
MADD c52, c52, a2, b7
LD b7, 12 * SIZE(BO)
MADD c62, c62, a2, b2
LD b2, 1 * SIZE(BO)
MADD c72, c72, a2, b3
LD b3, 2 * SIZE(BO)
MADD c82, c82, a2, b4
LD b4, 3 * SIZE(BO)
.align 3
.L15:
andi L, K, 3
NOP
blez L, .L18
NOP
.align 3
.L16:
MADD c11, c11, a1, b1
LD a2, 1 * SIZE(AO)
MADD c21, c21, a1, b2
NOP
MADD c31, c31, a1, b3
NOP
MADD c41, c41, a1, b4
NOP
MADD c12, c12, a2, b1
LD b1, 8 * SIZE(BO)
MADD c22, c22, a2, b2
LD b2, 5 * SIZE(BO)
MADD c32, c32, a2, b3
LD b3, 6 * SIZE(BO)
MADD c42, c42, a2, b4
LD b4, 7 * SIZE(BO)
MADD c51, c51, a1, b5
daddiu L, L, -1
MADD c61, c61, a1, b2
daddiu AO, AO, 2 * SIZE
MADD c71, c71, a1, b3
daddiu BO, BO, 8 * SIZE
MADD c81, c81, a1, b4
LD a1, 0 * SIZE(AO)
MADD c52, c52, a2, b5
LD b5, 4 * SIZE(BO)
MADD c62, c62, a2, b2
LD b2, 1 * SIZE(BO)
MADD c72, c72, a2, b3
LD b3, 2 * SIZE(BO)
MADD c82, c82, a2, b4
bgtz L, .L16
LD b4, 3 * SIZE(BO)
.L18:
LD $f0, 0 * SIZE(CO1)
LD $f1, 1 * SIZE(CO1)
LD $f2, 2 * SIZE(CO1)
LD $f3, 3 * SIZE(CO1)
LD $f4, 0 * SIZE(CO2)
MADD $f0, $f0, ALPHA_R, c11
LD $f5, 1 * SIZE(CO2)
MADD $f1, $f1, ALPHA_I, c11
LD $f6, 2 * SIZE(CO2)
MADD $f2, $f2, ALPHA_R, c12
LD $f7, 3 * SIZE(CO2)
MADD $f3, $f3, ALPHA_I, c12
MADD $f4, $f4, ALPHA_R, c21
ST $f0, 0 * SIZE(CO1)
MADD $f5, $f5, ALPHA_I, c21
ST $f1, 1 * SIZE(CO1)
MADD $f6, $f6, ALPHA_R, c22
ST $f2, 2 * SIZE(CO1)
MADD $f7, $f7, ALPHA_I, c22
ST $f3, 3 * SIZE(CO1)
LD $f0, 0 * SIZE(CO3)
LD $f1, 1 * SIZE(CO3)
LD $f2, 2 * SIZE(CO3)
LD $f3, 3 * SIZE(CO3)
ST $f4, 0 * SIZE(CO2)
ST $f5, 1 * SIZE(CO2)
ST $f6, 2 * SIZE(CO2)
ST $f7, 3 * SIZE(CO2)
LD $f4, 0 * SIZE(CO4)
LD $f5, 1 * SIZE(CO4)
LD $f6, 2 * SIZE(CO4)
LD $f7, 3 * SIZE(CO4)
MADD $f0, $f0, ALPHA_R, c31
MADD $f1, $f1, ALPHA_I, c31
MADD $f2, $f2, ALPHA_R, c32
MADD $f3, $f3, ALPHA_I, c32
MADD $f4, $f4, ALPHA_R, c41
ST $f0, 0 * SIZE(CO3)
MADD $f5, $f5, ALPHA_I, c41
ST $f1, 1 * SIZE(CO3)
MADD $f6, $f6, ALPHA_R, c42
ST $f2, 2 * SIZE(CO3)
MADD $f7, $f7, ALPHA_I, c42
ST $f3, 3 * SIZE(CO3)
LD $f0, 0 * SIZE(CO5)
LD $f1, 1 * SIZE(CO5)
LD $f2, 2 * SIZE(CO5)
LD $f3, 3 * SIZE(CO5)
ST $f4, 0 * SIZE(CO4)
ST $f5, 1 * SIZE(CO4)
ST $f6, 2 * SIZE(CO4)
ST $f7, 3 * SIZE(CO4)
LD $f4, 0 * SIZE(CO6)
LD $f5, 1 * SIZE(CO6)
LD $f6, 2 * SIZE(CO6)
LD $f7, 3 * SIZE(CO6)
MADD $f0, $f0, ALPHA_R, c51
daddiu CO1,CO1, 4 * SIZE
MADD $f1, $f1, ALPHA_I, c51
daddiu CO2,CO2, 4 * SIZE
MADD $f2, $f2, ALPHA_R, c52
daddiu CO3,CO3, 4 * SIZE
MADD $f3, $f3, ALPHA_I, c52
daddiu CO4,CO4, 4 * SIZE
MADD $f4, $f4, ALPHA_R, c61
ST $f0, 0 * SIZE(CO5)
MADD $f5, $f5, ALPHA_I, c61
ST $f1, 1 * SIZE(CO5)
MADD $f6, $f6, ALPHA_R, c62
ST $f2, 2 * SIZE(CO5)
MADD $f7, $f7, ALPHA_I, c62
ST $f3, 3 * SIZE(CO5)
LD $f0, 0 * SIZE(CO7)
LD $f1, 1 * SIZE(CO7)
LD $f2, 2 * SIZE(CO7)
LD $f3, 3 * SIZE(CO7)
ST $f4, 0 * SIZE(CO6)
ST $f5, 1 * SIZE(CO6)
ST $f6, 2 * SIZE(CO6)
ST $f7, 3 * SIZE(CO6)
LD $f4, 0 * SIZE(CO8)
daddiu I, I, -1
LD $f5, 1 * SIZE(CO8)
MTC $0, c11
LD $f6, 2 * SIZE(CO8)
LD $f7, 3 * SIZE(CO8)
MADD $f0, $f0, ALPHA_R, c71
daddiu CO5,CO5, 4 * SIZE
MADD $f1, $f1, ALPHA_I, c71
daddiu CO6,CO6, 4 * SIZE
MADD $f2, $f2, ALPHA_R, c72
daddiu CO7,CO7, 4 * SIZE
MADD $f3, $f3, ALPHA_I, c72
daddiu CO8,CO8, 4 * SIZE
MADD $f4, $f4, ALPHA_R, c81
ST $f0, -4 * SIZE(CO7)
MADD $f5, $f5, ALPHA_I, c81
ST $f1, -3 * SIZE(CO7)
MADD $f6, $f6, ALPHA_R, c82
ST $f2, -2 * SIZE(CO7)
MADD $f7, $f7, ALPHA_I, c82
ST $f3, -1 * SIZE(CO7)
ST $f4, -4 * SIZE(CO8)
MOV c21, c11
ST $f5, -3 * SIZE(CO8)
MOV c31, c11
ST $f6, -2 * SIZE(CO8)
MOV c41, c11
ST $f7, -1 * SIZE(CO8)
MOV c51, c11
bgtz I, .L11
MOV c61, c11
.align 3
.L20:
andi I, M, 1
MOV c61, c11
blez I, .L29
MOV c71, c11
LD a1, 0 * SIZE(AO)
LD a2, 1 * SIZE(AO)
LD a3, 2 * SIZE(AO)
LD a4, 3 * SIZE(AO)
LD b1, 0 * SIZE(B)
LD b2, 1 * SIZE(B)
LD b3, 2 * SIZE(B)
LD b4, 3 * SIZE(B)
LD b5, 4 * SIZE(B)
LD b6, 8 * SIZE(B)
LD b7, 12 * SIZE(B)
dsra L, K, 2
MOV c81, c11
blez L, .L25
move BO, B
.align 3
.L22:
MADD c11, c11, a1, b1
LD b1, 16 * SIZE(BO)
MADD c21, c21, a1, b2
LD b2, 5 * SIZE(BO)
MADD c31, c31, a1, b3
LD b3, 6 * SIZE(BO)
MADD c41, c41, a1, b4
LD b4, 7 * SIZE(BO)
MADD c51, c51, a1, b5
LD b5, 20 * SIZE(BO)
MADD c61, c61, a1, b2
LD b2, 9 * SIZE(BO)
MADD c71, c71, a1, b3
LD b3, 10 * SIZE(BO)
MADD c81, c81, a1, b4
LD b4, 11 * SIZE(BO)
LD a1, 4 * SIZE(AO)
daddiu L, L, -1
MADD c11, c11, a2, b6
LD b6, 24 * SIZE(BO)
MADD c21, c21, a2, b2
LD b2, 13 * SIZE(BO)
MADD c31, c31, a2, b3
LD b3, 14 * SIZE(BO)
MADD c41, c41, a2, b4
LD b4, 15 * SIZE(BO)
MADD c51, c51, a2, b7
LD b7, 28 * SIZE(BO)
MADD c61, c61, a2, b2
LD b2, 17 * SIZE(BO)
MADD c71, c71, a2, b3
LD b3, 18 * SIZE(BO)
MADD c81, c81, a2, b4
LD b4, 19 * SIZE(BO)
LD a2, 5 * SIZE(AO)
daddiu AO, AO, 4 * SIZE
MADD c11, c11, a3, b1
LD b1, 32 * SIZE(BO)
MADD c21, c21, a3, b2
LD b2, 21 * SIZE(BO)
MADD c31, c31, a3, b3
LD b3, 22 * SIZE(BO)
MADD c41, c41, a3, b4
LD b4, 23 * SIZE(BO)
MADD c51, c51, a3, b5
LD b5, 36 * SIZE(BO)
MADD c61, c61, a3, b2
LD b2, 25 * SIZE(BO)
MADD c71, c71, a3, b3
LD b3, 26 * SIZE(BO)
MADD c81, c81, a3, b4
LD b4, 27 * SIZE(BO)
LD a3, 2 * SIZE(AO)
daddiu BO, BO, 32 * SIZE
MADD c11, c11, a4, b6
LD b6, 8 * SIZE(BO)
MADD c21, c21, a4, b2
LD b2, -3 * SIZE(BO)
MADD c31, c31, a4, b3
LD b3, -2 * SIZE(BO)
MADD c41, c41, a4, b4
LD b4, -1 * SIZE(BO)
MADD c51, c51, a4, b7
LD b7, 12 * SIZE(BO)
MADD c61, c61, a4, b2
LD b2, 1 * SIZE(BO)
MADD c71, c71, a4, b3
LD b3, 2 * SIZE(BO)
MADD c81, c81, a4, b4
LD b4, 3 * SIZE(BO)
bgtz L, .L22
LD a4, 3 * SIZE(AO)
.align 3
.L25:
andi L, K, 3
NOP
blez L, .L28
NOP
.align 3
.L26:
MADD c11, c11, a1, b1
LD b1, 8 * SIZE(BO)
MADD c21, c21, a1, b2
LD b2, 5 * SIZE(BO)
MADD c31, c31, a1, b3
LD b3, 6 * SIZE(BO)
MADD c41, c41, a1, b4
LD b4, 7 * SIZE(BO)
daddiu L, L, -1
MOV a2, a2
daddiu AO, AO, 1 * SIZE
daddiu BO, BO, 8 * SIZE
MADD c51, c51, a1, b5
LD b5, 4 * SIZE(BO)
MADD c61, c61, a1, b2
LD b2, 1 * SIZE(BO)
MADD c71, c71, a1, b3
LD b3, 2 * SIZE(BO)
MADD c81, c81, a1, b4
LD a1, 0 * SIZE(AO)
bgtz L, .L26
LD b4, 3 * SIZE(BO)
.L28:
LD $f0, 0 * SIZE(CO1)
LD $f1, 1 * SIZE(CO1)
LD $f2, 0 * SIZE(CO2)
LD $f3, 1 * SIZE(CO2)
LD $f4, 0 * SIZE(CO3)
MADD $f0, $f0, ALPHA_R, c11
LD $f5, 1 * SIZE(CO3)
MADD $f1, $f1, ALPHA_I, c11
LD $f6, 0 * SIZE(CO4)
MADD $f2, $f2, ALPHA_R, c21
LD $f7, 1 * SIZE(CO4)
MADD $f3, $f3, ALPHA_I, c21
MADD $f4, $f4, ALPHA_R, c31
ST $f0, 0 * SIZE(CO1)
MADD $f5, $f5, ALPHA_I, c31
ST $f1, 1 * SIZE(CO1)
MADD $f6, $f6, ALPHA_R, c41
ST $f2, 0 * SIZE(CO2)
MADD $f7, $f7, ALPHA_I, c41
ST $f3, 1 * SIZE(CO2)
LD $f0, 0 * SIZE(CO5)
LD $f1, 1 * SIZE(CO5)
LD $f2, 0 * SIZE(CO6)
LD $f3, 1 * SIZE(CO6)
ST $f4, 0 * SIZE(CO3)
ST $f5, 1 * SIZE(CO3)
ST $f6, 0 * SIZE(CO4)
ST $f7, 1 * SIZE(CO4)
LD $f4, 0 * SIZE(CO7)
MADD $f0, $f0, ALPHA_R, c51
LD $f5, 1 * SIZE(CO7)
MADD $f1, $f1, ALPHA_I, c51
LD $f6, 0 * SIZE(CO8)
MADD $f2, $f2, ALPHA_R, c61
LD $f7, 1 * SIZE(CO8)
MADD $f3, $f3, ALPHA_I, c61
MADD $f4, $f4, ALPHA_R, c71
ST $f0, 0 * SIZE(CO5)
MADD $f5, $f5, ALPHA_I, c71
ST $f1, 1 * SIZE(CO5)
MADD $f6, $f6, ALPHA_R, c81
ST $f2, 0 * SIZE(CO6)
MADD $f7, $f7, ALPHA_I, c81
ST $f3, 1 * SIZE(CO6)
ST $f4, 0 * SIZE(CO7)
ST $f5, 1 * SIZE(CO7)
ST $f6, 0 * SIZE(CO8)
ST $f7, 1 * SIZE(CO8)
.align 3
.L29:
bgtz J, .L10
move B, BO
.align 3
.L30:
andi J, N, 4
blez J, .L50
move AO, A
move CO1, C
MTC $0, c11
daddu CO2, C, LDC
daddu CO3, CO2, LDC
daddu CO4, CO3, LDC
MOV c21, c11
daddu C, CO4, LDC
MOV c31, c11
dsra I, M, 1
blez I, .L40
MOV c41, c11
.L31:
LD a1, 0 * SIZE(AO)
LD a3, 4 * SIZE(AO)
LD b1, 0 * SIZE(B)
MOV c12, c11
LD b2, 1 * SIZE(B)
MOV c22, c11
LD b3, 2 * SIZE(B)
MOV c32, c11
LD b4, 3 * SIZE(B)
MOV c42, c11
LD b5, 4 * SIZE(B)
dsra L, K, 2
LD b6, 8 * SIZE(B)
LD b7, 12 * SIZE(B)
blez L, .L35
move BO, B
.align 3
.L32:
MADD c11, c11, a1, b1
LD a2, 1 * SIZE(AO)
MADD c21, c21, a1, b2
daddiu L, L, -1
MADD c31, c31, a1, b3
NOP
MADD c41, c41, a1, b4
LD a1, 2 * SIZE(AO)
MADD c12, c12, a2, b1
LD b1, 16 * SIZE(BO)
MADD c22, c22, a2, b2
LD b2, 5 * SIZE(BO)
MADD c32, c32, a2, b3
LD b3, 6 * SIZE(BO)
MADD c42, c42, a2, b4
LD b4, 7 * SIZE(BO)
MADD c11, c11, a1, b5
LD a2, 3 * SIZE(AO)
MADD c21, c21, a1, b2
NOP
MADD c31, c31, a1, b3
NOP
MADD c41, c41, a1, b4
LD a1, 8 * SIZE(AO)
MADD c12, c12, a2, b5
LD b5, 20 * SIZE(BO)
MADD c22, c22, a2, b2
LD b2, 9 * SIZE(BO)
MADD c32, c32, a2, b3
LD b3, 10 * SIZE(BO)
MADD c42, c42, a2, b4
LD b4, 11 * SIZE(BO)
MADD c11, c11, a3, b6
LD a2, 5 * SIZE(AO)
MADD c21, c21, a3, b2
NOP
MADD c31, c31, a3, b3
NOP
MADD c41, c41, a3, b4
LD a3, 6 * SIZE(AO)
MADD c12, c12, a2, b6
LD b6, 24 * SIZE(BO)
MADD c22, c22, a2, b2
LD b2, 13 * SIZE(BO)
MADD c32, c32, a2, b3
LD b3, 14 * SIZE(BO)
MADD c42, c42, a2, b4
LD b4, 15 * SIZE(BO)
MADD c11, c11, a3, b7
LD a2, 7 * SIZE(AO)
MADD c21, c21, a3, b2
daddiu AO, AO, 8 * SIZE
MADD c31, c31, a3, b3
daddiu BO, BO, 16 * SIZE
MADD c41, c41, a3, b4
LD a3, 4 * SIZE(AO)
MADD c12, c12, a2, b7
LD b7, 12 * SIZE(BO)
MADD c22, c22, a2, b2
LD b2, 1 * SIZE(BO)
MADD c32, c32, a2, b3
LD b3, 2 * SIZE(BO)
MADD c42, c42, a2, b4
NOP
bgtz L, .L32
LD b4, 3 * SIZE(BO)
.align 3
.L35:
andi L, K, 3
NOP
blez L, .L38
NOP
.align 3
.L36:
MADD c11, c11, a1, b1
LD a2, 1 * SIZE(AO)
MADD c21, c21, a1, b2
daddiu L, L, -1
MADD c31, c31, a1, b3
daddiu AO, AO, 2 * SIZE
MADD c41, c41, a1, b4
LD a1, 0 * SIZE(AO)
MADD c12, c12, a2, b1
LD b1, 4 * SIZE(BO)
MADD c22, c22, a2, b2
LD b2, 5 * SIZE(BO)
MADD c32, c32, a2, b3
LD b3, 6 * SIZE(BO)
MADD c42, c42, a2, b4
LD b4, 7 * SIZE(BO)
bgtz L, .L36
daddiu BO, BO, 4 * SIZE
.L38:
LD $f0, 0 * SIZE(CO1)
LD $f1, 1 * SIZE(CO1)
LD $f2, 2 * SIZE(CO1)
LD $f3, 3 * SIZE(CO1)
LD $f4, 0 * SIZE(CO2)
LD $f5, 1 * SIZE(CO2)
LD $f6, 2 * SIZE(CO2)
LD $f7, 3 * SIZE(CO2)
MADD $f0, $f0, ALPHA_R, c11
MADD $f1, $f1, ALPHA_I, c11
MADD $f2, $f2, ALPHA_R, c12
MADD $f3, $f3, ALPHA_I, c12
MADD $f4, $f4, ALPHA_R, c21
ST $f0, 0 * SIZE(CO1)
MADD $f5, $f5, ALPHA_I, c21
ST $f1, 1 * SIZE(CO1)
MADD $f6, $f6, ALPHA_R, c22
ST $f2, 2 * SIZE(CO1)
MADD $f7, $f7, ALPHA_I, c22
ST $f3, 3 * SIZE(CO1)
LD $f0, 0 * SIZE(CO3)
LD $f1, 1 * SIZE(CO3)
LD $f2, 2 * SIZE(CO3)
LD $f3, 3 * SIZE(CO3)
ST $f4, 0 * SIZE(CO2)
MADD $f0, $f0, ALPHA_R, c31
ST $f5, 1 * SIZE(CO2)
MADD $f1, $f1, ALPHA_I, c31
ST $f6, 2 * SIZE(CO2)
MADD $f2, $f2, ALPHA_R, c32
ST $f7, 3 * SIZE(CO2)
MADD $f3, $f3, ALPHA_I, c32
LD $f4, 0 * SIZE(CO4)
LD $f5, 1 * SIZE(CO4)
LD $f6, 2 * SIZE(CO4)
LD $f7, 3 * SIZE(CO4)
MADD $f4, $f4, ALPHA_R, c41
daddiu CO1,CO1, 4 * SIZE
MADD $f5, $f5, ALPHA_I, c41
daddiu CO2,CO2, 4 * SIZE
MADD $f6, $f6, ALPHA_R, c42
daddiu CO3,CO3, 4 * SIZE
MADD $f7, $f7, ALPHA_I, c42
daddiu CO4,CO4, 4 * SIZE
ST $f0, -4 * SIZE(CO3)
daddiu I, I, -1
ST $f1, -3 * SIZE(CO3)
ST $f2, -2 * SIZE(CO3)
ST $f3, -1 * SIZE(CO3)
ST $f4, -4 * SIZE(CO4)
MTC $0, c11
ST $f5, -3 * SIZE(CO4)
MOV c21, c11
ST $f6, -2 * SIZE(CO4)
MOV c31, c11
ST $f7, -1 * SIZE(CO4)
bgtz I, .L31
MOV c41, c11
.align 3
.L40:
andi I, M, 1
blez I, .L49
MOV c61, c11
LD a1, 0 * SIZE(AO)
MOV c71, c11
LD a2, 1 * SIZE(AO)
MOV c81, c11
LD b1, 0 * SIZE(B)
LD b2, 1 * SIZE(B)
LD b3, 2 * SIZE(B)
LD b4, 3 * SIZE(B)
LD b5, 4 * SIZE(B)
LD b6, 8 * SIZE(B)
LD b7, 12 * SIZE(B)
dsra L, K, 2
blez L, .L45
move BO, B
.align 3
.L42:
MADD c11, c11, a1, b1
LD b1, 16 * SIZE(BO)
MADD c21, c21, a1, b2
LD b2, 5 * SIZE(BO)
MADD c31, c31, a1, b3
LD b3, 6 * SIZE(BO)
MADD c41, c41, a1, b4
LD b4, 7 * SIZE(BO)
LD a1, 4 * SIZE(AO)
daddiu L, L, -1
MADD c11, c11, a2, b5
LD b5, 20 * SIZE(BO)
MADD c21, c21, a2, b2
LD b2, 9 * SIZE(BO)
MADD c31, c31, a2, b3
LD b3, 10 * SIZE(BO)
MADD c41, c41, a2, b4
LD b4, 11 * SIZE(BO)
LD a2, 2 * SIZE(AO)
daddiu AO, AO, 4 * SIZE
MADD c11, c11, a2, b6
LD b6, 24 * SIZE(BO)
MADD c21, c21, a2, b2
LD b2, 13 * SIZE(BO)
MADD c31, c31, a2, b3
LD b3, 14 * SIZE(BO)
MADD c41, c41, a2, b4
LD b4, 15 * SIZE(BO)
LD a2, -1 * SIZE(AO)
daddiu BO, BO, 16 * SIZE
MADD c11, c11, a2, b7
LD b7, 12 * SIZE(BO)
MADD c21, c21, a2, b2
LD b2, 1 * SIZE(BO)
MADD c31, c31, a2, b3
LD b3, 2 * SIZE(BO)
MADD c41, c41, a2, b4
LD b4, 3 * SIZE(BO)
bgtz L, .L42
LD a2, 1 * SIZE(AO)
.align 3
.L45:
andi L, K, 3
NOP
blez L, .L48
NOP
.align 3
.L46:
MADD c11, c11, a1, b1
LD b1, 4 * SIZE(BO)
MADD c21, c21, a1, b2
LD b2, 5 * SIZE(BO)
MADD c31, c31, a1, b3
LD b3, 6 * SIZE(BO)
MADD c41, c41, a1, b4
LD a1, 1 * SIZE(AO)
LD b4, 7 * SIZE(BO)
daddiu L, L, -1
daddiu AO, AO, 1 * SIZE
MOV a2, a2
bgtz L, .L46
daddiu BO, BO, 4 * SIZE
.L48:
LD $f0, 0 * SIZE(CO1)
LD $f1, 1 * SIZE(CO1)
LD $f2, 0 * SIZE(CO2)
LD $f3, 1 * SIZE(CO2)
LD $f4, 0 * SIZE(CO3)
MADD $f0, $f0, ALPHA_R, c11
LD $f5, 1 * SIZE(CO3)
MADD $f1, $f1, ALPHA_I, c11
LD $f6, 0 * SIZE(CO4)
MADD $f2, $f2, ALPHA_R, c21
LD $f7, 1 * SIZE(CO4)
MADD $f3, $f3, ALPHA_I, c21
MADD $f4, $f4, ALPHA_R, c31
ST $f0, 0 * SIZE(CO1)
MADD $f5, $f5, ALPHA_I, c31
ST $f1, 1 * SIZE(CO1)
MADD $f6, $f6, ALPHA_R, c41
ST $f2, 0 * SIZE(CO2)
MADD $f7, $f7, ALPHA_I, c41
ST $f3, 1 * SIZE(CO2)
ST $f4, 0 * SIZE(CO3)
ST $f5, 1 * SIZE(CO3)
ST $f6, 0 * SIZE(CO4)
ST $f7, 1 * SIZE(CO4)
.align 3
.L49:
move B, BO
.align 3
.L50:
andi J, N, 2
blez J, .L70
move AO, A
move CO1, C
daddu CO2, C, LDC
dsra I, M, 1
blez I, .L60
daddu C, CO2, LDC
.L51:
LD a1, 0 * SIZE(AO)
MTC $0, c11
LD a2, 1 * SIZE(AO)
MOV c21, c11
LD a5, 4 * SIZE(AO)
LD b1, 0 * SIZE(B)
MOV c12, c11
LD b2, 1 * SIZE(B)
MOV c22, c11
LD b3, 2 * SIZE(B)
LD b5, 4 * SIZE(B)
dsra L, K, 2
LD b6, 8 * SIZE(B)
LD b7, 12 * SIZE(B)
blez L, .L55
move BO, B
.align 3
.L52:
MADD c11, c11, a1, b1
LD a3, 2 * SIZE(AO)
MADD c21, c21, a1, b2
LD b4, 3 * SIZE(BO)
MADD c12, c12, a2, b1
LD a4, 3 * SIZE(AO)
MADD c22, c22, a2, b2
LD b1, 8 * SIZE(BO)
MADD c11, c11, a3, b3
LD a1, 8 * SIZE(AO)
MADD c21, c21, a3, b4
LD b2, 5 * SIZE(BO)
MADD c12, c12, a4, b3
LD a2, 5 * SIZE(AO)
MADD c22, c22, a4, b4
LD b3, 6 * SIZE(BO)
MADD c11, c11, a5, b5
LD a3, 6 * SIZE(AO)
MADD c21, c21, a5, b2
LD b4, 7 * SIZE(BO)
MADD c12, c12, a2, b5
LD a4, 7 * SIZE(AO)
MADD c22, c22, a2, b2
LD b5, 12 * SIZE(BO)
MADD c11, c11, a3, b3
LD a5, 12 * SIZE(AO)
MADD c21, c21, a3, b4
LD b2, 9 * SIZE(BO)
MADD c12, c12, a4, b3
LD a2, 9 * SIZE(AO)
MADD c22, c22, a4, b4
LD b3, 10 * SIZE(BO)
daddiu AO, AO, 8 * SIZE
daddiu L, L, -1
bgtz L, .L52
daddiu BO, BO, 8 * SIZE
.align 3
.L55:
andi L, K, 3
NOP
blez L, .L58
NOP
.align 3
.L56:
MADD c11, c11, a1, b1
LD a2, 1 * SIZE(AO)
MADD c21, c21, a1, b2
LD a1, 2 * SIZE(AO)
MADD c12, c12, a2, b1
LD b1, 2 * SIZE(BO)
MADD c22, c22, a2, b2
LD b2, 3 * SIZE(BO)
daddiu L, L, -1
daddiu AO, AO, 2 * SIZE
bgtz L, .L56
daddiu BO, BO, 2 * SIZE
.L58:
LD $f0, 0 * SIZE(CO1)
LD $f1, 1 * SIZE(CO1)
LD $f2, 2 * SIZE(CO1)
LD $f3, 3 * SIZE(CO1)
LD $f4, 0 * SIZE(CO2)
LD $f5, 1 * SIZE(CO2)
LD $f6, 2 * SIZE(CO2)
LD $f7, 3 * SIZE(CO2)
MADD $f0, $f0, ALPHA_R, c11
daddiu I, I, -1
MADD $f1, $f1, ALPHA_I, c11
daddiu CO1,CO1, 4 * SIZE
MADD $f2, $f2, ALPHA_R, c12
daddiu CO2,CO2, 4 * SIZE
MADD $f3, $f3, ALPHA_I, c12
MADD $f4, $f4, ALPHA_R, c21
MADD $f5, $f5, ALPHA_I, c21
MADD $f6, $f6, ALPHA_R, c22
MADD $f7, $f7, ALPHA_I, c22
ST $f0, -4 * SIZE(CO1)
ST $f1, -3 * SIZE(CO1)
ST $f2, -2 * SIZE(CO1)
ST $f3, -1 * SIZE(CO1)
ST $f4, -4 * SIZE(CO2)
ST $f5, -3 * SIZE(CO2)
ST $f6, -2 * SIZE(CO2)
bgtz I, .L51
ST $f7, -1 * SIZE(CO2)
.align 3
.L60:
andi I, M, 1
blez I, .L69
NOP
dsra L, K, 2
LD a1, 0 * SIZE(AO)
MTC $0, c11
LD a2, 1 * SIZE(AO)
MOV c21, c11
LD a3, 2 * SIZE(AO)
MOV c31, c11
LD a4, 3 * SIZE(AO)
MOV c41, c11
LD b1, 0 * SIZE(B)
LD b2, 1 * SIZE(B)
LD b3, 2 * SIZE(B)
LD b4, 3 * SIZE(B)
LD b5, 4 * SIZE(B)
LD b6, 8 * SIZE(B)
LD b7, 12 * SIZE(B)
blez L, .L65
move BO, B
.align 3
.L62:
MADD c11, c11, a1, b1
LD b1, 4 * SIZE(BO)
MADD c21, c21, a1, b2
LD b2, 5 * SIZE(BO)
MADD c31, c31, a2, b3
LD b3, 6 * SIZE(BO)
MADD c41, c41, a2, b4
LD b4, 7 * SIZE(BO)
LD a1, 4 * SIZE(AO)
LD a2, 5 * SIZE(AO)
MADD c11, c11, a3, b1
LD b1, 8 * SIZE(BO)
MADD c21, c21, a3, b2
LD b2, 9 * SIZE(BO)
MADD c31, c31, a4, b3
LD b3, 10 * SIZE(BO)
MADD c41, c41, a4, b4
LD b4, 11 * SIZE(BO)
LD a3, 6 * SIZE(AO)
LD a4, 7 * SIZE(AO)
daddiu L, L, -1
daddiu AO, AO, 4 * SIZE
bgtz L, .L62
daddiu BO, BO, 8 * SIZE
.align 3
.L65:
andi L, K, 3
NOP
blez L, .L68
NOP
.align 3
.L66:
MADD c11, c11, a1, b1
LD b1, 2 * SIZE(BO)
MADD c21, c21, a1, b2
LD b2, 3 * SIZE(BO)
LD a1, 1 * SIZE(AO)
daddiu L, L, -1
daddiu AO, AO, 1 * SIZE
bgtz L, .L66
daddiu BO, BO, 2 * SIZE
.L68:
LD $f0, 0 * SIZE(CO1)
LD $f1, 1 * SIZE(CO1)
LD $f2, 0 * SIZE(CO2)
LD $f3, 1 * SIZE(CO2)
ADD c11, c11, c31
ADD c21, c21, c41
MADD $f0, $f0, ALPHA_R, c11
MADD $f1, $f1, ALPHA_I, c11
MADD $f2, $f2, ALPHA_R, c21
MADD $f3, $f3, ALPHA_I, c21
ST $f0, 0 * SIZE(CO1)
ST $f1, 1 * SIZE(CO1)
ST $f2, 0 * SIZE(CO2)
ST $f3, 1 * SIZE(CO2)
.align 3
.L69:
move B, BO
.align 3
.L70:
andi J, N, 1
blez J, .L999
move AO, A
move CO1, C
dsra I, M, 1
blez I, .L80
daddu C, CO1, LDC
.L71:
LD a1, 0 * SIZE(AO)
MTC $0, c11
LD a2, 1 * SIZE(AO)
MOV c21, c11
LD a5, 4 * SIZE(AO)
LD b1, 0 * SIZE(B)
MOV c12, c11
LD b2, 1 * SIZE(B)
MOV c22, c11
LD b3, 2 * SIZE(B)
LD b5, 4 * SIZE(B)
dsra L, K, 2
LD b6, 8 * SIZE(B)
LD b7, 12 * SIZE(B)
blez L, .L75
move BO, B
.align 3
.L72:
LD a1, 0 * SIZE(AO)
LD a2, 1 * SIZE(AO)
LD b1, 0 * SIZE(BO)
MADD c11, c11, a1, b1
MADD c12, c12, a2, b1
LD a1, 2 * SIZE(AO)
LD a2, 3 * SIZE(AO)
LD b1, 1 * SIZE(BO)
MADD c11, c11, a1, b1
MADD c12, c12, a2, b1
LD a1, 4 * SIZE(AO)
LD a2, 5 * SIZE(AO)
LD b1, 2 * SIZE(BO)
MADD c11, c11, a1, b1
MADD c12, c12, a2, b1
LD a1, 6 * SIZE(AO)
LD a2, 7 * SIZE(AO)
LD b1, 3 * SIZE(BO)
MADD c11, c11, a1, b1
MADD c12, c12, a2, b1
daddiu L, L, -1
daddiu AO, AO, 8 * SIZE
bgtz L, .L72
daddiu BO, BO, 4 * SIZE
.align 3
.L75:
andi L, K, 3
NOP
blez L, .L78
NOP
.align 3
.L76:
LD a1, 0 * SIZE(AO)
LD a2, 1 * SIZE(AO)
LD b1, 0 * SIZE(BO)
MADD c11, c11, a1, b1
MADD c12, c12, a2, b1
daddiu L, L, -1
daddiu AO, AO, 2 * SIZE
bgtz L, .L76
daddiu BO, BO, 1 * SIZE
.L78:
LD $f0, 0 * SIZE(CO1)
LD $f1, 1 * SIZE(CO1)
LD $f2, 2 * SIZE(CO1)
LD $f3, 3 * SIZE(CO1)
ADD c11, c11, c21
daddiu I, I, -1
ADD c12, c12, c22
daddiu CO1,CO1, 4 * SIZE
MADD $f0, $f0, ALPHA_R, c11
MADD $f1, $f1, ALPHA_I, c11
MADD $f2, $f2, ALPHA_R, c12
MADD $f3, $f3, ALPHA_I, c12
ST $f0, -4 * SIZE(CO1)
ST $f1, -3 * SIZE(CO1)
ST $f2, -2 * SIZE(CO1)
bgtz I, .L71
ST $f3, -1 * SIZE(CO1)
.align 3
.L80:
andi I, M, 1
blez I, .L89
NOP
LD a1, 0 * SIZE(AO)
MTC $0, c11
LD a2, 1 * SIZE(AO)
MOV c21, c11
LD a3, 2 * SIZE(AO)
LD a4, 3 * SIZE(AO)
LD b1, 0 * SIZE(B)
LD b2, 1 * SIZE(B)
LD b3, 2 * SIZE(B)
LD b4, 3 * SIZE(B)
LD b5, 4 * SIZE(B)
LD b6, 8 * SIZE(B)
LD b7, 12 * SIZE(B)
dsra L, K, 2
blez L, .L85
move BO, B
.align 3
.L82:
LD a1, 0 * SIZE(AO)
LD b1, 0 * SIZE(BO)
MADD c11, c11, a1, b1
LD a1, 1 * SIZE(AO)
LD b1, 1 * SIZE(BO)
MADD c21, c21, a1, b1
LD a1, 2 * SIZE(AO)
LD b1, 2 * SIZE(BO)
MADD c11, c11, a1, b1
LD a1, 3 * SIZE(AO)
LD b1, 3 * SIZE(BO)
MADD c21, c21, a1, b1
daddiu L, L, -1
daddiu AO, AO, 4 * SIZE
bgtz L, .L82
daddiu BO, BO, 4 * SIZE
.align 3
.L85:
andi L, K, 3
NOP
blez L, .L88
NOP
.align 3
.L86:
LD a1, 0 * SIZE(AO)
LD b1, 0 * SIZE(BO)
MADD c11, c11, a1, b1
daddiu L, L, -1
daddiu AO, AO, 1 * SIZE
bgtz L, .L86
daddiu BO, BO, 1 * SIZE
.L88:
LD $f0, 0 * SIZE(CO1)
LD $f1, 1 * SIZE(CO1)
ADD c11, c11, c21
MADD $f0, $f0, ALPHA_R, c11
MADD $f1, $f1, ALPHA_I, c11
ST $f0, 0 * SIZE(CO1)
ST $f1, 1 * SIZE(CO1)
.align 3
.L89:
move B, BO
.align 3
.L999:
LDARG $16, 0($sp)
LDARG $17, 8($sp)
LDARG $18, 16($sp)
LDARG $19, 24($sp)
LDARG $20, 32($sp)
LDARG $21, 40($sp)
ldc1 $f24, 48($sp)
ldc1 $f25, 56($sp)
ldc1 $f26, 64($sp)
ldc1 $f27, 72($sp)
ldc1 $f28, 80($sp)
ldc1 $f29, 88($sp)
j $31
daddiu $sp, $sp, 128
EPILOGUE