|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/* All rights reserved. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* Redistribution and use in source and binary forms, with or */
|
|
kusano |
2b45e8 |
/* without modification, are permitted provided that the following */
|
|
kusano |
2b45e8 |
/* conditions are met: */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 1. Redistributions of source code must retain the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 2. Redistributions in binary form must reproduce the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer in the documentation and/or other materials */
|
|
kusano |
2b45e8 |
/* provided with the distribution. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
|
kusano |
2b45e8 |
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
|
kusano |
2b45e8 |
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
|
kusano |
2b45e8 |
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
|
kusano |
2b45e8 |
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
|
kusano |
2b45e8 |
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
|
kusano |
2b45e8 |
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
|
kusano |
2b45e8 |
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
|
kusano |
2b45e8 |
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
|
kusano |
2b45e8 |
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
|
kusano |
2b45e8 |
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
|
kusano |
2b45e8 |
/* POSSIBILITY OF SUCH DAMAGE. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* The views and conclusions contained in the software and */
|
|
kusano |
2b45e8 |
/* documentation are those of the authors and should not be */
|
|
kusano |
2b45e8 |
/* interpreted as representing official policies, either expressed */
|
|
kusano |
2b45e8 |
/* or implied, of The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ASSEMBLER
|
|
kusano |
2b45e8 |
#include "common.h"
|
|
kusano |
2b45e8 |
#include "version.h"
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
|
|
kusano |
2b45e8 |
#error "Architecture is not specified."
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef EV6
|
|
kusano |
2b45e8 |
#define PREFETCHSIZE 56
|
|
kusano |
2b45e8 |
#define UNOP unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef EV5
|
|
kusano |
2b45e8 |
#define PREFETCHSIZE 48
|
|
kusano |
2b45e8 |
#define UNOP
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef EV4
|
|
kusano |
2b45e8 |
#define UNOP
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.set noat
|
|
kusano |
2b45e8 |
.set noreorder
|
|
kusano |
2b45e8 |
.arch ev6
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.text
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
.globl CNAME
|
|
kusano |
2b45e8 |
.ent CNAME
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define STACKSIZE 80
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define M $16
|
|
kusano |
2b45e8 |
#define N $17
|
|
kusano |
2b45e8 |
#define K $18
|
|
kusano |
2b45e8 |
#define A $21
|
|
kusano |
2b45e8 |
#define B $22
|
|
kusano |
2b45e8 |
#define C $20
|
|
kusano |
2b45e8 |
#define LDC $23
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define C1 $19
|
|
kusano |
2b45e8 |
#define C2 $24
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define AO $at
|
|
kusano |
2b45e8 |
#define BO $5
|
|
kusano |
2b45e8 |
#define I $6
|
|
kusano |
2b45e8 |
#define J $7
|
|
kusano |
2b45e8 |
#define L $8
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define a1 $f16
|
|
kusano |
2b45e8 |
#define a2 $f17
|
|
kusano |
2b45e8 |
#define a3 $f18
|
|
kusano |
2b45e8 |
#define a4 $f19
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define b1 $f20
|
|
kusano |
2b45e8 |
#define b2 $f21
|
|
kusano |
2b45e8 |
#define b3 $f22
|
|
kusano |
2b45e8 |
#define b4 $f23
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define t1 $f24
|
|
kusano |
2b45e8 |
#define t2 $f25
|
|
kusano |
2b45e8 |
#define t3 $f26
|
|
kusano |
2b45e8 |
#define t4 $f27
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define a5 $f28
|
|
kusano |
2b45e8 |
#define a6 $f30
|
|
kusano |
2b45e8 |
#define b5 $f29
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define alpha_i $f29
|
|
kusano |
2b45e8 |
#define alpha_r $f30
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define c01 $f0
|
|
kusano |
2b45e8 |
#define c02 $f1
|
|
kusano |
2b45e8 |
#define c03 $f2
|
|
kusano |
2b45e8 |
#define c04 $f3
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define c05 $f4
|
|
kusano |
2b45e8 |
#define c06 $f5
|
|
kusano |
2b45e8 |
#define c07 $f6
|
|
kusano |
2b45e8 |
#define c08 $f7
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define c09 $f8
|
|
kusano |
2b45e8 |
#define c10 $f9
|
|
kusano |
2b45e8 |
#define c11 $f10
|
|
kusano |
2b45e8 |
#define c12 $f11
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define c13 $f12
|
|
kusano |
2b45e8 |
#define c14 $f13
|
|
kusano |
2b45e8 |
#define c15 $f14
|
|
kusano |
2b45e8 |
#define c16 $f15
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define TMP1 $0
|
|
kusano |
2b45e8 |
#define TMP2 $1
|
|
kusano |
2b45e8 |
#define KK $2
|
|
kusano |
2b45e8 |
#define BB $3
|
|
kusano |
2b45e8 |
#define OFFSET $4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ALPHA_R 64($sp)
|
|
kusano |
2b45e8 |
#define ALPHA_I 72($sp)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(NN) || defined(NT) || defined(TN) || defined(TT)
|
|
kusano |
2b45e8 |
#define ADD1 ADD
|
|
kusano |
2b45e8 |
#define ADD2 SUB
|
|
kusano |
2b45e8 |
#define ADD3 ADD
|
|
kusano |
2b45e8 |
#define ADD4 ADD
|
|
kusano |
2b45e8 |
#elif defined(RN) || defined(RT) || defined(CN) || defined(CT)
|
|
kusano |
2b45e8 |
#define ADD1 ADD
|
|
kusano |
2b45e8 |
#define ADD2 ADD
|
|
kusano |
2b45e8 |
#define ADD3 SUB
|
|
kusano |
2b45e8 |
#define ADD4 ADD
|
|
kusano |
2b45e8 |
#elif defined(NR) || defined(NC) || defined(TR) || defined(TC)
|
|
kusano |
2b45e8 |
#define ADD1 ADD
|
|
kusano |
2b45e8 |
#define ADD2 ADD
|
|
kusano |
2b45e8 |
#define ADD3 ADD
|
|
kusano |
2b45e8 |
#define ADD4 SUB
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define ADD1 ADD
|
|
kusano |
2b45e8 |
#define ADD2 SUB
|
|
kusano |
2b45e8 |
#define ADD3 SUB
|
|
kusano |
2b45e8 |
#define ADD4 SUB
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
CNAME:
|
|
kusano |
2b45e8 |
.frame $sp, STACKSIZE, $26, 0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef PROFILE
|
|
kusano |
2b45e8 |
ldgp $gp, 0($27)
|
|
kusano |
2b45e8 |
lda $at, _mcount
|
|
kusano |
2b45e8 |
jsr $at, ($at), _mcount
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef PROFILE
|
|
kusano |
2b45e8 |
.prologue 0
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
.prologue 1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda $sp, -STACKSIZE($sp)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ldq B, 0 + STACKSIZE($sp)
|
|
kusano |
2b45e8 |
ldq C, 8 + STACKSIZE($sp)
|
|
kusano |
2b45e8 |
ldq LDC, 16 + STACKSIZE($sp)
|
|
kusano |
2b45e8 |
#ifdef TRMMKERNEL
|
|
kusano |
2b45e8 |
ldq OFFSET, 24 + STACKSIZE($sp)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
sll LDC, ZBASE_SHIFT, LDC
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
stt $f2, 0($sp)
|
|
kusano |
2b45e8 |
stt $f3, 8($sp)
|
|
kusano |
2b45e8 |
stt $f4, 16($sp)
|
|
kusano |
2b45e8 |
stt $f5, 24($sp)
|
|
kusano |
2b45e8 |
stt $f6, 32($sp)
|
|
kusano |
2b45e8 |
stt $f7, 40($sp)
|
|
kusano |
2b45e8 |
stt $f8, 48($sp)
|
|
kusano |
2b45e8 |
stt $f9, 56($sp)
|
|
kusano |
2b45e8 |
stt $f19, ALPHA_R
|
|
kusano |
2b45e8 |
stt $f20, ALPHA_I
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cmple M, 0, $0
|
|
kusano |
2b45e8 |
cmple N, 0, $1
|
|
kusano |
2b45e8 |
cmple K, 0, $2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
or $0, $1, $0
|
|
kusano |
2b45e8 |
or $0, $2, $0
|
|
kusano |
2b45e8 |
bne $0, $L999
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL) && !defined(LEFT)
|
|
kusano |
2b45e8 |
subq $31, OFFSET, KK
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
sra N, 1, J
|
|
kusano |
2b45e8 |
ble J, $L30
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L01:
|
|
kusano |
2b45e8 |
mov C, C1
|
|
kusano |
2b45e8 |
addq C, LDC, C2
|
|
kusano |
2b45e8 |
mov A, AO
|
|
kusano |
2b45e8 |
s4addq K, 0, BB
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL) && defined(LEFT)
|
|
kusano |
2b45e8 |
mov OFFSET, KK
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
SXADDQ BB, B, BB
|
|
kusano |
2b45e8 |
addq C2, LDC, C
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
sra M, 1, I
|
|
kusano |
2b45e8 |
fclr t1
|
|
kusano |
2b45e8 |
fclr t2
|
|
kusano |
2b45e8 |
fclr t3
|
|
kusano |
2b45e8 |
fclr t4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
fclr c01
|
|
kusano |
2b45e8 |
fclr c05
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ble I, $L20
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L11:
|
|
kusano |
2b45e8 |
#ifndef EV4
|
|
kusano |
2b45e8 |
ldl $31, 0 * SIZE(BB)
|
|
kusano |
2b45e8 |
ldl $31, 8 * SIZE(BB)
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
lda BB, 16 * SIZE(BB)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if !defined(TRMMKERNEL) || \
|
|
kusano |
2b45e8 |
(defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \
|
|
kusano |
2b45e8 |
(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef TRMMKERNEL
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
addq KK, 2, TMP1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
addq KK, 2, TMP1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a1, 0 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr c09
|
|
kusano |
2b45e8 |
LD a2, 1 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr c13
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a3, 2 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr c02
|
|
kusano |
2b45e8 |
LD a4, 3 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr c06
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b1, 0 * SIZE(B)
|
|
kusano |
2b45e8 |
fclr c10
|
|
kusano |
2b45e8 |
LD b2, 1 * SIZE(B)
|
|
kusano |
2b45e8 |
fclr c14
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b3, 2 * SIZE(B)
|
|
kusano |
2b45e8 |
fclr c03
|
|
kusano |
2b45e8 |
LD b4, 3 * SIZE(B)
|
|
kusano |
2b45e8 |
fclr c07
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda BO, 4 * SIZE(B)
|
|
kusano |
2b45e8 |
fclr c11
|
|
kusano |
2b45e8 |
lda AO, 4 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr c15
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lds $f31, 4 * SIZE(C1)
|
|
kusano |
2b45e8 |
fclr c04
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
lda L, -2(K)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
lda L, -2(TMP1)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
fclr c08
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lds $f31, 4 * SIZE(C2)
|
|
kusano |
2b45e8 |
fclr c12
|
|
kusano |
2b45e8 |
fclr c16
|
|
kusano |
2b45e8 |
ble L, $L15
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
sll KK, ZBASE_SHIFT + 1, TMP1
|
|
kusano |
2b45e8 |
addq AO, TMP1, AO
|
|
kusano |
2b45e8 |
addq B, TMP1, BO
|
|
kusano |
2b45e8 |
subq K, KK, TMP1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a1, 0 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr c09
|
|
kusano |
2b45e8 |
LD a2, 1 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr c13
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a3, 2 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr c02
|
|
kusano |
2b45e8 |
LD a4, 3 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr c06
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b1, 0 * SIZE(BO)
|
|
kusano |
2b45e8 |
fclr c10
|
|
kusano |
2b45e8 |
LD b2, 1 * SIZE(BO)
|
|
kusano |
2b45e8 |
fclr c14
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b3, 2 * SIZE(BO)
|
|
kusano |
2b45e8 |
fclr c03
|
|
kusano |
2b45e8 |
LD b4, 3 * SIZE(BO)
|
|
kusano |
2b45e8 |
fclr c07
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda BO, 4 * SIZE(BO)
|
|
kusano |
2b45e8 |
fclr c11
|
|
kusano |
2b45e8 |
lda AO, 4 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr c15
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lds $f31, 4 * SIZE(C1)
|
|
kusano |
2b45e8 |
fclr c04
|
|
kusano |
2b45e8 |
lda L, -2(TMP1)
|
|
kusano |
2b45e8 |
fclr c08
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lds $f31, 4 * SIZE(C2)
|
|
kusano |
2b45e8 |
fclr c12
|
|
kusano |
2b45e8 |
fclr c16
|
|
kusano |
2b45e8 |
ble L, $L15
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L12:
|
|
kusano |
2b45e8 |
/* 1 */
|
|
kusano |
2b45e8 |
ADD1 c11, t1, c11
|
|
kusano |
2b45e8 |
#ifndef EV4
|
|
kusano |
2b45e8 |
ldq $31, PREFETCHSIZE * SIZE(AO)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
MUL b1, a1, t1
|
|
kusano |
2b45e8 |
#ifndef EV4
|
|
kusano |
2b45e8 |
ldl $31, PREFETCHSIZE * SIZE(BO)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c12, t2, c12
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b1, a2, t2
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c16, t3, c16
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b2, a2, t3
|
|
kusano |
2b45e8 |
LD a5, 0 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c15, t4, c15
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b2, a1, t4
|
|
kusano |
2b45e8 |
LD b5, 0 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* 2 */
|
|
kusano |
2b45e8 |
ADD1 c01, t1, c01
|
|
kusano |
2b45e8 |
UNOP
|
|
kusano |
2b45e8 |
MUL b1, a3, t1
|
|
kusano |
2b45e8 |
UNOP
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c02, t2, c02
|
|
kusano |
2b45e8 |
UNOP
|
|
kusano |
2b45e8 |
MUL b1, a4, t2
|
|
kusano |
2b45e8 |
UNOP
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c06, t3, c06
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b2, a4, t3
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c05, t4, c05
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b4, a1, t4
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* 3 */
|
|
kusano |
2b45e8 |
ADD1 c03, t1, c03
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b3, a1, t1
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c04, t2, c04
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b3, a2, t2
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c08, t3, c08
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b4, a2, t3
|
|
kusano |
2b45e8 |
LD a2, 1 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c13, t4, c13
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b2, a3, t4
|
|
kusano |
2b45e8 |
LD b2, 1 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* 4 */
|
|
kusano |
2b45e8 |
ADD1 c09, t1, c09
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b3, a3, t1
|
|
kusano |
2b45e8 |
LD a6, 2 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c10, t2, c10
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b3, a4, t2
|
|
kusano |
2b45e8 |
LD b3, 2 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c14, t3, c14
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b4, a4, t3
|
|
kusano |
2b45e8 |
LD a4, 3 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c07, t4, c07
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b4, a3, t4
|
|
kusano |
2b45e8 |
LD b4, 3 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* 5 */
|
|
kusano |
2b45e8 |
ADD1 c11, t1, c11
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b5, a5, t1
|
|
kusano |
2b45e8 |
LD a1, 4 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c12, t2, c12
|
|
kusano |
2b45e8 |
lda L, -2(L)
|
|
kusano |
2b45e8 |
MUL b5, a2, t2
|
|
kusano |
2b45e8 |
LD b1, 4 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c16, t3, c16
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b2, a2, t3
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c15, t4, c15
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b2, a5, t4
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* 6 */
|
|
kusano |
2b45e8 |
ADD1 c01, t1, c01
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b5, a6, t1
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c02, t2, c02
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b5, a4, t2
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c06, t3, c06
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b2, a4, t3
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c05, t4, c05
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b4, a5, t4
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* 7 */
|
|
kusano |
2b45e8 |
ADD1 c03, t1, c03
|
|
kusano |
2b45e8 |
lda AO, 8 * SIZE(AO)
|
|
kusano |
2b45e8 |
MUL b3, a5, t1
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c04, t2, c04
|
|
kusano |
2b45e8 |
lda BO, 8 * SIZE(BO)
|
|
kusano |
2b45e8 |
MUL b3, a2, t2
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c08, t3, c08
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b4, a2, t3
|
|
kusano |
2b45e8 |
LD a2, -3 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c13, t4, c13
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b2, a6, t4
|
|
kusano |
2b45e8 |
LD b2, -3 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* 8 */
|
|
kusano |
2b45e8 |
ADD1 c09, t1, c09
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b3, a6, t1
|
|
kusano |
2b45e8 |
LD a3, -2 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c10, t2, c10
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b3, a4, t2
|
|
kusano |
2b45e8 |
LD b3, -2 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c14, t3, c14
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b4, a4, t3
|
|
kusano |
2b45e8 |
LD a4, -1 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c07, t4, c07
|
|
kusano |
2b45e8 |
MUL b4, a6, t4
|
|
kusano |
2b45e8 |
LD b4, -1 * SIZE(BO)
|
|
kusano |
2b45e8 |
bgt L, $L12
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L15:
|
|
kusano |
2b45e8 |
ADD1 c11, t1, c11
|
|
kusano |
2b45e8 |
ldt alpha_r, ALPHA_R
|
|
kusano |
2b45e8 |
MUL b1, a1, t1
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
blbs K, $L18
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
blbs TMP1, $L18
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c12, t2, c12
|
|
kusano |
2b45e8 |
MUL b1, a2, t2
|
|
kusano |
2b45e8 |
ADD2 c16, t3, c16
|
|
kusano |
2b45e8 |
MUL b2, a2, t3
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c15, t4, c15
|
|
kusano |
2b45e8 |
MUL b2, a1, t4
|
|
kusano |
2b45e8 |
ADD1 c01, t1, c01
|
|
kusano |
2b45e8 |
MUL b1, a3, t1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c02, t2, c02
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b1, a4, t2
|
|
kusano |
2b45e8 |
LD b1, 0 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c06, t3, c06
|
|
kusano |
2b45e8 |
MUL b2, a4, t3
|
|
kusano |
2b45e8 |
ADD4 c05, t4, c05
|
|
kusano |
2b45e8 |
MUL b4, a1, t4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c03, t1, c03
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b3, a1, t1
|
|
kusano |
2b45e8 |
LD a1, 0 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c04, t2, c04
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b3, a2, t2
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c08, t3, c08
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b4, a2, t3
|
|
kusano |
2b45e8 |
LD a2, 1 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c13, t4, c13
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b2, a3, t4
|
|
kusano |
2b45e8 |
LD b2, 1 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c09, t1, c09
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b3, a3, t1
|
|
kusano |
2b45e8 |
lda AO, 4 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c10, t2, c10
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b3, a4, t2
|
|
kusano |
2b45e8 |
LD b3, 2 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c14, t3, c14
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b4, a4, t3
|
|
kusano |
2b45e8 |
LD a4, -1 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c07, t4, c07
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b4, a3, t4
|
|
kusano |
2b45e8 |
LD a3, -2 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c11, t1, c11
|
|
kusano |
2b45e8 |
LD b4, 3 * SIZE(BO)
|
|
kusano |
2b45e8 |
MUL b1, a1, t1
|
|
kusano |
2b45e8 |
lda BO, 4 * SIZE(BO)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L18:
|
|
kusano |
2b45e8 |
ADD3 c12, t2, c12
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b1, a2, t2
|
|
kusano |
2b45e8 |
ldt alpha_i, ALPHA_I
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c16, t3, c16
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b2, a2, t3
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD a5, 0 * SIZE(C1)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c15, t4, c15
|
|
kusano |
2b45e8 |
MUL b2, a1, t4
|
|
kusano |
2b45e8 |
ADD1 c01, t1, c01
|
|
kusano |
2b45e8 |
MUL b1, a3, t1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c02, t2, c02
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b1, a4, t2
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD b1, 1 * SIZE(C1)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c06, t3, c06
|
|
kusano |
2b45e8 |
MUL b2, a4, t3
|
|
kusano |
2b45e8 |
ADD4 c05, t4, c05
|
|
kusano |
2b45e8 |
MUL b4, a1, t4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c03, t1, c03
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b3, a1, t1
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD a1, 2 * SIZE(C1)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c04, t2, c04
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b3, a2, t2
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c08, t3, c08
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b4, a2, t3
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD a2, 3 * SIZE(C1)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c13, t4, c13
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b2, a3, t4
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD b2, 0 * SIZE(C2)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c09, t1, c09
|
|
kusano |
2b45e8 |
lda I, -1(I)
|
|
kusano |
2b45e8 |
MUL b3, a3, t1
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c10, t2, c10
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b3, a4, t2
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD b3, 1 * SIZE(C2)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c14, t3, c14
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b4, a4, t3
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD a4, 2 * SIZE(C2)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c07, t4, c07
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL b4, a3, t4
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD a3, 3 * SIZE(C2)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c11, t1, c11
|
|
kusano |
2b45e8 |
ADD3 c12, t2, c12
|
|
kusano |
2b45e8 |
ADD2 c16, t3, c16
|
|
kusano |
2b45e8 |
ADD4 c15, t4, c15
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD c01, c06, c01
|
|
kusano |
2b45e8 |
ADD c02, c05, c02
|
|
kusano |
2b45e8 |
ADD c03, c08, c03
|
|
kusano |
2b45e8 |
ADD c04, c07, c04
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD c09, c14, c09
|
|
kusano |
2b45e8 |
MUL alpha_r, c01, t1
|
|
kusano |
2b45e8 |
ADD c10, c13, c10
|
|
kusano |
2b45e8 |
MUL alpha_r, c02, t2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD c11, c16, c11
|
|
kusano |
2b45e8 |
MUL alpha_r, c03, t3
|
|
kusano |
2b45e8 |
ADD c12, c15, c12
|
|
kusano |
2b45e8 |
MUL alpha_r, c04, t4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
ADD a5, t1, a5
|
|
kusano |
2b45e8 |
MUL alpha_i, c02, t1
|
|
kusano |
2b45e8 |
ADD b1, t2, b1
|
|
kusano |
2b45e8 |
MUL alpha_i, c01, t2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD a1, t3, a1
|
|
kusano |
2b45e8 |
MUL alpha_i, c04, t3
|
|
kusano |
2b45e8 |
ADD a2, t4, a2
|
|
kusano |
2b45e8 |
MUL alpha_i, c03, t4
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
ADD $f31, t1, a5
|
|
kusano |
2b45e8 |
MUL alpha_i, c02, t1
|
|
kusano |
2b45e8 |
ADD $f31, t2, b1
|
|
kusano |
2b45e8 |
MUL alpha_i, c01, t2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD $f31, t3, a1
|
|
kusano |
2b45e8 |
MUL alpha_i, c04, t3
|
|
kusano |
2b45e8 |
ADD $f31, t4, a2
|
|
kusano |
2b45e8 |
MUL alpha_i, c03, t4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
SUB a5, t1, a5
|
|
kusano |
2b45e8 |
MUL alpha_r, c09, t1
|
|
kusano |
2b45e8 |
ADD b1, t2, b1
|
|
kusano |
2b45e8 |
MUL alpha_r, c10, t2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
SUB a1, t3, a1
|
|
kusano |
2b45e8 |
MUL alpha_r, c11, t3
|
|
kusano |
2b45e8 |
ADD a2, t4, a2
|
|
kusano |
2b45e8 |
MUL alpha_r, c12, t4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
ADD b2, t1, b2
|
|
kusano |
2b45e8 |
MUL alpha_i, c10, t1
|
|
kusano |
2b45e8 |
ADD b3, t2, b3
|
|
kusano |
2b45e8 |
MUL alpha_i, c09, t2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD a4, t3, a4
|
|
kusano |
2b45e8 |
MUL alpha_i, c12, t3
|
|
kusano |
2b45e8 |
ADD a3, t4, a3
|
|
kusano |
2b45e8 |
MUL alpha_i, c11, t4
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
ADD $f31, t1, b2
|
|
kusano |
2b45e8 |
MUL alpha_i, c10, t1
|
|
kusano |
2b45e8 |
ADD $f31, t2, b3
|
|
kusano |
2b45e8 |
MUL alpha_i, c09, t2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD $f31, t3, a4
|
|
kusano |
2b45e8 |
MUL alpha_i, c12, t3
|
|
kusano |
2b45e8 |
ADD $f31, t4, a3
|
|
kusano |
2b45e8 |
MUL alpha_i, c11, t4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
SUB b2, t1, b2
|
|
kusano |
2b45e8 |
ST a5, 0 * SIZE(C1)
|
|
kusano |
2b45e8 |
fclr t1
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD b3, t2, b3
|
|
kusano |
2b45e8 |
ST b1, 1 * SIZE(C1)
|
|
kusano |
2b45e8 |
fclr t2
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
SUB a4, t3, a4
|
|
kusano |
2b45e8 |
ST a1, 2 * SIZE(C1)
|
|
kusano |
2b45e8 |
fclr t3
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD a3, t4, a3
|
|
kusano |
2b45e8 |
ST a2, 3 * SIZE(C1)
|
|
kusano |
2b45e8 |
fclr t4
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST b2, 0 * SIZE(C2)
|
|
kusano |
2b45e8 |
fclr c01
|
|
kusano |
2b45e8 |
ST b3, 1 * SIZE(C2)
|
|
kusano |
2b45e8 |
fclr c05
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST a4, 2 * SIZE(C2)
|
|
kusano |
2b45e8 |
lda C1, 4 * SIZE(C1)
|
|
kusano |
2b45e8 |
ST a3, 3 * SIZE(C2)
|
|
kusano |
2b45e8 |
lda C2, 4 * SIZE(C2)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if (defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \
|
|
kusano |
2b45e8 |
(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
subq K, KK, TMP1
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
subq TMP1, 2, TMP1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
subq TMP1, 2, TMP1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
sll TMP1, ZBASE_SHIFT + 1, TMP1
|
|
kusano |
2b45e8 |
addq AO, TMP1, AO
|
|
kusano |
2b45e8 |
addq BO, TMP1, BO
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL) && defined(LEFT)
|
|
kusano |
2b45e8 |
addq KK, 2, KK
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
bgt I, $L11
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L20:
|
|
kusano |
2b45e8 |
and M, 1, I
|
|
kusano |
2b45e8 |
ble I, $L29
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if !defined(TRMMKERNEL) || \
|
|
kusano |
2b45e8 |
(defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \
|
|
kusano |
2b45e8 |
(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef TRMMKERNEL
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
addq KK, 1, TMP1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
addq KK, 2, TMP1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a1, 0 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr c09
|
|
kusano |
2b45e8 |
LD a2, 1 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr c13
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a3, 2 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr c02
|
|
kusano |
2b45e8 |
LD a4, 3 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr c06
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b1, 0 * SIZE(B)
|
|
kusano |
2b45e8 |
fclr c10
|
|
kusano |
2b45e8 |
LD b2, 1 * SIZE(B)
|
|
kusano |
2b45e8 |
fclr c14
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b3, 2 * SIZE(B)
|
|
kusano |
2b45e8 |
lda AO, 2 * SIZE(AO)
|
|
kusano |
2b45e8 |
LD b4, 3 * SIZE(B)
|
|
kusano |
2b45e8 |
lda BO, 4 * SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
lda L, -2(K)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
lda L, -2(TMP1)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
ble L, $L25
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
sll KK, ZBASE_SHIFT + 0, TMP1
|
|
kusano |
2b45e8 |
addq AO, TMP1, AO
|
|
kusano |
2b45e8 |
sll KK, ZBASE_SHIFT + 1, TMP1
|
|
kusano |
2b45e8 |
addq B, TMP1, BO
|
|
kusano |
2b45e8 |
subq K, KK, TMP1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a1, 0 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr c09
|
|
kusano |
2b45e8 |
LD a2, 1 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr c13
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a3, 2 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr c02
|
|
kusano |
2b45e8 |
LD a4, 3 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr c06
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b1, 0 * SIZE(BO)
|
|
kusano |
2b45e8 |
fclr c10
|
|
kusano |
2b45e8 |
LD b2, 1 * SIZE(BO)
|
|
kusano |
2b45e8 |
fclr c14
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b3, 2 * SIZE(BO)
|
|
kusano |
2b45e8 |
lda AO, 2 * SIZE(AO)
|
|
kusano |
2b45e8 |
LD b4, 3 * SIZE(BO)
|
|
kusano |
2b45e8 |
lda BO, 4 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda L, -2(TMP1)
|
|
kusano |
2b45e8 |
ble L, $L25
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L22:
|
|
kusano |
2b45e8 |
ADD1 c09, t1, c09
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a1, b1, t1
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c10, t2, c10
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a2, b1, t2
|
|
kusano |
2b45e8 |
LD b1, 0 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c13, t3, c13
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a1, b2, t3
|
|
kusano |
2b45e8 |
lda BO, 8 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c14, t4, c14
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a2, b2, t4
|
|
kusano |
2b45e8 |
LD b2, -7 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c01, t1, c01
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a1, b3, t1
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c02, t2, c02
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a2, b3, t2
|
|
kusano |
2b45e8 |
LD b3, -6 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c05, t3, c05
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a1, b4, t3
|
|
kusano |
2b45e8 |
LD a1, 2 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c06, t4, c06
|
|
kusano |
2b45e8 |
MUL a2, b4, t4
|
|
kusano |
2b45e8 |
LD b5, -5 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c09, t1, c09
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a3, b1, t1
|
|
kusano |
2b45e8 |
LD a2, 3 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c10, t2, c10
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a4, b1, t2
|
|
kusano |
2b45e8 |
LD b1, -4 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c13, t3, c13
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a3, b2, t3
|
|
kusano |
2b45e8 |
lda AO, 4 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c14, t4, c14
|
|
kusano |
2b45e8 |
MUL a4, b2, t4
|
|
kusano |
2b45e8 |
LD b2, -3 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c01, t1, c01
|
|
kusano |
2b45e8 |
lda L, -2(L)
|
|
kusano |
2b45e8 |
MUL a3, b3, t1
|
|
kusano |
2b45e8 |
LD b4, -1 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c02, t2, c02
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a4, b3, t2
|
|
kusano |
2b45e8 |
LD b3, -2 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c05, t3, c05
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a3, b5, t3
|
|
kusano |
2b45e8 |
LD a3, 0 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c06, t4, c06
|
|
kusano |
2b45e8 |
MUL a4, b5, t4
|
|
kusano |
2b45e8 |
LD a4, 1 * SIZE(AO)
|
|
kusano |
2b45e8 |
bgt L, $L22
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L25:
|
|
kusano |
2b45e8 |
ADD1 c09, t1, c09
|
|
kusano |
2b45e8 |
ldt alpha_r, ALPHA_R
|
|
kusano |
2b45e8 |
MUL a1, b1, t1
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
blbs K, $L28
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
blbs TMP1, $L28
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c10, t2, c10
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a2, b1, t2
|
|
kusano |
2b45e8 |
LD b1, 0 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c13, t3, c13
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a1, b2, t3
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c14, t4, c14
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a2, b2, t4
|
|
kusano |
2b45e8 |
LD b2, 1 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c01, t1, c01
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a1, b3, t1
|
|
kusano |
2b45e8 |
lda AO, 2 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c02, t2, c02
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a2, b3, t2
|
|
kusano |
2b45e8 |
LD b3, 2 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c05, t3, c05
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a1, b4, t3
|
|
kusano |
2b45e8 |
LD a1, -2 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c06, t4, c06
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a2, b4, t4
|
|
kusano |
2b45e8 |
LD a2, -1 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c09, t1, c09
|
|
kusano |
2b45e8 |
LD b4, 3 * SIZE(BO)
|
|
kusano |
2b45e8 |
MUL a1, b1, t1
|
|
kusano |
2b45e8 |
lda BO, 4 * SIZE(BO)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L28:
|
|
kusano |
2b45e8 |
ADD3 c10, t2, c10
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a2, b1, t2
|
|
kusano |
2b45e8 |
ldt alpha_i, ALPHA_I
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c13, t3, c13
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a1, b2, t3
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD c03, 0 * SIZE(C1)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c14, t4, c14
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a2, b2, t4
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD c04, 1 * SIZE(C1)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c01, t1, c01
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a1, b3, t1
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD c11, 0 * SIZE(C2)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c02, t2, c02
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a2, b3, t2
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD c12, 1 * SIZE(C2)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c05, t3, c05
|
|
kusano |
2b45e8 |
MUL a1, b4, t3
|
|
kusano |
2b45e8 |
ADD2 c06, t4, c06
|
|
kusano |
2b45e8 |
MUL a2, b4, t4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c09, t1, c09
|
|
kusano |
2b45e8 |
ADD3 c10, t2, c10
|
|
kusano |
2b45e8 |
ADD4 c13, t3, c13
|
|
kusano |
2b45e8 |
ADD2 c14, t4, c14
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD c01, c06, c01
|
|
kusano |
2b45e8 |
ADD c02, c05, c02
|
|
kusano |
2b45e8 |
ADD c09, c14, c09
|
|
kusano |
2b45e8 |
ADD c10, c13, c10
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL alpha_r, c01, t1
|
|
kusano |
2b45e8 |
MUL alpha_r, c02, t2
|
|
kusano |
2b45e8 |
MUL alpha_r, c09, t3
|
|
kusano |
2b45e8 |
MUL alpha_r, c10, t4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
ADD c03, t1, c03
|
|
kusano |
2b45e8 |
MUL alpha_i, c02, t1
|
|
kusano |
2b45e8 |
ADD c04, t2, c04
|
|
kusano |
2b45e8 |
MUL alpha_i, c01, t2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD c11, t3, c11
|
|
kusano |
2b45e8 |
MUL alpha_i, c10, t3
|
|
kusano |
2b45e8 |
ADD c12, t4, c12
|
|
kusano |
2b45e8 |
MUL alpha_i, c09, t4
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
ADD $f31, t1, c03
|
|
kusano |
2b45e8 |
MUL alpha_i, c02, t1
|
|
kusano |
2b45e8 |
ADD $f31, t2, c04
|
|
kusano |
2b45e8 |
MUL alpha_i, c01, t2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD $f31, t3, c11
|
|
kusano |
2b45e8 |
MUL alpha_i, c10, t3
|
|
kusano |
2b45e8 |
ADD $f31, t4, c12
|
|
kusano |
2b45e8 |
MUL alpha_i, c09, t4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
SUB c03, t1, c03
|
|
kusano |
2b45e8 |
ADD c04, t2, c04
|
|
kusano |
2b45e8 |
SUB c11, t3, c11
|
|
kusano |
2b45e8 |
ADD c12, t4, c12
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST c03, 0 * SIZE(C1)
|
|
kusano |
2b45e8 |
ST c04, 1 * SIZE(C1)
|
|
kusano |
2b45e8 |
ST c11, 0 * SIZE(C2)
|
|
kusano |
2b45e8 |
ST c12, 1 * SIZE(C2)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if (defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \
|
|
kusano |
2b45e8 |
(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
subq K, KK, TMP1
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
subq TMP1, 1, TMP1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
subq TMP1, 2, TMP1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
sll TMP1, ZBASE_SHIFT + 0, TMP2
|
|
kusano |
2b45e8 |
addq AO, TMP2, AO
|
|
kusano |
2b45e8 |
sll TMP1, ZBASE_SHIFT + 1, TMP2
|
|
kusano |
2b45e8 |
addq BO, TMP2, BO
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL) && defined(LEFT)
|
|
kusano |
2b45e8 |
addq KK, 1, KK
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L29:
|
|
kusano |
2b45e8 |
mov BO, B
|
|
kusano |
2b45e8 |
lda J, -1(J)
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL) && !defined(LEFT)
|
|
kusano |
2b45e8 |
addq KK, 2, KK
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
bgt J, $L01
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L30:
|
|
kusano |
2b45e8 |
and N, 1, J
|
|
kusano |
2b45e8 |
ble J, $L999
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
mov C, C1
|
|
kusano |
2b45e8 |
mov A, AO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL) && defined(LEFT)
|
|
kusano |
2b45e8 |
mov OFFSET, KK
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
sra M, 1, I
|
|
kusano |
2b45e8 |
ble I, $L50
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L41:
|
|
kusano |
2b45e8 |
#if !defined(TRMMKERNEL) || \
|
|
kusano |
2b45e8 |
(defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \
|
|
kusano |
2b45e8 |
(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef TRMMKERNEL
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
addq KK, 2, TMP1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
addq KK, 1, TMP1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a1, 0 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr t1
|
|
kusano |
2b45e8 |
LD a2, 1 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr t2
|
|
kusano |
2b45e8 |
LD a3, 2 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr t3
|
|
kusano |
2b45e8 |
LD a4, 3 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr t4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b1, 0 * SIZE(B)
|
|
kusano |
2b45e8 |
fclr c01
|
|
kusano |
2b45e8 |
LD b2, 1 * SIZE(B)
|
|
kusano |
2b45e8 |
fclr c05
|
|
kusano |
2b45e8 |
LD b3, 2 * SIZE(B)
|
|
kusano |
2b45e8 |
fclr c02
|
|
kusano |
2b45e8 |
LD b4, 3 * SIZE(B)
|
|
kusano |
2b45e8 |
fclr c06
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda BO, 2 * SIZE(B)
|
|
kusano |
2b45e8 |
fclr c03
|
|
kusano |
2b45e8 |
lda AO, 4 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr c07
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
lda L, -2(K)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
lda L, -2(TMP1)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
fclr c04
|
|
kusano |
2b45e8 |
fclr c08
|
|
kusano |
2b45e8 |
ble L, $L45
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
sll KK, ZBASE_SHIFT + 1, TMP1
|
|
kusano |
2b45e8 |
addq AO, TMP1, AO
|
|
kusano |
2b45e8 |
sll KK, ZBASE_SHIFT + 0, TMP1
|
|
kusano |
2b45e8 |
addq B, TMP1, BO
|
|
kusano |
2b45e8 |
subq K, KK, TMP1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a1, 0 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr t1
|
|
kusano |
2b45e8 |
LD a2, 1 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr t2
|
|
kusano |
2b45e8 |
LD a3, 2 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr t3
|
|
kusano |
2b45e8 |
LD a4, 3 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr t4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b1, 0 * SIZE(BO)
|
|
kusano |
2b45e8 |
fclr c01
|
|
kusano |
2b45e8 |
LD b2, 1 * SIZE(BO)
|
|
kusano |
2b45e8 |
fclr c05
|
|
kusano |
2b45e8 |
LD b3, 2 * SIZE(BO)
|
|
kusano |
2b45e8 |
fclr c02
|
|
kusano |
2b45e8 |
LD b4, 3 * SIZE(BO)
|
|
kusano |
2b45e8 |
fclr c06
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda BO, 2 * SIZE(BO)
|
|
kusano |
2b45e8 |
fclr c03
|
|
kusano |
2b45e8 |
lda AO, 4 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr c07
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda L, -2(TMP1)
|
|
kusano |
2b45e8 |
fclr c04
|
|
kusano |
2b45e8 |
fclr c08
|
|
kusano |
2b45e8 |
ble L, $L45
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L42:
|
|
kusano |
2b45e8 |
ADD4 c05, t1, c05
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a1, b1, t1
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c06, t2, c06
|
|
kusano |
2b45e8 |
lda L, -2(L)
|
|
kusano |
2b45e8 |
MUL a2, b1, t2
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c07, t3, c07
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a3, b1, t3
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c08, t4, c08
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a4, b1, t4
|
|
kusano |
2b45e8 |
LD b1, 2 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c01, t1, c01
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a1, b2, t1
|
|
kusano |
2b45e8 |
LD a1, 0 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c02, t2, c02
|
|
kusano |
2b45e8 |
lda BO, 4 * SIZE(BO)
|
|
kusano |
2b45e8 |
MUL a2, b2, t2
|
|
kusano |
2b45e8 |
LD a2, 1 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c03, t3, c03
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a3, b2, t3
|
|
kusano |
2b45e8 |
LD a3, 2 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c04, t4, c04
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a4, b2, t4
|
|
kusano |
2b45e8 |
LD a5, 3 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c05, t1, c05
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a1, b3, t1
|
|
kusano |
2b45e8 |
LD b2, -1 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c06, t2, c06
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a2, b3, t2
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c07, t3, c07
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a3, b3, t3
|
|
kusano |
2b45e8 |
lda AO, 8 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c08, t4, c08
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a5, b3, t4
|
|
kusano |
2b45e8 |
LD b3, 0 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c01, t1, c01
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a1, b4, t1
|
|
kusano |
2b45e8 |
LD a1, -4 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c02, t2, c02
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a2, b4, t2
|
|
kusano |
2b45e8 |
LD a2, -3 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c03, t3, c03
|
|
kusano |
2b45e8 |
LD a4, -1 * SIZE(AO)
|
|
kusano |
2b45e8 |
MUL a3, b4, t3
|
|
kusano |
2b45e8 |
LD a3, -2 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c04, t4, c04
|
|
kusano |
2b45e8 |
MUL a5, b4, t4
|
|
kusano |
2b45e8 |
LD b4, 1 * SIZE(BO)
|
|
kusano |
2b45e8 |
bgt L, $L42
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L45:
|
|
kusano |
2b45e8 |
ADD4 c05, t1, c05
|
|
kusano |
2b45e8 |
ldt alpha_r, ALPHA_R
|
|
kusano |
2b45e8 |
MUL b1, a1, t1
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
blbs K, $L48
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
blbs TMP1, $L48
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c06, t2, c06
|
|
kusano |
2b45e8 |
MUL a2, b1, t2
|
|
kusano |
2b45e8 |
ADD4 c07, t3, c07
|
|
kusano |
2b45e8 |
MUL a3, b1, t3
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c08, t4, c08
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a4, b1, t4
|
|
kusano |
2b45e8 |
LD b1, 0 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c01, t1, c01
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a1, b2, t1
|
|
kusano |
2b45e8 |
LD a1, 0 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c02, t2, c02
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a2, b2, t2
|
|
kusano |
2b45e8 |
LD a2, 1 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c03, t3, c03
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a3, b2, t3
|
|
kusano |
2b45e8 |
LD a3, 2 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c04, t4, c04
|
|
kusano |
2b45e8 |
MUL a4, b2, t4
|
|
kusano |
2b45e8 |
LD a4, 3 * SIZE(AO)
|
|
kusano |
2b45e8 |
lda AO, 4 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c05, t1, c05
|
|
kusano |
2b45e8 |
LD b2, 1 * SIZE(BO)
|
|
kusano |
2b45e8 |
MUL a1, b1, t1
|
|
kusano |
2b45e8 |
lda BO, 2 * SIZE(BO)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L48:
|
|
kusano |
2b45e8 |
ADD2 c06, t2, c06
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a2, b1, t2
|
|
kusano |
2b45e8 |
ldt alpha_i, ALPHA_I
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c07, t3, c07
|
|
kusano |
2b45e8 |
lda I, -1(I)
|
|
kusano |
2b45e8 |
MUL a3, b1, t3
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD c09, 0 * SIZE(C1)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c08, t4, c08
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a4, b1, t4
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD c10, 1 * SIZE(C1)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c01, t1, c01
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a1, b2, t1
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD c11, 2 * SIZE(C1)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c02, t2, c02
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a2, b2, t2
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD c12, 3 * SIZE(C1)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c03, t3, c03
|
|
kusano |
2b45e8 |
MUL a3, b2, t3
|
|
kusano |
2b45e8 |
ADD3 c04, t4, c04
|
|
kusano |
2b45e8 |
MUL a4, b2, t4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c05, t1, c05
|
|
kusano |
2b45e8 |
ADD2 c06, t2, c06
|
|
kusano |
2b45e8 |
ADD4 c07, t3, c07
|
|
kusano |
2b45e8 |
ADD2 c08, t4, c08
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD c01, c06, c01
|
|
kusano |
2b45e8 |
ADD c02, c05, c02
|
|
kusano |
2b45e8 |
ADD c03, c08, c03
|
|
kusano |
2b45e8 |
ADD c04, c07, c04
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL alpha_r, c01, t1
|
|
kusano |
2b45e8 |
MUL alpha_r, c02, t2
|
|
kusano |
2b45e8 |
MUL alpha_r, c03, t3
|
|
kusano |
2b45e8 |
MUL alpha_r, c04, t4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
ADD c09, t1, c09
|
|
kusano |
2b45e8 |
MUL alpha_i, c02, t1
|
|
kusano |
2b45e8 |
ADD c10, t2, c10
|
|
kusano |
2b45e8 |
MUL alpha_i, c01, t2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD c11, t3, c11
|
|
kusano |
2b45e8 |
MUL alpha_i, c04, t3
|
|
kusano |
2b45e8 |
ADD c12, t4, c12
|
|
kusano |
2b45e8 |
MUL alpha_i, c03, t4
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
ADD $f31, t1, c09
|
|
kusano |
2b45e8 |
MUL alpha_i, c02, t1
|
|
kusano |
2b45e8 |
ADD $f31, t2, c10
|
|
kusano |
2b45e8 |
MUL alpha_i, c01, t2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD $f31, t3, c11
|
|
kusano |
2b45e8 |
MUL alpha_i, c04, t3
|
|
kusano |
2b45e8 |
ADD $f31, t4, c12
|
|
kusano |
2b45e8 |
MUL alpha_i, c03, t4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
SUB c09, t1, c09
|
|
kusano |
2b45e8 |
ADD c10, t2, c10
|
|
kusano |
2b45e8 |
SUB c11, t3, c11
|
|
kusano |
2b45e8 |
ADD c12, t4, c12
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST c09, 0 * SIZE(C1)
|
|
kusano |
2b45e8 |
ST c10, 1 * SIZE(C1)
|
|
kusano |
2b45e8 |
ST c11, 2 * SIZE(C1)
|
|
kusano |
2b45e8 |
ST c12, 3 * SIZE(C1)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda C1, 4 * SIZE(C1)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if (defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \
|
|
kusano |
2b45e8 |
(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
subq K, KK, TMP1
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
subq TMP1, 2, TMP1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
subq TMP1, 1, TMP1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
sll TMP1, ZBASE_SHIFT + 1, TMP2
|
|
kusano |
2b45e8 |
addq AO, TMP2, AO
|
|
kusano |
2b45e8 |
sll TMP1, ZBASE_SHIFT + 0, TMP2
|
|
kusano |
2b45e8 |
addq BO, TMP2, BO
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(TRMMKERNEL) && defined(LEFT)
|
|
kusano |
2b45e8 |
addq KK, 2, KK
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bgt I, $L41
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L50:
|
|
kusano |
2b45e8 |
and M, 1, I
|
|
kusano |
2b45e8 |
ble I, $L999
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if !defined(TRMMKERNEL) || \
|
|
kusano |
2b45e8 |
(defined(TRMMKERNEL) && defined(LEFT) && defined(TRANSA)) || \
|
|
kusano |
2b45e8 |
(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef TRMMKERNEL
|
|
kusano |
2b45e8 |
#ifdef LEFT
|
|
kusano |
2b45e8 |
addq KK, 1, TMP1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
addq KK, 1, TMP1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a1, 0 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr t1
|
|
kusano |
2b45e8 |
LD a2, 1 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr t2
|
|
kusano |
2b45e8 |
LD a3, 2 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr t3
|
|
kusano |
2b45e8 |
LD a4, 3 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr t4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b1, 0 * SIZE(B)
|
|
kusano |
2b45e8 |
fclr c01
|
|
kusano |
2b45e8 |
LD b2, 1 * SIZE(B)
|
|
kusano |
2b45e8 |
fclr c05
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b3, 2 * SIZE(B)
|
|
kusano |
2b45e8 |
fclr c02
|
|
kusano |
2b45e8 |
LD b4, 3 * SIZE(B)
|
|
kusano |
2b45e8 |
fclr c06
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda AO, 2 * SIZE(AO)
|
|
kusano |
2b45e8 |
lda BO, 2 * SIZE(B)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
lda L, -2(K)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
lda L, -2(TMP1)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
ble L, $L55
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
sll KK, ZBASE_SHIFT + 0, TMP1
|
|
kusano |
2b45e8 |
addq AO, TMP1, AO
|
|
kusano |
2b45e8 |
addq B, TMP1, BO
|
|
kusano |
2b45e8 |
subq K, KK, TMP1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a1, 0 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr t1
|
|
kusano |
2b45e8 |
LD a2, 1 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr t2
|
|
kusano |
2b45e8 |
LD a3, 2 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr t3
|
|
kusano |
2b45e8 |
LD a4, 3 * SIZE(AO)
|
|
kusano |
2b45e8 |
fclr t4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b1, 0 * SIZE(BO)
|
|
kusano |
2b45e8 |
fclr c01
|
|
kusano |
2b45e8 |
LD b2, 1 * SIZE(BO)
|
|
kusano |
2b45e8 |
fclr c05
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b3, 2 * SIZE(BO)
|
|
kusano |
2b45e8 |
fclr c02
|
|
kusano |
2b45e8 |
LD b4, 3 * SIZE(BO)
|
|
kusano |
2b45e8 |
fclr c06
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda AO, 2 * SIZE(AO)
|
|
kusano |
2b45e8 |
lda BO, 2 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda L, -2(TMP1)
|
|
kusano |
2b45e8 |
ble L, $L55
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L52:
|
|
kusano |
2b45e8 |
ADD1 c01, t1, c01
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a1, b1, t1
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c02, t2, c02
|
|
kusano |
2b45e8 |
lda AO, 4 * SIZE(AO)
|
|
kusano |
2b45e8 |
MUL a2, b1, t2
|
|
kusano |
2b45e8 |
LD b1, 2 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c05, t3, c05
|
|
kusano |
2b45e8 |
lda L, -2(L)
|
|
kusano |
2b45e8 |
MUL a1, b2, t3
|
|
kusano |
2b45e8 |
LD a1, -2 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c06, t4, c06
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a2, b2, t4
|
|
kusano |
2b45e8 |
LD a2, -1 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c01, t1, c01
|
|
kusano |
2b45e8 |
LD b2, 3 * SIZE(BO)
|
|
kusano |
2b45e8 |
MUL a3, b3, t1
|
|
kusano |
2b45e8 |
lda BO, 4 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c02, t2, c02
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a4, b3, t2
|
|
kusano |
2b45e8 |
LD b3, 0 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c05, t3, c05
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a3, b4, t3
|
|
kusano |
2b45e8 |
LD a3, 0 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c06, t4, c06
|
|
kusano |
2b45e8 |
MUL a4, b4, t4
|
|
kusano |
2b45e8 |
LD b4, 1 * SIZE(BO)
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a4, 1 * SIZE(AO)
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
bgt L, $L52
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L55:
|
|
kusano |
2b45e8 |
ADD1 c01, t1, c01
|
|
kusano |
2b45e8 |
ldt alpha_r, ALPHA_R
|
|
kusano |
2b45e8 |
MUL a1, b1, t1
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
blbs K, $L58
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
blbs TMP1, $L58
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD3 c02, t2, c02
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a2, b1, t2
|
|
kusano |
2b45e8 |
LD b1, 0 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c05, t3, c05
|
|
kusano |
2b45e8 |
lda BO, 2 * SIZE(BO)
|
|
kusano |
2b45e8 |
MUL a1, b2, t3
|
|
kusano |
2b45e8 |
LD a1, 0 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c06, t4, c06
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a2, b2, t4
|
|
kusano |
2b45e8 |
LD a2, 1 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c01, t1, c01
|
|
kusano |
2b45e8 |
LD b2, -1 * SIZE(BO)
|
|
kusano |
2b45e8 |
MUL a1, b1, t1
|
|
kusano |
2b45e8 |
lda AO, 2 * SIZE(AO)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L58:
|
|
kusano |
2b45e8 |
ADD3 c02, t2, c02
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a2, b1, t2
|
|
kusano |
2b45e8 |
ldt alpha_i, ALPHA_I
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD4 c05, t3, c05
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a1, b2, t3
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD c03, 0 * SIZE(C1)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD2 c06, t4, c06
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL a2, b2, t4
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
LD c04, 1 * SIZE(C1)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD1 c01, t1, c01
|
|
kusano |
2b45e8 |
ADD3 c02, t2, c02
|
|
kusano |
2b45e8 |
ADD4 c05, t3, c05
|
|
kusano |
2b45e8 |
ADD2 c06, t4, c06
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD c01, c06, c01
|
|
kusano |
2b45e8 |
ADD c02, c05, c02
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL alpha_r, c01, t1
|
|
kusano |
2b45e8 |
MUL alpha_r, c02, t2
|
|
kusano |
2b45e8 |
MUL alpha_i, c02, t3
|
|
kusano |
2b45e8 |
MUL alpha_i, c01, t4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef TRMMKERNEL
|
|
kusano |
2b45e8 |
ADD c03, t1, c03
|
|
kusano |
2b45e8 |
ADD c04, t2, c04
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
ADD $f31, t1, c03
|
|
kusano |
2b45e8 |
ADD $f31, t2, c04
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
SUB c03, t3, c03
|
|
kusano |
2b45e8 |
ADD c04, t4, c04
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST c03, 0 * SIZE(C1)
|
|
kusano |
2b45e8 |
ST c04, 1 * SIZE(C1)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L999:
|
|
kusano |
2b45e8 |
ldt $f2, 0($sp)
|
|
kusano |
2b45e8 |
ldt $f3, 8($sp)
|
|
kusano |
2b45e8 |
ldt $f4, 16($sp)
|
|
kusano |
2b45e8 |
ldt $f5, 24($sp)
|
|
kusano |
2b45e8 |
ldt $f6, 32($sp)
|
|
kusano |
2b45e8 |
ldt $f7, 40($sp)
|
|
kusano |
2b45e8 |
ldt $f8, 48($sp)
|
|
kusano |
2b45e8 |
ldt $f9, 56($sp)
|
|
kusano |
2b45e8 |
clr $0
|
|
kusano |
2b45e8 |
lda $sp, STACKSIZE($sp)
|
|
kusano |
2b45e8 |
ret
|
|
kusano |
2b45e8 |
.ident VERSION
|
|
kusano |
2b45e8 |
.end CNAME
|