|
kusano |
2b45e8 |
/*****************************************************************************
|
|
kusano |
2b45e8 |
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
|
kusano |
2b45e8 |
All rights reserved.
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
Redistribution and use in source and binary forms, with or without
|
|
kusano |
2b45e8 |
modification, are permitted provided that the following conditions are
|
|
kusano |
2b45e8 |
met:
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
1. Redistributions of source code must retain the above copyright
|
|
kusano |
2b45e8 |
notice, this list of conditions and the following disclaimer.
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
2. Redistributions in binary form must reproduce the above copyright
|
|
kusano |
2b45e8 |
notice, this list of conditions and the following disclaimer in
|
|
kusano |
2b45e8 |
the documentation and/or other materials provided with the
|
|
kusano |
2b45e8 |
distribution.
|
|
kusano |
2b45e8 |
3. Neither the name of the ISCAS nor the names of its contributors may
|
|
kusano |
2b45e8 |
be used to endorse or promote products derived from this software
|
|
kusano |
2b45e8 |
without specific prior written permission.
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
kusano |
2b45e8 |
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
kusano |
2b45e8 |
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
kusano |
2b45e8 |
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
kusano |
2b45e8 |
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
kusano |
2b45e8 |
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
kusano |
2b45e8 |
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
kusano |
2b45e8 |
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
kusano |
2b45e8 |
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|
kusano |
2b45e8 |
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
**********************************************************************************/
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/* All rights reserved. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* Redistribution and use in source and binary forms, with or */
|
|
kusano |
2b45e8 |
/* without modification, are permitted provided that the following */
|
|
kusano |
2b45e8 |
/* conditions are met: */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 1. Redistributions of source code must retain the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 2. Redistributions in binary form must reproduce the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer in the documentation and/or other materials */
|
|
kusano |
2b45e8 |
/* provided with the distribution. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
|
kusano |
2b45e8 |
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
|
kusano |
2b45e8 |
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
|
kusano |
2b45e8 |
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
|
kusano |
2b45e8 |
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
|
kusano |
2b45e8 |
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
|
kusano |
2b45e8 |
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
|
kusano |
2b45e8 |
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
|
kusano |
2b45e8 |
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
|
kusano |
2b45e8 |
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
|
kusano |
2b45e8 |
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
|
kusano |
2b45e8 |
/* POSSIBILITY OF SUCH DAMAGE. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* The views and conclusions contained in the software and */
|
|
kusano |
2b45e8 |
/* documentation are those of the authors and should not be */
|
|
kusano |
2b45e8 |
/* interpreted as representing official policies, either expressed */
|
|
kusano |
2b45e8 |
/* or implied, of The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ASSEMBLER
|
|
kusano |
2b45e8 |
#include "common.h"
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define PREFETCH_DISTANCE 2016
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define N $4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define X $8
|
|
kusano |
2b45e8 |
#define INCX $9
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define Y $10
|
|
kusano |
2b45e8 |
#define INCY $11
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define I $2
|
|
kusano |
2b45e8 |
#define TEMP $3
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define YY $5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ALPHA $f15
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define a1 $f0
|
|
kusano |
2b45e8 |
#define a2 $f1
|
|
kusano |
2b45e8 |
#define a3 $f2
|
|
kusano |
2b45e8 |
#define a4 $f3
|
|
kusano |
2b45e8 |
#define a5 $f4
|
|
kusano |
2b45e8 |
#define a6 $f5
|
|
kusano |
2b45e8 |
#define a7 $f6
|
|
kusano |
2b45e8 |
#define a8 $f7
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define a9 $f8
|
|
kusano |
2b45e8 |
#define a10 $f9
|
|
kusano |
2b45e8 |
#define a11 $f10
|
|
kusano |
2b45e8 |
#define a12 $f11
|
|
kusano |
2b45e8 |
#define a13 $f12
|
|
kusano |
2b45e8 |
#define a14 $f13
|
|
kusano |
2b45e8 |
#define a15 $f14
|
|
kusano |
2b45e8 |
#define a16 $f17
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define t1 $f18
|
|
kusano |
2b45e8 |
#define t2 $f19
|
|
kusano |
2b45e8 |
#define t3 $f20
|
|
kusano |
2b45e8 |
#define t4 $f21
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define b1 $f22
|
|
kusano |
2b45e8 |
#define b2 $f23
|
|
kusano |
2b45e8 |
#define b3 $f24
|
|
kusano |
2b45e8 |
#define b4 $f25
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define b5 $f26
|
|
kusano |
2b45e8 |
#define b6 $f27
|
|
kusano |
2b45e8 |
#define b7 $f28
|
|
kusano |
2b45e8 |
#define b8 $f29
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define A1 0
|
|
kusano |
2b45e8 |
#define A2 1
|
|
kusano |
2b45e8 |
#define A3 2
|
|
kusano |
2b45e8 |
#define A4 3
|
|
kusano |
2b45e8 |
#define A5 4
|
|
kusano |
2b45e8 |
#define A6 5
|
|
kusano |
2b45e8 |
#define A7 6
|
|
kusano |
2b45e8 |
#define A8 7
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define A9 8
|
|
kusano |
2b45e8 |
#define A10 9
|
|
kusano |
2b45e8 |
#define A11 10
|
|
kusano |
2b45e8 |
#define A12 11
|
|
kusano |
2b45e8 |
#define A13 12
|
|
kusano |
2b45e8 |
#define A14 13
|
|
kusano |
2b45e8 |
#define A15 14
|
|
kusano |
2b45e8 |
#define A16 17
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define T1 18
|
|
kusano |
2b45e8 |
#define T2 19
|
|
kusano |
2b45e8 |
#define T3 20
|
|
kusano |
2b45e8 |
#define T4 21
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define B1 22
|
|
kusano |
2b45e8 |
#define B2 23
|
|
kusano |
2b45e8 |
#define B3 24
|
|
kusano |
2b45e8 |
#define B4 25
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define B5 26
|
|
kusano |
2b45e8 |
#define B6 27
|
|
kusano |
2b45e8 |
#define B7 28
|
|
kusano |
2b45e8 |
#define B8 29
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define X_BASE 8
|
|
kusano |
2b45e8 |
#define Y_BASE 10
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define gsLQC1_(base,fq,ft,offset) .word (0x32<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq)
|
|
kusano |
2b45e8 |
#define gsLQC1(base,fq,ft,offset) gsLQC1_((base), (fq), (ft), (offset))
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define gsSQC1_(base,fq,ft,offset) .word (0x3A<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq)
|
|
kusano |
2b45e8 |
#define gsSQC1(base,fq,ft,offset) gsSQC1_((base), (fq), (ft), (offset))
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
PROLOGUE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef __64BIT__
|
|
kusano |
2b45e8 |
daddiu $sp, $sp, -40
|
|
kusano |
2b45e8 |
sdc1 $f20, 0($sp)
|
|
kusano |
2b45e8 |
sdc1 $f22, 8($sp)
|
|
kusano |
2b45e8 |
sdc1 $f24, 16($sp)
|
|
kusano |
2b45e8 |
sdc1 $f26, 24($sp)
|
|
kusano |
2b45e8 |
sdc1 $f28, 32($sp)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
daddiu $sp, $sp, -48
|
|
kusano |
2b45e8 |
sdc1 $f24, 0($sp)
|
|
kusano |
2b45e8 |
sdc1 $f25, 8($sp)
|
|
kusano |
2b45e8 |
sdc1 $f26, 16($sp)
|
|
kusano |
2b45e8 |
sdc1 $f27, 24($sp)
|
|
kusano |
2b45e8 |
sdc1 $f28, 32($sp)
|
|
kusano |
2b45e8 |
sdc1 $f29, 40($sp)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
li TEMP, SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blez N, .L999
|
|
kusano |
2b45e8 |
dsll INCX, INCX, BASE_SHIFT
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bne INCX, TEMP, .L20
|
|
kusano |
2b45e8 |
dsll INCY, INCY, BASE_SHIFT
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bne INCY, TEMP, .L20
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
//Dose the address of Y algin 16 bytes?
|
|
kusano |
2b45e8 |
andi TEMP, Y, 8
|
|
kusano |
2b45e8 |
beq TEMP, $0, .L10
|
|
kusano |
2b45e8 |
//Y unalgin. Compute this unalgined element.
|
|
kusano |
2b45e8 |
LD a1, 0 * SIZE(X)
|
|
kusano |
2b45e8 |
LD b1, 0 * SIZE(Y)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddiu X, X, SIZE
|
|
kusano |
2b45e8 |
daddiu Y, Y, SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t1, b1, ALPHA, a1
|
|
kusano |
2b45e8 |
daddiu N, N, -1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t1, -1 * SIZE(Y)
|
|
kusano |
2b45e8 |
blez N, .L999
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L10:
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
dsra I, N, 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blez I, .L15
|
|
kusano |
2b45e8 |
daddiu I, I, -1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
//Y algin. We need test X address
|
|
kusano |
2b45e8 |
//Dose the address of X algin 16 bytes?
|
|
kusano |
2b45e8 |
andi TEMP, X, 8
|
|
kusano |
2b45e8 |
bne TEMP, $0, .L30 ///
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L11:
|
|
kusano |
2b45e8 |
//X & Y algin
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A2,A1,0)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A4,A3,1)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A6,A5,2)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A8,A7,3)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A10,A9,4)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A12,A11,5)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A14,A13,6)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A16,A15,7)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B2,B1,0)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B4,B3,1)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B6,B5,2)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B8,B7,3)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blez I, .L13
|
|
kusano |
2b45e8 |
NOP
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L12:
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t1, b1, ALPHA, a1
|
|
kusano |
2b45e8 |
MADD t2, b2, ALPHA, a2
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T2, T1, 0)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B2,B1,4)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t3, b3, ALPHA, a3
|
|
kusano |
2b45e8 |
MADD t4, b4, ALPHA, a4
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T4, T3, 1)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B4,B3,5)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
PREFETCHD(PREFETCH_DISTANCE*SIZE(Y))
|
|
kusano |
2b45e8 |
PREFETCHD((PREFETCH_DISTANCE+4)*SIZE(Y))
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t1, b5, ALPHA, a5
|
|
kusano |
2b45e8 |
MADD t2, b6, ALPHA, a6
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T2, T1, 2)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B6,B5,6)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t3, b7, ALPHA, a7
|
|
kusano |
2b45e8 |
MADD t4, b8, ALPHA, a8
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T4, T3, 3)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B8,B7, 7)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
PREFETCHD((PREFETCH_DISTANCE+8)*SIZE(Y))
|
|
kusano |
2b45e8 |
PREFETCHD((PREFETCH_DISTANCE+12)*SIZE(Y))
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t1, b1, ALPHA, a9
|
|
kusano |
2b45e8 |
MADD t2, b2, ALPHA, a10
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T2, T1, 4)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B2,B1,8)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t3, b3, ALPHA, a11
|
|
kusano |
2b45e8 |
MADD t4, b4, ALPHA, a12
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T4, T3, 5)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B4,B3,9)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
PREFETCHD(PREFETCH_DISTANCE*SIZE(X))
|
|
kusano |
2b45e8 |
PREFETCHD((PREFETCH_DISTANCE+4)*SIZE(X))
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t1, b5, ALPHA, a13
|
|
kusano |
2b45e8 |
MADD t2, b6, ALPHA, a14
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T2, T1, 6)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B6,B5,10)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t3, b7, ALPHA, a15
|
|
kusano |
2b45e8 |
MADD t4, b8, ALPHA, a16
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T4, T3, 7)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B8,B7,11)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
PREFETCHD((PREFETCH_DISTANCE+8)*SIZE(X))
|
|
kusano |
2b45e8 |
PREFETCHD((PREFETCH_DISTANCE+12)*SIZE(X))
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A2,A1,8)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A4,A3,9)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A6,A5,10)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A8,A7,11)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A10,A9,12)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A12,A11,13)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A14,A13,14)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A16,A15,15)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddiu I, I, -1
|
|
kusano |
2b45e8 |
daddiu Y, Y, 16 * SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddiu X, X, 16 * SIZE
|
|
kusano |
2b45e8 |
bgtz I, .L12
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L13:
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t1, b1, ALPHA, a1
|
|
kusano |
2b45e8 |
MADD t2, b2, ALPHA, a2
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T2, T1, 0)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B2,B1,4)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t3, b3, ALPHA, a3
|
|
kusano |
2b45e8 |
MADD t4, b4, ALPHA, a4
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T4, T3, 1)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B4,B3,5)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t1, b5, ALPHA, a5
|
|
kusano |
2b45e8 |
MADD t2, b6, ALPHA, a6
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T2, T1, 2)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B6,B5,6)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t3, b7, ALPHA, a7
|
|
kusano |
2b45e8 |
MADD t4, b8, ALPHA, a8
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T4, T3, 3)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B8,B7,7)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t1, b1, ALPHA, a9
|
|
kusano |
2b45e8 |
MADD t2, b2, ALPHA, a10
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T2, T1, 4)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t3, b3, ALPHA, a11
|
|
kusano |
2b45e8 |
MADD t4, b4, ALPHA, a12
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T4, T3, 5)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t1, b5, ALPHA, a13
|
|
kusano |
2b45e8 |
MADD t2, b6, ALPHA, a14
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T2, T1, 6)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t3, b7, ALPHA, a15
|
|
kusano |
2b45e8 |
MADD t4, b8, ALPHA, a16
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T4, T3, 7)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddiu X, X, 16 * SIZE
|
|
kusano |
2b45e8 |
daddiu Y, Y, 16 * SIZE
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L15:
|
|
kusano |
2b45e8 |
andi I, N, 15
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blez I, .L999
|
|
kusano |
2b45e8 |
NOP
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L16:
|
|
kusano |
2b45e8 |
LD a1, 0 * SIZE(X)
|
|
kusano |
2b45e8 |
LD b1, 0 * SIZE(Y)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddiu X, X, SIZE
|
|
kusano |
2b45e8 |
daddiu Y, Y, SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t1, b1, ALPHA, a1
|
|
kusano |
2b45e8 |
daddiu I, I, -1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bgtz I, .L16
|
|
kusano |
2b45e8 |
ST t1, -1 * SIZE(Y)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef __64BIT__
|
|
kusano |
2b45e8 |
ldc1 $f20, 0($sp)
|
|
kusano |
2b45e8 |
ldc1 $f22, 8($sp)
|
|
kusano |
2b45e8 |
ldc1 $f24, 16($sp)
|
|
kusano |
2b45e8 |
ldc1 $f26, 24($sp)
|
|
kusano |
2b45e8 |
ldc1 $f28, 32($sp)
|
|
kusano |
2b45e8 |
daddiu $sp, $sp, 40
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
ldc1 $f24, 0($sp)
|
|
kusano |
2b45e8 |
ldc1 $f25, 8($sp)
|
|
kusano |
2b45e8 |
ldc1 $f26, 16($sp)
|
|
kusano |
2b45e8 |
ldc1 $f27, 24($sp)
|
|
kusano |
2b45e8 |
ldc1 $f28, 32($sp)
|
|
kusano |
2b45e8 |
ldc1 $f29, 40($sp)
|
|
kusano |
2b45e8 |
daddiu $sp, $sp, 48
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
j $31
|
|
kusano |
2b45e8 |
NOP
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L30:
|
|
kusano |
2b45e8 |
//Y align, X unalign, INCX==INCY==1
|
|
kusano |
2b45e8 |
//unloop 16
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a1, 0 * SIZE(X)
|
|
kusano |
2b45e8 |
daddiu X, X, SIZE
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A3,A2,0)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A5,A4,1)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A7,A6,2)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A9,A8,3)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A11,A10,4)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A13,A12,5)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A15,A14,6)
|
|
kusano |
2b45e8 |
LD a16, 14 * SIZE(X)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B2,B1,0)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B4,B3,1)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B6,B5,2)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B8,B7,3)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blez I, .L32
|
|
kusano |
2b45e8 |
NOP
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L31:
|
|
kusano |
2b45e8 |
MADD t1, b1, ALPHA, a1
|
|
kusano |
2b45e8 |
MADD t2, b2, ALPHA, a2
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T2, T1, 0)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B2,B1,4)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t3, b3, ALPHA, a3
|
|
kusano |
2b45e8 |
MADD t4, b4, ALPHA, a4
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T4, T3, 1)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B4,B3,5)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
PREFETCHD(PREFETCH_DISTANCE*SIZE(Y))
|
|
kusano |
2b45e8 |
PREFETCHD((PREFETCH_DISTANCE+4)*SIZE(Y))
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t1, b5, ALPHA, a5
|
|
kusano |
2b45e8 |
MADD t2, b6, ALPHA, a6
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T2, T1, 2)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B6,B5,6)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t3, b7, ALPHA, a7
|
|
kusano |
2b45e8 |
MADD t4, b8, ALPHA, a8
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T4, T3, 3)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B8,B7,7)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
PREFETCHD((PREFETCH_DISTANCE+8)*SIZE(Y))
|
|
kusano |
2b45e8 |
PREFETCHD((PREFETCH_DISTANCE+12)*SIZE(Y))
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t1, b1, ALPHA, a9
|
|
kusano |
2b45e8 |
MADD t2, b2, ALPHA, a10
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T2, T1, 4)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B2,B1,8)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t3, b3, ALPHA, a11
|
|
kusano |
2b45e8 |
MADD t4, b4, ALPHA, a12
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T4, T3, 5)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B4,B3,9)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
PREFETCHD(PREFETCH_DISTANCE*SIZE(X))
|
|
kusano |
2b45e8 |
PREFETCHD((PREFETCH_DISTANCE+4)*SIZE(X))
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t1, b5, ALPHA, a13
|
|
kusano |
2b45e8 |
MADD t2, b6, ALPHA, a14
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T2, T1, 6)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B6,B5,10)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t3, b7, ALPHA, a15
|
|
kusano |
2b45e8 |
MADD t4, b8, ALPHA, a16
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T4, T3, 7)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B8,B7,11)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
PREFETCHD((PREFETCH_DISTANCE+8)*SIZE(X))
|
|
kusano |
2b45e8 |
PREFETCHD((PREFETCH_DISTANCE+12)*SIZE(X))
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a1, 15 * SIZE(X)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A3,A2,8)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A5,A4,9)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A7,A6,10)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A9,A8,11)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A11,A10,12)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A13,A12,13)
|
|
kusano |
2b45e8 |
gsLQC1(X_BASE,A15,A14,14)
|
|
kusano |
2b45e8 |
LD a16, 30 * SIZE(X)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddiu I, I, -1
|
|
kusano |
2b45e8 |
daddiu Y, Y, 16 * SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddiu X, X, 16 * SIZE
|
|
kusano |
2b45e8 |
bgtz I, .L31
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
//Loop end:
|
|
kusano |
2b45e8 |
.L32:
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t1, b1, ALPHA, a1
|
|
kusano |
2b45e8 |
MADD t2, b2, ALPHA, a2
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T2, T1, 0)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B2,B1,4)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t3, b3, ALPHA, a3
|
|
kusano |
2b45e8 |
MADD t4, b4, ALPHA, a4
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T4, T3, 1)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B4,B3,5)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t1, b5, ALPHA, a5
|
|
kusano |
2b45e8 |
MADD t2, b6, ALPHA, a6
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T2, T1, 2)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B6,B5,6)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t3, b7, ALPHA, a7
|
|
kusano |
2b45e8 |
MADD t4, b8, ALPHA, a8
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T4, T3, 3)
|
|
kusano |
2b45e8 |
gsLQC1(Y_BASE,B8,B7,7)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t1, b1, ALPHA, a9
|
|
kusano |
2b45e8 |
MADD t2, b2, ALPHA, a10
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T2, T1, 4)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t3, b3, ALPHA, a11
|
|
kusano |
2b45e8 |
MADD t4, b4, ALPHA, a12
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T4, T3, 5)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t1, b5, ALPHA, a13
|
|
kusano |
2b45e8 |
MADD t2, b6, ALPHA, a14
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T2, T1, 6)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t3, b7, ALPHA, a15
|
|
kusano |
2b45e8 |
MADD t4, b8, ALPHA, a16
|
|
kusano |
2b45e8 |
gsSQC1(Y_BASE, T4, T3, 7)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
daddiu X, X, 15 * SIZE
|
|
kusano |
2b45e8 |
daddiu Y, Y, 16 * SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
//jump back to the remain process.
|
|
kusano |
2b45e8 |
b .L15
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
//INCX!=1 or INCY != 1
|
|
kusano |
2b45e8 |
.L20:
|
|
kusano |
2b45e8 |
dsra I, N, 3
|
|
kusano |
2b45e8 |
move YY, Y
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blez I, .L25
|
|
kusano |
2b45e8 |
daddiu I, I, -1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a1, 0 * SIZE(X)
|
|
kusano |
2b45e8 |
daddu X, X, INCX
|
|
kusano |
2b45e8 |
LD b1, 0 * SIZE(Y)
|
|
kusano |
2b45e8 |
daddu Y, Y, INCY
|
|
kusano |
2b45e8 |
LD a2, 0 * SIZE(X)
|
|
kusano |
2b45e8 |
daddu X, X, INCX
|
|
kusano |
2b45e8 |
LD b2, 0 * SIZE(Y)
|
|
kusano |
2b45e8 |
daddu Y, Y, INCY
|
|
kusano |
2b45e8 |
LD a3, 0 * SIZE(X)
|
|
kusano |
2b45e8 |
daddu X, X, INCX
|
|
kusano |
2b45e8 |
LD b3, 0 * SIZE(Y)
|
|
kusano |
2b45e8 |
daddu Y, Y, INCY
|
|
kusano |
2b45e8 |
LD a4, 0 * SIZE(X)
|
|
kusano |
2b45e8 |
daddu X, X, INCX
|
|
kusano |
2b45e8 |
LD b4, 0 * SIZE(Y)
|
|
kusano |
2b45e8 |
daddu Y, Y, INCY
|
|
kusano |
2b45e8 |
LD a5, 0 * SIZE(X)
|
|
kusano |
2b45e8 |
daddu X, X, INCX
|
|
kusano |
2b45e8 |
LD b5, 0 * SIZE(Y)
|
|
kusano |
2b45e8 |
daddu Y, Y, INCY
|
|
kusano |
2b45e8 |
LD a6, 0 * SIZE(X)
|
|
kusano |
2b45e8 |
daddu X, X, INCX
|
|
kusano |
2b45e8 |
LD b6, 0 * SIZE(Y)
|
|
kusano |
2b45e8 |
daddu Y, Y, INCY
|
|
kusano |
2b45e8 |
LD a7, 0 * SIZE(X)
|
|
kusano |
2b45e8 |
daddu X, X, INCX
|
|
kusano |
2b45e8 |
LD b7, 0 * SIZE(Y)
|
|
kusano |
2b45e8 |
daddu Y, Y, INCY
|
|
kusano |
2b45e8 |
LD a8, 0 * SIZE(X)
|
|
kusano |
2b45e8 |
daddu X, X, INCX
|
|
kusano |
2b45e8 |
LD b8, 0 * SIZE(Y)
|
|
kusano |
2b45e8 |
daddu Y, Y, INCY
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blez I, .L23
|
|
kusano |
2b45e8 |
NOP
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L22:
|
|
kusano |
2b45e8 |
MADD t1, b1, ALPHA, a1
|
|
kusano |
2b45e8 |
LD a1, 0 * SIZE(X)
|
|
kusano |
2b45e8 |
LD b1, 0 * SIZE(Y)
|
|
kusano |
2b45e8 |
daddu X, X, INCX
|
|
kusano |
2b45e8 |
daddu Y, Y, INCY
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t2, b2, ALPHA, a2
|
|
kusano |
2b45e8 |
LD a2, 0 * SIZE(X)
|
|
kusano |
2b45e8 |
LD b2, 0 * SIZE(Y)
|
|
kusano |
2b45e8 |
daddu X, X, INCX
|
|
kusano |
2b45e8 |
daddu Y, Y, INCY
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t3, b3, ALPHA, a3
|
|
kusano |
2b45e8 |
LD a3, 0 * SIZE(X)
|
|
kusano |
2b45e8 |
LD b3, 0 * SIZE(Y)
|
|
kusano |
2b45e8 |
daddu X, X, INCX
|
|
kusano |
2b45e8 |
daddu Y, Y, INCY
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t4, b4, ALPHA, a4
|
|
kusano |
2b45e8 |
LD a4, 0 * SIZE(X)
|
|
kusano |
2b45e8 |
LD b4, 0 * SIZE(Y)
|
|
kusano |
2b45e8 |
daddu X, X, INCX
|
|
kusano |
2b45e8 |
daddu Y, Y, INCY
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t1, 0 * SIZE(YY)
|
|
kusano |
2b45e8 |
daddu YY, YY, INCY
|
|
kusano |
2b45e8 |
MADD t1, b5, ALPHA, a5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a5, 0 * SIZE(X)
|
|
kusano |
2b45e8 |
LD b5, 0 * SIZE(Y)
|
|
kusano |
2b45e8 |
daddu X, X, INCX
|
|
kusano |
2b45e8 |
daddu Y, Y, INCY
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t2, 0 * SIZE(YY)
|
|
kusano |
2b45e8 |
daddu YY, YY, INCY
|
|
kusano |
2b45e8 |
MADD t2, b6, ALPHA, a6
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a6, 0 * SIZE(X)
|
|
kusano |
2b45e8 |
LD b6, 0 * SIZE(Y)
|
|
kusano |
2b45e8 |
daddu X, X, INCX
|
|
kusano |
2b45e8 |
daddu Y, Y, INCY
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t3, 0 * SIZE(YY)
|
|
kusano |
2b45e8 |
daddu YY, YY, INCY
|
|
kusano |
2b45e8 |
MADD t3, b7, ALPHA, a7
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a7, 0 * SIZE(X)
|
|
kusano |
2b45e8 |
LD b7, 0 * SIZE(Y)
|
|
kusano |
2b45e8 |
daddu X, X, INCX
|
|
kusano |
2b45e8 |
daddu Y, Y, INCY
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t4, 0 * SIZE(YY)
|
|
kusano |
2b45e8 |
daddu YY, YY, INCY
|
|
kusano |
2b45e8 |
MADD t4, b8, ALPHA, a8
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD a8, 0 * SIZE(X)
|
|
kusano |
2b45e8 |
daddu X, X, INCX
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD b8, 0 * SIZE(Y)
|
|
kusano |
2b45e8 |
daddu Y, Y, INCY
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t1, 0 * SIZE(YY)
|
|
kusano |
2b45e8 |
daddu YY, YY, INCY
|
|
kusano |
2b45e8 |
ST t2, 0 * SIZE(YY)
|
|
kusano |
2b45e8 |
daddu YY, YY, INCY
|
|
kusano |
2b45e8 |
ST t3, 0 * SIZE(YY)
|
|
kusano |
2b45e8 |
daddu YY, YY, INCY
|
|
kusano |
2b45e8 |
ST t4, 0 * SIZE(YY)
|
|
kusano |
2b45e8 |
daddiu I, I, -1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bgtz I, .L22
|
|
kusano |
2b45e8 |
daddu YY, YY, INCY
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L23:
|
|
kusano |
2b45e8 |
MADD t1, b1, ALPHA, a1
|
|
kusano |
2b45e8 |
MADD t2, b2, ALPHA, a2
|
|
kusano |
2b45e8 |
MADD t3, b3, ALPHA, a3
|
|
kusano |
2b45e8 |
MADD t4, b4, ALPHA, a4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t1, 0 * SIZE(YY)
|
|
kusano |
2b45e8 |
daddu YY, YY, INCY
|
|
kusano |
2b45e8 |
MADD t1, b5, ALPHA, a5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t2, 0 * SIZE(YY)
|
|
kusano |
2b45e8 |
daddu YY, YY, INCY
|
|
kusano |
2b45e8 |
MADD t2, b6, ALPHA, a6
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t3, 0 * SIZE(YY)
|
|
kusano |
2b45e8 |
daddu YY, YY, INCY
|
|
kusano |
2b45e8 |
MADD t3, b7, ALPHA, a7
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t4, 0 * SIZE(YY)
|
|
kusano |
2b45e8 |
daddu YY, YY, INCY
|
|
kusano |
2b45e8 |
MADD t4, b8, ALPHA, a8
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t1, 0 * SIZE(YY)
|
|
kusano |
2b45e8 |
daddu YY, YY, INCY
|
|
kusano |
2b45e8 |
ST t2, 0 * SIZE(YY)
|
|
kusano |
2b45e8 |
daddu YY, YY, INCY
|
|
kusano |
2b45e8 |
ST t3, 0 * SIZE(YY)
|
|
kusano |
2b45e8 |
daddu YY, YY, INCY
|
|
kusano |
2b45e8 |
ST t4, 0 * SIZE(YY)
|
|
kusano |
2b45e8 |
daddu YY, YY, INCY
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L25:
|
|
kusano |
2b45e8 |
andi I, N, 7
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blez I, .L999
|
|
kusano |
2b45e8 |
NOP
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L26:
|
|
kusano |
2b45e8 |
LD a1, 0 * SIZE(X)
|
|
kusano |
2b45e8 |
LD b1, 0 * SIZE(Y)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MADD t1, b1, ALPHA, a1
|
|
kusano |
2b45e8 |
daddu X, X, INCX
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST t1, 0 * SIZE(Y)
|
|
kusano |
2b45e8 |
daddiu I, I, -1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bgtz I, .L26
|
|
kusano |
2b45e8 |
daddu Y, Y, INCY
|
|
kusano |
2b45e8 |
.align 5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L999:
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef __64BIT__
|
|
kusano |
2b45e8 |
ldc1 $f20, 0($sp)
|
|
kusano |
2b45e8 |
ldc1 $f22, 8($sp)
|
|
kusano |
2b45e8 |
ldc1 $f24, 16($sp)
|
|
kusano |
2b45e8 |
ldc1 $f26, 24($sp)
|
|
kusano |
2b45e8 |
ldc1 $f28, 32($sp)
|
|
kusano |
2b45e8 |
daddiu $sp, $sp, 40
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
ldc1 $f24, 0($sp)
|
|
kusano |
2b45e8 |
ldc1 $f25, 8($sp)
|
|
kusano |
2b45e8 |
ldc1 $f26, 16($sp)
|
|
kusano |
2b45e8 |
ldc1 $f27, 24($sp)
|
|
kusano |
2b45e8 |
ldc1 $f28, 32($sp)
|
|
kusano |
2b45e8 |
ldc1 $f29, 40($sp)
|
|
kusano |
2b45e8 |
daddiu $sp, $sp, 48
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
j $31
|
|
kusano |
2b45e8 |
NOP
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
EPILOGUE
|