|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/* All rights reserved. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* Redistribution and use in source and binary forms, with or */
|
|
kusano |
2b45e8 |
/* without modification, are permitted provided that the following */
|
|
kusano |
2b45e8 |
/* conditions are met: */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 1. Redistributions of source code must retain the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 2. Redistributions in binary form must reproduce the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer in the documentation and/or other materials */
|
|
kusano |
2b45e8 |
/* provided with the distribution. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
|
kusano |
2b45e8 |
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
|
kusano |
2b45e8 |
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
|
kusano |
2b45e8 |
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
|
kusano |
2b45e8 |
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
|
kusano |
2b45e8 |
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
|
kusano |
2b45e8 |
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
|
kusano |
2b45e8 |
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
|
kusano |
2b45e8 |
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
|
kusano |
2b45e8 |
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
|
kusano |
2b45e8 |
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
|
kusano |
2b45e8 |
/* POSSIBILITY OF SUCH DAMAGE. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* The views and conclusions contained in the software and */
|
|
kusano |
2b45e8 |
/* documentation are those of the authors and should not be */
|
|
kusano |
2b45e8 |
/* interpreted as representing official policies, either expressed */
|
|
kusano |
2b45e8 |
/* or implied, of The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ASSEMBLER
|
|
kusano |
2b45e8 |
#include "common.h"
|
|
kusano |
2b45e8 |
#include "version.h"
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define N $16
|
|
kusano |
2b45e8 |
#define X $17
|
|
kusano |
2b45e8 |
#define INCX $18
|
|
kusano |
2b45e8 |
#define Y $19
|
|
kusano |
2b45e8 |
#define INCY $20
|
|
kusano |
2b45e8 |
#define I $21
|
|
kusano |
2b45e8 |
#define XX $23
|
|
kusano |
2b45e8 |
#define YY $24
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define C $f10
|
|
kusano |
2b45e8 |
#define S $f11
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define PREFETCH_SIZE 80
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
PROLOGUE
|
|
kusano |
2b45e8 |
PROFCODE
|
|
kusano |
2b45e8 |
.frame $sp, 0, $26, 0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef PROFILE
|
|
kusano |
2b45e8 |
.prologue 0
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
.prologue 1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
fmov $f21, C
|
|
kusano |
2b45e8 |
LD S, 0($sp)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cmpeq INCX, 1, $23
|
|
kusano |
2b45e8 |
cmpeq INCY, 1, $24
|
|
kusano |
2b45e8 |
ble N, $L998
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
and $23, $24, $23
|
|
kusano |
2b45e8 |
beq $23, $L50
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
sra N, 3, I
|
|
kusano |
2b45e8 |
ble I, $L15
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD $f12, 0*SIZE(X)
|
|
kusano |
2b45e8 |
LD $f13, 0*SIZE(Y)
|
|
kusano |
2b45e8 |
LD $f14, 1*SIZE(X)
|
|
kusano |
2b45e8 |
LD $f15, 1*SIZE(Y)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD $f16, 2*SIZE(X)
|
|
kusano |
2b45e8 |
LD $f17, 2*SIZE(Y)
|
|
kusano |
2b45e8 |
LD $f18, 3*SIZE(X)
|
|
kusano |
2b45e8 |
LD $f19, 3*SIZE(Y)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f12, $f21
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL S, $f13, $f22
|
|
kusano |
2b45e8 |
MUL C, $f13, $f23
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD $f13, 4*SIZE(Y)
|
|
kusano |
2b45e8 |
MUL S, $f12, $f24
|
|
kusano |
2b45e8 |
LD $f12, 4*SIZE(X)
|
|
kusano |
2b45e8 |
MUL C, $f14, $f25
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda I, -1(I)
|
|
kusano |
2b45e8 |
MUL S, $f15, $f26
|
|
kusano |
2b45e8 |
ADD $f21, $f22, $f22
|
|
kusano |
2b45e8 |
MUL C, $f15, $f27
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD $f15, 5*SIZE(Y)
|
|
kusano |
2b45e8 |
MUL S, $f14, $f28
|
|
kusano |
2b45e8 |
SUB $f23, $f24, $f24
|
|
kusano |
2b45e8 |
ble I, $L13
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L12:
|
|
kusano |
2b45e8 |
MUL C, $f16, $f21
|
|
kusano |
2b45e8 |
lds $f31, (PREFETCH_SIZE) * SIZE(X)
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
LD $f14, 5*SIZE(X)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f22, 0*SIZE(X)
|
|
kusano |
2b45e8 |
MUL S, $f17, $f22
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
ADD $f25, $f26, $f26
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f17, $f23
|
|
kusano |
2b45e8 |
lds $f31, (PREFETCH_SIZE) * SIZE(Y)
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
LD $f17, 6*SIZE(Y)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f24, 0*SIZE(Y)
|
|
kusano |
2b45e8 |
MUL S, $f16, $f24
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
SUB $f27, $f28, $f28
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f18, $f25
|
|
kusano |
2b45e8 |
LD $f16, 6*SIZE(X)
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f26, 1*SIZE(X)
|
|
kusano |
2b45e8 |
MUL S, $f19, $f26
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
ADD $f21, $f22, $f22
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f19, $f27
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
LD $f19, 7*SIZE(Y)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f28, 1*SIZE(Y)
|
|
kusano |
2b45e8 |
MUL S, $f18, $f28
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
SUB $f23, $f24, $f24
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f12, $f21
|
|
kusano |
2b45e8 |
LD $f18, 7*SIZE(X)
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f22, 2*SIZE(X)
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL S, $f13, $f22
|
|
kusano |
2b45e8 |
ADD $f25, $f26, $f26
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f13, $f23
|
|
kusano |
2b45e8 |
LD $f13, 8*SIZE(Y)
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f24, 2*SIZE(Y)
|
|
kusano |
2b45e8 |
MUL S, $f12, $f24
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
SUB $f27, $f28, $f28
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f14, $f25
|
|
kusano |
2b45e8 |
LD $f12, 8*SIZE(X)
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f26, 3*SIZE(X)
|
|
kusano |
2b45e8 |
MUL S, $f15, $f26
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
ADD $f21, $f22, $f22
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f15, $f27
|
|
kusano |
2b45e8 |
LD $f15, 9*SIZE(Y)
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f28, 3*SIZE(Y)
|
|
kusano |
2b45e8 |
MUL S, $f14, $f28
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
SUB $f23, $f24, $f24
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f16, $f21
|
|
kusano |
2b45e8 |
LD $f14, 9*SIZE(X)
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f22, 4*SIZE(X)
|
|
kusano |
2b45e8 |
MUL S, $f17, $f22
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
ADD $f25, $f26, $f26
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f17, $f23
|
|
kusano |
2b45e8 |
LD $f17, 10*SIZE(Y)
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f24, 4*SIZE(Y)
|
|
kusano |
2b45e8 |
MUL S, $f16, $f24
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
SUB $f27, $f28, $f28
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f18, $f25
|
|
kusano |
2b45e8 |
LD $f16, 10*SIZE(X)
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f26, 5*SIZE(X)
|
|
kusano |
2b45e8 |
MUL S, $f19, $f26
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
ADD $f21, $f22, $f22
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f19, $f27
|
|
kusano |
2b45e8 |
LD $f19, 11*SIZE(Y)
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f28, 5*SIZE(Y)
|
|
kusano |
2b45e8 |
MUL S, $f18, $f28
|
|
kusano |
2b45e8 |
lda I, -1(I)
|
|
kusano |
2b45e8 |
SUB $f23, $f24, $f24
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f12, $f21
|
|
kusano |
2b45e8 |
LD $f18, 11*SIZE(X)
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f22, 6*SIZE(X)
|
|
kusano |
2b45e8 |
MUL S, $f13, $f22
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
ADD $f25, $f26, $f26
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f13, $f23
|
|
kusano |
2b45e8 |
LD $f13, 12*SIZE(Y)
|
|
kusano |
2b45e8 |
lda X, 8*SIZE(X)
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f24, 6*SIZE(Y)
|
|
kusano |
2b45e8 |
MUL S, $f12, $f24
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
SUB $f27, $f28, $f28
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f14, $f25
|
|
kusano |
2b45e8 |
LD $f12, 4*SIZE(X)
|
|
kusano |
2b45e8 |
lda Y, 8*SIZE(Y)
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f26, -1*SIZE(X)
|
|
kusano |
2b45e8 |
MUL S, $f15, $f26
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
ADD $f21, $f22, $f22
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f15, $f27
|
|
kusano |
2b45e8 |
LD $f15, 5*SIZE(Y)
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f28, -1*SIZE(Y)
|
|
kusano |
2b45e8 |
MUL S, $f14, $f28
|
|
kusano |
2b45e8 |
SUB $f23, $f24, $f24
|
|
kusano |
2b45e8 |
bgt I, $L12
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L13:
|
|
kusano |
2b45e8 |
MUL C, $f16, $f21
|
|
kusano |
2b45e8 |
LD $f14, 5*SIZE(X)
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f22, 0*SIZE(X)
|
|
kusano |
2b45e8 |
MUL S, $f17, $f22
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
ADD $f25, $f26, $f26
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f17, $f23
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
LD $f17, 6*SIZE(Y)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f24, 0*SIZE(Y)
|
|
kusano |
2b45e8 |
MUL S, $f16, $f24
|
|
kusano |
2b45e8 |
LD $f16, 6*SIZE(X)
|
|
kusano |
2b45e8 |
SUB $f27, $f28, $f28
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f18, $f25
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f26, 1*SIZE(X)
|
|
kusano |
2b45e8 |
MUL S, $f19, $f26
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
ADD $f21, $f22, $f22
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f19, $f27
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
LD $f19, 7*SIZE(Y)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f28, 1*SIZE(Y)
|
|
kusano |
2b45e8 |
MUL S, $f18, $f28
|
|
kusano |
2b45e8 |
LD $f18, 7*SIZE(X)
|
|
kusano |
2b45e8 |
SUB $f23, $f24, $f24
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f12, $f21
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f22, 2*SIZE(X)
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
MUL S, $f13, $f22
|
|
kusano |
2b45e8 |
ADD $f25, $f26, $f26
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f13, $f23
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f24, 2*SIZE(Y)
|
|
kusano |
2b45e8 |
MUL S, $f12, $f24
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
SUB $f27, $f28, $f28
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f14, $f25
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f26, 3*SIZE(X)
|
|
kusano |
2b45e8 |
MUL S, $f15, $f26
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
ADD $f21, $f22, $f22
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f15, $f27
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f28, 3*SIZE(Y)
|
|
kusano |
2b45e8 |
MUL S, $f14, $f28
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
SUB $f23, $f24, $f24
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f16, $f21
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f22, 4*SIZE(X)
|
|
kusano |
2b45e8 |
MUL S, $f17, $f22
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
ADD $f25, $f26, $f26
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f17, $f23
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f24, 4*SIZE(Y)
|
|
kusano |
2b45e8 |
MUL S, $f16, $f24
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
SUB $f27, $f28, $f28
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f18, $f25
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f26, 5*SIZE(X)
|
|
kusano |
2b45e8 |
MUL S, $f19, $f26
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
ADD $f21, $f22, $f22
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f19, $f27
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f28, 5*SIZE(Y)
|
|
kusano |
2b45e8 |
MUL S, $f18, $f28
|
|
kusano |
2b45e8 |
unop
|
|
kusano |
2b45e8 |
SUB $f23, $f24, $f24
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f22, 6*SIZE(X)
|
|
kusano |
2b45e8 |
ADD $f25, $f26, $f26
|
|
kusano |
2b45e8 |
ST $f24, 6*SIZE(Y)
|
|
kusano |
2b45e8 |
SUB $f27, $f28, $f28
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f26, 7*SIZE(X)
|
|
kusano |
2b45e8 |
lda X, 8*SIZE(X)
|
|
kusano |
2b45e8 |
ST $f28, 7*SIZE(Y)
|
|
kusano |
2b45e8 |
lda Y, 8*SIZE(Y)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L15:
|
|
kusano |
2b45e8 |
and N, 7, I
|
|
kusano |
2b45e8 |
ble I, $L998
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L16:
|
|
kusano |
2b45e8 |
LD $f12, 0*SIZE(X)
|
|
kusano |
2b45e8 |
LD $f13, 0*SIZE(Y)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f12, $f21
|
|
kusano |
2b45e8 |
MUL S, $f13, $f22
|
|
kusano |
2b45e8 |
MUL C, $f13, $f23
|
|
kusano |
2b45e8 |
MUL S, $f12, $f24
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD $f21, $f22, $f25
|
|
kusano |
2b45e8 |
SUB $f23, $f24, $f26
|
|
kusano |
2b45e8 |
lda I, -1(I)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f25, 0*SIZE(X)
|
|
kusano |
2b45e8 |
lda X, 1 * SIZE(X)
|
|
kusano |
2b45e8 |
ST $f26, 0*SIZE(Y)
|
|
kusano |
2b45e8 |
lda Y, 1 * SIZE(Y)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bgt I, $L16
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L998:
|
|
kusano |
2b45e8 |
clr $0
|
|
kusano |
2b45e8 |
ret
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L50:
|
|
kusano |
2b45e8 |
mov X, XX
|
|
kusano |
2b45e8 |
mov Y, YY
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
sra N, 3, I
|
|
kusano |
2b45e8 |
ble I, $L55
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L51:
|
|
kusano |
2b45e8 |
LD $f12, 0*SIZE(X)
|
|
kusano |
2b45e8 |
SXADDQ INCX, X, X
|
|
kusano |
2b45e8 |
LD $f13, 0*SIZE(Y)
|
|
kusano |
2b45e8 |
SXADDQ INCY, Y, Y
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD $f14, 0*SIZE(X)
|
|
kusano |
2b45e8 |
SXADDQ INCX, X, X
|
|
kusano |
2b45e8 |
LD $f15, 0*SIZE(Y)
|
|
kusano |
2b45e8 |
SXADDQ INCY, Y, Y
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD $f16, 0*SIZE(X)
|
|
kusano |
2b45e8 |
SXADDQ INCX, X, X
|
|
kusano |
2b45e8 |
LD $f17, 0*SIZE(Y)
|
|
kusano |
2b45e8 |
SXADDQ INCY, Y, Y
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD $f18, 0*SIZE(X)
|
|
kusano |
2b45e8 |
SXADDQ INCX, X, X
|
|
kusano |
2b45e8 |
LD $f19, 0*SIZE(Y)
|
|
kusano |
2b45e8 |
SXADDQ INCY, Y, Y
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f12, $f21
|
|
kusano |
2b45e8 |
MUL S, $f13, $f22
|
|
kusano |
2b45e8 |
MUL C, $f13, $f23
|
|
kusano |
2b45e8 |
MUL S, $f12, $f24
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD $f21, $f22, $f22
|
|
kusano |
2b45e8 |
SUB $f23, $f24, $f24
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f22, 0*SIZE(XX)
|
|
kusano |
2b45e8 |
SXADDQ INCX, XX, XX
|
|
kusano |
2b45e8 |
ST $f24, 0*SIZE(YY)
|
|
kusano |
2b45e8 |
SXADDQ INCY, YY, YY
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f14, $f25
|
|
kusano |
2b45e8 |
MUL S, $f15, $f26
|
|
kusano |
2b45e8 |
MUL C, $f15, $f27
|
|
kusano |
2b45e8 |
MUL S, $f14, $f28
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD $f25, $f26, $f26
|
|
kusano |
2b45e8 |
SUB $f27, $f28, $f28
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f26, 0*SIZE(XX)
|
|
kusano |
2b45e8 |
SXADDQ INCX, XX, XX
|
|
kusano |
2b45e8 |
ST $f28, 0*SIZE(YY)
|
|
kusano |
2b45e8 |
SXADDQ INCY, YY, YY
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f16, $f21
|
|
kusano |
2b45e8 |
MUL S, $f17, $f22
|
|
kusano |
2b45e8 |
MUL C, $f17, $f23
|
|
kusano |
2b45e8 |
MUL S, $f16, $f24
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD $f21, $f22, $f22
|
|
kusano |
2b45e8 |
SUB $f23, $f24, $f24
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f22, 0*SIZE(XX)
|
|
kusano |
2b45e8 |
SXADDQ INCX, XX, XX
|
|
kusano |
2b45e8 |
ST $f24, 0*SIZE(YY)
|
|
kusano |
2b45e8 |
SXADDQ INCY, YY, YY
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f18, $f25
|
|
kusano |
2b45e8 |
MUL S, $f19, $f26
|
|
kusano |
2b45e8 |
MUL C, $f19, $f27
|
|
kusano |
2b45e8 |
MUL S, $f18, $f28
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD $f25, $f26, $f26
|
|
kusano |
2b45e8 |
SUB $f27, $f28, $f28
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f26, 0*SIZE(XX)
|
|
kusano |
2b45e8 |
SXADDQ INCX, XX, XX
|
|
kusano |
2b45e8 |
ST $f28, 0*SIZE(YY)
|
|
kusano |
2b45e8 |
SXADDQ INCY, YY, YY
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD $f12, 0*SIZE(X)
|
|
kusano |
2b45e8 |
SXADDQ INCX, X, X
|
|
kusano |
2b45e8 |
LD $f13, 0*SIZE(Y)
|
|
kusano |
2b45e8 |
SXADDQ INCY, Y, Y
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD $f14, 0*SIZE(X)
|
|
kusano |
2b45e8 |
SXADDQ INCX, X, X
|
|
kusano |
2b45e8 |
LD $f15, 0*SIZE(Y)
|
|
kusano |
2b45e8 |
SXADDQ INCY, Y, Y
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD $f16, 0*SIZE(X)
|
|
kusano |
2b45e8 |
SXADDQ INCX, X, X
|
|
kusano |
2b45e8 |
LD $f17, 0*SIZE(Y)
|
|
kusano |
2b45e8 |
SXADDQ INCY, Y, Y
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LD $f18, 0*SIZE(X)
|
|
kusano |
2b45e8 |
SXADDQ INCX, X, X
|
|
kusano |
2b45e8 |
LD $f19, 0*SIZE(Y)
|
|
kusano |
2b45e8 |
SXADDQ INCY, Y, Y
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f12, $f21
|
|
kusano |
2b45e8 |
MUL S, $f13, $f22
|
|
kusano |
2b45e8 |
MUL C, $f13, $f23
|
|
kusano |
2b45e8 |
MUL S, $f12, $f24
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD $f21, $f22, $f22
|
|
kusano |
2b45e8 |
SUB $f23, $f24, $f24
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f22, 0*SIZE(XX)
|
|
kusano |
2b45e8 |
SXADDQ INCX, XX, XX
|
|
kusano |
2b45e8 |
ST $f24, 0*SIZE(YY)
|
|
kusano |
2b45e8 |
SXADDQ INCY, YY, YY
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f14, $f25
|
|
kusano |
2b45e8 |
MUL S, $f15, $f26
|
|
kusano |
2b45e8 |
MUL C, $f15, $f27
|
|
kusano |
2b45e8 |
MUL S, $f14, $f28
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD $f25, $f26, $f26
|
|
kusano |
2b45e8 |
SUB $f27, $f28, $f28
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f26, 0*SIZE(XX)
|
|
kusano |
2b45e8 |
SXADDQ INCX, XX, XX
|
|
kusano |
2b45e8 |
ST $f28, 0*SIZE(YY)
|
|
kusano |
2b45e8 |
SXADDQ INCY, YY, YY
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f16, $f21
|
|
kusano |
2b45e8 |
MUL S, $f17, $f22
|
|
kusano |
2b45e8 |
MUL C, $f17, $f23
|
|
kusano |
2b45e8 |
MUL S, $f16, $f24
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD $f21, $f22, $f22
|
|
kusano |
2b45e8 |
SUB $f23, $f24, $f24
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f22, 0*SIZE(XX)
|
|
kusano |
2b45e8 |
SXADDQ INCX, XX, XX
|
|
kusano |
2b45e8 |
ST $f24, 0*SIZE(YY)
|
|
kusano |
2b45e8 |
SXADDQ INCY, YY, YY
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f18, $f25
|
|
kusano |
2b45e8 |
MUL S, $f19, $f26
|
|
kusano |
2b45e8 |
MUL C, $f19, $f27
|
|
kusano |
2b45e8 |
MUL S, $f18, $f28
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD $f25, $f26, $f26
|
|
kusano |
2b45e8 |
SUB $f27, $f28, $f28
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f26, 0*SIZE(XX)
|
|
kusano |
2b45e8 |
SXADDQ INCX, XX, XX
|
|
kusano |
2b45e8 |
ST $f28, 0*SIZE(YY)
|
|
kusano |
2b45e8 |
SXADDQ INCY, YY, YY
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda I, -1(I)
|
|
kusano |
2b45e8 |
bgt I, $L51
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L55:
|
|
kusano |
2b45e8 |
and N, 7, I
|
|
kusano |
2b45e8 |
ble I, $L999
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L56:
|
|
kusano |
2b45e8 |
LD $f12, 0*SIZE(X)
|
|
kusano |
2b45e8 |
LD $f13, 0*SIZE(Y)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
MUL C, $f12, $f21
|
|
kusano |
2b45e8 |
MUL S, $f13, $f22
|
|
kusano |
2b45e8 |
MUL C, $f13, $f23
|
|
kusano |
2b45e8 |
MUL S, $f12, $f24
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ADD $f21, $f22, $f25
|
|
kusano |
2b45e8 |
SUB $f23, $f24, $f26
|
|
kusano |
2b45e8 |
lda I, -1(I)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
ST $f25, 0*SIZE(X)
|
|
kusano |
2b45e8 |
SXADDQ INCX, X, X
|
|
kusano |
2b45e8 |
ST $f26, 0*SIZE(Y)
|
|
kusano |
2b45e8 |
SXADDQ INCY, Y, Y
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bgt I, $L56
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
$L999:
|
|
kusano |
2b45e8 |
clr $0
|
|
kusano |
2b45e8 |
ret
|
|
kusano |
2b45e8 |
EPILOGUE
|