/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define ASSEMBLER
#include "common.h"
#include "version.h"
#define N $16
#define X $17
#define INCX $18
#define Y $19
#define INCY $20
PROLOGUE
PROFCODE
.frame $sp, 0, $26, 0
#ifndef PROFILE
.prologue 0
#else
.prologue 1
#endif
cmpeq INCX, 1, $0
ble N, $End
#ifndef COMPLEX
sra N, 4, $4
#else
sra N, 3, $4
#endif
cmpeq INCY, 1, $1
and $0, $1, $0
beq $0, $Sub
#ifndef COMPLEX
and N, 15, $5
#else
and N, 7, $5
#endif
ble $4, $Remain
LD $f10, 0*SIZE(X)
LD $f11, 1*SIZE(X)
LD $f12, 2*SIZE(X)
LD $f13, 3*SIZE(X)
LD $f14, 4*SIZE(X)
LD $f15, 5*SIZE(X)
LD $f16, 6*SIZE(X)
LD $f17, 7*SIZE(X)
LD $f18, 8*SIZE(X)
LD $f19, 9*SIZE(X)
LD $f20, 10*SIZE(X)
LD $f21, 11*SIZE(X)
LD $f22, 12*SIZE(X)
LD $f23, 13*SIZE(X)
LD $f24, 14*SIZE(X)
LD $f25, 15*SIZE(X)
subq $4, 1, $4
lda X, 16*SIZE(X)
ble $4, $MainLoopEnd
.align 4
$MainLoop:
ST $f10, 0*SIZE(Y)
ST $f11, 1*SIZE(Y)
ST $f12, 2*SIZE(Y)
ST $f13, 3*SIZE(Y)
LD $f10, 0*SIZE(X)
LD $f11, 1*SIZE(X)
LD $f12, 2*SIZE(X)
LD $f13, 3*SIZE(X)
ST $f14, 4*SIZE(Y)
ST $f15, 5*SIZE(Y)
ST $f16, 6*SIZE(Y)
ST $f17, 7*SIZE(Y)
LD $f14, 4*SIZE(X)
LD $f15, 5*SIZE(X)
LD $f16, 6*SIZE(X)
LD $f17, 7*SIZE(X)
ST $f18, 8*SIZE(Y)
ST $f19, 9*SIZE(Y)
ST $f20, 10*SIZE(Y)
ST $f21, 11*SIZE(Y)
LD $f18, 8*SIZE(X)
LD $f19, 9*SIZE(X)
LD $f20, 10*SIZE(X)
LD $f21, 11*SIZE(X)
ST $f22, 12*SIZE(Y)
ST $f23, 13*SIZE(Y)
ST $f24, 14*SIZE(Y)
ST $f25, 15*SIZE(Y)
LD $f22, 12*SIZE(X)
LD $f23, 13*SIZE(X)
LD $f24, 14*SIZE(X)
LD $f25, 15*SIZE(X)
subq $4, 1, $4
lda Y, 16*SIZE(Y)
lda X, 16*SIZE(X)
bgt $4, $MainLoop
.align 4
$MainLoopEnd:
ST $f10, 0*SIZE(Y)
ST $f11, 1*SIZE(Y)
ST $f12, 2*SIZE(Y)
ST $f13, 3*SIZE(Y)
ST $f14, 4*SIZE(Y)
ST $f15, 5*SIZE(Y)
ST $f16, 6*SIZE(Y)
ST $f17, 7*SIZE(Y)
ST $f18, 8*SIZE(Y)
ST $f19, 9*SIZE(Y)
ST $f20, 10*SIZE(Y)
ST $f21, 11*SIZE(Y)
ST $f22, 12*SIZE(Y)
ST $f23, 13*SIZE(Y)
ST $f24, 14*SIZE(Y)
ST $f25, 15*SIZE(Y)
lda Y, 16*SIZE(Y)
.align 4
$Remain:
ble $5, $End
.align 4
$RemainLoop:
#ifndef COMPLEX
LD $f10, 0*SIZE(X)
lda X, 1*SIZE(X)
ST $f10, 0*SIZE(Y)
lda Y, 1*SIZE(Y)
#else
LD $f10, 0*SIZE(X)
LD $f11, 1*SIZE(X)
lda X, 2*SIZE(X)
ST $f10, 0*SIZE(Y)
ST $f11, 1*SIZE(Y)
lda Y, 2*SIZE(Y)
#endif
subq $5, 1, $5
bgt $5, $RemainLoop
.align 4
$End:
ret
.align 4
$Sub:
#ifdef COMPLEX
addq INCX, INCX, INCX
addq INCY, INCY, INCY
and N, 7, $5
#else
and N, 15, $5
#endif
ble $4, $SubRemain
.align 4
$SubMainLoop:
#ifndef COMPLEX
LD $f10, 0(X)
SXADDQ INCX, X, X
LD $f11, 0(X)
SXADDQ INCX, X, X
LD $f12, 0(X)
SXADDQ INCX, X, X
LD $f13, 0(X)
SXADDQ INCX, X, X
LD $f14, 0(X)
SXADDQ INCX, X, X
LD $f15, 0(X)
SXADDQ INCX, X, X
LD $f16, 0(X)
SXADDQ INCX, X, X
LD $f17, 0(X)
SXADDQ INCX, X, X
LD $f18, 0(X)
SXADDQ INCX, X, X
LD $f19, 0(X)
SXADDQ INCX, X, X
LD $f20, 0(X)
SXADDQ INCX, X, X
LD $f21, 0(X)
SXADDQ INCX, X, X
LD $f22, 0(X)
SXADDQ INCX, X, X
LD $f23, 0(X)
SXADDQ INCX, X, X
LD $f24, 0(X)
SXADDQ INCX, X, X
LD $f25, 0(X)
SXADDQ INCX, X, X
ST $f10, 0(Y)
SXADDQ INCY, Y, Y
ST $f11, 0(Y)
SXADDQ INCY, Y, Y
ST $f12, 0(Y)
SXADDQ INCY, Y, Y
ST $f13, 0(Y)
SXADDQ INCY, Y, Y
ST $f14, 0(Y)
SXADDQ INCY, Y, Y
ST $f15, 0(Y)
SXADDQ INCY, Y, Y
ST $f16, 0(Y)
SXADDQ INCY, Y, Y
ST $f17, 0(Y)
SXADDQ INCY, Y, Y
ST $f18, 0(Y)
SXADDQ INCY, Y, Y
ST $f19, 0(Y)
SXADDQ INCY, Y, Y
ST $f20, 0(Y)
SXADDQ INCY, Y, Y
ST $f21, 0(Y)
SXADDQ INCY, Y, Y
ST $f22, 0(Y)
SXADDQ INCY, Y, Y
ST $f23, 0(Y)
SXADDQ INCY, Y, Y
ST $f24, 0(Y)
SXADDQ INCY, Y, Y
ST $f25, 0(Y)
SXADDQ INCY, Y, Y
#else
LD $f10, 0(X)
LD $f11, SIZE(X)
SXADDQ INCX, X, X
LD $f12, 0(X)
LD $f13, SIZE(X)
SXADDQ INCX, X, X
LD $f14, 0(X)
LD $f15, SIZE(X)
SXADDQ INCX, X, X
LD $f16, 0(X)
LD $f17, SIZE(X)
SXADDQ INCX, X, X
LD $f18, 0(X)
LD $f19, SIZE(X)
SXADDQ INCX, X, X
LD $f20, 0(X)
LD $f21, SIZE(X)
SXADDQ INCX, X, X
LD $f22, 0(X)
LD $f23, SIZE(X)
SXADDQ INCX, X, X
LD $f24, 0(X)
LD $f25, SIZE(X)
SXADDQ INCX, X, X
ST $f10, 0(Y)
ST $f11, SIZE(Y)
SXADDQ INCY, Y, Y
ST $f12, 0(Y)
ST $f13, SIZE(Y)
SXADDQ INCY, Y, Y
ST $f14, 0(Y)
ST $f15, SIZE(Y)
SXADDQ INCY, Y, Y
ST $f16, 0(Y)
ST $f17, SIZE(Y)
SXADDQ INCY, Y, Y
ST $f18, 0(Y)
ST $f19, SIZE(Y)
SXADDQ INCY, Y, Y
ST $f20, 0(Y)
ST $f21, SIZE(Y)
SXADDQ INCY, Y, Y
ST $f22, 0(Y)
ST $f23, SIZE(Y)
SXADDQ INCY, Y, Y
ST $f24, 0(Y)
ST $f25, SIZE(Y)
SXADDQ INCY, Y, Y
#endif
subq $4, 1, $4
bgt $4, $SubMainLoop
.align 4
$SubRemain:
ble $5, $SubEnd
.align 4
$SubRemainLoop:
#ifndef COMPLEX
LD $f10, 0(X)
SXADDQ INCX, X, X
ST $f10, 0(Y)
SXADDQ INCY, Y, Y
#else
LD $f10, 0(X)
LD $f11, SIZE(X)
SXADDQ INCX, X, X
ST $f10, 0(Y)
ST $f11, SIZE(Y)
SXADDQ INCY, Y, Y
#endif
subq $5, 1, $5
bgt $5, $SubRemainLoop
.align 4
$SubEnd:
ret
EPILOGUE