Blob Blame Raw
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin.           */
/* All rights reserved.                                              */
/*                                                                   */
/* Redistribution and use in source and binary forms, with or        */
/* without modification, are permitted provided that the following   */
/* conditions are met:                                               */
/*                                                                   */
/*   1. Redistributions of source code must retain the above         */
/*      copyright notice, this list of conditions and the following  */
/*      disclaimer.                                                  */
/*                                                                   */
/*   2. Redistributions in binary form must reproduce the above      */
/*      copyright notice, this list of conditions and the following  */
/*      disclaimer in the documentation and/or other materials       */
/*      provided with the distribution.                              */
/*                                                                   */
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
/*                                                                   */
/* The views and conclusions contained in the software and           */
/* documentation are those of the authors and should not be          */
/* interpreted as representing official policies, either expressed   */
/* or implied, of The University of Texas at Austin.                 */
/*********************************************************************/

#define ASSEMBLER
#include "common.h"
#include "version.h"

#define N	$16
#define X	$17
#define INCX	$18
#define Y	$19
#define INCY	$20

	PROLOGUE
	PROFCODE
	.frame	$sp, 0, $26, 0

#ifndef PROFILE
	.prologue 0
#else
	.prologue 1
#endif

	cmpeq	INCX,  1, $0
	ble	N, $End
#ifndef COMPLEX
	sra	N,  4, $4
#else
	sra	N,  3, $4
#endif
	cmpeq	INCY,  1, $1

	and	$0,  $1, $0
	beq	$0, $Sub
#ifndef COMPLEX
	and	N, 15, $5
#else
	and	N,  7, $5
#endif
	ble	$4,  $Remain

	LD	$f10,  0*SIZE(X)
	LD	$f11,  1*SIZE(X)
	LD	$f12,  2*SIZE(X)
	LD	$f13,  3*SIZE(X)
	LD	$f14,  4*SIZE(X)
	LD	$f15,  5*SIZE(X)
	LD	$f16,  6*SIZE(X)
	LD	$f17,  7*SIZE(X)

	LD	$f18,  8*SIZE(X)
	LD	$f19,  9*SIZE(X)
	LD	$f20, 10*SIZE(X)
	LD	$f21, 11*SIZE(X)
	LD	$f22, 12*SIZE(X)
	LD	$f23, 13*SIZE(X)
	LD	$f24, 14*SIZE(X)
	LD	$f25, 15*SIZE(X)

	subq	$4, 1, $4
	lda	X, 16*SIZE(X)
	ble	$4, $MainLoopEnd
	.align 4

$MainLoop:
	ST	$f10,  0*SIZE(Y)
	ST	$f11,  1*SIZE(Y)
	ST	$f12,  2*SIZE(Y)
	ST	$f13,  3*SIZE(Y)

	LD	$f10,  0*SIZE(X)
	LD	$f11,  1*SIZE(X)
	LD	$f12,  2*SIZE(X)
	LD	$f13,  3*SIZE(X)

	ST	$f14,  4*SIZE(Y)
	ST	$f15,  5*SIZE(Y)
	ST	$f16,  6*SIZE(Y)
	ST	$f17,  7*SIZE(Y)

	LD	$f14,  4*SIZE(X)
	LD	$f15,  5*SIZE(X)
	LD	$f16,  6*SIZE(X)
	LD	$f17,  7*SIZE(X)

	ST	$f18,  8*SIZE(Y)
	ST	$f19,  9*SIZE(Y)
	ST	$f20, 10*SIZE(Y)
	ST	$f21, 11*SIZE(Y)

	LD	$f18,  8*SIZE(X)
	LD	$f19,  9*SIZE(X)
	LD	$f20, 10*SIZE(X)
	LD	$f21, 11*SIZE(X)

	ST	$f22, 12*SIZE(Y)
	ST	$f23, 13*SIZE(Y)
	ST	$f24, 14*SIZE(Y)
	ST	$f25, 15*SIZE(Y)

	LD	$f22, 12*SIZE(X)
	LD	$f23, 13*SIZE(X)
	LD	$f24, 14*SIZE(X)
	LD	$f25, 15*SIZE(X)

	subq	$4, 1, $4
	lda	Y, 16*SIZE(Y)
	lda	X, 16*SIZE(X)
	bgt	$4, $MainLoop
	.align 4

$MainLoopEnd:
	ST	$f10,  0*SIZE(Y)
	ST	$f11,  1*SIZE(Y)
	ST	$f12,  2*SIZE(Y)
	ST	$f13,  3*SIZE(Y)
	ST	$f14,  4*SIZE(Y)
	ST	$f15,  5*SIZE(Y)
	ST	$f16,  6*SIZE(Y)
	ST	$f17,  7*SIZE(Y)

	ST	$f18,  8*SIZE(Y)
	ST	$f19,  9*SIZE(Y)
	ST	$f20, 10*SIZE(Y)
	ST	$f21, 11*SIZE(Y)
	ST	$f22, 12*SIZE(Y)
	ST	$f23, 13*SIZE(Y)
	ST	$f24, 14*SIZE(Y)
	ST	$f25, 15*SIZE(Y)

	lda	Y, 16*SIZE(Y)
	.align 4

$Remain:
	ble	$5, $End
	.align 4

$RemainLoop:
#ifndef COMPLEX
	LD	$f10,  0*SIZE(X)
	lda	X,   1*SIZE(X)
	ST	$f10,  0*SIZE(Y)
	lda	Y,   1*SIZE(Y)
#else
	LD	$f10,  0*SIZE(X)
	LD	$f11,  1*SIZE(X)
	lda	X,   2*SIZE(X)
	ST	$f10,  0*SIZE(Y)
	ST	$f11,  1*SIZE(Y)
	lda	Y,   2*SIZE(Y)
#endif
	subq	$5, 1, $5
	bgt	$5, $RemainLoop
	.align 4
$End:
	ret
	.align 4

$Sub:
#ifdef COMPLEX
	addq	INCX, INCX, INCX
	addq	INCY, INCY, INCY
	and	N,  7, $5
#else
	and	N, 15, $5
#endif
	ble	$4, $SubRemain
	.align 4

$SubMainLoop:
#ifndef COMPLEX
	LD	$f10,  0(X)
	SXADDQ	INCX, X, X
	LD	$f11,  0(X)
	SXADDQ	INCX, X, X

	LD	$f12,  0(X)
	SXADDQ	INCX, X, X
	LD	$f13,  0(X)
	SXADDQ	INCX, X, X

	LD	$f14,  0(X)
	SXADDQ	INCX, X, X
	LD	$f15,  0(X)
	SXADDQ	INCX, X, X

	LD	$f16,  0(X)
	SXADDQ	INCX, X, X
	LD	$f17,  0(X)
	SXADDQ	INCX, X, X

	LD	$f18,  0(X)
	SXADDQ	INCX, X, X
	LD	$f19,  0(X)
	SXADDQ	INCX, X, X

	LD	$f20,  0(X)
	SXADDQ	INCX, X, X
	LD	$f21,  0(X)
	SXADDQ	INCX, X, X

	LD	$f22,  0(X)
	SXADDQ	INCX, X, X
	LD	$f23,  0(X)
	SXADDQ	INCX, X, X

	LD	$f24,  0(X)
	SXADDQ	INCX, X, X
	LD	$f25,  0(X)
	SXADDQ	INCX, X, X

	ST	$f10,  0(Y)
	SXADDQ	INCY, Y, Y
	ST	$f11,  0(Y)
	SXADDQ	INCY, Y, Y

	ST	$f12,  0(Y)
	SXADDQ	INCY, Y, Y
	ST	$f13,  0(Y)
	SXADDQ	INCY, Y, Y

	ST	$f14,  0(Y)
	SXADDQ	INCY, Y, Y
	ST	$f15,  0(Y)
	SXADDQ	INCY, Y, Y

	ST	$f16,  0(Y)
	SXADDQ	INCY, Y, Y
	ST	$f17,  0(Y)
	SXADDQ	INCY, Y, Y

	ST	$f18,  0(Y)
	SXADDQ	INCY, Y, Y
	ST	$f19,  0(Y)
	SXADDQ	INCY, Y, Y

	ST	$f20,  0(Y)
	SXADDQ	INCY, Y, Y
	ST	$f21,  0(Y)
	SXADDQ	INCY, Y, Y

	ST	$f22,  0(Y)
	SXADDQ	INCY, Y, Y
	ST	$f23,  0(Y)
	SXADDQ	INCY, Y, Y

	ST	$f24,  0(Y)
	SXADDQ	INCY, Y, Y
	ST	$f25,  0(Y)
	SXADDQ	INCY, Y, Y
#else
	LD	$f10,    0(X)
	LD	$f11, SIZE(X)
	SXADDQ	INCX, X, X

	LD	$f12,    0(X)
	LD	$f13, SIZE(X)
	SXADDQ	INCX, X, X

	LD	$f14,    0(X)
	LD	$f15, SIZE(X)
	SXADDQ	INCX, X, X

	LD	$f16,    0(X)
	LD	$f17, SIZE(X)
	SXADDQ	INCX, X, X

	LD	$f18,    0(X)
	LD	$f19, SIZE(X)
	SXADDQ	INCX, X, X

	LD	$f20,    0(X)
	LD	$f21, SIZE(X)
	SXADDQ	INCX, X, X

	LD	$f22,    0(X)
	LD	$f23, SIZE(X)
	SXADDQ	INCX, X, X

	LD	$f24,    0(X)
	LD	$f25, SIZE(X)
	SXADDQ	INCX, X, X

	ST	$f10,    0(Y)
	ST	$f11, SIZE(Y)
	SXADDQ	INCY, Y, Y

	ST	$f12,    0(Y)
	ST	$f13, SIZE(Y)
	SXADDQ	INCY, Y, Y

	ST	$f14,    0(Y)
	ST	$f15, SIZE(Y)
	SXADDQ	INCY, Y, Y

	ST	$f16,    0(Y)
	ST	$f17, SIZE(Y)
	SXADDQ	INCY, Y, Y

	ST	$f18,    0(Y)
	ST	$f19, SIZE(Y)
	SXADDQ	INCY, Y, Y

	ST	$f20,    0(Y)
	ST	$f21, SIZE(Y)
	SXADDQ	INCY, Y, Y

	ST	$f22,    0(Y)
	ST	$f23, SIZE(Y)
	SXADDQ	INCY, Y, Y

	ST	$f24,    0(Y)
	ST	$f25, SIZE(Y)
	SXADDQ	INCY, Y, Y
#endif
	subq	$4, 1, $4
	bgt	$4, $SubMainLoop
	.align 4

$SubRemain:
	ble	$5, $SubEnd
	.align 4

 $SubRemainLoop:
#ifndef COMPLEX
	LD	$f10,  0(X)
	SXADDQ	INCX, X, X
	ST	$f10,  0(Y)
	SXADDQ	INCY, Y, Y
#else
	LD	$f10,    0(X)
	LD	$f11, SIZE(X)
	SXADDQ	INCX, X, X
	ST	$f10,    0(Y)
	ST	$f11, SIZE(Y)
	SXADDQ	INCY, Y, Y
#endif
	subq	$5, 1, $5
	bgt	$5, $SubRemainLoop
	.align 4

$SubEnd:
	ret
	EPILOGUE