Blame thirdparty/openblas/xianyi-OpenBLAS-e6e87a2/kernel/alpha/gemv_n.S

kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
/* Copyright 2009, 2010 The University of Texas at Austin.           */
kusano 2b45e8
/* All rights reserved.                                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* Redistribution and use in source and binary forms, with or        */
kusano 2b45e8
/* without modification, are permitted provided that the following   */
kusano 2b45e8
/* conditions are met:                                               */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   1. Redistributions of source code must retain the above         */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer.                                                  */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   2. Redistributions in binary form must reproduce the above      */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer in the documentation and/or other materials       */
kusano 2b45e8
/*      provided with the distribution.                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
kusano 2b45e8
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
kusano 2b45e8
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
kusano 2b45e8
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
kusano 2b45e8
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
kusano 2b45e8
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
kusano 2b45e8
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
kusano 2b45e8
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
kusano 2b45e8
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
kusano 2b45e8
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
kusano 2b45e8
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
kusano 2b45e8
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
kusano 2b45e8
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
kusano 2b45e8
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* The views and conclusions contained in the software and           */
kusano 2b45e8
/* documentation are those of the authors and should not be          */
kusano 2b45e8
/* interpreted as representing official policies, either expressed   */
kusano 2b45e8
/* or implied, of The University of Texas at Austin.                 */
kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
kusano 2b45e8
#define ASSEMBLER
kusano 2b45e8
#include "common.h"
kusano 2b45e8
#include "version.h"
kusano 2b45e8
kusano 2b45e8
#define STACKSIZE     64
kusano 2b45e8
#define PREFETCHSIZE  32
kusano 2b45e8
kusano 2b45e8
#define M	$16
kusano 2b45e8
#define N	$17
kusano 2b45e8
#define A	$20
kusano 2b45e8
#define	LDA	$21
kusano 2b45e8
kusano 2b45e8
#define X	$18
kusano 2b45e8
#define	INCX	$19
kusano 2b45e8
#define Y	$22
kusano 2b45e8
#define	INCY	$23
kusano 2b45e8
kusano 2b45e8
#define BUFFER	$24
kusano 2b45e8
kusano 2b45e8
#define I	$25
kusano 2b45e8
#define J	$27
kusano 2b45e8
kusano 2b45e8
#define Y1	$4
kusano 2b45e8
kusano 2b45e8
#define A1	$5
kusano 2b45e8
#define A2	$6
kusano 2b45e8
#define A3	$7
kusano 2b45e8
#define A4	$8
kusano 2b45e8
kusano 2b45e8
#define	alpha	$f19
kusano 2b45e8
kusano 2b45e8
#define	alpha1	$f0
kusano 2b45e8
#define	alpha2	$f1
kusano 2b45e8
#define	alpha3	$f10
kusano 2b45e8
#define	alpha4	$f11
kusano 2b45e8
kusano 2b45e8
#define	y0	$f12
kusano 2b45e8
#define	y1	$f13
kusano 2b45e8
#define	y2	$f14
kusano 2b45e8
#define	y3	$f15
kusano 2b45e8
kusano 2b45e8
#define	y4	$f16
kusano 2b45e8
#define	y5	$f17
kusano 2b45e8
#define	y6	$f18
kusano 2b45e8
#define	y7	$f21
kusano 2b45e8
kusano 2b45e8
#define	a0	$f22
kusano 2b45e8
#define	a1	$f23
kusano 2b45e8
#define	a2	$f24
kusano 2b45e8
#define	a3	$f25
kusano 2b45e8
#define	a4	$f26
kusano 2b45e8
#define	a5	$f27
kusano 2b45e8
#define	a6	$f28
kusano 2b45e8
#define	a7	$f29
kusano 2b45e8
kusano 2b45e8
#define	a8	$f2
kusano 2b45e8
#define	a9	$f3
kusano 2b45e8
#define	a10	$f4
kusano 2b45e8
#define	a11	$f5
kusano 2b45e8
#define	a12	$f6
kusano 2b45e8
#define	a13	$f7
kusano 2b45e8
#define	a14	$f8
kusano 2b45e8
#define	a15	$f9
kusano 2b45e8
kusano 2b45e8
	PROLOGUE
kusano 2b45e8
kusano 2b45e8
	lda	$sp,  -STACKSIZE($sp)
kusano 2b45e8
	ldq	X,       0 + STACKSIZE($sp)
kusano 2b45e8
	ldq	INCX,    8 + STACKSIZE($sp)
kusano 2b45e8
	ldq	Y,      16 + STACKSIZE($sp)
kusano 2b45e8
	ldq	INCY,   24 + STACKSIZE($sp)
kusano 2b45e8
	ldq	BUFFER, 32 + STACKSIZE($sp)
kusano 2b45e8
kusano 2b45e8
	stt	$f2,    0($sp)
kusano 2b45e8
	stt	$f3,    8($sp)
kusano 2b45e8
	stt	$f4,   16($sp)
kusano 2b45e8
	stt	$f5,   24($sp)
kusano 2b45e8
	stt	$f6,   32($sp)
kusano 2b45e8
	stt	$f7,   40($sp)
kusano 2b45e8
	stt	$f8,   48($sp)
kusano 2b45e8
	stt	$f9,   56($sp)
kusano 2b45e8
kusano 2b45e8
	PROFCODE
kusano 2b45e8
kusano 2b45e8
	cmple	M, 0, $0
kusano 2b45e8
	SXADDQ	INCX, 0, INCX
kusano 2b45e8
	cmple	N, 0, $1
kusano 2b45e8
	SXADDQ	INCY, 0, INCY
kusano 2b45e8
kusano 2b45e8
	or	$0, $1, $0
kusano 2b45e8
	bne	$0,  $L999
kusano 2b45e8
kusano 2b45e8
	SXADDQ	LDA,  0, LDA
kusano 2b45e8
kusano 2b45e8
	cmpeq	INCY, SIZE, $0
kusano 2b45e8
	bne	$0, $L10
kusano 2b45e8
kusano 2b45e8
	mov	BUFFER, Y1
kusano 2b45e8
kusano 2b45e8
	mov	Y, BUFFER
kusano 2b45e8
	mov	Y1, Y
kusano 2b45e8
kusano 2b45e8
	sra	M, 3, I
kusano 2b45e8
	ble	I, $L05
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L02:
kusano 2b45e8
	ST	$f31,  0 * SIZE(Y1)
kusano 2b45e8
	ST	$f31,  1 * SIZE(Y1)
kusano 2b45e8
	ST	$f31,  2 * SIZE(Y1)
kusano 2b45e8
	ST	$f31,  3 * SIZE(Y1)
kusano 2b45e8
	ST	$f31,  4 * SIZE(Y1)
kusano 2b45e8
	ST	$f31,  5 * SIZE(Y1)
kusano 2b45e8
	ST	$f31,  6 * SIZE(Y1)
kusano 2b45e8
	ST	$f31,  7 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	lda	Y1,    8 * SIZE(Y1)
kusano 2b45e8
	lda	I, -1(I)
kusano 2b45e8
	bgt	I, $L02
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L05:
kusano 2b45e8
	and	M, 7, I
kusano 2b45e8
	ble	I, $L10
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L06:
kusano 2b45e8
	ST	$f31,  0 * SIZE(Y1)
kusano 2b45e8
	addq	Y1, SIZE, Y1
kusano 2b45e8
kusano 2b45e8
	lda	I, -1(I)
kusano 2b45e8
	bgt	I, $L06
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L10:
kusano 2b45e8
	sra	N, 2, J
kusano 2b45e8
	ble	J,  $L20
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L11:
kusano 2b45e8
	LD	alpha1,  0 * SIZE(X)
kusano 2b45e8
	addq	X, INCX, X
kusano 2b45e8
	LD	alpha2,  0 * SIZE(X)
kusano 2b45e8
	addq	X, INCX, X
kusano 2b45e8
	LD	alpha3,  0 * SIZE(X)
kusano 2b45e8
	addq	X, INCX, X
kusano 2b45e8
	LD	alpha4,  0 * SIZE(X)
kusano 2b45e8
	addq	X, INCX, X
kusano 2b45e8
kusano 2b45e8
	MUL	alpha, alpha1, alpha1
kusano 2b45e8
	MUL	alpha, alpha2, alpha2
kusano 2b45e8
	MUL	alpha, alpha3, alpha3
kusano 2b45e8
	MUL	alpha, alpha4, alpha4
kusano 2b45e8
kusano 2b45e8
	mov	A, A1
kusano 2b45e8
	addq	A,  LDA, A2
kusano 2b45e8
	addq	A2, LDA, A3
kusano 2b45e8
	addq	A3, LDA, A4
kusano 2b45e8
	s4addq	LDA, A, A
kusano 2b45e8
kusano 2b45e8
	mov	Y, Y1
kusano 2b45e8
	ldl	$31, 4 * SIZE(X)
kusano 2b45e8
kusano 2b45e8
	sra	M,  3, I
kusano 2b45e8
	ble	I,  $L15
kusano 2b45e8
kusano 2b45e8
	LD	a0,  0 * SIZE(A1)
kusano 2b45e8
	LD	a1,  1 * SIZE(A1)
kusano 2b45e8
	LD	a2,  2 * SIZE(A1)
kusano 2b45e8
	LD	a3,  3 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	LD	a4,  0 * SIZE(A2)
kusano 2b45e8
	LD	a5,  1 * SIZE(A2)
kusano 2b45e8
	LD	a6,  2 * SIZE(A2)
kusano 2b45e8
	LD	a7,  3 * SIZE(A2)
kusano 2b45e8
kusano 2b45e8
	LD	y0,  0 * SIZE(Y1)
kusano 2b45e8
	LD	y1,  1 * SIZE(Y1)
kusano 2b45e8
	LD	y2,  2 * SIZE(Y1)
kusano 2b45e8
	LD	y3,  3 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	LD	a8,  0 * SIZE(A3)
kusano 2b45e8
	LD	a9,  1 * SIZE(A3)
kusano 2b45e8
	LD	a10, 2 * SIZE(A3)
kusano 2b45e8
	LD	a11, 3 * SIZE(A3)
kusano 2b45e8
kusano 2b45e8
	LD	y4,  4 * SIZE(Y1)
kusano 2b45e8
	LD	y5,  5 * SIZE(Y1)
kusano 2b45e8
	LD	y6,  6 * SIZE(Y1)
kusano 2b45e8
	LD	y7,  7 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	MUL	alpha1, a0,  a0
kusano 2b45e8
	LD	a12, 0 * SIZE(A4)
kusano 2b45e8
	MUL	alpha1, a1,  a1
kusano 2b45e8
	LD	a13, 1 * SIZE(A4)
kusano 2b45e8
	MUL	alpha1, a2,  a2
kusano 2b45e8
	LD	a14, 2 * SIZE(A4)
kusano 2b45e8
	MUL	alpha1, a3,  a3
kusano 2b45e8
	LD	a15, 3 * SIZE(A4)
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a0, y0
kusano 2b45e8
	LD	a0,   4 * SIZE(A1)
kusano 2b45e8
	MUL	alpha2, a4,  a4
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y1, a1, y1
kusano 2b45e8
	LD	a1,   5 * SIZE(A1)
kusano 2b45e8
	MUL	alpha2, a5,  a5
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y2, a2, y2
kusano 2b45e8
	LD	a2,   6 * SIZE(A1)
kusano 2b45e8
	MUL	alpha2, a6,  a6
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y3, a3, y3
kusano 2b45e8
	LD	a3,   7 * SIZE(A1)
kusano 2b45e8
	MUL	alpha2, a7,  a7
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a4, y0
kusano 2b45e8
	LD	a4,   4 * SIZE(A2)
kusano 2b45e8
	MUL	alpha3, a8,  a8
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y1, a5, y1
kusano 2b45e8
	LD	a5,   5 * SIZE(A2)
kusano 2b45e8
	MUL	alpha3, a9,  a9
kusano 2b45e8
	lda	I,   -1(I)
kusano 2b45e8
kusano 2b45e8
	ADD	y2, a6, y2
kusano 2b45e8
	LD	a6,   6 * SIZE(A2)
kusano 2b45e8
	MUL	alpha3, a10, a10
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y3, a7, y3
kusano 2b45e8
	LD	a7,   7 * SIZE(A2)
kusano 2b45e8
	MUL	alpha3, a11, a11
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a8,  y0
kusano 2b45e8
	LD	a8,   4 * SIZE(A3)
kusano 2b45e8
	MUL	alpha4, a12, a12
kusano 2b45e8
	ble	I, $L13
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L12:
kusano 2b45e8
	ADD	y1, a9,  y1
kusano 2b45e8
	LD	a9,   5 * SIZE(A3)
kusano 2b45e8
	MUL	alpha4, a13, a13
kusano 2b45e8
	ldl	$31, (PREFETCHSIZE + 0) * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD	y2, a10, y2
kusano 2b45e8
	LD	a10,  6 * SIZE(A3)
kusano 2b45e8
	MUL	alpha4, a14, a14
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y3, a11, y3
kusano 2b45e8
	LD	a11,  7 * SIZE(A3)
kusano 2b45e8
	MUL	alpha4, a15, a15
kusano 2b45e8
	lda	I,   -1(I)
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a12, y0
kusano 2b45e8
	LD	a12,  4 * SIZE(A4)
kusano 2b45e8
	MUL	alpha1, a0,  a0
kusano 2b45e8
	lds	$f31, (PREFETCHSIZE + 0) * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y1, a13, y1
kusano 2b45e8
	LD	a13,  5 * SIZE(A4)
kusano 2b45e8
	MUL	alpha1, a1,  a1
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y2, a14, y2
kusano 2b45e8
	LD	a14,  6 * SIZE(A4)
kusano 2b45e8
	MUL	alpha1, a2,  a2
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y3, a15, y3
kusano 2b45e8
	LD	a15,  7 * SIZE(A4)
kusano 2b45e8
	MUL	alpha1, a3,  a3
kusano 2b45e8
	ldl	$31, (PREFETCHSIZE + 0) * SIZE(A2)
kusano 2b45e8
kusano 2b45e8
	ADD	y4, a0, y4
kusano 2b45e8
	ST	y0,   0 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha2, a4,  a4
kusano 2b45e8
	LD	a0,   8 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD	y5, a1, y5
kusano 2b45e8
	ST	y1,   1 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha2, a5,  a5
kusano 2b45e8
	LD	a1,   9 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD	y6, a2, y6
kusano 2b45e8
	ST	y2,   2 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha2, a6,  a6
kusano 2b45e8
	LD	a2,  10 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD	y7, a3, y7
kusano 2b45e8
	ST	y3,   3 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha2, a7,  a7
kusano 2b45e8
	LD	a3,  11 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD	y4, a4, y4
kusano 2b45e8
	LD	a4,   8 * SIZE(A2)
kusano 2b45e8
	MUL	alpha3, a8,  a8
kusano 2b45e8
	LD	y0,   8 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y5, a5, y5
kusano 2b45e8
	LD	a5,   9 * SIZE(A2)
kusano 2b45e8
	MUL	alpha3, a9,  a9
kusano 2b45e8
	LD	y1,   9 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y6, a6, y6
kusano 2b45e8
	LD	a6,  10 * SIZE(A2)
kusano 2b45e8
	MUL	alpha3, a10, a10
kusano 2b45e8
	LD	y2,  10 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y7, a7, y7
kusano 2b45e8
	LD	a7,  11 * SIZE(A2)
kusano 2b45e8
	MUL	alpha3, a11, a11
kusano 2b45e8
	LD	y3,  11 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y4, a8,  y4
kusano 2b45e8
	LD	a8,   8 * SIZE(A3)
kusano 2b45e8
	MUL	alpha4, a12, a12
kusano 2b45e8
	ldl	$31, (PREFETCHSIZE + 0) * SIZE(A3)
kusano 2b45e8
kusano 2b45e8
	ADD	y5, a9,  y5
kusano 2b45e8
	LD	a9,   9 * SIZE(A3)
kusano 2b45e8
	MUL	alpha4, a13, a13
kusano 2b45e8
	lda	A1,  8 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD	y6, a10, y6
kusano 2b45e8
	LD	a10, 10 * SIZE(A3)
kusano 2b45e8
	MUL	alpha4, a14, a14
kusano 2b45e8
	lda	A2,  8 * SIZE(A2)
kusano 2b45e8
kusano 2b45e8
	ADD	y7, a11, y7
kusano 2b45e8
	LD	a11, 11 * SIZE(A3)
kusano 2b45e8
	MUL	alpha4, a15, a15
kusano 2b45e8
	lda	Y1,  8 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y4, a12, y4
kusano 2b45e8
	LD	a12,  8 * SIZE(A4)
kusano 2b45e8
	MUL	alpha1, a0,  a0
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y5, a13, y5
kusano 2b45e8
	LD	a13,  9 * SIZE(A4)
kusano 2b45e8
	MUL	alpha1, a1,  a1
kusano 2b45e8
	lda	A3,  8 * SIZE(A3)
kusano 2b45e8
kusano 2b45e8
	ADD	y6, a14, y6
kusano 2b45e8
	LD	a14, 10 * SIZE(A4)
kusano 2b45e8
	MUL	alpha1, a2,  a2
kusano 2b45e8
	ldl	$31, (PREFETCHSIZE + 0) * SIZE(A4)
kusano 2b45e8
kusano 2b45e8
	ADD	y7, a15, y7
kusano 2b45e8
	LD	a15, 11 * SIZE(A4)
kusano 2b45e8
	MUL	alpha1, a3,  a3
kusano 2b45e8
	lda	A4,   8 * SIZE(A4)
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a0, y0
kusano 2b45e8
	LD	a0,   4 * SIZE(A1)
kusano 2b45e8
	MUL	alpha2, a4,  a4
kusano 2b45e8
	ST	y4,  -4 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y1, a1, y1
kusano 2b45e8
	LD	a1,   5 * SIZE(A1)
kusano 2b45e8
	MUL	alpha2, a5,  a5
kusano 2b45e8
	ST	y5,  -3 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y2, a2, y2
kusano 2b45e8
	LD	a2,   6 * SIZE(A1)
kusano 2b45e8
	MUL	alpha2, a6,  a6
kusano 2b45e8
	ST	y6,  -2 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y3, a3, y3
kusano 2b45e8
	LD	a3,   7 * SIZE(A1)
kusano 2b45e8
	MUL	alpha2, a7,  a7
kusano 2b45e8
	ST	y7,  -1 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a4, y0
kusano 2b45e8
	LD	a4,   4 * SIZE(A2)
kusano 2b45e8
	MUL	alpha3, a8,  a8
kusano 2b45e8
	LD	y4,   4 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y1, a5, y1
kusano 2b45e8
	LD	a5,   5 * SIZE(A2)
kusano 2b45e8
	MUL	alpha3, a9,  a9
kusano 2b45e8
	LD	y5,   5 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y2, a6, y2
kusano 2b45e8
	LD	a6,   6 * SIZE(A2)
kusano 2b45e8
	MUL	alpha3, a10, a10
kusano 2b45e8
	LD	y6,   6 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y3, a7, y3
kusano 2b45e8
	LD	a7,   7 * SIZE(A2)
kusano 2b45e8
	MUL	alpha3, a11, a11
kusano 2b45e8
	LD	y7,   7 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a8,  y0
kusano 2b45e8
	LD	a8,   4 * SIZE(A3)
kusano 2b45e8
	MUL	alpha4, a12, a12
kusano 2b45e8
	bgt	I, $L12
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L13:
kusano 2b45e8
	ADD	y1, a9,  y1
kusano 2b45e8
	LD	a9,  5 * SIZE(A3)
kusano 2b45e8
	MUL	alpha4, a13, a13
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y2, a10, y2
kusano 2b45e8
	LD	a10, 6 * SIZE(A3)
kusano 2b45e8
	MUL	alpha4, a14, a14
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y3, a11, y3
kusano 2b45e8
	LD	a11, 7 * SIZE(A3)
kusano 2b45e8
	MUL	alpha4, a15, a15
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a12, y0
kusano 2b45e8
	LD	a12, 4 * SIZE(A4)
kusano 2b45e8
	MUL	alpha1, a0,  a0
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y1, a13, y1
kusano 2b45e8
	LD	a13, 5 * SIZE(A4)
kusano 2b45e8
	MUL	alpha1, a1,  a1
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y2, a14, y2
kusano 2b45e8
	LD	a14, 6 * SIZE(A4)
kusano 2b45e8
	MUL	alpha1, a2,  a2
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y3, a15, y3
kusano 2b45e8
	LD	a15, 7 * SIZE(A4)
kusano 2b45e8
	MUL	alpha1, a3,  a3
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ST	y0,  0 * SIZE(Y1)
kusano 2b45e8
	ADD	y4, a0, y4
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	alpha2, a4,  a4
kusano 2b45e8
kusano 2b45e8
	ST	y1,  1 * SIZE(Y1)
kusano 2b45e8
	ADD	y5, a1, y5
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	alpha2, a5,  a5
kusano 2b45e8
kusano 2b45e8
	ST	y2,  2 * SIZE(Y1)
kusano 2b45e8
	ADD	y6, a2, y6
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	alpha2, a6,  a6
kusano 2b45e8
kusano 2b45e8
	ST	y3,  3 * SIZE(Y1)
kusano 2b45e8
	ADD	y7, a3, y7
kusano 2b45e8
	lda	Y1,  8 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha2, a7,  a7
kusano 2b45e8
kusano 2b45e8
	ADD	y4, a4, y4
kusano 2b45e8
	MUL	alpha3, a8,  a8
kusano 2b45e8
	ADD	y5, a5, y5
kusano 2b45e8
	MUL	alpha3, a9,  a9
kusano 2b45e8
	ADD	y6, a6, y6
kusano 2b45e8
	MUL	alpha3, a10, a10
kusano 2b45e8
	ADD	y7, a7, y7
kusano 2b45e8
	MUL	alpha3, a11, a11
kusano 2b45e8
kusano 2b45e8
	ADD	y4, a8,  y4
kusano 2b45e8
	MUL	alpha4, a12, a12
kusano 2b45e8
	ADD	y5, a9,  y5
kusano 2b45e8
	MUL	alpha4, a13, a13
kusano 2b45e8
	ADD	y6, a10, y6
kusano 2b45e8
	MUL	alpha4, a14, a14
kusano 2b45e8
	ADD	y7, a11, y7
kusano 2b45e8
	MUL	alpha4, a15, a15
kusano 2b45e8
kusano 2b45e8
	ADD	y4, a12, y4
kusano 2b45e8
	ADD	y5, a13, y5
kusano 2b45e8
	ADD	y6, a14, y6
kusano 2b45e8
	ADD	y7, a15, y7
kusano 2b45e8
kusano 2b45e8
	ST	y4, -4 * SIZE(Y1)
kusano 2b45e8
	lda	A1,  8 * SIZE(A1)
kusano 2b45e8
	ST	y5, -3 * SIZE(Y1)
kusano 2b45e8
	lda	A2,  8 * SIZE(A2)
kusano 2b45e8
	ST	y6, -2 * SIZE(Y1)
kusano 2b45e8
	lda	A3,  8 * SIZE(A3)
kusano 2b45e8
	ST	y7, -1 * SIZE(Y1)
kusano 2b45e8
	lda	A4,  8 * SIZE(A4)
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L15:
kusano 2b45e8
	and	M, 4, I
kusano 2b45e8
	ble	I, $L16
kusano 2b45e8
kusano 2b45e8
	LD	y0,  0 * SIZE(Y1)
kusano 2b45e8
	LD	y1,  1 * SIZE(Y1)
kusano 2b45e8
	LD	y2,  2 * SIZE(Y1)
kusano 2b45e8
	LD	y3,  3 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	LD	a0,  0 * SIZE(A1)
kusano 2b45e8
	LD	a1,  1 * SIZE(A1)
kusano 2b45e8
	LD	a2,  2 * SIZE(A1)
kusano 2b45e8
	LD	a3,  3 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	LD	a4,  0 * SIZE(A2)
kusano 2b45e8
	LD	a5,  1 * SIZE(A2)
kusano 2b45e8
	LD	a6,  2 * SIZE(A2)
kusano 2b45e8
	LD	a7,  3 * SIZE(A2)
kusano 2b45e8
kusano 2b45e8
	LD	a8,  0 * SIZE(A3)
kusano 2b45e8
	LD	a9,  1 * SIZE(A3)
kusano 2b45e8
	LD	a10, 2 * SIZE(A3)
kusano 2b45e8
	LD	a11, 3 * SIZE(A3)
kusano 2b45e8
kusano 2b45e8
	MUL	alpha1, a0,  a0
kusano 2b45e8
	LD	a12, 0 * SIZE(A4)
kusano 2b45e8
	MUL	alpha1, a1,  a1
kusano 2b45e8
	LD	a13, 1 * SIZE(A4)
kusano 2b45e8
	MUL	alpha1, a2,  a2
kusano 2b45e8
	LD	a14, 2 * SIZE(A4)
kusano 2b45e8
	MUL	alpha1, a3,  a3
kusano 2b45e8
	LD	a15, 3 * SIZE(A4)
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a0, y0
kusano 2b45e8
	MUL	alpha2, a4,  a4
kusano 2b45e8
	ADD	y1, a1, y1
kusano 2b45e8
	MUL	alpha2, a5,  a5
kusano 2b45e8
	ADD	y2, a2, y2
kusano 2b45e8
	MUL	alpha2, a6,  a6
kusano 2b45e8
	ADD	y3, a3, y3
kusano 2b45e8
	MUL	alpha2, a7,  a7
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a4, y0
kusano 2b45e8
	MUL	alpha3, a8,  a8
kusano 2b45e8
	ADD	y1, a5, y1
kusano 2b45e8
	MUL	alpha3, a9,  a9
kusano 2b45e8
	ADD	y2, a6, y2
kusano 2b45e8
	MUL	alpha3, a10, a10
kusano 2b45e8
	ADD	y3, a7, y3
kusano 2b45e8
	MUL	alpha3, a11, a11
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a8,  y0
kusano 2b45e8
	MUL	alpha4, a12, a12
kusano 2b45e8
	ADD	y1, a9,  y1
kusano 2b45e8
	MUL	alpha4, a13, a13
kusano 2b45e8
	ADD	y2, a10, y2
kusano 2b45e8
	MUL	alpha4, a14, a14
kusano 2b45e8
	ADD	y3, a11, y3
kusano 2b45e8
	MUL	alpha4, a15, a15
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a12, y0
kusano 2b45e8
	lda	Y1,  4 * SIZE(Y1)
kusano 2b45e8
	ADD	y1, a13, y1
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y2, a14, y2
kusano 2b45e8
	unop
kusano 2b45e8
	ADD	y3, a15, y3
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ST	y0, -4 * SIZE(Y1)
kusano 2b45e8
	lda	A1,  4 * SIZE(A1)
kusano 2b45e8
	ST	y1, -3 * SIZE(Y1)
kusano 2b45e8
	lda	A2,  4 * SIZE(A2)
kusano 2b45e8
	ST	y2, -2 * SIZE(Y1)
kusano 2b45e8
	lda	A3,  4 * SIZE(A3)
kusano 2b45e8
	ST	y3, -1 * SIZE(Y1)
kusano 2b45e8
	lda	A4,  4 * SIZE(A4)
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L16:
kusano 2b45e8
	and	M, 2, I
kusano 2b45e8
	ble	I, $L17
kusano 2b45e8
kusano 2b45e8
	LD	a0,  0 * SIZE(A1)
kusano 2b45e8
	LD	a1,  1 * SIZE(A1)
kusano 2b45e8
	LD	a2,  0 * SIZE(A2)
kusano 2b45e8
	LD	a3,  1 * SIZE(A2)
kusano 2b45e8
	
kusano 2b45e8
	LD	y0,  0 * SIZE(Y1)
kusano 2b45e8
	LD	y1,  1 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	LD	a4,  0 * SIZE(A3)
kusano 2b45e8
	MUL	alpha1, a0, a0
kusano 2b45e8
	LD	a5,  1 * SIZE(A3)
kusano 2b45e8
	MUL	alpha1, a1, a1
kusano 2b45e8
	LD	a6,  0 * SIZE(A4)
kusano 2b45e8
	MUL	alpha2, a2, a2
kusano 2b45e8
	LD	a7,  1 * SIZE(A4)
kusano 2b45e8
	MUL	alpha2, a3, a3
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a0, y0
kusano 2b45e8
	MUL	alpha3, a4, a4
kusano 2b45e8
	ADD	y1, a1, y1
kusano 2b45e8
	MUL	alpha3, a5, a5
kusano 2b45e8
	ADD	y0, a2, y0
kusano 2b45e8
	MUL	alpha4, a6, a6
kusano 2b45e8
	ADD	y1, a3, y1
kusano 2b45e8
	MUL	alpha4, a7, a7
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a4, y0
kusano 2b45e8
	lda	A1,  2 * SIZE(A1)
kusano 2b45e8
	ADD	y1, a5, y1
kusano 2b45e8
	lda	A2,  2 * SIZE(A2)
kusano 2b45e8
	ADD	y0, a6, y0
kusano 2b45e8
	lda	A3,  2 * SIZE(A3)
kusano 2b45e8
	ADD	y1, a7, y1
kusano 2b45e8
	lda	A4,  2 * SIZE(A4)
kusano 2b45e8
kusano 2b45e8
	ST	y0,  0 * SIZE(Y1)
kusano 2b45e8
	unop
kusano 2b45e8
	ST	y1,  1 * SIZE(Y1)
kusano 2b45e8
	lda	Y1,  2 * SIZE(Y1)
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L17:
kusano 2b45e8
	blbc	M, $L18
kusano 2b45e8
kusano 2b45e8
	LD	y0,   0 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	LD	a0,   0 * SIZE(A1)
kusano 2b45e8
	LD	a1,   0 * SIZE(A2)
kusano 2b45e8
	LD	a2,   0 * SIZE(A3)
kusano 2b45e8
	LD	a3,   0 * SIZE(A4)
kusano 2b45e8
kusano 2b45e8
	MUL	alpha1, a0, a0
kusano 2b45e8
	MUL	alpha2, a1, a1
kusano 2b45e8
	MUL	alpha3, a2, a2
kusano 2b45e8
	MUL	alpha4, a3, a3
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a0, y0
kusano 2b45e8
	ADD	y0, a1, y0
kusano 2b45e8
	ADD	y0, a2, y0
kusano 2b45e8
	ADD	y0, a3, y0
kusano 2b45e8
kusano 2b45e8
	ST	y0,   0 * SIZE(Y1)
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L18:
kusano 2b45e8
	lda	J, -1(J)
kusano 2b45e8
	bgt	J,  $L11
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L20:
kusano 2b45e8
	and	N, 2, J
kusano 2b45e8
	ble	J, $L30
kusano 2b45e8
kusano 2b45e8
	LD	alpha1,  0 * SIZE(X)
kusano 2b45e8
	addq	X, INCX, X
kusano 2b45e8
	LD	alpha2,  0 * SIZE(X)
kusano 2b45e8
	addq	X, INCX, X
kusano 2b45e8
kusano 2b45e8
	mov	A, A1
kusano 2b45e8
	MUL	alpha, alpha1, alpha1
kusano 2b45e8
	addq	A,  LDA, A2
kusano 2b45e8
	MUL	alpha, alpha2, alpha2
kusano 2b45e8
kusano 2b45e8
	addq	A2, LDA, A
kusano 2b45e8
	mov	Y, Y1
kusano 2b45e8
kusano 2b45e8
	sra	M,  3, I
kusano 2b45e8
	ble	I,  $L25
kusano 2b45e8
kusano 2b45e8
	LD	a0,  0 * SIZE(A1)
kusano 2b45e8
	LD	a1,  1 * SIZE(A1)
kusano 2b45e8
	LD	a2,  2 * SIZE(A1)
kusano 2b45e8
	LD	a3,  3 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	LD	a4,  0 * SIZE(A2)
kusano 2b45e8
	LD	a5,  1 * SIZE(A2)
kusano 2b45e8
	LD	a6,  2 * SIZE(A2)
kusano 2b45e8
	LD	a7,  3 * SIZE(A2)
kusano 2b45e8
kusano 2b45e8
	LD	y0,  0 * SIZE(Y1)
kusano 2b45e8
	LD	y1,  1 * SIZE(Y1)
kusano 2b45e8
	LD	y2,  2 * SIZE(Y1)
kusano 2b45e8
	LD	y3,  3 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	MUL	alpha1, a0,  a0
kusano 2b45e8
	LD	y4,  4 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha1, a1,  a1
kusano 2b45e8
	LD	y5,  5 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha1, a2,  a2
kusano 2b45e8
	LD	y6,  6 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha1, a3,  a3
kusano 2b45e8
	LD	y7,  7 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a0, y0
kusano 2b45e8
	LD	a0,  4 * SIZE(A1)
kusano 2b45e8
	MUL	alpha2, a4,  a4
kusano 2b45e8
kusano 2b45e8
	ADD	y1, a1, y1
kusano 2b45e8
	LD	a1,  5 * SIZE(A1)
kusano 2b45e8
	MUL	alpha2, a5,  a5
kusano 2b45e8
kusano 2b45e8
	ADD	y2, a2, y2
kusano 2b45e8
	LD	a2,  6 * SIZE(A1)
kusano 2b45e8
	MUL	alpha2, a6,  a6
kusano 2b45e8
kusano 2b45e8
	ADD	y3, a3, y3
kusano 2b45e8
	LD	a3,  7 * SIZE(A1)
kusano 2b45e8
	MUL	alpha2, a7,  a7
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a4, y0
kusano 2b45e8
	LD	a4,  4 * SIZE(A2)
kusano 2b45e8
	MUL	alpha1, a0,  a0
kusano 2b45e8
kusano 2b45e8
	ADD	y1, a5, y1
kusano 2b45e8
	LD	a5,  5 * SIZE(A2)
kusano 2b45e8
	MUL	alpha1, a1,  a1
kusano 2b45e8
kusano 2b45e8
	ADD	y2, a6, y2
kusano 2b45e8
	LD	a6,  6 * SIZE(A2)
kusano 2b45e8
	MUL	alpha1, a2,  a2
kusano 2b45e8
kusano 2b45e8
	ADD	y3, a7, y3
kusano 2b45e8
	LD	a7,  7 * SIZE(A2)
kusano 2b45e8
	MUL	alpha1, a3,  a3
kusano 2b45e8
kusano 2b45e8
	lda	I,   -1(I)
kusano 2b45e8
	ble	I, $L23
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L22:
kusano 2b45e8
	ldl	$31, (PREFETCHSIZE + 0) * SIZE(A1)
kusano 2b45e8
	lda	I,   -1(I)
kusano 2b45e8
	ldl	$31, (PREFETCHSIZE + 0) * SIZE(A2)
kusano 2b45e8
	lda	A2,  8 * SIZE(A2)
kusano 2b45e8
kusano 2b45e8
	ADD	y4, a0, y4
kusano 2b45e8
	ST	y0,  0 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha2, a4,  a4
kusano 2b45e8
	LD	a0,  8 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD	y5, a1, y5
kusano 2b45e8
	ST	y1,  1 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha2, a5,  a5
kusano 2b45e8
	LD	a1,  9 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD	y6, a2, y6
kusano 2b45e8
	ST	y2,  2 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha2, a6,  a6
kusano 2b45e8
	LD	a2, 10 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD	y7, a3, y7
kusano 2b45e8
	ST	y3,  3 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha2, a7,  a7
kusano 2b45e8
	LD	a3, 11 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD	y4, a4, y4
kusano 2b45e8
	LD	a4,  0 * SIZE(A2)
kusano 2b45e8
	MUL	alpha1, a0,  a0
kusano 2b45e8
	LD	y0,  8 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y5, a5, y5
kusano 2b45e8
	LD	a5,  1 * SIZE(A2)
kusano 2b45e8
	MUL	alpha1, a1,  a1
kusano 2b45e8
	LD	y1,  9 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y6, a6, y6
kusano 2b45e8
	LD	a6,  2 * SIZE(A2)
kusano 2b45e8
	MUL	alpha1, a2,  a2
kusano 2b45e8
	LD	y2, 10 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y7, a7, y7
kusano 2b45e8
	LD	a7,  3 * SIZE(A2)
kusano 2b45e8
	MUL	alpha1, a3,  a3
kusano 2b45e8
	LD	y3, 11 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a0, y0
kusano 2b45e8
	ST	y4,  4 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha2, a4,  a4
kusano 2b45e8
	LD	a0, 12 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD	y1, a1, y1
kusano 2b45e8
	ST	y5,  5 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha2, a5,  a5
kusano 2b45e8
	LD	a1, 13 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD	y2, a2, y2
kusano 2b45e8
	ST	y6,  6 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha2, a6,  a6
kusano 2b45e8
	LD	a2, 14 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD	y3, a3, y3
kusano 2b45e8
	ST	y7,  7 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha2, a7,  a7
kusano 2b45e8
	LD	a3, 15 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a4, y0
kusano 2b45e8
	LD	a4,  4 * SIZE(A2)
kusano 2b45e8
	MUL	alpha1, a0,  a0
kusano 2b45e8
	LD	y4, 12 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y1, a5, y1
kusano 2b45e8
	LD	a5,  5 * SIZE(A2)
kusano 2b45e8
	MUL	alpha1, a1,  a1
kusano 2b45e8
	LD	y5, 13 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y2, a6, y2
kusano 2b45e8
	LD	a6,  6 * SIZE(A2)
kusano 2b45e8
	MUL	alpha1, a2,  a2
kusano 2b45e8
	LD	y6, 14 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y3, a7, y3
kusano 2b45e8
	LD	a7,  7 * SIZE(A2)
kusano 2b45e8
	MUL	alpha1, a3,  a3
kusano 2b45e8
	LD	y7, 15 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	lds	$f31, (PREFETCHSIZE + 0) * SIZE(Y1)
kusano 2b45e8
	lda	A1,  8 * SIZE(A1)
kusano 2b45e8
	lda	Y1,  8 * SIZE(Y1) 
kusano 2b45e8
	bgt	I, $L22
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L23:
kusano 2b45e8
	ADD	y4, a0, y4
kusano 2b45e8
	ST	y0,  0 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha2, a4,  a4
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y5, a1, y5
kusano 2b45e8
	ST	y1,  1 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha2, a5,  a5
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y6, a2, y6
kusano 2b45e8
	ST	y2,  2 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha2, a6,  a6
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y7, a3, y7
kusano 2b45e8
	ST	y3,  3 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha2, a7,  a7
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y4, a4, y4
kusano 2b45e8
	ADD	y5, a5, y5
kusano 2b45e8
	ADD	y6, a6, y6
kusano 2b45e8
	ADD	y7, a7, y7
kusano 2b45e8
kusano 2b45e8
	ST	y4,  4 * SIZE(Y1)
kusano 2b45e8
	lda	A1,  8 * SIZE(A1)
kusano 2b45e8
	ST	y5,  5 * SIZE(Y1)
kusano 2b45e8
	lda	A2,  8 * SIZE(A2)
kusano 2b45e8
kusano 2b45e8
	ST	y6,  6 * SIZE(Y1)
kusano 2b45e8
	unop
kusano 2b45e8
	ST	y7,  7 * SIZE(Y1)
kusano 2b45e8
	lda	Y1,  8 * SIZE(Y1)
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L25:
kusano 2b45e8
	and	M, 4, I
kusano 2b45e8
	ble	I, $L26
kusano 2b45e8
kusano 2b45e8
	LD	y0,  0 * SIZE(Y1)
kusano 2b45e8
	LD	y1,  1 * SIZE(Y1)
kusano 2b45e8
	LD	y2,  2 * SIZE(Y1)
kusano 2b45e8
	LD	y3,  3 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	LD	a0,  0 * SIZE(A1)
kusano 2b45e8
	LD	a1,  1 * SIZE(A1)
kusano 2b45e8
	LD	a2,  2 * SIZE(A1)
kusano 2b45e8
	LD	a3,  3 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	MUL	alpha1, a0,  a0
kusano 2b45e8
	LD	a4,  0 * SIZE(A2)
kusano 2b45e8
	MUL	alpha1, a1,  a1
kusano 2b45e8
	LD	a5,  1 * SIZE(A2)
kusano 2b45e8
	MUL	alpha1, a2,  a2
kusano 2b45e8
	LD	a6,  2 * SIZE(A2)
kusano 2b45e8
	MUL	alpha1, a3,  a3
kusano 2b45e8
	LD	a7,  3 * SIZE(A2)
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a0, y0
kusano 2b45e8
	MUL	alpha2, a4,  a4
kusano 2b45e8
	ADD	y1, a1, y1
kusano 2b45e8
	MUL	alpha2, a5,  a5
kusano 2b45e8
	ADD	y2, a2, y2
kusano 2b45e8
	MUL	alpha2, a6,  a6
kusano 2b45e8
	ADD	y3, a3, y3
kusano 2b45e8
	MUL	alpha2, a7,  a7
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a4, y0
kusano 2b45e8
	lda	Y1,  4 * SIZE(Y1)
kusano 2b45e8
	ADD	y1, a5, y1
kusano 2b45e8
	unop
kusano 2b45e8
	ADD	y2, a6, y2
kusano 2b45e8
	unop
kusano 2b45e8
	ADD	y3, a7, y3
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ST	y0, -4 * SIZE(Y1)
kusano 2b45e8
	lda	A1,  4 * SIZE(A1)
kusano 2b45e8
	ST	y1, -3 * SIZE(Y1)
kusano 2b45e8
	lda	A2,  4 * SIZE(A2)
kusano 2b45e8
	ST	y2, -2 * SIZE(Y1)
kusano 2b45e8
	lda	A3,  4 * SIZE(A3)
kusano 2b45e8
	ST	y3, -1 * SIZE(Y1)
kusano 2b45e8
	lda	A4,  4 * SIZE(A4)
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L26:
kusano 2b45e8
	and	M, 2, I
kusano 2b45e8
	ble	I, $L27
kusano 2b45e8
kusano 2b45e8
	LD	a0,  0 * SIZE(A1)
kusano 2b45e8
	LD	a1,  1 * SIZE(A1)
kusano 2b45e8
	LD	a2,  0 * SIZE(A2)
kusano 2b45e8
	LD	a3,  1 * SIZE(A2)
kusano 2b45e8
	
kusano 2b45e8
	LD	y0,  0 * SIZE(Y1)
kusano 2b45e8
	LD	y1,  1 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	MUL	alpha1, a0, a0
kusano 2b45e8
	MUL	alpha1, a1, a1
kusano 2b45e8
	MUL	alpha2, a2, a2
kusano 2b45e8
	MUL	alpha2, a3, a3
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a0, y0
kusano 2b45e8
	lda	A1,  2 * SIZE(A1)
kusano 2b45e8
	ADD	y1, a1, y1
kusano 2b45e8
	lda	A2,  2 * SIZE(A2)
kusano 2b45e8
	ADD	y0, a2, y0
kusano 2b45e8
	unop
kusano 2b45e8
	ADD	y1, a3, y1
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ST	y0,  0 * SIZE(Y1)
kusano 2b45e8
	unop
kusano 2b45e8
	ST	y1,  1 * SIZE(Y1)
kusano 2b45e8
	lda	Y1,  2 * SIZE(Y1)
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L27:
kusano 2b45e8
	blbc	M, $L30
kusano 2b45e8
kusano 2b45e8
	LD	y0,   0 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	LD	a0,   0 * SIZE(A1)
kusano 2b45e8
	LD	a1,   0 * SIZE(A2)
kusano 2b45e8
kusano 2b45e8
	MUL	alpha1, a0, a0
kusano 2b45e8
	MUL	alpha2, a1, a1
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a0, y0
kusano 2b45e8
	ADD	y0, a1, y0
kusano 2b45e8
kusano 2b45e8
	ST	y0,   0 * SIZE(Y1)
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L30:
kusano 2b45e8
	blbc	N, $L990
kusano 2b45e8
kusano 2b45e8
	LD	alpha1,  0 * SIZE(X)
kusano 2b45e8
	mov	A, A1
kusano 2b45e8
	MUL	alpha, alpha1, alpha1
kusano 2b45e8
	mov	Y, Y1
kusano 2b45e8
kusano 2b45e8
	sra	M,  3, I
kusano 2b45e8
	ble	I,  $L35
kusano 2b45e8
kusano 2b45e8
	LD	a0,  0 * SIZE(A1)
kusano 2b45e8
	LD	a1,  1 * SIZE(A1)
kusano 2b45e8
	LD	a2,  2 * SIZE(A1)
kusano 2b45e8
	LD	a3,  3 * SIZE(A1)
kusano 2b45e8
	LD	a4,  4 * SIZE(A1)
kusano 2b45e8
	LD	a5,  5 * SIZE(A1)
kusano 2b45e8
	LD	a6,  6 * SIZE(A1)
kusano 2b45e8
	LD	a7,  7 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	LD	y0,  0 * SIZE(Y1)
kusano 2b45e8
	LD	y1,  1 * SIZE(Y1)
kusano 2b45e8
	LD	y2,  2 * SIZE(Y1)
kusano 2b45e8
	LD	y3,  3 * SIZE(Y1)
kusano 2b45e8
	LD	y4,  4 * SIZE(Y1)
kusano 2b45e8
	LD	y5,  5 * SIZE(Y1)
kusano 2b45e8
	LD	y6,  6 * SIZE(Y1)
kusano 2b45e8
	LD	y7,  7 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	MUL	alpha1, a0,  a0
kusano 2b45e8
	MUL	alpha1, a1,  a1
kusano 2b45e8
	MUL	alpha1, a2,  a2
kusano 2b45e8
	MUL	alpha1, a3,  a3
kusano 2b45e8
kusano 2b45e8
	lda	I,   -1(I)
kusano 2b45e8
	ble	I, $L33
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L32:
kusano 2b45e8
	ADD	y0, a0, y0
kusano 2b45e8
	LD	y4,  4 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha1, a4,  a4
kusano 2b45e8
	LD	a0,  8 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD	y1, a1, y1
kusano 2b45e8
	LD	y5,  5 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha1, a5,  a5
kusano 2b45e8
	LD	a1,  9 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD	y2, a2, y2
kusano 2b45e8
	LD	y6,  6 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha1, a6,  a6
kusano 2b45e8
	LD	a2, 10 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD	y3, a3, y3
kusano 2b45e8
	LD	y7,  7 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha1, a7,  a7
kusano 2b45e8
	LD	a3, 11 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ST	y0,  0 * SIZE(Y1)
kusano 2b45e8
	ST	y1,  1 * SIZE(Y1)
kusano 2b45e8
	ST	y2,  2 * SIZE(Y1)
kusano 2b45e8
	ST	y3,  3 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y4, a4, y4
kusano 2b45e8
	LD	y0,  8 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha1, a0,  a0
kusano 2b45e8
	LD	a4, 12 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD	y5, a5, y5
kusano 2b45e8
	LD	y1,  9 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha1, a1,  a1
kusano 2b45e8
	LD	a5, 13 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD	y6, a6, y6
kusano 2b45e8
	LD	y2, 10 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha1, a2,  a2
kusano 2b45e8
	LD	a6, 14 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD	y7, a7, y7
kusano 2b45e8
	LD	y3, 11 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha1, a3,  a3
kusano 2b45e8
	LD	a7, 15 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ST	y4,  4 * SIZE(Y1)
kusano 2b45e8
	lda	I,   -1(I)
kusano 2b45e8
	ST	y5,  5 * SIZE(Y1)
kusano 2b45e8
	lda	A1,  8 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ST	y6,  6 * SIZE(Y1)
kusano 2b45e8
	ldl	$31, (PREFETCHSIZE + 0) * SIZE(A1)
kusano 2b45e8
	ST	y7,  7 * SIZE(Y1)
kusano 2b45e8
	lds	$f31, (PREFETCHSIZE + 0) * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	lda	Y1,  8 * SIZE(Y1)
kusano 2b45e8
	bgt	I, $L32
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L33:
kusano 2b45e8
	ADD	y0, a0, y0
kusano 2b45e8
	LD	y4,  4 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha1, a4,  a4
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y1, a1, y1
kusano 2b45e8
	LD	y5,  5 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha1, a5,  a5
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y2, a2, y2
kusano 2b45e8
	LD	y6,  6 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha1, a6,  a6
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y3, a3, y3
kusano 2b45e8
	LD	y7,  7 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha1, a7,  a7
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	y4, a4, y4
kusano 2b45e8
	ST	y0,  0 * SIZE(Y1)
kusano 2b45e8
	ADD	y5, a5, y5
kusano 2b45e8
	ST	y1,  1 * SIZE(Y1)
kusano 2b45e8
	ADD	y6, a6, y6
kusano 2b45e8
	ST	y2,  2 * SIZE(Y1)
kusano 2b45e8
	ADD	y7, a7, y7
kusano 2b45e8
	ST	y3,  3 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ST	y4,  4 * SIZE(Y1)
kusano 2b45e8
	unop
kusano 2b45e8
	ST	y5,  5 * SIZE(Y1)
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ST	y6,  6 * SIZE(Y1)
kusano 2b45e8
	lda	A1,  8 * SIZE(A1)
kusano 2b45e8
	ST	y7,  7 * SIZE(Y1)
kusano 2b45e8
	lda	Y1,  8 * SIZE(Y1)
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L35:
kusano 2b45e8
	and	M, 4, I
kusano 2b45e8
	ble	I, $L36
kusano 2b45e8
kusano 2b45e8
	LD	a0,  0 * SIZE(A1)
kusano 2b45e8
	LD	a1,  1 * SIZE(A1)
kusano 2b45e8
	LD	a2,  2 * SIZE(A1)
kusano 2b45e8
	LD	a3,  3 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	MUL	alpha1, a0,  a0
kusano 2b45e8
	LD	y0,  0 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha1, a1,  a1
kusano 2b45e8
	LD	y1,  1 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha1, a2,  a2
kusano 2b45e8
	LD	y2,  2 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha1, a3,  a3
kusano 2b45e8
	LD	y3,  3 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a0, y0
kusano 2b45e8
	ADD	y1, a1, y1
kusano 2b45e8
	ADD	y2, a2, y2
kusano 2b45e8
	ADD	y3, a3, y3
kusano 2b45e8
kusano 2b45e8
	ST	y0,  0 * SIZE(Y1)
kusano 2b45e8
	lda	A1,  4 * SIZE(A1)
kusano 2b45e8
	ST	y1,  1 * SIZE(Y1)
kusano 2b45e8
	lda	A2,  4 * SIZE(A2)
kusano 2b45e8
	ST	y2,  2 * SIZE(Y1)
kusano 2b45e8
	unop
kusano 2b45e8
	ST	y3,  3 * SIZE(Y1)
kusano 2b45e8
	lda	Y1,  4 * SIZE(Y1)
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L36:
kusano 2b45e8
	and	M, 2, I
kusano 2b45e8
	ble	I, $L37
kusano 2b45e8
kusano 2b45e8
	LD	a0,  0 * SIZE(A1)
kusano 2b45e8
	LD	a1,  1 * SIZE(A1)
kusano 2b45e8
	
kusano 2b45e8
	LD	y0,  0 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha1, a0, a0
kusano 2b45e8
	LD	y1,  1 * SIZE(Y1)
kusano 2b45e8
	MUL	alpha1, a1, a1
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a0, y0
kusano 2b45e8
	ADD	y1, a1, y1
kusano 2b45e8
kusano 2b45e8
	ST	y0,  0 * SIZE(Y1)
kusano 2b45e8
	lda	A1,  2 * SIZE(A1)
kusano 2b45e8
	ST	y1,  1 * SIZE(Y1)
kusano 2b45e8
	lda	Y1,  2 * SIZE(Y1)
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L37:
kusano 2b45e8
	blbc	M, $L990
kusano 2b45e8
kusano 2b45e8
	LD	y0,   0 * SIZE(Y1)
kusano 2b45e8
	LD	a0,   0 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	MUL	alpha1, a0, a0
kusano 2b45e8
kusano 2b45e8
	ADD	y0, a0, y0
kusano 2b45e8
	ST	y0,   0 * SIZE(Y1)
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L990:
kusano 2b45e8
	cmpeq	INCY, SIZE, $0
kusano 2b45e8
	bne	$0, $L999
kusano 2b45e8
kusano 2b45e8
	mov	BUFFER, Y1
kusano 2b45e8
kusano 2b45e8
	sra	M, 3, I
kusano 2b45e8
	ble	I, $L995
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L992:
kusano 2b45e8
	LD	a0,  0 * SIZE(BUFFER)
kusano 2b45e8
	addq	BUFFER, INCY, BUFFER
kusano 2b45e8
	LD	a1,  0 * SIZE(BUFFER)
kusano 2b45e8
	addq	BUFFER, INCY, BUFFER
kusano 2b45e8
	LD	a2,  0 * SIZE(BUFFER)
kusano 2b45e8
	addq	BUFFER, INCY, BUFFER
kusano 2b45e8
	LD	a3,  0 * SIZE(BUFFER)
kusano 2b45e8
	addq	BUFFER, INCY, BUFFER
kusano 2b45e8
kusano 2b45e8
	LD	y0,  0 * SIZE(Y)
kusano 2b45e8
	LD	y1,  1 * SIZE(Y)
kusano 2b45e8
	LD	y2,  2 * SIZE(Y)
kusano 2b45e8
	LD	y3,  3 * SIZE(Y)
kusano 2b45e8
kusano 2b45e8
	LD	a4,  0 * SIZE(BUFFER)
kusano 2b45e8
	addq	BUFFER, INCY, BUFFER
kusano 2b45e8
	LD	a5,  0 * SIZE(BUFFER)
kusano 2b45e8
	addq	BUFFER, INCY, BUFFER
kusano 2b45e8
	LD	a6,  0 * SIZE(BUFFER)
kusano 2b45e8
	addq	BUFFER, INCY, BUFFER
kusano 2b45e8
	LD	a7,  0 * SIZE(BUFFER)
kusano 2b45e8
	addq	BUFFER, INCY, BUFFER
kusano 2b45e8
kusano 2b45e8
	LD	y4,  4 * SIZE(Y)
kusano 2b45e8
	LD	y5,  5 * SIZE(Y)
kusano 2b45e8
	LD	y6,  6 * SIZE(Y)
kusano 2b45e8
	LD	y7,  7 * SIZE(Y)
kusano 2b45e8
kusano 2b45e8
	ADD	a0, y0, a0
kusano 2b45e8
	ADD	a1, y1, a1
kusano 2b45e8
	ADD	a2, y2, a2
kusano 2b45e8
	ADD	a3, y3, a3
kusano 2b45e8
	ADD	a4, y4, a4
kusano 2b45e8
	ADD	a5, y5, a5
kusano 2b45e8
	ADD	a6, y6, a6
kusano 2b45e8
	ADD	a7, y7, a7
kusano 2b45e8
kusano 2b45e8
	ST	a0,  0 * SIZE(Y1)
kusano 2b45e8
	addq	Y1, INCY, Y1
kusano 2b45e8
	ST	a1,  0 * SIZE(Y1)
kusano 2b45e8
	addq	Y1, INCY, Y1
kusano 2b45e8
	ST	a2,  0 * SIZE(Y1)
kusano 2b45e8
	addq	Y1, INCY, Y1
kusano 2b45e8
	ST	a3,  0 * SIZE(Y1)
kusano 2b45e8
	addq	Y1, INCY, Y1
kusano 2b45e8
kusano 2b45e8
	ST	a4,  0 * SIZE(Y1)
kusano 2b45e8
	addq	Y1, INCY, Y1
kusano 2b45e8
	ST	a5,  0 * SIZE(Y1)
kusano 2b45e8
	addq	Y1, INCY, Y1
kusano 2b45e8
	ST	a6,  0 * SIZE(Y1)
kusano 2b45e8
	addq	Y1, INCY, Y1
kusano 2b45e8
	ST	a7,  0 * SIZE(Y1)
kusano 2b45e8
	addq	Y1, INCY, Y1
kusano 2b45e8
kusano 2b45e8
	lda	I, -1(I)
kusano 2b45e8
	lda	Y,   8 * SIZE(Y)
kusano 2b45e8
	bgt	I, $L992
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L995:
kusano 2b45e8
	and	M, 7, I
kusano 2b45e8
	ble	I, $L999
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L996:
kusano 2b45e8
	LD	a0,  0 * SIZE(BUFFER)
kusano 2b45e8
	addq	BUFFER, INCY, BUFFER
kusano 2b45e8
kusano 2b45e8
	LD	y0,  0 * SIZE(Y)
kusano 2b45e8
	lda	Y,   1 * SIZE(Y)
kusano 2b45e8
kusano 2b45e8
	ADD	a0, y0, a0
kusano 2b45e8
kusano 2b45e8
	ST	a0,  0 * SIZE(Y1)
kusano 2b45e8
	addq	Y1, INCY, Y1
kusano 2b45e8
kusano 2b45e8
	lda	I, -1(I)
kusano 2b45e8
	bgt	I, $L996
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L999:
kusano 2b45e8
	ldt	$f2,    0($sp)
kusano 2b45e8
	ldt	$f3,    8($sp)
kusano 2b45e8
	ldt	$f4,   16($sp)
kusano 2b45e8
	ldt	$f5,   24($sp)
kusano 2b45e8
	ldt	$f6,   32($sp)
kusano 2b45e8
	ldt	$f7,   40($sp)
kusano 2b45e8
	ldt	$f8,   48($sp)
kusano 2b45e8
	ldt	$f9,   56($sp)
kusano 2b45e8
kusano 2b45e8
	lda	$sp,  STACKSIZE($sp)
kusano 2b45e8
	ret
kusano 2b45e8
	EPILOGUE