Blame thirdparty/openblas/xianyi-OpenBLAS-e6e87a2/kernel/alpha/zgemv_t.S

kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
/* Copyright 2009, 2010 The University of Texas at Austin.           */
kusano 2b45e8
/* All rights reserved.                                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* Redistribution and use in source and binary forms, with or        */
kusano 2b45e8
/* without modification, are permitted provided that the following   */
kusano 2b45e8
/* conditions are met:                                               */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   1. Redistributions of source code must retain the above         */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer.                                                  */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   2. Redistributions in binary form must reproduce the above      */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer in the documentation and/or other materials       */
kusano 2b45e8
/*      provided with the distribution.                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
kusano 2b45e8
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
kusano 2b45e8
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
kusano 2b45e8
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
kusano 2b45e8
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
kusano 2b45e8
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
kusano 2b45e8
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
kusano 2b45e8
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
kusano 2b45e8
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
kusano 2b45e8
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
kusano 2b45e8
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
kusano 2b45e8
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
kusano 2b45e8
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
kusano 2b45e8
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* The views and conclusions contained in the software and           */
kusano 2b45e8
/* documentation are those of the authors and should not be          */
kusano 2b45e8
/* interpreted as representing official policies, either expressed   */
kusano 2b45e8
/* or implied, of The University of Texas at Austin.                 */
kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
kusano 2b45e8
#define ASSEMBLER
kusano 2b45e8
#include "common.h"
kusano 2b45e8
#include "version.h"
kusano 2b45e8
kusano 2b45e8
#define STACKSIZE     64
kusano 2b45e8
#define PREFETCHSIZE  32
kusano 2b45e8
kusano 2b45e8
#define M	$16
kusano 2b45e8
#define N	$17
kusano 2b45e8
#define A	$21
kusano 2b45e8
#define	LDA	$18
kusano 2b45e8
kusano 2b45e8
#define X	$19
kusano 2b45e8
#define	INCX	$20
kusano 2b45e8
#define Y	$22
kusano 2b45e8
#define	INCY	$23
kusano 2b45e8
kusano 2b45e8
#define BUFFER	$24
kusano 2b45e8
kusano 2b45e8
#define I	$25
kusano 2b45e8
#define J	$27
kusano 2b45e8
kusano 2b45e8
#define	X1	$3
kusano 2b45e8
#define Y1	$4
kusano 2b45e8
#define A1	$5
kusano 2b45e8
#define A2	$6
kusano 2b45e8
kusano 2b45e8
#define	alpha_r	$f19
kusano 2b45e8
#define	alpha_i	$f20
kusano 2b45e8
kusano 2b45e8
#define	s0	$f0
kusano 2b45e8
#define	s1	$f1
kusano 2b45e8
#define	s2	$f10
kusano 2b45e8
#define	s3	$f11
kusano 2b45e8
kusano 2b45e8
#define	t0	$f12
kusano 2b45e8
#define	t1	$f13
kusano 2b45e8
#define	t2	$f14
kusano 2b45e8
#define	t3	$f15
kusano 2b45e8
kusano 2b45e8
#define	x0	$f16
kusano 2b45e8
#define	x1	$f17
kusano 2b45e8
#define	x2	$f18
kusano 2b45e8
#define	x3	$f21
kusano 2b45e8
kusano 2b45e8
#define	a0	$f22
kusano 2b45e8
#define	a1	$f23
kusano 2b45e8
#define	a2	$f24
kusano 2b45e8
#define	a3	$f25
kusano 2b45e8
#define	a4	$f26
kusano 2b45e8
#define	a5	$f27
kusano 2b45e8
#define	a6	$f28
kusano 2b45e8
#define	a7	$f29
kusano 2b45e8
kusano 2b45e8
#define	a8	$f2
kusano 2b45e8
#define	a9	$f3
kusano 2b45e8
#define	a10	$f4
kusano 2b45e8
#define	a11	$f5
kusano 2b45e8
#define	a12	$f6
kusano 2b45e8
#define	a13	$f7
kusano 2b45e8
#define	a14	$f8
kusano 2b45e8
#define	a15	$f9
kusano 2b45e8
kusano 2b45e8
#if   !defined(CONJ) && !defined(XCONJ)
kusano 2b45e8
#define ADD1	ADD
kusano 2b45e8
#define ADD2	ADD
kusano 2b45e8
#define ADD3	SUB
kusano 2b45e8
#define ADD4	ADD
kusano 2b45e8
#elif !defined(CONJ) &&  defined(XCONJ)
kusano 2b45e8
#define ADD1	ADD
kusano 2b45e8
#define ADD2	ADD
kusano 2b45e8
#define ADD3	ADD
kusano 2b45e8
#define ADD4	SUB
kusano 2b45e8
#elif  defined(CONJ) && !defined(XCONJ)
kusano 2b45e8
#define ADD1	ADD
kusano 2b45e8
#define ADD2	SUB
kusano 2b45e8
#define ADD3	ADD
kusano 2b45e8
#define ADD4	ADD
kusano 2b45e8
#else
kusano 2b45e8
#define ADD1	ADD
kusano 2b45e8
#define ADD2	SUB
kusano 2b45e8
#define ADD3	SUB
kusano 2b45e8
#define ADD4	SUB
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	PROLOGUE
kusano 2b45e8
kusano 2b45e8
	lda	$sp,  -STACKSIZE($sp)
kusano 2b45e8
	ldq	LDA,     0 + STACKSIZE($sp)
kusano 2b45e8
	ldq	X,       8 + STACKSIZE($sp)
kusano 2b45e8
	ldq	INCX,   16 + STACKSIZE($sp)
kusano 2b45e8
	ldq	Y,      24 + STACKSIZE($sp)
kusano 2b45e8
	ldq	INCY,   32 + STACKSIZE($sp)
kusano 2b45e8
	ldq	BUFFER, 40 + STACKSIZE($sp)
kusano 2b45e8
kusano 2b45e8
	stt	$f2,    0($sp)
kusano 2b45e8
	stt	$f3,    8($sp)
kusano 2b45e8
	stt	$f4,   16($sp)
kusano 2b45e8
	stt	$f5,   24($sp)
kusano 2b45e8
	stt	$f6,   32($sp)
kusano 2b45e8
	stt	$f7,   40($sp)
kusano 2b45e8
	stt	$f8,   48($sp)
kusano 2b45e8
	stt	$f9,   56($sp)
kusano 2b45e8
kusano 2b45e8
	PROFCODE
kusano 2b45e8
kusano 2b45e8
	cmple	M, 0, $0
kusano 2b45e8
	sll	INCX, ZBASE_SHIFT, INCX
kusano 2b45e8
	cmple	N, 0, $1
kusano 2b45e8
	sll	INCY, ZBASE_SHIFT, INCY
kusano 2b45e8
kusano 2b45e8
	or	$0, $1, $0
kusano 2b45e8
	bne	$0,  $L999
kusano 2b45e8
kusano 2b45e8
	cmpeq	INCX, 2 * SIZE, $0
kusano 2b45e8
	mov	X, X1
kusano 2b45e8
	sll	LDA, ZBASE_SHIFT,LDA
kusano 2b45e8
	bne	$0, $L10
kusano 2b45e8
kusano 2b45e8
	sra	M, 2, I
kusano 2b45e8
	mov	BUFFER, Y1
kusano 2b45e8
	mov	BUFFER, X
kusano 2b45e8
	ble	I, $L05
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L02:
kusano 2b45e8
	ldl	$31, (PREFETCHSIZE + 0) * SIZE(X1)
kusano 2b45e8
	lda	I, -1(I)
kusano 2b45e8
kusano 2b45e8
	LD	a0,  0 * SIZE(X1)
kusano 2b45e8
	LD	a1,  1 * SIZE(X1)
kusano 2b45e8
	addq	X1, INCX, X1
kusano 2b45e8
	LD	a2,  0 * SIZE(X1)
kusano 2b45e8
	LD	a3,  1 * SIZE(X1)
kusano 2b45e8
	addq	X1, INCX, X1
kusano 2b45e8
kusano 2b45e8
	ST	a0,  0 * SIZE(Y1)
kusano 2b45e8
	ST	a1,  1 * SIZE(Y1)
kusano 2b45e8
	ST	a2,  2 * SIZE(Y1)
kusano 2b45e8
	ST	a3,  3 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	LD	a4,  0 * SIZE(X1)
kusano 2b45e8
	LD	a5,  1 * SIZE(X1)
kusano 2b45e8
	addq	X1, INCX, X1
kusano 2b45e8
	LD	a6,  0 * SIZE(X1)
kusano 2b45e8
	LD	a7,  1 * SIZE(X1)
kusano 2b45e8
	addq	X1, INCX, X1
kusano 2b45e8
kusano 2b45e8
	ST	a4,  4 * SIZE(Y1)
kusano 2b45e8
	ST	a5,  5 * SIZE(Y1)
kusano 2b45e8
	ST	a6,  6 * SIZE(Y1)
kusano 2b45e8
	ST	a7,  7 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	lda	Y1,  8 * SIZE(Y1)
kusano 2b45e8
	bgt	I, $L02
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L05:
kusano 2b45e8
	and	M, 3, I
kusano 2b45e8
	ble	I, $L10
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L06:
kusano 2b45e8
	LD	a0,  0 * SIZE(X1)
kusano 2b45e8
	LD	a1,  1 * SIZE(X1)
kusano 2b45e8
	addq	X1, INCX, X1
kusano 2b45e8
kusano 2b45e8
	ST	a0,  0 * SIZE(Y1)
kusano 2b45e8
	ST	a1,  1 * SIZE(Y1)
kusano 2b45e8
	lda	Y1,  2 * SIZE(Y1)
kusano 2b45e8
kusano 2b45e8
	lda	I, -1(I)
kusano 2b45e8
	bgt	I, $L06
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L10:
kusano 2b45e8
	mov	Y, Y1
kusano 2b45e8
	fclr	t0
kusano 2b45e8
	unop
kusano 2b45e8
	fclr	t1
kusano 2b45e8
kusano 2b45e8
	sra	N, 1, J
kusano 2b45e8
	fclr	t2
kusano 2b45e8
	fclr	t3
kusano 2b45e8
	ble	J,  $L20
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L11:
kusano 2b45e8
	mov	A, A1
kusano 2b45e8
	fclr	s0
kusano 2b45e8
	addq	A,  LDA, A2
kusano 2b45e8
	fclr	s1
kusano 2b45e8
kusano 2b45e8
	addq	A2, LDA, A
kusano 2b45e8
	unop
kusano 2b45e8
	mov	X, X1
kusano 2b45e8
	lds	$f31, 3 * SIZE(Y)
kusano 2b45e8
kusano 2b45e8
	sra	M,  2, I
kusano 2b45e8
	fclr	s2
kusano 2b45e8
	fclr	s3
kusano 2b45e8
	ble	I,  $L15
kusano 2b45e8
kusano 2b45e8
	LD	a0,   0 * SIZE(A1)
kusano 2b45e8
	LD	a1,   1 * SIZE(A1)
kusano 2b45e8
	LD	a2,   0 * SIZE(A2)
kusano 2b45e8
	LD	a3,   1 * SIZE(A2)
kusano 2b45e8
	LD	a4,   2 * SIZE(A1)
kusano 2b45e8
	LD	a5,   3 * SIZE(A1)
kusano 2b45e8
	LD	a6,   2 * SIZE(A2)
kusano 2b45e8
	LD	a7,   3 * SIZE(A2)
kusano 2b45e8
kusano 2b45e8
	LD	a8,   4 * SIZE(A1)
kusano 2b45e8
	LD	a9,   5 * SIZE(A1)
kusano 2b45e8
	LD	a10,  4 * SIZE(A2)
kusano 2b45e8
	LD	a11,  5 * SIZE(A2)
kusano 2b45e8
	LD	a12,  6 * SIZE(A1)
kusano 2b45e8
	LD	a13,  7 * SIZE(A1)
kusano 2b45e8
	LD	a14,  6 * SIZE(A2)
kusano 2b45e8
	LD	a15,  7 * SIZE(A2)
kusano 2b45e8
kusano 2b45e8
	LD	x0,   0 * SIZE(X1)
kusano 2b45e8
	LD	x1,   1 * SIZE(X1)
kusano 2b45e8
	LD	x2,   2 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	lda	I,   -1(I)
kusano 2b45e8
	ble	I, $L13
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L12:
kusano 2b45e8
	ADD3	s0, t0,  s0
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x0, a0,  t0
kusano 2b45e8
	LD	x3,   3 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD4	s1, t1,  s1
kusano 2b45e8
	ldl	$31, (PREFETCHSIZE + 0) * SIZE(A1)
kusano 2b45e8
	MUL	x0, a1,  t1
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD3	s2, t2,  s2
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x0, a2,  t2
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD4	s3, t3,  s3
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x0, a3,  t3
kusano 2b45e8
	LD	x0,   4 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD1	s0, t0,  s0
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x1, a1,  t0
kusano 2b45e8
	LD	a1,   9 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD2	s1, t1,  s1
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x1, a0,  t1
kusano 2b45e8
	LD	a0,   8 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD1	s2, t2,  s2
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x1, a3,  t2
kusano 2b45e8
	LD	a3,   9 * SIZE(A2)
kusano 2b45e8
kusano 2b45e8
	ADD2	s3, t3,  s3
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x1, a2,  t3
kusano 2b45e8
	LD	a2,   8 * SIZE(A2)
kusano 2b45e8
kusano 2b45e8
	ADD3	s0, t0,  s0
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x2, a4,  t0
kusano 2b45e8
	LD	x1,   5 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD4	s1, t1,  s1
kusano 2b45e8
	MUL	x2, a5,  t1
kusano 2b45e8
	ADD3	s2, t2,  s2
kusano 2b45e8
	MUL	x2, a6,  t2
kusano 2b45e8
kusano 2b45e8
	ADD4	s3, t3,  s3
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x2, a7,  t3
kusano 2b45e8
	LD	x2,   6 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD1	s0, t0,  s0
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x3, a5,  t0
kusano 2b45e8
	LD	a5,  11 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD2	s1, t1,  s1
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x3, a4,  t1
kusano 2b45e8
	LD	a4,  10 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD1	s2, t2,  s2
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x3, a7,  t2
kusano 2b45e8
	LD	a7,  11 * SIZE(A2)
kusano 2b45e8
kusano 2b45e8
	ADD2	s3, t3,  s3
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x3, a6,  t3
kusano 2b45e8
	LD	a6,  10 * SIZE(A2)
kusano 2b45e8
kusano 2b45e8
	ADD3	s0, t0,  s0
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x0, a8,  t0
kusano 2b45e8
	LD	x3,   7 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD4	s1, t1,  s1
kusano 2b45e8
	ldl	$31, (PREFETCHSIZE + 0) * SIZE(A2)
kusano 2b45e8
	MUL	x0, a9,  t1
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD3	s2, t2,  s2
kusano 2b45e8
	lda	I,   -1(I)
kusano 2b45e8
	MUL	x0, a10, t2
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD4	s3, t3,  s3
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x0, a11, t3
kusano 2b45e8
	LD	x0,   8 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD1	s0, t0,  s0
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x1, a9,  t0
kusano 2b45e8
	LD	a9,  13 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD2	s1, t1,  s1
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x1, a8,  t1
kusano 2b45e8
	LD	a8,  12 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD1	s2, t2,  s2
kusano 2b45e8
	lda	A1,   8 * SIZE(A1)
kusano 2b45e8
	MUL	x1, a11, t2
kusano 2b45e8
	LD	a11, 13 * SIZE(A2)
kusano 2b45e8
kusano 2b45e8
	ADD2	s3, t3,  s3
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x1, a10, t3
kusano 2b45e8
	LD	a10, 12 * SIZE(A2)
kusano 2b45e8
kusano 2b45e8
	ADD3	s0, t0,  s0
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x2, a12, t0
kusano 2b45e8
	LD	x1,   9 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD4	s1, t1,  s1
kusano 2b45e8
	ldl	$31, (PREFETCHSIZE + 0) * SIZE(X1)
kusano 2b45e8
	MUL	x2, a13, t1
kusano 2b45e8
	lda	A2,   8 * SIZE(A2)
kusano 2b45e8
kusano 2b45e8
	ADD3	s2, t2,  s2
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x2, a14, t2
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD4	s3, t3,  s3
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x2, a15, t3
kusano 2b45e8
	LD	x2,  10 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD1	s0, t0,  s0
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x3, a13, t0
kusano 2b45e8
	LD	a13,  7 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD2	s1, t1,  s1
kusano 2b45e8
	lda	X1,   8 * SIZE(X1)
kusano 2b45e8
	MUL	x3, a12, t1
kusano 2b45e8
	LD	a12,  6 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD1	s2, t2,  s2
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x3, a15, t2
kusano 2b45e8
	LD	a15,  7 * SIZE(A2)
kusano 2b45e8
kusano 2b45e8
	ADD2	s3, t3,  s3
kusano 2b45e8
	MUL	x3, a14, t3
kusano 2b45e8
	LD	a14,  6 * SIZE(A2)
kusano 2b45e8
	bgt	I, $L12
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L13:
kusano 2b45e8
	ADD3	s0, t0,  s0
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x0, a0,  t0
kusano 2b45e8
	LD	x3,   3 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD4	s1, t1,  s1
kusano 2b45e8
	MUL	x0, a1,  t1
kusano 2b45e8
	ADD3	s2, t2,  s2
kusano 2b45e8
	MUL	x0, a2,  t2
kusano 2b45e8
kusano 2b45e8
	ADD4	s3, t3,  s3
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x0, a3,  t3
kusano 2b45e8
	LD	x0,   4 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD1	s0, t0,  s0
kusano 2b45e8
	MUL	x1, a1,  t0
kusano 2b45e8
	ADD2	s1, t1,  s1
kusano 2b45e8
	MUL	x1, a0,  t1
kusano 2b45e8
kusano 2b45e8
	ADD1	s2, t2,  s2
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x1, a3,  t2
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD2	s3, t3,  s3
kusano 2b45e8
	lda	A1,   8 * SIZE(A1)
kusano 2b45e8
	MUL	x1, a2,  t3
kusano 2b45e8
	LD	x1,   5 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD3	s0, t0,  s0
kusano 2b45e8
	MUL	x2, a4,  t0
kusano 2b45e8
	ADD4	s1, t1,  s1
kusano 2b45e8
	MUL	x2, a5,  t1
kusano 2b45e8
kusano 2b45e8
	ADD3	s2, t2,  s2
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x2, a6,  t2
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD4	s3, t3,  s3
kusano 2b45e8
	lda	A2,   8 * SIZE(A2)
kusano 2b45e8
	MUL	x2, a7,  t3
kusano 2b45e8
	LD	x2,   6 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD1	s0, t0,  s0
kusano 2b45e8
	MUL	x3, a5,  t0
kusano 2b45e8
	ADD2	s1, t1,  s1
kusano 2b45e8
	MUL	x3, a4,  t1
kusano 2b45e8
kusano 2b45e8
	ADD1	s2, t2,  s2
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x3, a7,  t2
kusano 2b45e8
	lda	X1,   8 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD2	s3, t3,  s3
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x3, a6,  t3
kusano 2b45e8
	LD	x3,  -1 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD3	s0, t0,  s0
kusano 2b45e8
	MUL	x0, a8,  t0
kusano 2b45e8
	ADD4	s1, t1,  s1
kusano 2b45e8
	MUL	x0, a9,  t1
kusano 2b45e8
kusano 2b45e8
	ADD3	s2, t2,  s2
kusano 2b45e8
	MUL	x0, a10, t2
kusano 2b45e8
	ADD4	s3, t3,  s3
kusano 2b45e8
	MUL	x0, a11, t3
kusano 2b45e8
kusano 2b45e8
	ADD1	s0, t0,  s0
kusano 2b45e8
	MUL	x1, a9,  t0
kusano 2b45e8
	ADD2	s1, t1,  s1
kusano 2b45e8
	MUL	x1, a8,  t1
kusano 2b45e8
kusano 2b45e8
	ADD1	s2, t2,  s2
kusano 2b45e8
	MUL	x1, a11, t2
kusano 2b45e8
	ADD2	s3, t3,  s3
kusano 2b45e8
	MUL	x1, a10, t3
kusano 2b45e8
kusano 2b45e8
	ADD3	s0, t0,  s0
kusano 2b45e8
	MUL	x2, a12, t0
kusano 2b45e8
	ADD4	s1, t1,  s1
kusano 2b45e8
	MUL	x2, a13, t1
kusano 2b45e8
kusano 2b45e8
	ADD3	s2, t2,  s2
kusano 2b45e8
	MUL	x2, a14, t2
kusano 2b45e8
	ADD4	s3, t3,  s3
kusano 2b45e8
	MUL	x2, a15, t3
kusano 2b45e8
kusano 2b45e8
	ADD1	s0, t0,  s0
kusano 2b45e8
	MUL	x3, a13, t0
kusano 2b45e8
	ADD2	s1, t1,  s1
kusano 2b45e8
	MUL	x3, a12, t1
kusano 2b45e8
kusano 2b45e8
	ADD1	s2, t2,  s2
kusano 2b45e8
	MUL	x3, a15, t2
kusano 2b45e8
	ADD2	s3, t3,  s3
kusano 2b45e8
	MUL	x3, a14, t3
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L15:
kusano 2b45e8
	and	M, 3, I
kusano 2b45e8
	ble	I,  $L18
kusano 2b45e8
kusano 2b45e8
	LD	a0,   0 * SIZE(A1)
kusano 2b45e8
	LD	a1,   1 * SIZE(A1)
kusano 2b45e8
	LD	a2,   0 * SIZE(A2)
kusano 2b45e8
	LD	a3,   1 * SIZE(A2)
kusano 2b45e8
kusano 2b45e8
	LD	x0,   0 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	lda	I,   -1(I)
kusano 2b45e8
	ble	I, $L17
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L16:
kusano 2b45e8
	ADD3	s0, t0, s0
kusano 2b45e8
	lda	I,   -1(I)
kusano 2b45e8
	MUL	x0, a0, t0
kusano 2b45e8
	LD	x1,   1 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD4	s1, t1, s1
kusano 2b45e8
	MUL	x0, a1, t1
kusano 2b45e8
	ADD3	s2, t2, s2
kusano 2b45e8
	MUL	x0, a2, t2
kusano 2b45e8
kusano 2b45e8
	ADD4	s3, t3, s3
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x0, a3, t3
kusano 2b45e8
	LD	x0,   2 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD1	s0, t0, s0
kusano 2b45e8
	lda	A2,   2 * SIZE(A2)
kusano 2b45e8
	MUL	x1, a1, t0
kusano 2b45e8
	LD	a1,   3 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD2	s1, t1, s1
kusano 2b45e8
	lda	X1,   2 * SIZE(X1)
kusano 2b45e8
	MUL	x1, a0, t1
kusano 2b45e8
	LD	a0,   2 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD1	s2, t2, s2
kusano 2b45e8
	lda	A1,   2 * SIZE(A1)
kusano 2b45e8
	MUL	x1, a3, t2
kusano 2b45e8
	LD	a3,   1 * SIZE(A2)
kusano 2b45e8
kusano 2b45e8
	ADD2	s3, t3, s3
kusano 2b45e8
	MUL	x1, a2, t3
kusano 2b45e8
	LD	a2,   0 * SIZE(A2)
kusano 2b45e8
	bgt	I, $L16
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L17:
kusano 2b45e8
	ADD3	s0, t0, s0
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x0, a0, t0
kusano 2b45e8
	LD	x1,   1 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD4	s1, t1, s1
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x0, a1, t1
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD3	s2, t2, s2
kusano 2b45e8
	MUL	x0, a2, t2
kusano 2b45e8
	ADD4	s3, t3, s3
kusano 2b45e8
	MUL	x0, a3, t3
kusano 2b45e8
kusano 2b45e8
	ADD1	s0, t0, s0
kusano 2b45e8
	MUL	x1, a1, t0
kusano 2b45e8
	ADD2	s1, t1, s1
kusano 2b45e8
	MUL	x1, a0, t1
kusano 2b45e8
kusano 2b45e8
	ADD1	s2, t2, s2
kusano 2b45e8
	MUL	x1, a3, t2
kusano 2b45e8
	ADD2	s3, t3, s3
kusano 2b45e8
	MUL	x1, a2, t3
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L18:
kusano 2b45e8
	LD	a0,    0 * SIZE(Y)
kusano 2b45e8
	unop
kusano 2b45e8
	LD	a1,    1 * SIZE(Y)
kusano 2b45e8
	addq	Y, INCY, Y
kusano 2b45e8
kusano 2b45e8
	LD	a2,    0 * SIZE(Y)
kusano 2b45e8
	unop
kusano 2b45e8
	LD	a3,    1 * SIZE(Y)
kusano 2b45e8
	addq	Y, INCY, Y
kusano 2b45e8
kusano 2b45e8
	ADD3	s0, t0, s0
kusano 2b45e8
	ADD4	s1, t1, s1
kusano 2b45e8
	ADD3	s2, t2, s2
kusano 2b45e8
	ADD4	s3, t3, s3
kusano 2b45e8
kusano 2b45e8
	MUL	alpha_r, s0, t0
kusano 2b45e8
	MUL	alpha_r, s1, t1
kusano 2b45e8
	MUL	alpha_r, s2, t2
kusano 2b45e8
	MUL	alpha_r, s3, t3
kusano 2b45e8
kusano 2b45e8
	ADD	a0, t0, a0
kusano 2b45e8
	MUL	alpha_i, s1, t0
kusano 2b45e8
	ADD	a1, t1, a1
kusano 2b45e8
	MUL	alpha_i, s0, t1
kusano 2b45e8
	ADD	a2, t2, a2
kusano 2b45e8
	MUL	alpha_i, s3, t2
kusano 2b45e8
	ADD	a3, t3, a3
kusano 2b45e8
	MUL	alpha_i, s2, t3
kusano 2b45e8
kusano 2b45e8
	SUB	a0, t0, a0
kusano 2b45e8
	ADD	a1, t1, a1
kusano 2b45e8
	SUB	a2, t2, a2
kusano 2b45e8
	ADD	a3, t3, a3
kusano 2b45e8
kusano 2b45e8
	ST	a0,    0 * SIZE(Y1)
kusano 2b45e8
	fclr	t0
kusano 2b45e8
	ST	a1,    1 * SIZE(Y1)
kusano 2b45e8
	addq	Y1, INCY, Y1
kusano 2b45e8
kusano 2b45e8
	ST	a2,    0 * SIZE(Y1)
kusano 2b45e8
	fclr	t1
kusano 2b45e8
	ST	a3,    1 * SIZE(Y1)
kusano 2b45e8
	addq	Y1, INCY, Y1
kusano 2b45e8
kusano 2b45e8
	fclr	t2
kusano 2b45e8
	lda	J, -1(J)
kusano 2b45e8
	fclr	t3
kusano 2b45e8
	bgt	J,  $L11
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L20:
kusano 2b45e8
	blbc	N,  $L999
kusano 2b45e8
kusano 2b45e8
	mov	A, A1
kusano 2b45e8
	fclr	s0
kusano 2b45e8
	fclr	s1
kusano 2b45e8
	mov	X, X1
kusano 2b45e8
kusano 2b45e8
	sra	M,  2, I
kusano 2b45e8
	fclr	s2
kusano 2b45e8
	fclr	s3
kusano 2b45e8
	ble	I,  $L25
kusano 2b45e8
kusano 2b45e8
	LD	a0,   0 * SIZE(A1)
kusano 2b45e8
	LD	a1,   1 * SIZE(A1)
kusano 2b45e8
	LD	a4,   2 * SIZE(A1)
kusano 2b45e8
	LD	a5,   3 * SIZE(A1)
kusano 2b45e8
	LD	a8,   4 * SIZE(A1)
kusano 2b45e8
	LD	a9,   5 * SIZE(A1)
kusano 2b45e8
	LD	a12,  6 * SIZE(A1)
kusano 2b45e8
	LD	a13,  7 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	LD	x0,   0 * SIZE(X1)
kusano 2b45e8
	LD	x1,   1 * SIZE(X1)
kusano 2b45e8
	LD	x2,   2 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	lda	I,   -1(I)
kusano 2b45e8
	ble	I, $L23
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L22:
kusano 2b45e8
	ADD3	s0, t0,  s0
kusano 2b45e8
	ldl	$31, (PREFETCHSIZE + 0) * SIZE(A1)
kusano 2b45e8
	MUL	x0, a0,  t0
kusano 2b45e8
	LD	x3,   3 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD4	s1, t1,  s1
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x0, a1,  t1
kusano 2b45e8
	LD	x0,   4 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD1	s2, t0,  s2
kusano 2b45e8
	lda	I,   -1(I)
kusano 2b45e8
	MUL	x1, a1,  t0
kusano 2b45e8
	LD	a1,   9 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD2	s3, t1,  s3
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x1, a0,  t1
kusano 2b45e8
	LD	a0,   8 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD3	s0, t0,  s0
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x2, a4,  t0
kusano 2b45e8
	LD	x1,   5 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD4	s1, t1,  s1
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x2, a5,  t1
kusano 2b45e8
	LD	x2,   6 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD1	s2, t0,  s2
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x3, a5,  t0
kusano 2b45e8
	LD	a5,  11 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD2	s3, t1,  s3
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x3, a4,  t1
kusano 2b45e8
	LD	a4,  10 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD3	s0, t0,  s0
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x0, a8,  t0
kusano 2b45e8
	LD	x3,   7 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD4	s1, t1,  s1
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x0, a9,  t1
kusano 2b45e8
	LD	x0,   8 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD1	s2, t0,  s2
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x1, a9,  t0
kusano 2b45e8
	LD	a9,  13 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD2	s3, t1,  s3
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x1, a8,  t1
kusano 2b45e8
	LD	a8,  12 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD3	s0, t0,  s0
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x2, a12, t0
kusano 2b45e8
	LD	x1,   9 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD4	s1, t1,  s1
kusano 2b45e8
	lda	A1,   8 * SIZE(A1)
kusano 2b45e8
	MUL	x2, a13, t1
kusano 2b45e8
	LD	x2,  10 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD1	s2, t0,  s2
kusano 2b45e8
	lda	X1,   8 * SIZE(X1)
kusano 2b45e8
	MUL	x3, a13, t0
kusano 2b45e8
	LD	a13,  7 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD2	s3, t1,  s3
kusano 2b45e8
	MUL	x3, a12, t1
kusano 2b45e8
	LD	a12,  6 * SIZE(A1)
kusano 2b45e8
	bgt	I, $L22
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L23:
kusano 2b45e8
	ADD3	s0, t0,  s0
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x0, a0,  t0
kusano 2b45e8
	LD	x3,   3 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD4	s1, t1,  s1
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x0, a1,  t1
kusano 2b45e8
	LD	x0,   4 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD1	s2, t0,  s2
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x1, a1,  t0
kusano 2b45e8
	lda	A1,   8 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD2	s3, t1,  s3
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x1, a0,  t1
kusano 2b45e8
	LD	x1,   5 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD3	s0, t0,  s0
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x2, a4,  t0
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD4	s1, t1,  s1
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x2, a5,  t1
kusano 2b45e8
	LD	x2,   6 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD1	s2, t0,  s2
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x3, a5,  t0
kusano 2b45e8
	lda	X1,   8 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD2	s3, t1,  s3
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x3, a4,  t1
kusano 2b45e8
	LD	x3,  -1 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD3	s0, t0,  s0
kusano 2b45e8
	MUL	x0, a8,  t0
kusano 2b45e8
	ADD4	s1, t1,  s1
kusano 2b45e8
	MUL	x0, a9,  t1
kusano 2b45e8
kusano 2b45e8
	ADD1	s2, t0,  s2
kusano 2b45e8
	MUL	x1, a9,  t0
kusano 2b45e8
	ADD2	s3, t1,  s3
kusano 2b45e8
	MUL	x1, a8,  t1
kusano 2b45e8
kusano 2b45e8
	ADD3	s0, t0,  s0
kusano 2b45e8
	MUL	x2, a12, t0
kusano 2b45e8
	ADD4	s1, t1,  s1
kusano 2b45e8
	MUL	x2, a13, t1
kusano 2b45e8
kusano 2b45e8
	ADD1	s2, t0,  s2
kusano 2b45e8
	MUL	x3, a13, t0
kusano 2b45e8
	ADD2	s3, t1,  s3
kusano 2b45e8
	MUL	x3, a12, t1
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L25:
kusano 2b45e8
	and	M, 3, I
kusano 2b45e8
	ble	I,  $L28
kusano 2b45e8
kusano 2b45e8
	LD	a0,   0 * SIZE(A1)
kusano 2b45e8
	LD	a1,   1 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	LD	x0,   0 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	lda	I,   -1(I)
kusano 2b45e8
	ble	I, $L27
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L26:
kusano 2b45e8
	ADD3	s0, t0, s0
kusano 2b45e8
	lda	A1,   2 * SIZE(A1)
kusano 2b45e8
	MUL	x0, a0, t0
kusano 2b45e8
	LD	x1,   1 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD4	s1, t1, s1
kusano 2b45e8
	lda	I,   -1(I)
kusano 2b45e8
	MUL	x0, a1, t1
kusano 2b45e8
	LD	x0,   2 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD1	s0, t0, s0
kusano 2b45e8
	lda	X1,   2 * SIZE(X1)
kusano 2b45e8
	MUL	x1, a1, t0
kusano 2b45e8
	LD	a1,   1 * SIZE(A1)
kusano 2b45e8
kusano 2b45e8
	ADD2	s1, t1, s1
kusano 2b45e8
	MUL	x1, a0, t1
kusano 2b45e8
	LD	a0,   0 * SIZE(A1)
kusano 2b45e8
	bgt	I, $L26
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L27:
kusano 2b45e8
	ADD3	s0, t0, s0
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x0, a0, t0
kusano 2b45e8
	LD	x1,   1 * SIZE(X1)
kusano 2b45e8
kusano 2b45e8
	ADD4	s1, t1, s1
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	x0, a1, t1
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD1	s0, t0, s0
kusano 2b45e8
	MUL	x1, a1, t0
kusano 2b45e8
	ADD2	s1, t1, s1
kusano 2b45e8
	MUL	x1, a0, t1
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L28:
kusano 2b45e8
	LD	a0,    0 * SIZE(Y)
kusano 2b45e8
	LD	a1,    1 * SIZE(Y)
kusano 2b45e8
kusano 2b45e8
	ADD3	s0, t0, s0
kusano 2b45e8
	ADD4	s1, t1, s1
kusano 2b45e8
	ADD3	s2, t2, s2
kusano 2b45e8
	ADD4	s3, t3, s3
kusano 2b45e8
kusano 2b45e8
	ADD	s0, s2, s0
kusano 2b45e8
	ADD	s1, s3, s1
kusano 2b45e8
kusano 2b45e8
	MUL	alpha_r, s0, t0
kusano 2b45e8
	MUL	alpha_r, s1, t1
kusano 2b45e8
kusano 2b45e8
	ADD	a0, t0, a0
kusano 2b45e8
	MUL	alpha_i, s1, t0
kusano 2b45e8
	ADD	a1, t1, a1
kusano 2b45e8
	MUL	alpha_i, s0, t1
kusano 2b45e8
kusano 2b45e8
	SUB	a0, t0, a0
kusano 2b45e8
	ADD	a1, t1, a1
kusano 2b45e8
kusano 2b45e8
	ST	a0,    0 * SIZE(Y1)
kusano 2b45e8
	ST	a1,    1 * SIZE(Y1)
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L999:
kusano 2b45e8
	ldt	$f2,    0($sp)
kusano 2b45e8
	ldt	$f3,    8($sp)
kusano 2b45e8
	ldt	$f4,   16($sp)
kusano 2b45e8
	ldt	$f5,   24($sp)
kusano 2b45e8
	ldt	$f6,   32($sp)
kusano 2b45e8
	ldt	$f7,   40($sp)
kusano 2b45e8
	ldt	$f8,   48($sp)
kusano 2b45e8
	ldt	$f9,   56($sp)
kusano 2b45e8
kusano 2b45e8
	lda	$sp,  STACKSIZE($sp)
kusano 2b45e8
	ret
kusano 2b45e8
	EPILOGUE