kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
/* Copyright 2009, 2010 The University of Texas at Austin.           */
kusano 2b45e8
/* All rights reserved.                                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* Redistribution and use in source and binary forms, with or        */
kusano 2b45e8
/* without modification, are permitted provided that the following   */
kusano 2b45e8
/* conditions are met:                                               */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   1. Redistributions of source code must retain the above         */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer.                                                  */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   2. Redistributions in binary form must reproduce the above      */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer in the documentation and/or other materials       */
kusano 2b45e8
/*      provided with the distribution.                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
kusano 2b45e8
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
kusano 2b45e8
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
kusano 2b45e8
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
kusano 2b45e8
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
kusano 2b45e8
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
kusano 2b45e8
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
kusano 2b45e8
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
kusano 2b45e8
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
kusano 2b45e8
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
kusano 2b45e8
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
kusano 2b45e8
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
kusano 2b45e8
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
kusano 2b45e8
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* The views and conclusions contained in the software and           */
kusano 2b45e8
/* documentation are those of the authors and should not be          */
kusano 2b45e8
/* interpreted as representing official policies, either expressed   */
kusano 2b45e8
/* or implied, of The University of Texas at Austin.                 */
kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
kusano 2b45e8
#define ASSEMBLER
kusano 2b45e8
#include "common.h"
kusano 2b45e8
#include "version.h"
kusano 2b45e8
kusano 2b45e8
#define PREFETCHSIZE 40
kusano 2b45e8
kusano 2b45e8
#ifndef CONJ
kusano 2b45e8
#define ADD1	SUB
kusano 2b45e8
#define	ADD2	ADD
kusano 2b45e8
#else
kusano 2b45e8
#define ADD1	ADD
kusano 2b45e8
#define ADD2	SUB
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
kusano 2b45e8
	PROLOGUE
kusano 2b45e8
	PROFCODE
kusano 2b45e8
	.frame	$sp, 16, $26, 0
kusano 2b45e8
kusano 2b45e8
	ldl	$19,  0($sp)
kusano 2b45e8
	fmov	$f19, $f29
kusano 2b45e8
	ldq	$20,  8($sp)
kusano 2b45e8
	fmov	$f20, $f30
kusano 2b45e8
kusano 2b45e8
	mov	$21, $18
kusano 2b45e8
	ldl	$21, 16($sp)
kusano 2b45e8
	lda	$sp, -64($sp)
kusano 2b45e8
	nop
kusano 2b45e8
kusano 2b45e8
	stt	$f2,   0($sp)
kusano 2b45e8
	cmpeq	$19, 1, $1
kusano 2b45e8
	stt	$f3,   8($sp)
kusano 2b45e8
	cmpeq	$21, 1, $2
kusano 2b45e8
kusano 2b45e8
	stt	$f4,  16($sp)
kusano 2b45e8
	and	$16, 3, $5
kusano 2b45e8
	stt	$f5,  24($sp)
kusano 2b45e8
	stt	$f6,  32($sp)
kusano 2b45e8
kusano 2b45e8
	stt	$f7,  40($sp)
kusano 2b45e8
	stt	$f8,  48($sp)
kusano 2b45e8
#ifndef PROFILE
kusano 2b45e8
	.prologue 0
kusano 2b45e8
#else
kusano 2b45e8
	.prologue 1
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	and	$1, $2, $1
kusano 2b45e8
	ble	$16, $End
kusano 2b45e8
	sra	$16, 2, $4
kusano 2b45e8
	beq	$1, $Sub
kusano 2b45e8
kusano 2b45e8
	ble	$4,  $Remain
kusano 2b45e8
	subq	$4,  1,  $4
kusano 2b45e8
kusano 2b45e8
	LD	$f0,  0*SIZE($18)
kusano 2b45e8
	LD	$f1,  1*SIZE($18)
kusano 2b45e8
	LD	$f2,  2*SIZE($18)
kusano 2b45e8
	LD	$f3,  3*SIZE($18)
kusano 2b45e8
	LD	$f4,  4*SIZE($18)
kusano 2b45e8
	LD	$f5,  5*SIZE($18)
kusano 2b45e8
	LD	$f6,  6*SIZE($18)
kusano 2b45e8
	LD	$f7,  7*SIZE($18)
kusano 2b45e8
kusano 2b45e8
	LD	$f8,  0*SIZE($20)
kusano 2b45e8
	LD	$f28, 1*SIZE($20)
kusano 2b45e8
	LD	$f10, 2*SIZE($20)
kusano 2b45e8
	LD	$f11, 3*SIZE($20)
kusano 2b45e8
	LD	$f12, 4*SIZE($20)
kusano 2b45e8
	LD	$f13, 5*SIZE($20)
kusano 2b45e8
	LD	$f14, 6*SIZE($20)
kusano 2b45e8
	LD	$f15, 7*SIZE($20)
kusano 2b45e8
kusano 2b45e8
	addq	$18, 8*SIZE, $18
kusano 2b45e8
	ble	$4, $MainLoopEnd
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$MainLoop:
kusano 2b45e8
	ldt	$f31, PREFETCHSIZE * SIZE($20)
kusano 2b45e8
	ldl	$31,  PREFETCHSIZE * SIZE($18)
kusano 2b45e8
kusano 2b45e8
	MUL	$f29, $f0,  $f20
kusano 2b45e8
	LD	$f31, 9*SIZE($18)
kusano 2b45e8
	MUL	$f30, $f1,  $f21
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	MUL	$f30, $f0,  $f22
kusano 2b45e8
	LD	$f0,  0*SIZE($18)
kusano 2b45e8
	MUL	$f29, $f1,  $f23
kusano 2b45e8
	LD	$f1,  1*SIZE($18)
kusano 2b45e8
	
kusano 2b45e8
	MUL	$f29, $f2,  $f24
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	$f30, $f3,  $f25
kusano 2b45e8
	nop
kusano 2b45e8
kusano 2b45e8
	MUL	$f30, $f2,  $f26
kusano 2b45e8
	LD	$f2,  2*SIZE($18)
kusano 2b45e8
	MUL	$f29, $f3,  $f27
kusano 2b45e8
	LD	$f3,  3*SIZE($18)
kusano 2b45e8
kusano 2b45e8
	ADD1	$f20, $f21, $f16
kusano 2b45e8
	MUL	$f29, $f4,  $f20
kusano 2b45e8
	ADD2	$f22, $f23, $f17
kusano 2b45e8
	MUL	$f30, $f5,  $f21
kusano 2b45e8
kusano 2b45e8
	ADD1	$f24, $f25, $f18
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	$f30, $f4,  $f22
kusano 2b45e8
	LD	$f4,  4*SIZE($18)
kusano 2b45e8
kusano 2b45e8
	ADD2	$f26, $f27, $f19
kusano 2b45e8
	addq	$20, 8*SIZE, $20
kusano 2b45e8
	MUL	$f29, $f5,  $f23
kusano 2b45e8
	LD	$f5,  5*SIZE($18)
kusano 2b45e8
	
kusano 2b45e8
	ADD	$f16, $f8,  $f16
kusano 2b45e8
	LD	$f8,  0*SIZE($20)
kusano 2b45e8
	MUL	$f29, $f6,  $f24
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	$f17, $f28, $f17
kusano 2b45e8
	LD	$f28, 1*SIZE($20)
kusano 2b45e8
	MUL	$f30, $f7,  $f25
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	$f18, $f10, $f18
kusano 2b45e8
	LD	$f10, 2*SIZE($20)
kusano 2b45e8
	MUL	$f30, $f6,  $f26
kusano 2b45e8
	LD	$f6,  6*SIZE($18)
kusano 2b45e8
kusano 2b45e8
	ADD	$f19, $f11, $f19
kusano 2b45e8
	LD	$f11, 3*SIZE($20)
kusano 2b45e8
	MUL	$f29, $f7,  $f27
kusano 2b45e8
	LD	$f7,  7*SIZE($18)
kusano 2b45e8
kusano 2b45e8
	ST	$f16,-8*SIZE($20)
kusano 2b45e8
	ADD1	$f20, $f21, $f16
kusano 2b45e8
	ST	$f17,-7*SIZE($20)
kusano 2b45e8
	ADD2	$f22, $f23, $f17
kusano 2b45e8
kusano 2b45e8
	ST	$f18,-6*SIZE($20)
kusano 2b45e8
	ADD1	$f24, $f25, $f18
kusano 2b45e8
	ST	$f19,-5*SIZE($20)
kusano 2b45e8
	ADD2	$f26, $f27, $f19
kusano 2b45e8
	
kusano 2b45e8
	ADD	$f16, $f12, $f16
kusano 2b45e8
	LD	$f12, 4*SIZE($20)
kusano 2b45e8
	ADD	$f17, $f13, $f17
kusano 2b45e8
	LD	$f13, 5*SIZE($20)
kusano 2b45e8
	ADD	$f18, $f14, $f18
kusano 2b45e8
	LD	$f14, 6*SIZE($20)
kusano 2b45e8
	ADD	$f19, $f15, $f19
kusano 2b45e8
	LD	$f15, 7*SIZE($20)
kusano 2b45e8
kusano 2b45e8
	ST	$f16,-4*SIZE($20)
kusano 2b45e8
	addq	$18, 8*SIZE, $18
kusano 2b45e8
	ST	$f17,-3*SIZE($20)
kusano 2b45e8
	subq	$4, 1, $4
kusano 2b45e8
kusano 2b45e8
	ST	$f18,-2*SIZE($20)
kusano 2b45e8
	nop
kusano 2b45e8
	ST	$f19,-1*SIZE($20)
kusano 2b45e8
	bgt	$4, $MainLoop
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$MainLoopEnd:
kusano 2b45e8
	MUL	$f29, $f0,  $f20
kusano 2b45e8
	MUL	$f30, $f1,  $f21
kusano 2b45e8
	MUL	$f30, $f0,  $f22
kusano 2b45e8
	MUL	$f29, $f1,  $f23
kusano 2b45e8
	
kusano 2b45e8
	MUL	$f29, $f2,  $f24
kusano 2b45e8
	MUL	$f30, $f3,  $f25
kusano 2b45e8
	MUL	$f30, $f2,  $f26
kusano 2b45e8
	MUL	$f29, $f3,  $f27
kusano 2b45e8
kusano 2b45e8
	ADD1	$f20, $f21, $f16
kusano 2b45e8
	MUL	$f29, $f4,  $f20
kusano 2b45e8
	ADD2	$f22, $f23, $f17
kusano 2b45e8
	MUL	$f30, $f5,  $f21
kusano 2b45e8
kusano 2b45e8
	ADD1	$f24, $f25, $f18
kusano 2b45e8
	MUL	$f30, $f4,  $f22
kusano 2b45e8
	ADD2	$f26, $f27, $f19
kusano 2b45e8
	MUL	$f29, $f5,  $f23
kusano 2b45e8
	
kusano 2b45e8
	ADD	$f16, $f8,  $f16
kusano 2b45e8
	MUL	$f29, $f6,  $f24
kusano 2b45e8
	ADD	$f17, $f28, $f17
kusano 2b45e8
	MUL	$f30, $f7,  $f25
kusano 2b45e8
kusano 2b45e8
	ADD	$f18, $f10, $f18
kusano 2b45e8
	MUL	$f30, $f6,  $f26
kusano 2b45e8
	ADD	$f19, $f11, $f19
kusano 2b45e8
	MUL	$f29, $f7,  $f27
kusano 2b45e8
kusano 2b45e8
	ST	$f16, 0*SIZE($20)
kusano 2b45e8
	ADD1	$f20, $f21, $f16
kusano 2b45e8
	ST	$f17, 1*SIZE($20)
kusano 2b45e8
	ADD2	$f22, $f23, $f17
kusano 2b45e8
kusano 2b45e8
	ST	$f18, 2*SIZE($20)
kusano 2b45e8
	ADD1	$f24, $f25, $f18
kusano 2b45e8
	ST	$f19, 3*SIZE($20)
kusano 2b45e8
	ADD2	$f26, $f27, $f19
kusano 2b45e8
	
kusano 2b45e8
	ADD	$f16, $f12, $f16
kusano 2b45e8
	ADD	$f17, $f13, $f17
kusano 2b45e8
	ADD	$f18, $f14, $f18
kusano 2b45e8
	ADD	$f19, $f15, $f19
kusano 2b45e8
kusano 2b45e8
	ST	$f16, 4*SIZE($20)
kusano 2b45e8
	ST	$f17, 5*SIZE($20)
kusano 2b45e8
	ST	$f18, 6*SIZE($20)
kusano 2b45e8
	ST	$f19, 7*SIZE($20)
kusano 2b45e8
kusano 2b45e8
	unop
kusano 2b45e8
	addq	$20, 8*SIZE, $20
kusano 2b45e8
	unop
kusano 2b45e8
	ble	$5,  $End
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$Remain:
kusano 2b45e8
	subq	$5,  1,  $6
kusano 2b45e8
	ble	$5,  $End
kusano 2b45e8
	LD	$f0,  0*SIZE($18)
kusano 2b45e8
	LD	$f1,  1*SIZE($18)
kusano 2b45e8
kusano 2b45e8
	LD	$f8,  0*SIZE($20)
kusano 2b45e8
	LD	$f28, 1*SIZE($20)
kusano 2b45e8
	addq	$18, 2*SIZE, $18
kusano 2b45e8
	ble	$6, $RemainLoopEnd
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$RemainLoop:
kusano 2b45e8
	MUL	$f29, $f0,  $f20
kusano 2b45e8
	subq	$6, 1, $6
kusano 2b45e8
	MUL	$f30, $f1,  $f21
kusano 2b45e8
	addq	$20, 2*SIZE, $20
kusano 2b45e8
kusano 2b45e8
	MUL	$f30, $f0,  $f22
kusano 2b45e8
	LD	$f0,  0*SIZE($18)
kusano 2b45e8
	MUL	$f29, $f1,  $f23
kusano 2b45e8
	LD	$f1,  1*SIZE($18)
kusano 2b45e8
	
kusano 2b45e8
	ADD1	$f20, $f21, $f16
kusano 2b45e8
	ADD2	$f22, $f23, $f17
kusano 2b45e8
	ADD	$f16, $f8,  $f16
kusano 2b45e8
	LD	$f8,  0*SIZE($20)
kusano 2b45e8
	ADD	$f17, $f28, $f17
kusano 2b45e8
	LD	$f28, 1*SIZE($20)
kusano 2b45e8
kusano 2b45e8
	ST	$f16,-2*SIZE($20)
kusano 2b45e8
	addq	$18, 2*SIZE, $18
kusano 2b45e8
	ST	$f17,-1*SIZE($20)
kusano 2b45e8
	bgt	$6, $RemainLoop
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$RemainLoopEnd:
kusano 2b45e8
	MUL	$f29, $f0,  $f20
kusano 2b45e8
	MUL	$f30, $f1,  $f21
kusano 2b45e8
	MUL	$f30, $f0,  $f22
kusano 2b45e8
	MUL	$f29, $f1,  $f23
kusano 2b45e8
	
kusano 2b45e8
	ADD1	$f20, $f21, $f16
kusano 2b45e8
	ADD2	$f22, $f23, $f17
kusano 2b45e8
	ADD	$f16, $f8,  $f16
kusano 2b45e8
	ADD	$f17, $f28, $f17
kusano 2b45e8
kusano 2b45e8
	ST	$f16, 0*SIZE($20)
kusano 2b45e8
	nop
kusano 2b45e8
	ST	$f17, 1*SIZE($20)
kusano 2b45e8
	nop
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$End:
kusano 2b45e8
	ldt	$f2,   0($sp)
kusano 2b45e8
	ldt	$f3,   8($sp)
kusano 2b45e8
	ldt	$f4,  16($sp)
kusano 2b45e8
	ldt	$f5,  24($sp)
kusano 2b45e8
	ldt	$f6,  32($sp)
kusano 2b45e8
	ldt	$f7,  40($sp)
kusano 2b45e8
	ldt	$f8,  48($sp)
kusano 2b45e8
	lda	$sp,  64($sp)
kusano 2b45e8
	ret
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$Sub:
kusano 2b45e8
	SXSUBL	$16,  SIZE, $22
kusano 2b45e8
	addq	$22,  $22,  $22		# Complex 
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
	addq	$19, $19, $19		# Complex 
kusano 2b45e8
	addq	$21, $21, $21		# Complex 
kusano 2b45e8
kusano 2b45e8
	ble	$4, $SubRemain
kusano 2b45e8
	LD	$f0,  0*SIZE($18)
kusano 2b45e8
	LD	$f1,  1*SIZE($18)
kusano 2b45e8
	SXADDQ	$19, $18, $18
kusano 2b45e8
kusano 2b45e8
	LD	$f2,  0*SIZE($18)
kusano 2b45e8
	LD	$f3,  1*SIZE($18)
kusano 2b45e8
	SXADDQ	$19, $18, $18
kusano 2b45e8
kusano 2b45e8
	LD	$f4,  0*SIZE($18)
kusano 2b45e8
	LD	$f5,  1*SIZE($18)
kusano 2b45e8
	SXADDQ	$19, $18, $18
kusano 2b45e8
kusano 2b45e8
	LD	$f6,  0*SIZE($18)
kusano 2b45e8
	LD	$f7,  1*SIZE($18)
kusano 2b45e8
	SXADDQ	$19, $18, $18
kusano 2b45e8
kusano 2b45e8
	LD	$f8,  0*SIZE($20)
kusano 2b45e8
	LD	$f28, 1*SIZE($20)
kusano 2b45e8
	SXADDQ	$21, $20, $24
kusano 2b45e8
kusano 2b45e8
	LD	$f10, 0*SIZE($24)
kusano 2b45e8
	LD	$f11, 1*SIZE($24)
kusano 2b45e8
	SXADDQ	$21, $24, $24
kusano 2b45e8
kusano 2b45e8
	LD	$f12, 0*SIZE($24)
kusano 2b45e8
	LD	$f13, 1*SIZE($24)
kusano 2b45e8
	SXADDQ	$21, $24, $24
kusano 2b45e8
kusano 2b45e8
	LD	$f14, 0*SIZE($24)
kusano 2b45e8
	LD	$f15, 1*SIZE($24)
kusano 2b45e8
	SXADDQ	$21, $24, $24
kusano 2b45e8
kusano 2b45e8
	subq	$4,  1,  $4
kusano 2b45e8
	ble	$4, $SubMainLoopEnd
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$SubMainLoop:
kusano 2b45e8
	MUL	$f29, $f0,  $f20
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	$f30, $f1,  $f21
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	MUL	$f30, $f0,  $f22
kusano 2b45e8
	LD	$f0,  0*SIZE($18)
kusano 2b45e8
	MUL	$f29, $f1,  $f23
kusano 2b45e8
	LD	$f1,  1*SIZE($18)
kusano 2b45e8
kusano 2b45e8
	MUL	$f29, $f2,  $f24
kusano 2b45e8
	SXADDQ	$19, $18, $18
kusano 2b45e8
	MUL	$f30, $f3,  $f25
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	MUL	$f30, $f2,  $f26
kusano 2b45e8
	LD	$f2,  0*SIZE($18)
kusano 2b45e8
	MUL	$f29, $f3,  $f27
kusano 2b45e8
	LD	$f3,  1*SIZE($18)
kusano 2b45e8
kusano 2b45e8
	ADD1	$f20, $f21, $f16
kusano 2b45e8
	SXADDQ	$19, $18, $18
kusano 2b45e8
	MUL	$f29, $f4,  $f20
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD2	$f22, $f23, $f17
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	$f30, $f5,  $f21
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD1	$f24, $f25, $f18
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	$f30, $f4,  $f22
kusano 2b45e8
	LD	$f4,  0*SIZE($18)
kusano 2b45e8
kusano 2b45e8
	ADD2	$f26, $f27, $f19
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	$f29, $f5,  $f23
kusano 2b45e8
	LD	$f5,  1*SIZE($18)
kusano 2b45e8
	
kusano 2b45e8
	ADD	$f16, $f8,  $f16
kusano 2b45e8
	LD	$f8,  0*SIZE($24)
kusano 2b45e8
	MUL	$f29, $f6,  $f24
kusano 2b45e8
	SXADDQ	$19, $18, $18
kusano 2b45e8
kusano 2b45e8
	ADD	$f17, $f28, $f17
kusano 2b45e8
	LD	$f28, 1*SIZE($24)
kusano 2b45e8
	MUL	$f30, $f7,  $f25
kusano 2b45e8
	SXADDQ	$21, $24, $24
kusano 2b45e8
kusano 2b45e8
	ADD	$f18, $f10, $f18
kusano 2b45e8
	LD	$f10, 0*SIZE($24)
kusano 2b45e8
	MUL	$f30, $f6,  $f26
kusano 2b45e8
	LD	$f6,  0*SIZE($18)
kusano 2b45e8
kusano 2b45e8
	ADD	$f19, $f11, $f19
kusano 2b45e8
	LD	$f11, 1*SIZE($24)
kusano 2b45e8
	MUL	$f29, $f7,  $f27
kusano 2b45e8
	LD	$f7,  1*SIZE($18)
kusano 2b45e8
kusano 2b45e8
	ST	$f16, 0*SIZE($20)
kusano 2b45e8
	SXADDQ	$19, $18, $18
kusano 2b45e8
 	ADD1	$f20, $f21, $f16
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ST	$f17, 1*SIZE($20)
kusano 2b45e8
	SXADDQ	$21, $20, $20
kusano 2b45e8
	ADD2	$f22, $f23, $f17
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ST	$f18, 0*SIZE($20)
kusano 2b45e8
	SXADDQ	$21, $24, $24
kusano 2b45e8
	ADD1	$f24, $f25, $f18
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ST	$f19, 1*SIZE($20)
kusano 2b45e8
	unop
kusano 2b45e8
	ADD2	$f26, $f27, $f19
kusano 2b45e8
	SXADDQ	$21, $20, $20
kusano 2b45e8
kusano 2b45e8
	ADD	$f16, $f12, $f16
kusano 2b45e8
	unop
kusano 2b45e8
	LD	$f12, 0*SIZE($24)
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	$f17, $f13, $f17
kusano 2b45e8
	unop
kusano 2b45e8
	LD	$f13, 1*SIZE($24)
kusano 2b45e8
	SXADDQ	$21, $24, $24
kusano 2b45e8
kusano 2b45e8
	ADD	$f18, $f14, $f18
kusano 2b45e8
	subq	$4, 1, $4
kusano 2b45e8
	LD	$f14, 0*SIZE($24)
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	$f19, $f15, $f19
kusano 2b45e8
	unop
kusano 2b45e8
	LD	$f15, 1*SIZE($24)
kusano 2b45e8
	SXADDQ	$21, $24, $24
kusano 2b45e8
kusano 2b45e8
	ST	$f16, 0*SIZE($20)
kusano 2b45e8
	ST	$f17, 1*SIZE($20)
kusano 2b45e8
	SXADDQ	$21, $20, $20
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ST	$f18, 0*SIZE($20)
kusano 2b45e8
	ST	$f19, 1*SIZE($20)
kusano 2b45e8
	SXADDQ	$21, $20, $20
kusano 2b45e8
	bgt	$4, $SubMainLoop
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$SubMainLoopEnd:
kusano 2b45e8
	MUL	$f29, $f0,  $f20
kusano 2b45e8
	MUL	$f30, $f1,  $f21
kusano 2b45e8
	MUL	$f30, $f0,  $f22
kusano 2b45e8
	MUL	$f29, $f1,  $f23
kusano 2b45e8
	
kusano 2b45e8
	MUL	$f29, $f2,  $f24
kusano 2b45e8
	MUL	$f30, $f3,  $f25
kusano 2b45e8
	MUL	$f30, $f2,  $f26
kusano 2b45e8
	MUL	$f29, $f3,  $f27
kusano 2b45e8
kusano 2b45e8
	ADD1	$f20, $f21, $f16
kusano 2b45e8
	MUL	$f29, $f4,  $f20
kusano 2b45e8
	ADD2	$f22, $f23, $f17
kusano 2b45e8
	MUL	$f30, $f5,  $f21
kusano 2b45e8
kusano 2b45e8
	ADD1	$f24, $f25, $f18
kusano 2b45e8
	MUL	$f30, $f4,  $f22
kusano 2b45e8
	ADD2	$f26, $f27, $f19
kusano 2b45e8
	MUL	$f29, $f5,  $f23
kusano 2b45e8
	
kusano 2b45e8
	ADD	$f16, $f8,  $f16
kusano 2b45e8
	MUL	$f29, $f6,  $f24
kusano 2b45e8
	ADD	$f17, $f28, $f17
kusano 2b45e8
	MUL	$f30, $f7,  $f25
kusano 2b45e8
kusano 2b45e8
	ADD	$f18, $f10, $f18
kusano 2b45e8
	MUL	$f30, $f6,  $f26
kusano 2b45e8
	ADD	$f19, $f11, $f19
kusano 2b45e8
	MUL	$f29, $f7,  $f27
kusano 2b45e8
kusano 2b45e8
	ST	$f16, 0*SIZE($20)
kusano 2b45e8
	ADD1	$f20, $f21, $f16
kusano 2b45e8
	ST	$f17, 1*SIZE($20)
kusano 2b45e8
	ADD2	$f22, $f23, $f17
kusano 2b45e8
kusano 2b45e8
	SXADDQ	$21, $20, $20
kusano 2b45e8
	nop
kusano 2b45e8
	ST	$f18, 0*SIZE($20)
kusano 2b45e8
	ADD1	$f24, $f25, $f18
kusano 2b45e8
kusano 2b45e8
	ST	$f19, 1*SIZE($20)
kusano 2b45e8
	ADD2	$f26, $f27, $f19
kusano 2b45e8
	SXADDQ	$21, $20, $20
kusano 2b45e8
	ADD	$f16, $f12, $f16
kusano 2b45e8
kusano 2b45e8
	ADD	$f17, $f13, $f17
kusano 2b45e8
	ADD	$f18, $f14, $f18
kusano 2b45e8
	ADD	$f19, $f15, $f19
kusano 2b45e8
kusano 2b45e8
	ST	$f16, 0*SIZE($20)
kusano 2b45e8
	ST	$f17, 1*SIZE($20)
kusano 2b45e8
	SXADDQ	$21, $20, $20
kusano 2b45e8
kusano 2b45e8
	ST	$f18, 0*SIZE($20)
kusano 2b45e8
	ST	$f19, 1*SIZE($20)
kusano 2b45e8
	SXADDQ	$21, $20, $20
kusano 2b45e8
	ble	$5,  $SubEnd
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$SubRemain:
kusano 2b45e8
	subq	$5,  1,  $6
kusano 2b45e8
	ble	$5,  $SubEnd
kusano 2b45e8
	LD	$f0,  0*SIZE($18)
kusano 2b45e8
	LD	$f1,  1*SIZE($18)
kusano 2b45e8
kusano 2b45e8
	LD	$f8,  0*SIZE($20)
kusano 2b45e8
	LD	$f28, 1*SIZE($20)
kusano 2b45e8
	SXADDQ	$19, $18, $18
kusano 2b45e8
	SXADDQ	$21, $20, $24
kusano 2b45e8
	ble	$6, $SubRemainLoopEnd
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$SubRemainLoop:
kusano 2b45e8
	MUL	$f29, $f0,  $f20
kusano 2b45e8
	MUL	$f30, $f1,  $f21
kusano 2b45e8
	MUL	$f30, $f0,  $f22
kusano 2b45e8
	LD	$f0,  0*SIZE($18)
kusano 2b45e8
kusano 2b45e8
	MUL	$f29, $f1,  $f23
kusano 2b45e8
	LD	$f1,  1*SIZE($18)
kusano 2b45e8
	ADD1	$f20, $f21, $f16
kusano 2b45e8
	SXADDQ	$19, $18, $18
kusano 2b45e8
kusano 2b45e8
	ADD2	$f22, $f23, $f17
kusano 2b45e8
	nop
kusano 2b45e8
	ADD	$f16, $f8,  $f16
kusano 2b45e8
	LD	$f8,  0*SIZE($24)
kusano 2b45e8
kusano 2b45e8
	ADD	$f17, $f28, $f17
kusano 2b45e8
	LD	$f28, 1*SIZE($24)
kusano 2b45e8
	SXADDQ	$21, $24, $24
kusano 2b45e8
	subq	$6, 1, $6
kusano 2b45e8
kusano 2b45e8
	ST	$f16, 0*SIZE($20)
kusano 2b45e8
	ST	$f17, 1*SIZE($20)
kusano 2b45e8
	SXADDQ	$21, $20, $20
kusano 2b45e8
	bgt	$6, $SubRemainLoop
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$SubRemainLoopEnd:
kusano 2b45e8
	MUL	$f29, $f0,  $f20
kusano 2b45e8
	MUL	$f30, $f1,  $f21
kusano 2b45e8
	MUL	$f30, $f0,  $f22
kusano 2b45e8
	MUL	$f29, $f1,  $f23
kusano 2b45e8
	
kusano 2b45e8
	ADD1	$f20, $f21, $f16
kusano 2b45e8
	ADD2	$f22, $f23, $f17
kusano 2b45e8
	ADD	$f16, $f8,  $f16
kusano 2b45e8
	ADD	$f17, $f28, $f17
kusano 2b45e8
kusano 2b45e8
	ST	$f16, 0*SIZE($20)
kusano 2b45e8
	nop
kusano 2b45e8
	ST	$f17, 1*SIZE($20)
kusano 2b45e8
	nop
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$SubEnd:
kusano 2b45e8
	ldt	$f2,   0($sp)
kusano 2b45e8
	ldt	$f3,   8($sp)
kusano 2b45e8
	ldt	$f4,  16($sp)
kusano 2b45e8
	ldt	$f5,  24($sp)
kusano 2b45e8
	ldt	$f6,  32($sp)
kusano 2b45e8
	ldt	$f7,  40($sp)
kusano 2b45e8
	ldt	$f8,  48($sp)
kusano 2b45e8
	lda	$sp,  64($sp)
kusano 2b45e8
	ret
kusano 2b45e8
	EPILOGUE