Blame thirdparty/openblas/xianyi-OpenBLAS-e6e87a2/kernel/mips64/ztrsm_kernel_LT.S

kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
/* Copyright 2009, 2010 The University of Texas at Austin.           */
kusano 2b45e8
/* All rights reserved.                                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* Redistribution and use in source and binary forms, with or        */
kusano 2b45e8
/* without modification, are permitted provided that the following   */
kusano 2b45e8
/* conditions are met:                                               */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   1. Redistributions of source code must retain the above         */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer.                                                  */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   2. Redistributions in binary form must reproduce the above      */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer in the documentation and/or other materials       */
kusano 2b45e8
/*      provided with the distribution.                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
kusano 2b45e8
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
kusano 2b45e8
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
kusano 2b45e8
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
kusano 2b45e8
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
kusano 2b45e8
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
kusano 2b45e8
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
kusano 2b45e8
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
kusano 2b45e8
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
kusano 2b45e8
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
kusano 2b45e8
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
kusano 2b45e8
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
kusano 2b45e8
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
kusano 2b45e8
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* The views and conclusions contained in the software and           */
kusano 2b45e8
/* documentation are those of the authors and should not be          */
kusano 2b45e8
/* interpreted as representing official policies, either expressed   */
kusano 2b45e8
/* or implied, of The University of Texas at Austin.                 */
kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
kusano 2b45e8
#define ASSEMBLER
kusano 2b45e8
#include "common.h"
kusano 2b45e8
kusano 2b45e8
#define M	$4
kusano 2b45e8
#define	N	$5
kusano 2b45e8
#define	K	$6
kusano 2b45e8
#define A	$9
kusano 2b45e8
#define B	$10
kusano 2b45e8
#define C	$11
kusano 2b45e8
#define LDC	$8
kusano 2b45e8
kusano 2b45e8
#define AO	$12
kusano 2b45e8
#define BO	$13
kusano 2b45e8
kusano 2b45e8
#define I	$2
kusano 2b45e8
#define J	$3
kusano 2b45e8
#define L	$7
kusano 2b45e8
kusano 2b45e8
#define CO1	$14
kusano 2b45e8
#define CO2	$15
kusano 2b45e8
#define CO3	$16
kusano 2b45e8
#define CO4	$17
kusano 2b45e8
kusano 2b45e8
#define OFFSET	$18
kusano 2b45e8
#define KK	$19
kusano 2b45e8
#define TEMP	$20
kusano 2b45e8
#define AORIG	$21
kusano 2b45e8
kusano 2b45e8
#define a1	$f0
kusano 2b45e8
#define a2	$f1
kusano 2b45e8
#define a3	$f26
kusano 2b45e8
#define a4	$f27
kusano 2b45e8
kusano 2b45e8
#define b1	$f2
kusano 2b45e8
#define b2	$f3
kusano 2b45e8
#define b3	$f4
kusano 2b45e8
#define b4	$f5
kusano 2b45e8
#define b5	$f6
kusano 2b45e8
#define b6	$f7
kusano 2b45e8
#define b7	$f8
kusano 2b45e8
#define b8	$f9
kusano 2b45e8
kusano 2b45e8
#define a5	b8
kusano 2b45e8
kusano 2b45e8
#define c11	$f10
kusano 2b45e8
#define c12	$f11
kusano 2b45e8
#define c21	$f12
kusano 2b45e8
#define c22	$f13
kusano 2b45e8
#define c31	$f14
kusano 2b45e8
#define c32	$f15
kusano 2b45e8
#define c41	$f16
kusano 2b45e8
#define c42	$f17
kusano 2b45e8
#define c51	$f18
kusano 2b45e8
#define c52	$f19
kusano 2b45e8
#define c61	$f20
kusano 2b45e8
#define c62	$f21
kusano 2b45e8
#define c71	$f22
kusano 2b45e8
#define c72	$f23
kusano 2b45e8
#define c81	$f24
kusano 2b45e8
#define c82	$f25
kusano 2b45e8
kusano 2b45e8
#ifndef CONJ
kusano 2b45e8
#define MADD1	  MADD
kusano 2b45e8
#define MADD2	  MADD
kusano 2b45e8
#define MADD3	  MADD
kusano 2b45e8
#define MADD4	  NMSUB
kusano 2b45e8
#define MADD5	  MSUB
kusano 2b45e8
#define MADD6	  MADD
kusano 2b45e8
#define MADD7	  NMSUB
kusano 2b45e8
#define MADD8	  MADD
kusano 2b45e8
#else
kusano 2b45e8
#if defined(LN) || defined(LT)
kusano 2b45e8
#define MADD1	  MADD
kusano 2b45e8
#define MADD2	  NMSUB
kusano 2b45e8
#define MADD3	  MADD
kusano 2b45e8
#define MADD4	  MADD
kusano 2b45e8
#else
kusano 2b45e8
#define MADD1	  MADD
kusano 2b45e8
#define MADD2	  MADD
kusano 2b45e8
#define MADD3	  NMSUB
kusano 2b45e8
#define MADD4	  MADD
kusano 2b45e8
#endif
kusano 2b45e8
#define MADD5	  MADD
kusano 2b45e8
#define MADD6	  MSUB
kusano 2b45e8
#define MADD7	  MADD
kusano 2b45e8
#define MADD8	  NMSUB
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	PROLOGUE
kusano 2b45e8
	
kusano 2b45e8
	daddiu	$sp, $sp, -128
kusano 2b45e8
kusano 2b45e8
	SDARG	$16,   0($sp)
kusano 2b45e8
	SDARG	$17,   8($sp)
kusano 2b45e8
	SDARG	$18,  16($sp)
kusano 2b45e8
	SDARG	$19,  24($sp)
kusano 2b45e8
	SDARG	$20,  32($sp)
kusano 2b45e8
	SDARG	$21,  40($sp)
kusano 2b45e8
kusano 2b45e8
	sdc1	$f24, 48($sp)
kusano 2b45e8
	sdc1	$f25, 56($sp)
kusano 2b45e8
	sdc1	$f26, 64($sp)
kusano 2b45e8
	sdc1	$f27, 72($sp)
kusano 2b45e8
kusano 2b45e8
#ifndef __64BIT__
kusano 2b45e8
	sdc1	$f20, 88($sp)
kusano 2b45e8
	sdc1	$f21, 96($sp)
kusano 2b45e8
	sdc1	$f22,104($sp)
kusano 2b45e8
	sdc1	$f23,112($sp)
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	LDARG	LDC,    128 + 0($sp)
kusano 2b45e8
	LDARG	OFFSET, 128 + 8($sp)
kusano 2b45e8
kusano 2b45e8
	dsll	LDC, LDC, ZBASE_SHIFT
kusano 2b45e8
kusano 2b45e8
#ifdef LN
kusano 2b45e8
	mult	M, K
kusano 2b45e8
	mflo	TEMP
kusano 2b45e8
kusano 2b45e8
	dsll	TEMP, TEMP, ZBASE_SHIFT
kusano 2b45e8
	daddu	A, A, TEMP
kusano 2b45e8
kusano 2b45e8
	dsll	TEMP, M, ZBASE_SHIFT
kusano 2b45e8
	daddu	C, C, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef RN
kusano 2b45e8
	neg	KK, OFFSET
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef RT
kusano 2b45e8
	mult	N, K
kusano 2b45e8
	mflo	TEMP
kusano 2b45e8
kusano 2b45e8
	dsll	TEMP, TEMP, ZBASE_SHIFT
kusano 2b45e8
	daddu	B, B, TEMP
kusano 2b45e8
kusano 2b45e8
	mult	N, LDC
kusano 2b45e8
	mflo	TEMP
kusano 2b45e8
	daddu	C, C, TEMP
kusano 2b45e8
kusano 2b45e8
	dsubu	KK, N, OFFSET
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	dsra	J,  N, 2
kusano 2b45e8
	blez	J, .L20
kusano 2b45e8
	nop
kusano 2b45e8
kusano 2b45e8
.L10:
kusano 2b45e8
#ifdef RT
kusano 2b45e8
	dsll	TEMP, K, 2 + ZBASE_SHIFT
kusano 2b45e8
	dsubu	B, B, TEMP
kusano 2b45e8
kusano 2b45e8
	dsll	TEMP, LDC, 2
kusano 2b45e8
	dsubu	C, C, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	move	CO1, C
kusano 2b45e8
	MTC	$0,  c11
kusano 2b45e8
	daddu	CO2, C,   LDC
kusano 2b45e8
	daddu	CO3, CO2, LDC
kusano 2b45e8
	daddiu	J, J, -1
kusano 2b45e8
	daddu	CO4, CO3, LDC
kusano 2b45e8
	MOV	c21, c11
kusano 2b45e8
	MOV	c31, c11
kusano 2b45e8
	MOV	c41, c11
kusano 2b45e8
	MOV	c51, c11
kusano 2b45e8
	move	I,  M
kusano 2b45e8
kusano 2b45e8
#ifdef LN
kusano 2b45e8
	daddu	KK, M, OFFSET
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef LT
kusano 2b45e8
	move	KK, OFFSET
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(LN) || defined(RT)
kusano 2b45e8
	move	AORIG, A
kusano 2b45e8
#else
kusano 2b45e8
	move	AO, A
kusano 2b45e8
#endif
kusano 2b45e8
#ifndef RT
kusano 2b45e8
	daddu	C,  CO4, LDC
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	blez	I, .L19
kusano 2b45e8
	MOV	c61, c11
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L11:
kusano 2b45e8
#if defined(LT) || defined(RN)
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	MOV	c71, c11
kusano 2b45e8
	LD	b1,  0 * SIZE(B)
kusano 2b45e8
	MOV	c81, c11
kusano 2b45e8
kusano 2b45e8
	LD	a3,  4 * SIZE(AO)
kusano 2b45e8
	MOV	c12, c11
kusano 2b45e8
	LD	b2,  1 * SIZE(B)
kusano 2b45e8
	MOV	c22, c11
kusano 2b45e8
kusano 2b45e8
	dsra	L,  KK, 2
kusano 2b45e8
	MOV	c32, c11
kusano 2b45e8
	LD	b3,  2 * SIZE(B)
kusano 2b45e8
	MOV	c42, c11
kusano 2b45e8
kusano 2b45e8
	LD	b4,  3 * SIZE(B)
kusano 2b45e8
	MOV	c52, c11
kusano 2b45e8
	LD	b5,  4 * SIZE(B)
kusano 2b45e8
	MOV	c62, c11
kusano 2b45e8
kusano 2b45e8
	LD	b6,  8 * SIZE(B)
kusano 2b45e8
	MOV	c72, c11
kusano 2b45e8
	LD	b7, 12 * SIZE(B)
kusano 2b45e8
	MOV	c82, c11
kusano 2b45e8
kusano 2b45e8
	blez	L, .L15
kusano 2b45e8
	move	BO,  B
kusano 2b45e8
#else
kusano 2b45e8
#ifdef LN
kusano 2b45e8
	dsll	TEMP,   K,  ZBASE_SHIFT
kusano 2b45e8
	dsubu	AORIG, AORIG, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	dsll	L,    KK, ZBASE_SHIFT
kusano 2b45e8
	dsll	TEMP, KK, 2 + ZBASE_SHIFT
kusano 2b45e8
kusano 2b45e8
	daddu	AO, AORIG, L
kusano 2b45e8
	daddu	BO, B,     TEMP
kusano 2b45e8
kusano 2b45e8
	dsubu	TEMP, K, KK
kusano 2b45e8
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	MOV	c71, c11
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	MOV	c81, c11
kusano 2b45e8
kusano 2b45e8
	LD	a3,  4 * SIZE(AO)
kusano 2b45e8
	MOV	c12, c11
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	MOV	c22, c11
kusano 2b45e8
kusano 2b45e8
	dsra	L,  TEMP, 2
kusano 2b45e8
	MOV	c32, c11
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	MOV	c42, c11
kusano 2b45e8
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	MOV	c52, c11
kusano 2b45e8
	LD	b5,  4 * SIZE(BO)
kusano 2b45e8
	MOV	c62, c11
kusano 2b45e8
kusano 2b45e8
	LD	b6,  8 * SIZE(BO)
kusano 2b45e8
	MOV	c72, c11
kusano 2b45e8
	LD	b7, 12 * SIZE(BO)
kusano 2b45e8
	MOV	c82, c11
kusano 2b45e8
kusano 2b45e8
	blez	L, .L15
kusano 2b45e8
	NOP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	MADD1	c11, c11, a1, b1
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	MADD3	c21, c21, a1, b2
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
	MADD1	c31, c31, a1, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	blez	L, .L13
kusano 2b45e8
	MADD3	c41, c41, a1, b4
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L12:
kusano 2b45e8
	MADD2	c12, c12, a2, b1
kusano 2b45e8
	LD	b1, 16 * SIZE(BO)
kusano 2b45e8
	MADD4	c22, c22, a2, b2
kusano 2b45e8
	LD	b2,  5 * SIZE(BO)
kusano 2b45e8
	MADD2	c32, c32, a2, b3
kusano 2b45e8
	LD	b3,  6 * SIZE(BO)
kusano 2b45e8
	MADD4	c42, c42, a2, b4
kusano 2b45e8
	LD	b4,  7 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c51, c51, a1, b5
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c61, c61, a1, b2
kusano 2b45e8
	LD	a4,  2 * SIZE(AO)
kusano 2b45e8
	MADD1	c71, c71, a1, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c81, c81, a1, b4
kusano 2b45e8
	LD	a1,  8 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD2	c52, c52, a2, b5
kusano 2b45e8
	LD	b5, 20 * SIZE(BO)
kusano 2b45e8
	MADD4	c62, c62, a2, b2
kusano 2b45e8
	LD	b2,  9 * SIZE(BO)
kusano 2b45e8
	MADD2	c72, c72, a2, b3
kusano 2b45e8
	LD	b3, 10 * SIZE(BO)
kusano 2b45e8
	MADD4	c82, c82, a2, b4
kusano 2b45e8
	LD	b4, 11 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c11, c11, a4, b6
kusano 2b45e8
	LD	a2,  3 * SIZE(AO)
kusano 2b45e8
	MADD3	c21, c21, a4, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD1	c31, c31, a4, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c41, c41, a4, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	MADD2	c12, c12, a2, b6
kusano 2b45e8
	LD	b6, 24 * SIZE(BO)
kusano 2b45e8
	MADD4	c22, c22, a2, b2
kusano 2b45e8
	LD	b2, 13 * SIZE(BO)
kusano 2b45e8
	MADD2	c32, c32, a2, b3
kusano 2b45e8
	LD	b3, 14 * SIZE(BO)
kusano 2b45e8
	MADD4	c42, c42, a2, b4
kusano 2b45e8
	LD	b4, 15 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c51, c51, a4, b7
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c61, c61, a4, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD1	c71, c71, a4, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c81, c81, a4, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	MADD2	c52, c52, a2, b7
kusano 2b45e8
	LD	b7, 28 * SIZE(BO)
kusano 2b45e8
	MADD4	c62, c62, a2, b2
kusano 2b45e8
	LD	b2, 17 * SIZE(BO)
kusano 2b45e8
	MADD2	c72, c72, a2, b3
kusano 2b45e8
	LD	b3, 18 * SIZE(BO)
kusano 2b45e8
	MADD4	c82, c82, a2, b4
kusano 2b45e8
	LD	b4, 19 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c11, c11, a3, b1
kusano 2b45e8
	LD	a2,  5 * SIZE(AO)
kusano 2b45e8
	MADD3	c21, c21, a3, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD1	c31, c31, a3, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c41, c41, a3, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	MADD2	c12, c12, a2, b1
kusano 2b45e8
	LD	b1, 32 * SIZE(BO)
kusano 2b45e8
	MADD4	c22, c22, a2, b2
kusano 2b45e8
	LD	b2, 21 * SIZE(BO)
kusano 2b45e8
	MADD2	c32, c32, a2, b3
kusano 2b45e8
	LD	b3, 22 * SIZE(BO)
kusano 2b45e8
	MADD4	c42, c42, a2, b4
kusano 2b45e8
	LD	b4, 23 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c51, c51, a3, b5
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c61, c61, a3, b2
kusano 2b45e8
	LD	a4,  6 * SIZE(AO)
kusano 2b45e8
	MADD1	c71, c71, a3, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c81, c81, a3, b4
kusano 2b45e8
	LD	a3, 12 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD2	c52, c52, a2, b5
kusano 2b45e8
	LD	b5, 36 * SIZE(BO)
kusano 2b45e8
	MADD4	c62, c62, a2, b2
kusano 2b45e8
	LD	b2, 25 * SIZE(BO)
kusano 2b45e8
	MADD2	c72, c72, a2, b3
kusano 2b45e8
	LD	b3, 26 * SIZE(BO)
kusano 2b45e8
	MADD4	c82, c82, a2, b4
kusano 2b45e8
	LD	b4, 27 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c11, c11, a4, b6
kusano 2b45e8
	LD	a2,  7 * SIZE(AO)
kusano 2b45e8
	MADD3	c21, c21, a4, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD1	c31, c31, a4, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c41, c41, a4, b4
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
kusano 2b45e8
	MADD2	c12, c12, a2, b6
kusano 2b45e8
	LD	b6, 40 * SIZE(BO)
kusano 2b45e8
	MADD4	c22, c22, a2, b2
kusano 2b45e8
	LD	b2, 29 * SIZE(BO)
kusano 2b45e8
	MADD2	c32, c32, a2, b3
kusano 2b45e8
	LD	b3, 30 * SIZE(BO)
kusano 2b45e8
	MADD4	c42, c42, a2, b4
kusano 2b45e8
	LD	b4, 31 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c51, c51, a4, b7
kusano 2b45e8
	daddiu	BO, BO, 32 * SIZE
kusano 2b45e8
	MADD3	c61, c61, a4, b2
kusano 2b45e8
	daddiu	AO, AO,  8 * SIZE
kusano 2b45e8
	MADD1	c71, c71, a4, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c81, c81, a4, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	MADD2	c52, c52, a2, b7
kusano 2b45e8
	LD	b7, 12 * SIZE(BO)
kusano 2b45e8
	MADD4	c62, c62, a2, b2
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	MADD2	c72, c72, a2, b3
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	MADD4	c82, c82, a2, b4
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c11, c11, a1, b1
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	MADD3	c21, c21, a1, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD1	c31, c31, a1, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	bgtz	L, .L12
kusano 2b45e8
	MADD3	c41, c41, a1, b4
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L13:
kusano 2b45e8
	MADD2	c12, c12, a2, b1
kusano 2b45e8
	LD	b1, 16 * SIZE(BO)
kusano 2b45e8
	MADD4	c22, c22, a2, b2
kusano 2b45e8
	LD	b2,  5 * SIZE(BO)
kusano 2b45e8
	MADD2	c32, c32, a2, b3
kusano 2b45e8
	LD	b3,  6 * SIZE(BO)
kusano 2b45e8
	MADD4	c42, c42, a2, b4
kusano 2b45e8
	LD	b4,  7 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c51, c51, a1, b5
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c61, c61, a1, b2
kusano 2b45e8
	LD	a4,  2 * SIZE(AO)
kusano 2b45e8
	MADD1	c71, c71, a1, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c81, c81, a1, b4
kusano 2b45e8
	LD	a1,  8 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD2	c52, c52, a2, b5
kusano 2b45e8
	LD	b5, 20 * SIZE(BO)
kusano 2b45e8
	MADD4	c62, c62, a2, b2
kusano 2b45e8
	LD	b2,  9 * SIZE(BO)
kusano 2b45e8
	MADD2	c72, c72, a2, b3
kusano 2b45e8
	LD	b3, 10 * SIZE(BO)
kusano 2b45e8
	MADD4	c82, c82, a2, b4
kusano 2b45e8
	LD	b4, 11 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c11, c11, a4, b6
kusano 2b45e8
	LD	a2,  3 * SIZE(AO)
kusano 2b45e8
	MADD3	c21, c21, a4, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD1	c31, c31, a4, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c41, c41, a4, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	MADD2	c12, c12, a2, b6
kusano 2b45e8
	LD	b6, 24 * SIZE(BO)
kusano 2b45e8
	MADD4	c22, c22, a2, b2
kusano 2b45e8
	LD	b2, 13 * SIZE(BO)
kusano 2b45e8
	MADD2	c32, c32, a2, b3
kusano 2b45e8
	LD	b3, 14 * SIZE(BO)
kusano 2b45e8
	MADD4	c42, c42, a2, b4
kusano 2b45e8
	LD	b4, 15 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c51, c51, a4, b7
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c61, c61, a4, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD1	c71, c71, a4, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c81, c81, a4, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	MADD2	c52, c52, a2, b7
kusano 2b45e8
	LD	b7, 28 * SIZE(BO)
kusano 2b45e8
	MADD4	c62, c62, a2, b2
kusano 2b45e8
	LD	b2, 17 * SIZE(BO)
kusano 2b45e8
	MADD2	c72, c72, a2, b3
kusano 2b45e8
	LD	b3, 18 * SIZE(BO)
kusano 2b45e8
	MADD4	c82, c82, a2, b4
kusano 2b45e8
	LD	b4, 19 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c11, c11, a3, b1
kusano 2b45e8
	LD	a2,  5 * SIZE(AO)
kusano 2b45e8
	MADD3	c21, c21, a3, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD1	c31, c31, a3, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c41, c41, a3, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	MADD2	c12, c12, a2, b1
kusano 2b45e8
	LD	b1, 32 * SIZE(BO)
kusano 2b45e8
	MADD4	c22, c22, a2, b2
kusano 2b45e8
	LD	b2, 21 * SIZE(BO)
kusano 2b45e8
	MADD2	c32, c32, a2, b3
kusano 2b45e8
	LD	b3, 22 * SIZE(BO)
kusano 2b45e8
	MADD4	c42, c42, a2, b4
kusano 2b45e8
	LD	b4, 23 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c51, c51, a3, b5
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c61, c61, a3, b2
kusano 2b45e8
	LD	a4,  6 * SIZE(AO)
kusano 2b45e8
	MADD1	c71, c71, a3, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c81, c81, a3, b4
kusano 2b45e8
	LD	a3, 12 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD2	c52, c52, a2, b5
kusano 2b45e8
	LD	b5, 36 * SIZE(BO)
kusano 2b45e8
	MADD4	c62, c62, a2, b2
kusano 2b45e8
	LD	b2, 25 * SIZE(BO)
kusano 2b45e8
	MADD2	c72, c72, a2, b3
kusano 2b45e8
	LD	b3, 26 * SIZE(BO)
kusano 2b45e8
	MADD4	c82, c82, a2, b4
kusano 2b45e8
	LD	b4, 27 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c11, c11, a4, b6
kusano 2b45e8
	LD	a2,  7 * SIZE(AO)
kusano 2b45e8
	MADD3	c21, c21, a4, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD1	c31, c31, a4, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c41, c41, a4, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	MADD2	c12, c12, a2, b6
kusano 2b45e8
	LD	b6, 40 * SIZE(BO)
kusano 2b45e8
	MADD4	c22, c22, a2, b2
kusano 2b45e8
	LD	b2, 29 * SIZE(BO)
kusano 2b45e8
	MADD2	c32, c32, a2, b3
kusano 2b45e8
	LD	b3, 30 * SIZE(BO)
kusano 2b45e8
	MADD4	c42, c42, a2, b4
kusano 2b45e8
	LD	b4, 31 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c51, c51, a4, b7
kusano 2b45e8
	daddiu	BO, BO, 32 * SIZE
kusano 2b45e8
	MADD3	c61, c61, a4, b2
kusano 2b45e8
	daddiu	AO, AO,  8 * SIZE
kusano 2b45e8
	MADD1	c71, c71, a4, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c81, c81, a4, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	MADD2	c52, c52, a2, b7
kusano 2b45e8
	LD	b7, 12 * SIZE(BO)
kusano 2b45e8
	MADD4	c62, c62, a2, b2
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	MADD2	c72, c72, a2, b3
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	MADD4	c82, c82, a2, b4
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L15:
kusano 2b45e8
#if defined(LT) || defined(RN)
kusano 2b45e8
	andi	L, KK,  3
kusano 2b45e8
#else
kusano 2b45e8
	andi	L, TEMP, 3
kusano 2b45e8
#endif
kusano 2b45e8
	blez	L, .L18
kusano 2b45e8
	NOP
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L16:
kusano 2b45e8
	MADD1	c11, c11, a1, b1
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	MADD3	c21, c21, a1, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD1	c31, c31, a1, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c41, c41, a1, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	MADD2	c12, c12, a2, b1
kusano 2b45e8
	LD	b1,  8 * SIZE(BO)
kusano 2b45e8
	MADD4	c22, c22, a2, b2
kusano 2b45e8
	LD	b2,  5 * SIZE(BO)
kusano 2b45e8
	MADD2	c32, c32, a2, b3
kusano 2b45e8
	LD	b3,  6 * SIZE(BO)
kusano 2b45e8
	MADD4	c42, c42, a2, b4
kusano 2b45e8
	LD	b4,  7 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c51, c51, a1, b5
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
	MADD3	c61, c61, a1, b2
kusano 2b45e8
	daddiu	AO, AO,  2 * SIZE
kusano 2b45e8
	MADD1	c71, c71, a1, b3
kusano 2b45e8
	daddiu	BO, BO,  8 * SIZE
kusano 2b45e8
	MADD3	c81, c81, a1, b4
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD2	c52, c52, a2, b5
kusano 2b45e8
	LD	b5,  4 * SIZE(BO)
kusano 2b45e8
	MADD4	c62, c62, a2, b2
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	MADD2	c72, c72, a2, b3
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	MADD4	c82, c82, a2, b4
kusano 2b45e8
	bgtz	L, .L16
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
.L18:
kusano 2b45e8
 	ADD	c11, c11, c22
kusano 2b45e8
	ADD	c12, c12, c21
kusano 2b45e8
	ADD	c31, c31, c42
kusano 2b45e8
	ADD	c32, c32, c41
kusano 2b45e8
kusano 2b45e8
	ADD	c51, c51, c62
kusano 2b45e8
	ADD	c52, c52, c61
kusano 2b45e8
	ADD	c71, c71, c82
kusano 2b45e8
	ADD	c72, c72, c81
kusano 2b45e8
kusano 2b45e8
#if defined(LN) || defined(RT)
kusano 2b45e8
#ifdef LN
kusano 2b45e8
	daddiu	TEMP, KK, -1
kusano 2b45e8
#else
kusano 2b45e8
	daddiu	TEMP, KK, -4
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	dsll	L,    TEMP, ZBASE_SHIFT
kusano 2b45e8
	dsll	TEMP, TEMP, 2 + ZBASE_SHIFT
kusano 2b45e8
	daddu	AO, AORIG, L
kusano 2b45e8
	daddu	BO, B,     TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(LN) || defined(LT)
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	LD	b5,  4 * SIZE(BO)
kusano 2b45e8
	LD	b6,  5 * SIZE(BO)
kusano 2b45e8
	LD	b7,  6 * SIZE(BO)
kusano 2b45e8
	LD	b8,  7 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	SUB	c11, b1, c11
kusano 2b45e8
	SUB	c12, b2, c12
kusano 2b45e8
	SUB	c31, b3, c31
kusano 2b45e8
	SUB	c32, b4, c32
kusano 2b45e8
	SUB	c51, b5, c51
kusano 2b45e8
	SUB	c52, b6, c52
kusano 2b45e8
 	SUB	c71, b7, c71
kusano 2b45e8
	SUB	c72, b8, c72
kusano 2b45e8
kusano 2b45e8
#else
kusano 2b45e8
	LD	b1,  0 * SIZE(AO)
kusano 2b45e8
	LD	b2,  1 * SIZE(AO)
kusano 2b45e8
	LD	b3,  2 * SIZE(AO)
kusano 2b45e8
	LD	b4,  3 * SIZE(AO)
kusano 2b45e8
	LD	b5,  4 * SIZE(AO)
kusano 2b45e8
	LD	b6,  5 * SIZE(AO)
kusano 2b45e8
	LD	b7,  6 * SIZE(AO)
kusano 2b45e8
	LD	b8,  7 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	SUB	c11, b1, c11
kusano 2b45e8
	SUB	c12, b2, c12
kusano 2b45e8
 	SUB	c31, b3, c31
kusano 2b45e8
	SUB	c32, b4, c32
kusano 2b45e8
	SUB	c51, b5, c51
kusano 2b45e8
	SUB	c52, b6, c52
kusano 2b45e8
	SUB	c71, b7, c71
kusano 2b45e8
	SUB	c72, b8, c72
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(LN) || defined(LT)
kusano 2b45e8
	LD	b1,  0 * SIZE(AO)
kusano 2b45e8
	LD	b2,  1 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MUL	a1, b2, c12
kusano 2b45e8
	MUL	a2, b2, c11
kusano 2b45e8
	MUL	a3, b2, c32
kusano 2b45e8
	MUL	a4, b2, c31
kusano 2b45e8
kusano 2b45e8
	MADD5	c11, a1, b1, c11
kusano 2b45e8
	MADD6	c12, a2, b1, c12
kusano 2b45e8
	MADD5	c31, a3, b1, c31
kusano 2b45e8
	MADD6	c32, a4, b1, c32
kusano 2b45e8
kusano 2b45e8
	MUL	a1, b2, c52
kusano 2b45e8
	MUL	a2, b2, c51
kusano 2b45e8
	MUL	a3, b2, c72
kusano 2b45e8
	MUL	a4, b2, c71
kusano 2b45e8
kusano 2b45e8
	MADD5	c51, a1, b1, c51
kusano 2b45e8
	MADD6	c52, a2, b1, c52
kusano 2b45e8
	MADD5	c71, a3, b1, c71
kusano 2b45e8
	MADD6	c72, a4, b1, c72
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef RN
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	LD	b5,  4 * SIZE(BO)
kusano 2b45e8
	LD	b6,  5 * SIZE(BO)
kusano 2b45e8
	LD	b7,  6 * SIZE(BO)
kusano 2b45e8
	LD	b8,  7 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MUL	a1, b2, c12
kusano 2b45e8
	MUL	a2, b2, c11
kusano 2b45e8
kusano 2b45e8
	MADD5	c11, a1, b1, c11
kusano 2b45e8
	MADD6	c12, a2, b1, c12
kusano 2b45e8
kusano 2b45e8
	NMSUB	c31, c31, b3, c11
kusano 2b45e8
	MADD7	c32, c32, b4, c11
kusano 2b45e8
	NMSUB	c51, c51, b5, c11
kusano 2b45e8
	MADD7	c52, c52, b6, c11
kusano 2b45e8
	NMSUB	c71, c71, b7, c11
kusano 2b45e8
	MADD7	c72, c72, b8, c11
kusano 2b45e8
kusano 2b45e8
	MADD8	c31, c31, b4, c12
kusano 2b45e8
	NMSUB	c32, c32, b3, c12
kusano 2b45e8
	MADD8	c51, c51, b6, c12
kusano 2b45e8
	NMSUB	c52, c52, b5, c12
kusano 2b45e8
	MADD8	c71, c71, b8, c12
kusano 2b45e8
	NMSUB	c72, c72, b7, c12
kusano 2b45e8
kusano 2b45e8
	LD	b3, 10 * SIZE(BO)
kusano 2b45e8
	LD	b4, 11 * SIZE(BO)
kusano 2b45e8
	LD	b5, 12 * SIZE(BO)
kusano 2b45e8
	LD	b6, 13 * SIZE(BO)
kusano 2b45e8
	LD	b7, 14 * SIZE(BO)
kusano 2b45e8
	LD	b8, 15 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MUL	a1, b4, c32
kusano 2b45e8
	MUL	a2, b4, c31
kusano 2b45e8
kusano 2b45e8
	MADD5	c31, a1, b3, c31
kusano 2b45e8
	MADD6	c32, a2, b3, c32
kusano 2b45e8
kusano 2b45e8
	NMSUB	c51, c51, b5, c31
kusano 2b45e8
	MADD7	c52, c52, b6, c31
kusano 2b45e8
	NMSUB	c71, c71, b7, c31
kusano 2b45e8
	MADD7	c72, c72, b8, c31
kusano 2b45e8
kusano 2b45e8
	MADD8	c51, c51, b6, c32
kusano 2b45e8
	NMSUB	c52, c52, b5, c32
kusano 2b45e8
	MADD8	c71, c71, b8, c32
kusano 2b45e8
	NMSUB	c72, c72, b7, c32
kusano 2b45e8
kusano 2b45e8
	LD	b5, 20 * SIZE(BO)
kusano 2b45e8
	LD	b6, 21 * SIZE(BO)
kusano 2b45e8
	LD	b7, 22 * SIZE(BO)
kusano 2b45e8
	LD	b8, 23 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MUL	a1, b6, c52
kusano 2b45e8
	MUL	a2, b6, c51
kusano 2b45e8
kusano 2b45e8
	MADD5	c51, a1, b5, c51
kusano 2b45e8
	MADD6	c52, a2, b5, c52
kusano 2b45e8
kusano 2b45e8
	NMSUB	c71, c71, b7, c51
kusano 2b45e8
	MADD7	c72, c72, b8, c51
kusano 2b45e8
kusano 2b45e8
	MADD8	c71, c71, b8, c52
kusano 2b45e8
	NMSUB	c72, c72, b7, c52
kusano 2b45e8
kusano 2b45e8
	LD	b7, 30 * SIZE(BO)
kusano 2b45e8
	LD	b8, 31 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MUL	a1, b8, c72
kusano 2b45e8
	MUL	a2, b8, c71
kusano 2b45e8
kusano 2b45e8
	MADD5	c71, a1, b7, c71
kusano 2b45e8
	MADD6	c72, a2, b7, c72
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef RT
kusano 2b45e8
	LD	b1, 30 * SIZE(BO)
kusano 2b45e8
	LD	b2, 31 * SIZE(BO)
kusano 2b45e8
	LD	b3, 28 * SIZE(BO)
kusano 2b45e8
	LD	b4, 29 * SIZE(BO)
kusano 2b45e8
	LD	b5, 26 * SIZE(BO)
kusano 2b45e8
	LD	b6, 27 * SIZE(BO)
kusano 2b45e8
	LD	b7, 24 * SIZE(BO)
kusano 2b45e8
	LD	b8, 25 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MUL	a1, b2, c72
kusano 2b45e8
	MUL	a2, b2, c71
kusano 2b45e8
kusano 2b45e8
	MADD5	c71, a1, b1, c71
kusano 2b45e8
	MADD6	c72, a2, b1, c72
kusano 2b45e8
kusano 2b45e8
	NMSUB	c51, c51, b3, c71
kusano 2b45e8
	MADD7	c52, c52, b4, c71
kusano 2b45e8
	NMSUB	c31, c31, b5, c71
kusano 2b45e8
	MADD7	c32, c32, b6, c71
kusano 2b45e8
	NMSUB	c11, c11, b7, c71
kusano 2b45e8
	MADD7	c12, c12, b8, c71
kusano 2b45e8
kusano 2b45e8
	MADD8	c51, c51, b4, c72
kusano 2b45e8
	NMSUB	c52, c52, b3, c72
kusano 2b45e8
	MADD8	c31, c31, b6, c72
kusano 2b45e8
	NMSUB	c32, c32, b5, c72
kusano 2b45e8
	MADD8	c11, c11, b8, c72
kusano 2b45e8
	NMSUB	c12, c12, b7, c72
kusano 2b45e8
kusano 2b45e8
	LD	b3, 20 * SIZE(BO)
kusano 2b45e8
	LD	b4, 21 * SIZE(BO)
kusano 2b45e8
	LD	b5, 18 * SIZE(BO)
kusano 2b45e8
	LD	b6, 19 * SIZE(BO)
kusano 2b45e8
	LD	b7, 16 * SIZE(BO)
kusano 2b45e8
	LD	b8, 17 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MUL	a1, b4, c52
kusano 2b45e8
	MUL	a2, b4, c51
kusano 2b45e8
kusano 2b45e8
	MADD5	c51, a1, b3, c51
kusano 2b45e8
	MADD6	c52, a2, b3, c52
kusano 2b45e8
kusano 2b45e8
	NMSUB	c31, c31, b5, c51
kusano 2b45e8
	MADD7	c32, c32, b6, c51
kusano 2b45e8
	NMSUB	c11, c11, b7, c51
kusano 2b45e8
	MADD7	c12, c12, b8, c51
kusano 2b45e8
kusano 2b45e8
	MADD8	c31, c31, b6, c52
kusano 2b45e8
	NMSUB	c32, c32, b5, c52
kusano 2b45e8
	MADD8	c11, c11, b8, c52
kusano 2b45e8
	NMSUB	c12, c12, b7, c52
kusano 2b45e8
kusano 2b45e8
	LD	b5, 10 * SIZE(BO)
kusano 2b45e8
	LD	b6, 11 * SIZE(BO)
kusano 2b45e8
	LD	b7,  8 * SIZE(BO)
kusano 2b45e8
	LD	b8,  9 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MUL	a1, b6, c32
kusano 2b45e8
	MUL	a2, b6, c31
kusano 2b45e8
kusano 2b45e8
	MADD5	c31, a1, b5, c31
kusano 2b45e8
	MADD6	c32, a2, b5, c32
kusano 2b45e8
kusano 2b45e8
	NMSUB	c11, c11, b7, c31
kusano 2b45e8
	MADD7	c12, c12, b8, c31
kusano 2b45e8
kusano 2b45e8
	MADD8	c11, c11, b8, c32
kusano 2b45e8
	NMSUB	c12, c12, b7, c32
kusano 2b45e8
kusano 2b45e8
	LD	b7,  0 * SIZE(BO)
kusano 2b45e8
	LD	b8,  1 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MUL	a1, b8, c12
kusano 2b45e8
	MUL	a2, b8, c11
kusano 2b45e8
kusano 2b45e8
	MADD5	c11, a1, b7, c11
kusano 2b45e8
	MADD6	c12, a2, b7, c12
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(LN) || defined(LT)
kusano 2b45e8
	ST	c11,  0 * SIZE(BO)
kusano 2b45e8
	ST	c12,  1 * SIZE(BO)
kusano 2b45e8
	ST	c31,  2 * SIZE(BO)
kusano 2b45e8
	ST	c32,  3 * SIZE(BO)
kusano 2b45e8
	ST	c51,  4 * SIZE(BO)
kusano 2b45e8
	ST	c52,  5 * SIZE(BO)
kusano 2b45e8
	ST	c71,  6 * SIZE(BO)
kusano 2b45e8
	ST	c72,  7 * SIZE(BO)
kusano 2b45e8
#else
kusano 2b45e8
	ST	c11,  0 * SIZE(AO)
kusano 2b45e8
	ST	c12,  1 * SIZE(AO)
kusano 2b45e8
	ST	c31,  2 * SIZE(AO)
kusano 2b45e8
	ST	c32,  3 * SIZE(AO)
kusano 2b45e8
	ST	c51,  4 * SIZE(AO)
kusano 2b45e8
	ST	c52,  5 * SIZE(AO)
kusano 2b45e8
	ST	c71,  6 * SIZE(AO)
kusano 2b45e8
	ST	c72,  7 * SIZE(AO)
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef LN
kusano 2b45e8
	daddiu	CO1,CO1, -2 * SIZE
kusano 2b45e8
	daddiu	CO2,CO2, -2 * SIZE
kusano 2b45e8
	daddiu	CO3,CO3, -2 * SIZE
kusano 2b45e8
	daddiu	CO4,CO4, -2 * SIZE
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ST	c11,  0 * SIZE(CO1)
kusano 2b45e8
	ST	c12,  1 * SIZE(CO1)
kusano 2b45e8
	ST	c31,  0 * SIZE(CO2)
kusano 2b45e8
	ST	c32,  1 * SIZE(CO2)
kusano 2b45e8
	ST	c51,  0 * SIZE(CO3)
kusano 2b45e8
	ST	c52,  1 * SIZE(CO3)
kusano 2b45e8
	ST	c71,  0 * SIZE(CO4)
kusano 2b45e8
	ST	c72,  1 * SIZE(CO4)
kusano 2b45e8
kusano 2b45e8
#ifndef LN
kusano 2b45e8
	daddiu	CO1,CO1, 2 * SIZE
kusano 2b45e8
	daddiu	CO2,CO2, 2 * SIZE
kusano 2b45e8
	daddiu	CO3,CO3, 2 * SIZE
kusano 2b45e8
	daddiu	CO4,CO4, 2 * SIZE
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
kusano 2b45e8
#ifdef RT
kusano 2b45e8
	dsll	TEMP, K, ZBASE_SHIFT
kusano 2b45e8
	daddu	AORIG, AORIG, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(LT) || defined(RN)
kusano 2b45e8
	dsubu	TEMP, K, KK
kusano 2b45e8
	dsll	L,    TEMP, ZBASE_SHIFT
kusano 2b45e8
	dsll	TEMP, TEMP, 2 + ZBASE_SHIFT
kusano 2b45e8
	daddu	AO, AO, L
kusano 2b45e8
	daddu	BO, BO, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef LT
kusano 2b45e8
	daddiu	KK, KK, 1
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef LN
kusano 2b45e8
	daddiu	KK, KK, -1
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	MTC	$0,  c11
kusano 2b45e8
kusano 2b45e8
	daddiu	I, I, -1
kusano 2b45e8
kusano 2b45e8
kusano 2b45e8
	MOV	c21, c11
kusano 2b45e8
	MOV	c31, c11
kusano 2b45e8
	MOV	c41, c11
kusano 2b45e8
	MOV	c51, c11
kusano 2b45e8
kusano 2b45e8
	bgtz	I, .L11
kusano 2b45e8
	MOV	c61, c11
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L19:
kusano 2b45e8
#ifdef LN
kusano 2b45e8
	dsll	TEMP, K, 2 + ZBASE_SHIFT
kusano 2b45e8
	daddu	B, B, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(LT) || defined(RN)
kusano 2b45e8
	move	B,  BO
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef RN
kusano 2b45e8
	daddiu	KK, KK,  4
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef RT
kusano 2b45e8
	daddiu	KK, KK, -4
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	bgtz	J, .L10
kusano 2b45e8
	NOP
kusano 2b45e8
	.align 3
kusano 2b45e8
	
kusano 2b45e8
.L20:
kusano 2b45e8
	andi	J,  N, 2
kusano 2b45e8
	blez	J, .L30
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
#ifdef RT
kusano 2b45e8
	dsll	TEMP, K, 1 + ZBASE_SHIFT
kusano 2b45e8
	dsubu	B, B, TEMP
kusano 2b45e8
kusano 2b45e8
	dsll	TEMP, LDC, 1
kusano 2b45e8
	dsubu	C, C, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	MTC	$0,  c11
kusano 2b45e8
kusano 2b45e8
	move	CO1, C
kusano 2b45e8
	daddu	CO2, C,   LDC
kusano 2b45e8
kusano 2b45e8
#ifdef LN
kusano 2b45e8
	daddu	KK, M, OFFSET
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef LT
kusano 2b45e8
	move	KK, OFFSET
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(LN) || defined(RT)
kusano 2b45e8
	move	AORIG, A
kusano 2b45e8
#else
kusano 2b45e8
	move	AO, A
kusano 2b45e8
#endif
kusano 2b45e8
#ifndef RT
kusano 2b45e8
	daddu	C,  CO2, LDC
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	move	I,  M
kusano 2b45e8
	blez	I, .L29
kusano 2b45e8
	NOP
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L21:
kusano 2b45e8
#if defined(LT) || defined(RN)
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	MOV	c21, c11
kusano 2b45e8
	LD	b1,  0 * SIZE(B)
kusano 2b45e8
	MOV	c31, c11
kusano 2b45e8
	LD	a3,  4 * SIZE(AO)
kusano 2b45e8
	MOV	c41, c11
kusano 2b45e8
	LD	b2,  1 * SIZE(B)
kusano 2b45e8
	dsra	L,  KK, 2
kusano 2b45e8
kusano 2b45e8
	LD	b3,  2 * SIZE(B)
kusano 2b45e8
	MOV	c12, c11
kusano 2b45e8
	LD	b4,  3 * SIZE(B)
kusano 2b45e8
	MOV	c22, c11
kusano 2b45e8
	LD	b5,  4 * SIZE(B)
kusano 2b45e8
	MOV	c32, c11
kusano 2b45e8
kusano 2b45e8
	NOP
kusano 2b45e8
	MOV	c42, c11
kusano 2b45e8
	blez	L, .L25
kusano 2b45e8
	move	BO,  B
kusano 2b45e8
#else
kusano 2b45e8
#ifdef LN
kusano 2b45e8
	dsll	TEMP,   K,  ZBASE_SHIFT
kusano 2b45e8
	dsubu	AORIG, AORIG, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	dsll	L,    KK, ZBASE_SHIFT
kusano 2b45e8
	dsll	TEMP, KK, 1 + ZBASE_SHIFT
kusano 2b45e8
kusano 2b45e8
	daddu	AO, AORIG, L
kusano 2b45e8
	daddu	BO, B,     TEMP
kusano 2b45e8
kusano 2b45e8
	dsubu	TEMP, K, KK
kusano 2b45e8
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	MOV	c21, c11
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	MOV	c31, c11
kusano 2b45e8
	LD	a3,  4 * SIZE(AO)
kusano 2b45e8
	MOV	c41, c11
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	dsra	L,  TEMP, 2
kusano 2b45e8
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	MOV	c12, c11
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	MOV	c22, c11
kusano 2b45e8
	LD	b5,  4 * SIZE(BO)
kusano 2b45e8
	MOV	c32, c11
kusano 2b45e8
kusano 2b45e8
	blez	L, .L25
kusano 2b45e8
	MOV	c42, c11
kusano 2b45e8
#endif
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L22:
kusano 2b45e8
	MADD1	c11, c11, a1, b1
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	MADD3	c21, c21, a1, b2
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
	MADD1	c31, c31, a1, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c41, c41, a1, b4
kusano 2b45e8
	LD	a1,  2 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD2	c12, c12, a2, b1
kusano 2b45e8
	LD	b1,  8 * SIZE(BO)
kusano 2b45e8
	MADD4	c22, c22, a2, b2
kusano 2b45e8
	LD	b2,  5 * SIZE(BO)
kusano 2b45e8
	MADD2	c32, c32, a2, b3
kusano 2b45e8
	LD	b3,  6 * SIZE(BO)
kusano 2b45e8
	MADD4	c42, c42, a2, b4
kusano 2b45e8
	LD	b4,  7 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c11, c11, a1, b5
kusano 2b45e8
	LD	a2,  3 * SIZE(AO)
kusano 2b45e8
	MADD3	c21, c21, a1, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD1	c31, c31, a1, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c41, c41, a1, b4
kusano 2b45e8
	LD	a1,  8 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD2	c12, c12, a2, b5
kusano 2b45e8
	LD	b5, 12 * SIZE(BO)
kusano 2b45e8
	MADD4	c22, c22, a2, b2
kusano 2b45e8
	LD	b2,  9 * SIZE(BO)
kusano 2b45e8
	MADD2	c32, c32, a2, b3
kusano 2b45e8
	LD	b3, 10 * SIZE(BO)
kusano 2b45e8
	MADD4	c42, c42, a2, b4
kusano 2b45e8
	LD	b4, 11 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c11, c11, a3, b1
kusano 2b45e8
	LD	a2,  5 * SIZE(AO)
kusano 2b45e8
	MADD3	c21, c21, a3, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD1	c31, c31, a3, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c41, c41, a3, b4
kusano 2b45e8
	LD	a3,  6 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD2	c12, c12, a2, b1
kusano 2b45e8
	LD	b1, 16 * SIZE(BO)
kusano 2b45e8
	MADD4	c22, c22, a2, b2
kusano 2b45e8
	LD	b2, 13 * SIZE(BO)
kusano 2b45e8
	MADD2	c32, c32, a2, b3
kusano 2b45e8
	LD	b3, 14 * SIZE(BO)
kusano 2b45e8
	MADD4	c42, c42, a2, b4
kusano 2b45e8
	LD	b4, 15 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c11, c11, a3, b5
kusano 2b45e8
	LD	a2,  7 * SIZE(AO)
kusano 2b45e8
	MADD3	c21, c21, a3, b2
kusano 2b45e8
	daddiu	AO, AO,  8 * SIZE
kusano 2b45e8
	MADD1	c31, c31, a3, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD3	c41, c41, a3, b4
kusano 2b45e8
	LD	a3,  4 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD2	c12, c12, a2, b5
kusano 2b45e8
	LD	b5, 20 * SIZE(BO)
kusano 2b45e8
	MADD4	c22, c22, a2, b2
kusano 2b45e8
	LD	b2, 17 * SIZE(BO)
kusano 2b45e8
	MADD2	c32, c32, a2, b3
kusano 2b45e8
	LD	b3, 18 * SIZE(BO)
kusano 2b45e8
	MADD4	c42, c42, a2, b4
kusano 2b45e8
	LD	b4, 19 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	bgtz	L, .L22
kusano 2b45e8
	daddiu	BO, BO, 16 * SIZE
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L25:
kusano 2b45e8
#if defined(LT) || defined(RN)
kusano 2b45e8
	andi	L, KK,  3
kusano 2b45e8
#else
kusano 2b45e8
	andi	L, TEMP, 3
kusano 2b45e8
#endif
kusano 2b45e8
	blez	L, .L28
kusano 2b45e8
	NOP
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L26:
kusano 2b45e8
	MADD1	c11, c11, a1, b1
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	MADD3	c21, c21, a1, b2
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
	MADD1	c31, c31, a1, b3
kusano 2b45e8
	daddiu	BO, BO,  4 * SIZE
kusano 2b45e8
	MADD3	c41, c41, a1, b4
kusano 2b45e8
	LD	a1,  2 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD2	c12, c12, a2, b1
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	MADD4	c22, c22, a2, b2
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	MADD2	c32, c32, a2, b3
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	MADD4	c42, c42, a2, b4
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	bgtz	L, .L26
kusano 2b45e8
	daddiu	AO, AO,  2 * SIZE
kusano 2b45e8
kusano 2b45e8
.L28:
kusano 2b45e8
 	ADD	c11, c11, c22
kusano 2b45e8
	ADD	c12, c12, c21
kusano 2b45e8
	ADD	c31, c31, c42
kusano 2b45e8
	ADD	c32, c32, c41
kusano 2b45e8
kusano 2b45e8
#if defined(LN) || defined(RT)
kusano 2b45e8
#ifdef LN
kusano 2b45e8
	daddiu	TEMP, KK, -1
kusano 2b45e8
#else
kusano 2b45e8
	daddiu	TEMP, KK, -2
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	dsll	L,    TEMP, ZBASE_SHIFT
kusano 2b45e8
	dsll	TEMP, TEMP, 1 + ZBASE_SHIFT
kusano 2b45e8
	daddu	AO, AORIG, L
kusano 2b45e8
	daddu	BO, B,     TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(LN) || defined(LT)
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	SUB	c11, b1, c11
kusano 2b45e8
	SUB	c12, b2, c12
kusano 2b45e8
	SUB	c31, b3, c31
kusano 2b45e8
	SUB	c32, b4, c32
kusano 2b45e8
#else
kusano 2b45e8
	LD	b1,  0 * SIZE(AO)
kusano 2b45e8
	LD	b2,  1 * SIZE(AO)
kusano 2b45e8
	LD	b3,  2 * SIZE(AO)
kusano 2b45e8
	LD	b4,  3 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	SUB	c11, b1, c11
kusano 2b45e8
	SUB	c12, b2, c12
kusano 2b45e8
 	SUB	c31, b3, c31
kusano 2b45e8
	SUB	c32, b4, c32
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(LN) || defined(LT)
kusano 2b45e8
	LD	b1,  0 * SIZE(AO)
kusano 2b45e8
	LD	b2,  1 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MUL	a1, b2, c12
kusano 2b45e8
	MUL	a2, b2, c11
kusano 2b45e8
	MUL	a3, b2, c32
kusano 2b45e8
	MUL	a4, b2, c31
kusano 2b45e8
kusano 2b45e8
	MADD5	c11, a1, b1, c11
kusano 2b45e8
	MADD6	c12, a2, b1, c12
kusano 2b45e8
	MADD5	c31, a3, b1, c31
kusano 2b45e8
	MADD6	c32, a4, b1, c32
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef RN
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MUL	a1, b2, c12
kusano 2b45e8
	MUL	a2, b2, c11
kusano 2b45e8
kusano 2b45e8
	MADD5	c11, a1, b1, c11
kusano 2b45e8
	MADD6	c12, a2, b1, c12
kusano 2b45e8
kusano 2b45e8
	NMSUB	c31, c31, b3, c11
kusano 2b45e8
	MADD7	c32, c32, b4, c11
kusano 2b45e8
kusano 2b45e8
	MADD8	c31, c31, b4, c12
kusano 2b45e8
	NMSUB	c32, c32, b3, c12
kusano 2b45e8
kusano 2b45e8
	LD	b3,  6 * SIZE(BO)
kusano 2b45e8
	LD	b4,  7 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MUL	a1, b4, c32
kusano 2b45e8
	MUL	a2, b4, c31
kusano 2b45e8
kusano 2b45e8
	MADD5	c31, a1, b3, c31
kusano 2b45e8
	MADD6	c32, a2, b3, c32
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef RT
kusano 2b45e8
	LD	b5,  6 * SIZE(BO)
kusano 2b45e8
	LD	b6,  7 * SIZE(BO)
kusano 2b45e8
	LD	b7,  4 * SIZE(BO)
kusano 2b45e8
	LD	b8,  5 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MUL	a1, b6, c32
kusano 2b45e8
	MUL	a2, b6, c31
kusano 2b45e8
kusano 2b45e8
	MADD5	c31, a1, b5, c31
kusano 2b45e8
	MADD6	c32, a2, b5, c32
kusano 2b45e8
kusano 2b45e8
	NMSUB	c11, c11, b7, c31
kusano 2b45e8
	MADD7	c12, c12, b8, c31
kusano 2b45e8
kusano 2b45e8
	MADD8	c11, c11, b8, c32
kusano 2b45e8
	NMSUB	c12, c12, b7, c32
kusano 2b45e8
kusano 2b45e8
	LD	b7,  0 * SIZE(BO)
kusano 2b45e8
	LD	b8,  1 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MUL	a1, b8, c12
kusano 2b45e8
	MUL	a2, b8, c11
kusano 2b45e8
kusano 2b45e8
	MADD5	c11, a1, b7, c11
kusano 2b45e8
	MADD6	c12, a2, b7, c12
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(LN) || defined(LT)
kusano 2b45e8
	ST	c11,  0 * SIZE(BO)
kusano 2b45e8
	ST	c12,  1 * SIZE(BO)
kusano 2b45e8
	ST	c31,  2 * SIZE(BO)
kusano 2b45e8
	ST	c32,  3 * SIZE(BO)
kusano 2b45e8
#else
kusano 2b45e8
	ST	c11,  0 * SIZE(AO)
kusano 2b45e8
	ST	c12,  1 * SIZE(AO)
kusano 2b45e8
	ST	c31,  2 * SIZE(AO)
kusano 2b45e8
	ST	c32,  3 * SIZE(AO)
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef LN
kusano 2b45e8
	daddiu	CO1,CO1, -2 * SIZE
kusano 2b45e8
	daddiu	CO2,CO2, -2 * SIZE
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ST	c11,  0 * SIZE(CO1)
kusano 2b45e8
	ST	c12,  1 * SIZE(CO1)
kusano 2b45e8
	ST	c31,  0 * SIZE(CO2)
kusano 2b45e8
	ST	c32,  1 * SIZE(CO2)
kusano 2b45e8
kusano 2b45e8
#ifndef LN
kusano 2b45e8
	daddiu	CO1,CO1, 2 * SIZE
kusano 2b45e8
	daddiu	CO2,CO2, 2 * SIZE
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	MTC	$0,  c11
kusano 2b45e8
kusano 2b45e8
#ifdef RT
kusano 2b45e8
	dsll	TEMP, K, ZBASE_SHIFT
kusano 2b45e8
	daddu	AORIG, AORIG, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(LT) || defined(RN)
kusano 2b45e8
	dsubu	TEMP, K, KK
kusano 2b45e8
	dsll	L,    TEMP, ZBASE_SHIFT
kusano 2b45e8
	dsll	TEMP, TEMP, 1 + ZBASE_SHIFT
kusano 2b45e8
	daddu	AO, AO, L
kusano 2b45e8
	daddu	BO, BO, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef LT
kusano 2b45e8
	daddiu	KK, KK, 1
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef LN
kusano 2b45e8
	daddiu	KK, KK, -1
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	daddiu	I, I, -1
kusano 2b45e8
kusano 2b45e8
	bgtz	I, .L21
kusano 2b45e8
	NOP
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L29:
kusano 2b45e8
#ifdef LN
kusano 2b45e8
	dsll	TEMP, K, 1 + ZBASE_SHIFT
kusano 2b45e8
	daddu	B, B, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(LT) || defined(RN)
kusano 2b45e8
	move	B,  BO
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef RN
kusano 2b45e8
	daddiu	KK, KK,  2
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef RT
kusano 2b45e8
	daddiu	KK, KK, -2
kusano 2b45e8
#endif
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L30:
kusano 2b45e8
	andi	J,  N, 1
kusano 2b45e8
	blez	J, .L999
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
kusano 2b45e8
#ifdef RT
kusano 2b45e8
	dsll	TEMP, K, ZBASE_SHIFT
kusano 2b45e8
	dsubu	B, B, TEMP
kusano 2b45e8
kusano 2b45e8
	dsubu	C, C, LDC
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	MTC	$0,  c11
kusano 2b45e8
kusano 2b45e8
	move	CO1, C
kusano 2b45e8
kusano 2b45e8
#ifdef LN
kusano 2b45e8
	daddu	KK, M, OFFSET
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef LT
kusano 2b45e8
	move	KK, OFFSET
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(LN) || defined(RT)
kusano 2b45e8
	move	AORIG, A
kusano 2b45e8
#else
kusano 2b45e8
	move	AO, A
kusano 2b45e8
#endif
kusano 2b45e8
#ifndef RT
kusano 2b45e8
	daddu	C,  CO1, LDC
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	move	I,  M
kusano 2b45e8
	blez	I, .L39
kusano 2b45e8
	NOP
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L31:
kusano 2b45e8
#if defined(LT) || defined(RN)
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	MOV	c21, c11
kusano 2b45e8
	LD	b1,  0 * SIZE(B)
kusano 2b45e8
	MOV	c31, c11
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MOV	c41, c11
kusano 2b45e8
	LD	b2,  1 * SIZE(B)
kusano 2b45e8
	MOV	c12, c11
kusano 2b45e8
	dsra	L,  KK, 2
kusano 2b45e8
kusano 2b45e8
	MOV	c22, c11
kusano 2b45e8
	LD	a3,  4 * SIZE(AO)
kusano 2b45e8
	MOV	c32, c11
kusano 2b45e8
	LD	b3,  4 * SIZE(B)
kusano 2b45e8
kusano 2b45e8
	NOP
kusano 2b45e8
	MOV	c42, c11
kusano 2b45e8
	blez	L, .L35
kusano 2b45e8
	move	BO,  B
kusano 2b45e8
#else
kusano 2b45e8
#ifdef LN
kusano 2b45e8
	dsll	TEMP,   K,  ZBASE_SHIFT
kusano 2b45e8
	dsubu	AORIG, AORIG, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
	dsll	TEMP, KK, ZBASE_SHIFT
kusano 2b45e8
kusano 2b45e8
	daddu	AO, AORIG, TEMP
kusano 2b45e8
	daddu	BO, B,     TEMP
kusano 2b45e8
kusano 2b45e8
	dsubu	TEMP, K, KK
kusano 2b45e8
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	MOV	c21, c11
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	MOV	c31, c11
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MOV	c41, c11
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	MOV	c12, c11
kusano 2b45e8
	dsra	L, TEMP, 2
kusano 2b45e8
kusano 2b45e8
	MOV	c22, c11
kusano 2b45e8
	LD	a3,  4 * SIZE(AO)
kusano 2b45e8
	MOV	c32, c11
kusano 2b45e8
	LD	b3,  4 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	blez	L, .L35
kusano 2b45e8
	MOV	c42, c11
kusano 2b45e8
#endif
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L32:
kusano 2b45e8
	MADD1	c11, c11, a1, b1
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	MADD3	c21, c21, a1, b2
kusano 2b45e8
	LD	a1,  2 * SIZE(AO)
kusano 2b45e8
	MADD2	c12, c12, a2, b1
kusano 2b45e8
	LD	b1,  2 * SIZE(BO)
kusano 2b45e8
	MADD4	c22, c22, a2, b2
kusano 2b45e8
	LD	a2,  3 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c11, c11, a1, b1
kusano 2b45e8
	LD	b2,  5 * SIZE(BO)
kusano 2b45e8
	MADD3	c21, c21, a1, b4
kusano 2b45e8
	LD	a1,  8 * SIZE(AO)
kusano 2b45e8
	MADD2	c12, c12, a2, b1
kusano 2b45e8
	LD	b1,  8 * SIZE(BO)
kusano 2b45e8
	MADD4	c22, c22, a2, b4
kusano 2b45e8
	LD	a2,  5 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c11, c11, a3, b3
kusano 2b45e8
	LD	b4,  7 * SIZE(BO)
kusano 2b45e8
	MADD3	c21, c21, a3, b2
kusano 2b45e8
	LD	a3,  6 * SIZE(AO)
kusano 2b45e8
	MADD2	c12, c12, a2, b3
kusano 2b45e8
	LD	b3,  6 * SIZE(BO)
kusano 2b45e8
	MADD4	c22, c22, a2, b2
kusano 2b45e8
	LD	a2,  7 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD1	c11, c11, a3, b3
kusano 2b45e8
	LD	b2,  9 * SIZE(BO)
kusano 2b45e8
	MADD3	c21, c21, a3, b4
kusano 2b45e8
	LD	a3, 12 * SIZE(AO)
kusano 2b45e8
	MADD2	c12, c12, a2, b3
kusano 2b45e8
	LD	b3, 12 * SIZE(BO)
kusano 2b45e8
	MADD4	c22, c22, a2, b4
kusano 2b45e8
	LD	a2,  9 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	daddiu	AO, AO,  8 * SIZE
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
kusano 2b45e8
	bgtz	L, .L32
kusano 2b45e8
	daddiu	BO, BO,  8 * SIZE
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L35:
kusano 2b45e8
#if defined(LT) || defined(RN)
kusano 2b45e8
	andi	L, KK,  3
kusano 2b45e8
#else
kusano 2b45e8
	andi	L, TEMP, 3
kusano 2b45e8
#endif
kusano 2b45e8
	blez	L, .L38
kusano 2b45e8
	NOP
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L36:
kusano 2b45e8
	MADD1	c11, c11, a1, b1
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
	MADD3	c21, c21, a1, b2
kusano 2b45e8
	LD	a1,  2 * SIZE(AO)
kusano 2b45e8
	MADD2	c12, c12, a2, b1
kusano 2b45e8
	LD	b1,  2 * SIZE(BO)
kusano 2b45e8
	MADD4	c22, c22, a2, b2
kusano 2b45e8
	LD	a2,  3 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	LD	b2,  3 * SIZE(BO)
kusano 2b45e8
	daddiu	BO, BO,  2 * SIZE
kusano 2b45e8
	bgtz	L, .L36
kusano 2b45e8
	daddiu	AO, AO,  2 * SIZE
kusano 2b45e8
kusano 2b45e8
.L38:
kusano 2b45e8
 	ADD	c11, c11, c22
kusano 2b45e8
	ADD	c12, c12, c21
kusano 2b45e8
kusano 2b45e8
#if defined(LN) || defined(RT)
kusano 2b45e8
	daddiu	TEMP, KK, -1
kusano 2b45e8
kusano 2b45e8
	dsll	TEMP, TEMP, ZBASE_SHIFT
kusano 2b45e8
	daddu	AO, AORIG, TEMP
kusano 2b45e8
	daddu	BO, B,     TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(LN) || defined(LT)
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	SUB	c11, b1, c11
kusano 2b45e8
	SUB	c12, b2, c12
kusano 2b45e8
#else
kusano 2b45e8
	LD	b1,  0 * SIZE(AO)
kusano 2b45e8
	LD	b2,  1 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	SUB	c11, b1, c11
kusano 2b45e8
	SUB	c12, b2, c12
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(LN) || defined(LT)
kusano 2b45e8
	LD	b1,  0 * SIZE(AO)
kusano 2b45e8
	LD	b2,  1 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MUL	a1, b2, c12
kusano 2b45e8
	MUL	a2, b2, c11
kusano 2b45e8
kusano 2b45e8
	MADD5	c11, a1, b1, c11
kusano 2b45e8
	MADD6	c12, a2, b1, c12
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(RN) || defined(RT)
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MUL	a1, b2, c12
kusano 2b45e8
	MUL	a2, b2, c11
kusano 2b45e8
kusano 2b45e8
	MADD5	c11, a1, b1, c11
kusano 2b45e8
	MADD6	c12, a2, b1, c12
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(LN) || defined(LT)
kusano 2b45e8
	ST	c11,  0 * SIZE(BO)
kusano 2b45e8
	ST	c12,  1 * SIZE(BO)
kusano 2b45e8
#else
kusano 2b45e8
	ST	c11,  0 * SIZE(AO)
kusano 2b45e8
	ST	c12,  1 * SIZE(AO)
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef LN
kusano 2b45e8
	daddiu	CO1,CO1, -2 * SIZE
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ST	c11,  0 * SIZE(CO1)
kusano 2b45e8
	ST	c12,  1 * SIZE(CO1)
kusano 2b45e8
kusano 2b45e8
#ifndef LN
kusano 2b45e8
	daddiu	CO1,CO1, 2 * SIZE
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	MTC	$0,  c11
kusano 2b45e8
kusano 2b45e8
#ifdef RT
kusano 2b45e8
	dsll	TEMP, K, ZBASE_SHIFT
kusano 2b45e8
	daddu	AORIG, AORIG, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(LT) || defined(RN)
kusano 2b45e8
	dsubu	TEMP, K, KK
kusano 2b45e8
	dsll	TEMP, TEMP, ZBASE_SHIFT
kusano 2b45e8
	daddu	AO, AO, TEMP
kusano 2b45e8
	daddu	BO, BO, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef LT
kusano 2b45e8
	daddiu	KK, KK, 1
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef LN
kusano 2b45e8
	daddiu	KK, KK, -1
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	daddiu	I, I, -1
kusano 2b45e8
kusano 2b45e8
	bgtz	I, .L31
kusano 2b45e8
	NOP
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L39:
kusano 2b45e8
#ifdef LN
kusano 2b45e8
	dsll	TEMP, K, ZBASE_SHIFT
kusano 2b45e8
	daddu	B, B, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(LT) || defined(RN)
kusano 2b45e8
	move	B,  BO
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef RN
kusano 2b45e8
	daddiu	KK, KK,  1
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef RT
kusano 2b45e8
	daddiu	KK, KK, -1
kusano 2b45e8
#endif
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L999:
kusano 2b45e8
	LDARG	$16,   0($sp)
kusano 2b45e8
	LDARG	$17,   8($sp)
kusano 2b45e8
	LDARG	$18,  16($sp)
kusano 2b45e8
	LDARG	$19,  24($sp)
kusano 2b45e8
	LDARG	$20,  32($sp)
kusano 2b45e8
	LDARG	$21,  40($sp)
kusano 2b45e8
kusano 2b45e8
	ldc1	$f24, 48($sp)
kusano 2b45e8
	ldc1	$f25, 56($sp)
kusano 2b45e8
	ldc1	$f26, 64($sp)
kusano 2b45e8
	ldc1	$f27, 72($sp)
kusano 2b45e8
kusano 2b45e8
#ifndef __64BIT__
kusano 2b45e8
	ldc1	$f20, 88($sp)
kusano 2b45e8
	ldc1	$f21, 96($sp)
kusano 2b45e8
	ldc1	$f22,104($sp)
kusano 2b45e8
	ldc1	$f23,112($sp)
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	j	$31
kusano 2b45e8
	daddiu	$sp, $sp, 128
kusano 2b45e8
kusano 2b45e8
	EPILOGUE