Blame thirdparty/openblas/xianyi-OpenBLAS-e6e87a2/kernel/mips64/gemm_kernel.S

kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
/* Copyright 2009, 2010 The University of Texas at Austin.           */
kusano 2b45e8
/* All rights reserved.                                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* Redistribution and use in source and binary forms, with or        */
kusano 2b45e8
/* without modification, are permitted provided that the following   */
kusano 2b45e8
/* conditions are met:                                               */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   1. Redistributions of source code must retain the above         */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer.                                                  */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   2. Redistributions in binary form must reproduce the above      */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer in the documentation and/or other materials       */
kusano 2b45e8
/*      provided with the distribution.                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
kusano 2b45e8
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
kusano 2b45e8
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
kusano 2b45e8
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
kusano 2b45e8
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
kusano 2b45e8
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
kusano 2b45e8
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
kusano 2b45e8
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
kusano 2b45e8
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
kusano 2b45e8
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
kusano 2b45e8
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
kusano 2b45e8
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
kusano 2b45e8
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
kusano 2b45e8
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* The views and conclusions contained in the software and           */
kusano 2b45e8
/* documentation are those of the authors and should not be          */
kusano 2b45e8
/* interpreted as representing official policies, either expressed   */
kusano 2b45e8
/* or implied, of The University of Texas at Austin.                 */
kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
kusano 2b45e8
#define ASSEMBLER
kusano 2b45e8
#include "common.h"
kusano 2b45e8
kusano 2b45e8
#define M	$4
kusano 2b45e8
#define	N	$5
kusano 2b45e8
#define	K	$6
kusano 2b45e8
#define A	$8
kusano 2b45e8
#define B	$9
kusano 2b45e8
#define C	$10
kusano 2b45e8
#define LDC	$11
kusano 2b45e8
kusano 2b45e8
#define AO	$12
kusano 2b45e8
#define BO	$13
kusano 2b45e8
kusano 2b45e8
#define I	$2
kusano 2b45e8
#define J	$3
kusano 2b45e8
#define L	$7
kusano 2b45e8
kusano 2b45e8
#define PREFETCHSIZE (4 * 10)
kusano 2b45e8
	
kusano 2b45e8
#define CO1	$14
kusano 2b45e8
#define CO2	$15
kusano 2b45e8
#define CO3	$16
kusano 2b45e8
#define CO4	$17
kusano 2b45e8
#define CO5	$18
kusano 2b45e8
#define CO6	$19
kusano 2b45e8
#define CO7	$20
kusano 2b45e8
#define CO8	$21
kusano 2b45e8
kusano 2b45e8
#define BB	$22
kusano 2b45e8
kusano 2b45e8
#if defined(TRMMKERNEL)
kusano 2b45e8
#define OFFSET	$23
kusano 2b45e8
#define KK	$24
kusano 2b45e8
#define TEMP	$25
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#define a1	$f0
kusano 2b45e8
#define a2	$f1
kusano 2b45e8
#define a3	$f27
kusano 2b45e8
#define a4	$f28
kusano 2b45e8
kusano 2b45e8
#define b1	$f2
kusano 2b45e8
#define b2	$f3
kusano 2b45e8
#define b3	$f4
kusano 2b45e8
#define b4	$f5
kusano 2b45e8
#define b5	$f6
kusano 2b45e8
#define b6	$f7
kusano 2b45e8
#define b7	$f8
kusano 2b45e8
#define b8	$f9
kusano 2b45e8
kusano 2b45e8
#define a5	b8
kusano 2b45e8
kusano 2b45e8
#define c11	$f10
kusano 2b45e8
#define c12	$f11
kusano 2b45e8
#define c21	$f12
kusano 2b45e8
#define c22	$f13
kusano 2b45e8
#define c31	$f14
kusano 2b45e8
#define c32	$f16
kusano 2b45e8
#define c41	$f17
kusano 2b45e8
#define c42	$f18
kusano 2b45e8
#define c51	$f19
kusano 2b45e8
#define c52	$f20
kusano 2b45e8
#define c61	$f21
kusano 2b45e8
#define c62	$f22
kusano 2b45e8
#define c71	$f23
kusano 2b45e8
#define c72	$f24
kusano 2b45e8
#define c81	$f25
kusano 2b45e8
#define c82	$f26
kusano 2b45e8
kusano 2b45e8
#define ALPHA	$f15
kusano 2b45e8
kusano 2b45e8
	PROLOGUE
kusano 2b45e8
	
kusano 2b45e8
	daddiu	$sp, $sp, -160
kusano 2b45e8
kusano 2b45e8
	SDARG	$16,   0($sp)
kusano 2b45e8
	SDARG	$17,   8($sp)
kusano 2b45e8
	SDARG	$18,  16($sp)
kusano 2b45e8
	SDARG	$19,  24($sp)
kusano 2b45e8
	SDARG	$20,  32($sp)
kusano 2b45e8
	SDARG	$21,  40($sp)
kusano 2b45e8
	SDARG	$22,  48($sp)
kusano 2b45e8
kusano 2b45e8
	sdc1	$f24, 56($sp)
kusano 2b45e8
	sdc1	$f25, 64($sp)
kusano 2b45e8
	sdc1	$f26, 72($sp)
kusano 2b45e8
	sdc1	$f27, 80($sp)
kusano 2b45e8
	sdc1	$f28, 88($sp)
kusano 2b45e8
kusano 2b45e8
#if defined(TRMMKERNEL)
kusano 2b45e8
	SDARG	$23,  96($sp)
kusano 2b45e8
	SDARG	$24, 104($sp)
kusano 2b45e8
	SDARG	$25, 112($sp)
kusano 2b45e8
kusano 2b45e8
	LDARG	OFFSET, 160($sp)
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef __64BIT__
kusano 2b45e8
	sdc1	$f20,120($sp)
kusano 2b45e8
	sdc1	$f21,128($sp)
kusano 2b45e8
	sdc1	$f22,136($sp)
kusano 2b45e8
	sdc1	$f23,144($sp)
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	dsll	LDC, LDC, BASE_SHIFT
kusano 2b45e8
kusano 2b45e8
#if defined(TRMMKERNEL) && !defined(LEFT)
kusano 2b45e8
	neg	KK, OFFSET
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	dsra	J,  N, 3
kusano 2b45e8
	blez	J, .L30
kusano 2b45e8
	nop
kusano 2b45e8
kusano 2b45e8
.L10:
kusano 2b45e8
	move	CO1, C
kusano 2b45e8
	MTC	$0,  c11
kusano 2b45e8
	daddu	CO2, C,   LDC
kusano 2b45e8
	move	AO, A
kusano 2b45e8
	daddu	CO3, CO2, LDC
kusano 2b45e8
	daddiu	J, J, -1
kusano 2b45e8
	daddu	CO4, CO3, LDC
kusano 2b45e8
	MOV	c21, c11
kusano 2b45e8
	daddu	CO5, CO4, LDC
kusano 2b45e8
	MOV	c31, c11
kusano 2b45e8
	daddu	CO6, CO5, LDC
kusano 2b45e8
	MOV	c41, c11
kusano 2b45e8
	daddu	CO7, CO6, LDC
kusano 2b45e8
	MOV	c51, c11
kusano 2b45e8
	daddu	CO8, CO7, LDC
kusano 2b45e8
	dsra	I,  M, 1
kusano 2b45e8
	daddu	C,   CO8, LDC
kusano 2b45e8
kusano 2b45e8
	dsll	BB, K, 2 + BASE_SHIFT
kusano 2b45e8
	daddu	BB, B, BB
kusano 2b45e8
kusano 2b45e8
#if defined(TRMMKERNEL) &&  defined(LEFT)
kusano 2b45e8
	move	KK, OFFSET
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	blez	I, .L20
kusano 2b45e8
	MOV	c61, c11
kusano 2b45e8
kusano 2b45e8
.L11:
kusano 2b45e8
#if defined(TRMMKERNEL)
kusano 2b45e8
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
	move	BO,  B
kusano 2b45e8
#else
kusano 2b45e8
	dsll	L,    KK, 1 + BASE_SHIFT
kusano 2b45e8
	dsll	TEMP, KK, 3 + BASE_SHIFT
kusano 2b45e8
kusano 2b45e8
	daddu	AO, AO, L
kusano 2b45e8
	daddu	BO, B,  TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	MOV	c71, c11
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	MOV	c81, c11
kusano 2b45e8
kusano 2b45e8
	LD	a3,  4 * SIZE(AO)
kusano 2b45e8
	MOV	c12, c11
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	MOV	c22, c11
kusano 2b45e8
kusano 2b45e8
	MOV	c32, c11
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	MOV	c42, c11
kusano 2b45e8
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	MOV	c52, c11
kusano 2b45e8
	LD	b5,  4 * SIZE(BO)
kusano 2b45e8
	MOV	c62, c11
kusano 2b45e8
kusano 2b45e8
	LD	b6,  8 * SIZE(BO)
kusano 2b45e8
	MOV	c72, c11
kusano 2b45e8
	LD	b7, 12 * SIZE(BO)
kusano 2b45e8
	MOV	c82, c11
kusano 2b45e8
kusano 2b45e8
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
kusano 2b45e8
	dsubu	TEMP, K, KK
kusano 2b45e8
#elif defined(LEFT)
kusano 2b45e8
	daddiu	TEMP, KK, 2
kusano 2b45e8
#else
kusano 2b45e8
	daddiu	TEMP, KK, 8
kusano 2b45e8
#endif
kusano 2b45e8
	dsra	L,  TEMP, 2
kusano 2b45e8
kusano 2b45e8
	blez	L, .L15
kusano 2b45e8
	NOP
kusano 2b45e8
#else
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	MOV	c71, c11
kusano 2b45e8
	LD	b1,  0 * SIZE(B)
kusano 2b45e8
	MOV	c81, c11
kusano 2b45e8
kusano 2b45e8
	pref	1, 3 * SIZE(CO1)
kusano 2b45e8
	pref	1, 3 * SIZE(CO2)
kusano 2b45e8
kusano 2b45e8
	LD	a3,  4 * SIZE(AO)
kusano 2b45e8
	MOV	c12, c11
kusano 2b45e8
	LD	b2,  1 * SIZE(B)
kusano 2b45e8
	MOV	c22, c11
kusano 2b45e8
kusano 2b45e8
	dsra	L,  K, 2
kusano 2b45e8
	MOV	c32, c11
kusano 2b45e8
	LD	b3,  2 * SIZE(B)
kusano 2b45e8
	MOV	c42, c11
kusano 2b45e8
kusano 2b45e8
	LD	b4,  3 * SIZE(B)
kusano 2b45e8
	MOV	c52, c11
kusano 2b45e8
	LD	b5,  4 * SIZE(B)
kusano 2b45e8
	MOV	c62, c11
kusano 2b45e8
kusano 2b45e8
	LD	b6,  8 * SIZE(B)
kusano 2b45e8
	MOV	c72, c11
kusano 2b45e8
	LD	b7, 12 * SIZE(B)
kusano 2b45e8
	MOV	c82, c11
kusano 2b45e8
kusano 2b45e8
	blez	L, .L15
kusano 2b45e8
	move	BO,  B
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a1, b1
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	MADD	c21, c21, a1, b2
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
	MADD	c31, c31, a1, b3
kusano 2b45e8
	blez	L, .L13
kusano 2b45e8
	MADD	c41, c41, a1, b4
kusano 2b45e8
	pref	1, 2 * SIZE(CO3)
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L12:
kusano 2b45e8
	MADD	c12, c12, a2, b1
kusano 2b45e8
	LD	b1, 16 * SIZE(BO)
kusano 2b45e8
	MADD	c22, c22, a2, b2
kusano 2b45e8
	LD	b2,  5 * SIZE(BO)
kusano 2b45e8
	MADD	c32, c32, a2, b3
kusano 2b45e8
	LD	b3,  6 * SIZE(BO)
kusano 2b45e8
	MADD	c42, c42, a2, b4
kusano 2b45e8
	LD	b4,  7 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c51, c51, a1, b5
kusano 2b45e8
	LD	a4,  2 * SIZE(AO)
kusano 2b45e8
	MADD	c61, c61, a1, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c71, c71, a1, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c81, c81, a1, b4
kusano 2b45e8
	LD	a1,  8 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD	c52, c52, a2, b5
kusano 2b45e8
	LD	b5, 20 * SIZE(BO)
kusano 2b45e8
	MADD	c62, c62, a2, b2
kusano 2b45e8
	LD	b2,  9 * SIZE(BO)
kusano 2b45e8
	MADD	c72, c72, a2, b3
kusano 2b45e8
	LD	b3, 10 * SIZE(BO)
kusano 2b45e8
	MADD	c82, c82, a2, b4
kusano 2b45e8
	LD	b4, 11 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a4, b6
kusano 2b45e8
	LD	a2,  3 * SIZE(AO)
kusano 2b45e8
	MADD	c21, c21, a4, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c31, c31, a4, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c41, c41, a4, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	MADD	c12, c12, a2, b6
kusano 2b45e8
	LD	b6, 24 * SIZE(BO)
kusano 2b45e8
	MADD	c22, c22, a2, b2
kusano 2b45e8
	LD	b2, 13 * SIZE(BO)
kusano 2b45e8
	MADD	c32, c32, a2, b3
kusano 2b45e8
	LD	b3, 14 * SIZE(BO)
kusano 2b45e8
	MADD	c42, c42, a2, b4
kusano 2b45e8
	LD	b4, 15 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c51, c51, a4, b7
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c61, c61, a4, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c71, c71, a4, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c81, c81, a4, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	MADD	c52, c52, a2, b7
kusano 2b45e8
	LD	b7, 28 * SIZE(BO)
kusano 2b45e8
	MADD	c62, c62, a2, b2
kusano 2b45e8
	LD	b2, 17 * SIZE(BO)
kusano 2b45e8
	MADD	c72, c72, a2, b3
kusano 2b45e8
	LD	b3, 18 * SIZE(BO)
kusano 2b45e8
	MADD	c82, c82, a2, b4
kusano 2b45e8
	LD	b4, 19 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a3, b1
kusano 2b45e8
	LD	a2,  5 * SIZE(AO)
kusano 2b45e8
	MADD	c21, c21, a3, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c31, c31, a3, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c41, c41, a3, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	MADD	c12, c12, a2, b1
kusano 2b45e8
	LD	b1, 32 * SIZE(BO)
kusano 2b45e8
	MADD	c22, c22, a2, b2
kusano 2b45e8
	LD	b2, 21 * SIZE(BO)
kusano 2b45e8
	MADD	c32, c32, a2, b3
kusano 2b45e8
	LD	b3, 22 * SIZE(BO)
kusano 2b45e8
	MADD	c42, c42, a2, b4
kusano 2b45e8
	LD	b4, 23 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c51, c51, a3, b5
kusano 2b45e8
	LD	a4,  6 * SIZE(AO)
kusano 2b45e8
	MADD	c61, c61, a3, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c71, c71, a3, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c81, c81, a3, b4
kusano 2b45e8
	LD	a3, 12 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD	c52, c52, a2, b5
kusano 2b45e8
	LD	b5, 36 * SIZE(BO)
kusano 2b45e8
	MADD	c62, c62, a2, b2
kusano 2b45e8
	LD	b2, 25 * SIZE(BO)
kusano 2b45e8
	MADD	c72, c72, a2, b3
kusano 2b45e8
	LD	b3, 26 * SIZE(BO)
kusano 2b45e8
	MADD	c82, c82, a2, b4
kusano 2b45e8
	LD	b4, 27 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a4, b6
kusano 2b45e8
	LD	a2,  7 * SIZE(AO)
kusano 2b45e8
	MADD	c21, c21, a4, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c31, c31, a4, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c41, c41, a4, b4
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
kusano 2b45e8
	MADD	c12, c12, a2, b6
kusano 2b45e8
	LD	b6, 40 * SIZE(BO)
kusano 2b45e8
	MADD	c22, c22, a2, b2
kusano 2b45e8
	LD	b2, 29 * SIZE(BO)
kusano 2b45e8
	MADD	c32, c32, a2, b3
kusano 2b45e8
	LD	b3, 30 * SIZE(BO)
kusano 2b45e8
	MADD	c42, c42, a2, b4
kusano 2b45e8
	LD	b4, 31 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c51, c51, a4, b7
kusano 2b45e8
	daddiu	BO, BO, 32 * SIZE
kusano 2b45e8
	MADD	c61, c61, a4, b2
kusano 2b45e8
	daddiu	AO, AO,  8 * SIZE
kusano 2b45e8
	MADD	c71, c71, a4, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c81, c81, a4, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	MADD	c52, c52, a2, b7
kusano 2b45e8
	LD	b7, 12 * SIZE(BO)
kusano 2b45e8
	MADD	c62, c62, a2, b2
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	MADD	c72, c72, a2, b3
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	MADD	c82, c82, a2, b4
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a1, b1
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	MADD	c21, c21, a1, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c31, c31, a1, b3
kusano 2b45e8
	bgtz	L, .L12
kusano 2b45e8
	MADD	c41, c41, a1, b4
kusano 2b45e8
	NOP
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L13:
kusano 2b45e8
	MADD	c12, c12, a2, b1
kusano 2b45e8
	LD	b1, 16 * SIZE(BO)
kusano 2b45e8
	MADD	c22, c22, a2, b2
kusano 2b45e8
	LD	b2,  5 * SIZE(BO)
kusano 2b45e8
	MADD	c32, c32, a2, b3
kusano 2b45e8
	LD	b3,  6 * SIZE(BO)
kusano 2b45e8
	MADD	c42, c42, a2, b4
kusano 2b45e8
	LD	b4,  7 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c51, c51, a1, b5
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c61, c61, a1, b2
kusano 2b45e8
	LD	a4,  2 * SIZE(AO)
kusano 2b45e8
	MADD	c71, c71, a1, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c81, c81, a1, b4
kusano 2b45e8
	LD	a1,  8 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD	c52, c52, a2, b5
kusano 2b45e8
	LD	b5, 20 * SIZE(BO)
kusano 2b45e8
	MADD	c62, c62, a2, b2
kusano 2b45e8
	LD	b2,  9 * SIZE(BO)
kusano 2b45e8
	MADD	c72, c72, a2, b3
kusano 2b45e8
	LD	b3, 10 * SIZE(BO)
kusano 2b45e8
	MADD	c82, c82, a2, b4
kusano 2b45e8
	LD	b4, 11 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a4, b6
kusano 2b45e8
	LD	a2,  3 * SIZE(AO)
kusano 2b45e8
	MADD	c21, c21, a4, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c31, c31, a4, b3
kusano 2b45e8
	pref	1, 3 * SIZE(CO4)
kusano 2b45e8
	MADD	c41, c41, a4, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	MADD	c12, c12, a2, b6
kusano 2b45e8
	LD	b6, 24 * SIZE(BO)
kusano 2b45e8
	MADD	c22, c22, a2, b2
kusano 2b45e8
	LD	b2, 13 * SIZE(BO)
kusano 2b45e8
	MADD	c32, c32, a2, b3
kusano 2b45e8
	LD	b3, 14 * SIZE(BO)
kusano 2b45e8
	MADD	c42, c42, a2, b4
kusano 2b45e8
	LD	b4, 15 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c51, c51, a4, b7
kusano 2b45e8
	pref	1, 3 * SIZE(CO5)
kusano 2b45e8
	MADD	c61, c61, a4, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c71, c71, a4, b3
kusano 2b45e8
	pref	1, 3 * SIZE(CO6)
kusano 2b45e8
	MADD	c81, c81, a4, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	MADD	c52, c52, a2, b7
kusano 2b45e8
	LD	b7, 28 * SIZE(BO)
kusano 2b45e8
	MADD	c62, c62, a2, b2
kusano 2b45e8
	LD	b2, 17 * SIZE(BO)
kusano 2b45e8
	MADD	c72, c72, a2, b3
kusano 2b45e8
	LD	b3, 18 * SIZE(BO)
kusano 2b45e8
	MADD	c82, c82, a2, b4
kusano 2b45e8
	LD	b4, 19 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a3, b1
kusano 2b45e8
	LD	a2,  5 * SIZE(AO)
kusano 2b45e8
	MADD	c21, c21, a3, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c31, c31, a3, b3
kusano 2b45e8
	pref	1, 3 * SIZE(CO7)
kusano 2b45e8
	MADD	c41, c41, a3, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	MADD	c12, c12, a2, b1
kusano 2b45e8
	LD	b1, 32 * SIZE(BO)
kusano 2b45e8
	MADD	c22, c22, a2, b2
kusano 2b45e8
	LD	b2, 21 * SIZE(BO)
kusano 2b45e8
	MADD	c32, c32, a2, b3
kusano 2b45e8
	LD	b3, 22 * SIZE(BO)
kusano 2b45e8
	MADD	c42, c42, a2, b4
kusano 2b45e8
	LD	b4, 23 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c51, c51, a3, b5
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c61, c61, a3, b2
kusano 2b45e8
	LD	a4,  6 * SIZE(AO)
kusano 2b45e8
	MADD	c71, c71, a3, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c81, c81, a3, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	MADD	c52, c52, a2, b5
kusano 2b45e8
	LD	b5, 36 * SIZE(BO)
kusano 2b45e8
	MADD	c62, c62, a2, b2
kusano 2b45e8
	LD	b2, 25 * SIZE(BO)
kusano 2b45e8
	MADD	c72, c72, a2, b3
kusano 2b45e8
	LD	b3, 26 * SIZE(BO)
kusano 2b45e8
	MADD	c82, c82, a2, b4
kusano 2b45e8
	LD	b4, 27 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a4, b6
kusano 2b45e8
	LD	a2,  7 * SIZE(AO)
kusano 2b45e8
	MADD	c21, c21, a4, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c31, c31, a4, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c41, c41, a4, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	MADD	c12, c12, a2, b6
kusano 2b45e8
	LD	b6, 40 * SIZE(BO)
kusano 2b45e8
	MADD	c22, c22, a2, b2
kusano 2b45e8
	LD	b2, 29 * SIZE(BO)
kusano 2b45e8
	MADD	c32, c32, a2, b3
kusano 2b45e8
	LD	b3, 30 * SIZE(BO)
kusano 2b45e8
	MADD	c42, c42, a2, b4
kusano 2b45e8
	LD	b4, 31 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c51, c51, a4, b7
kusano 2b45e8
	daddiu	BO, BO, 32 * SIZE
kusano 2b45e8
	MADD	c61, c61, a4, b2
kusano 2b45e8
	daddiu	AO, AO,  8 * SIZE
kusano 2b45e8
	MADD	c71, c71, a4, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c81, c81, a4, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	MADD	c52, c52, a2, b7
kusano 2b45e8
	LD	b7, 12 * SIZE(BO)
kusano 2b45e8
	MADD	c62, c62, a2, b2
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	MADD	c72, c72, a2, b3
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	MADD	c82, c82, a2, b4
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L15:
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	andi	L,  K, 3
kusano 2b45e8
#else
kusano 2b45e8
	andi	L,  TEMP, 3
kusano 2b45e8
#endif
kusano 2b45e8
	NOP
kusano 2b45e8
	blez	L, .L18
kusano 2b45e8
	pref	1, 3 * SIZE(CO8)
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L16:
kusano 2b45e8
	MADD	c11, c11, a1, b1
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	MADD	c21, c21, a1, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c31, c31, a1, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c41, c41, a1, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	MADD	c12, c12, a2, b1
kusano 2b45e8
	LD	b1,  8 * SIZE(BO)
kusano 2b45e8
	MADD	c22, c22, a2, b2
kusano 2b45e8
	LD	b2,  5 * SIZE(BO)
kusano 2b45e8
	MADD	c32, c32, a2, b3
kusano 2b45e8
	LD	b3,  6 * SIZE(BO)
kusano 2b45e8
	MADD	c42, c42, a2, b4
kusano 2b45e8
	LD	b4,  7 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c51, c51, a1, b5
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
	MADD	c61, c61, a1, b2
kusano 2b45e8
	daddiu	AO, AO,  2 * SIZE
kusano 2b45e8
	MADD	c71, c71, a1, b3
kusano 2b45e8
	daddiu	BO, BO,  8 * SIZE
kusano 2b45e8
	MADD	c81, c81, a1, b4
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD	c52, c52, a2, b5
kusano 2b45e8
	LD	b5,  4 * SIZE(BO)
kusano 2b45e8
	MADD	c62, c62, a2, b2
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	MADD	c72, c72, a2, b3
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	MADD	c82, c82, a2, b4
kusano 2b45e8
	bgtz	L, .L16
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
.L18:
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	$f0, 0 * SIZE(CO1)
kusano 2b45e8
	daddiu	CO3,CO3, 2 * SIZE
kusano 2b45e8
	LD	$f1, 1 * SIZE(CO1)
kusano 2b45e8
	daddiu	CO1,CO1, 2 * SIZE
kusano 2b45e8
	LD	$f2, 0 * SIZE(CO2)
kusano 2b45e8
	daddiu	CO4,CO4, 2 * SIZE
kusano 2b45e8
	LD	$f3, 1 * SIZE(CO2)
kusano 2b45e8
	daddiu	CO2,CO2, 2 * SIZE
kusano 2b45e8
kusano 2b45e8
	LD	$f4, -2 * SIZE(CO3)
kusano 2b45e8
	daddiu	CO5,CO5, 2 * SIZE
kusano 2b45e8
	LD	$f5, -1 * SIZE(CO3)
kusano 2b45e8
	daddiu	CO6,CO6, 2 * SIZE
kusano 2b45e8
	LD	$f6, -2 * SIZE(CO4)
kusano 2b45e8
	daddiu	CO7,CO7, 2 * SIZE
kusano 2b45e8
	LD	$f7, -1 * SIZE(CO4)
kusano 2b45e8
	daddiu	I, I, -1
kusano 2b45e8
kusano 2b45e8
	MADD	c11, $f0, ALPHA, c11
kusano 2b45e8
	LD	$f0,-2 * SIZE(CO5)
kusano 2b45e8
	MADD	c12, $f1, ALPHA, c12
kusano 2b45e8
	LD	$f1,-1 * SIZE(CO5)
kusano 2b45e8
	MADD	c21, $f2, ALPHA, c21
kusano 2b45e8
	LD	$f2,-2 * SIZE(CO6)
kusano 2b45e8
	MADD	c22, $f3, ALPHA, c22
kusano 2b45e8
	LD	$f3,-1 * SIZE(CO6)
kusano 2b45e8
kusano 2b45e8
	MADD	c31, $f4, ALPHA, c31
kusano 2b45e8
	LD	$f4,-2 * SIZE(CO7)
kusano 2b45e8
	MADD	c32, $f5, ALPHA, c32
kusano 2b45e8
	LD	$f5,-1 * SIZE(CO7)
kusano 2b45e8
	MADD	c41, $f6, ALPHA, c41
kusano 2b45e8
	LD	$f6, 0 * SIZE(CO8)
kusano 2b45e8
	MADD	c42, $f7, ALPHA, c42
kusano 2b45e8
	LD	$f7, 1 * SIZE(CO8)
kusano 2b45e8
kusano 2b45e8
	pref	0, 0 * SIZE(BB)
kusano 2b45e8
	pref	0, 8 * SIZE(BB)
kusano 2b45e8
kusano 2b45e8
	ST	c11, -2 * SIZE(CO1)
kusano 2b45e8
	MTC	$0,  c11
kusano 2b45e8
	ST	c12, -1 * SIZE(CO1)
kusano 2b45e8
	daddiu	CO8,CO8, 2 * SIZE
kusano 2b45e8
	ST	c21, -2 * SIZE(CO2)
kusano 2b45e8
	MOV	c21, c11
kusano 2b45e8
	ST	c22, -1 * SIZE(CO2)
kusano 2b45e8
	daddiu	BB, BB, 16 * SIZE
kusano 2b45e8
kusano 2b45e8
	MADD	c51, $f0, ALPHA, c51
kusano 2b45e8
	ST	c31, -2 * SIZE(CO3)
kusano 2b45e8
	MADD	c52, $f1, ALPHA, c52
kusano 2b45e8
	ST	c32, -1 * SIZE(CO3)
kusano 2b45e8
	MADD	c61, $f2, ALPHA, c61
kusano 2b45e8
	ST	c41, -2 * SIZE(CO4)
kusano 2b45e8
	MADD	c62, $f3, ALPHA, c62
kusano 2b45e8
	ST	c42, -1 * SIZE(CO4)
kusano 2b45e8
kusano 2b45e8
	MADD	c71, $f4, ALPHA, c71
kusano 2b45e8
	ST	c51, -2 * SIZE(CO5)
kusano 2b45e8
	MADD	c72, $f5, ALPHA, c72
kusano 2b45e8
	ST	c52, -1 * SIZE(CO5)
kusano 2b45e8
	MADD	c81, $f6, ALPHA, c81
kusano 2b45e8
	ST	c61, -2 * SIZE(CO6)
kusano 2b45e8
	MADD	c82, $f7, ALPHA, c82
kusano 2b45e8
	ST	c62, -1 * SIZE(CO6)
kusano 2b45e8
kusano 2b45e8
	ST	c71, -2 * SIZE(CO7)
kusano 2b45e8
	MOV	c31, c11
kusano 2b45e8
	ST	c72, -1 * SIZE(CO7)
kusano 2b45e8
	MOV	c41, c11
kusano 2b45e8
kusano 2b45e8
	ST	c81, -2 * SIZE(CO8)
kusano 2b45e8
	MOV	c51, c11
kusano 2b45e8
	ST	c82, -1 * SIZE(CO8)
kusano 2b45e8
	bgtz	I, .L11
kusano 2b45e8
	MOV	c61, c11
kusano 2b45e8
#else
kusano 2b45e8
	daddiu	CO4,CO4, 2 * SIZE
kusano 2b45e8
	daddiu	CO5,CO5, 2 * SIZE
kusano 2b45e8
	daddiu	CO6,CO6, 2 * SIZE
kusano 2b45e8
	daddiu	CO7,CO7, 2 * SIZE
kusano 2b45e8
kusano 2b45e8
	pref	0, 0 * SIZE(BB)
kusano 2b45e8
	pref	0, 8 * SIZE(BB)
kusano 2b45e8
kusano 2b45e8
	MUL	c11, ALPHA, c11
kusano 2b45e8
	daddiu	CO1,CO1, 2 * SIZE
kusano 2b45e8
	MUL	c12, ALPHA, c12
kusano 2b45e8
	MTC	$0,  a1
kusano 2b45e8
	MUL	c21, ALPHA, c21
kusano 2b45e8
	daddiu	CO2,CO2, 2 * SIZE
kusano 2b45e8
	MUL	c22, ALPHA, c22
kusano 2b45e8
	daddiu	CO3,CO3, 2 * SIZE
kusano 2b45e8
kusano 2b45e8
	ST	c11, -2 * SIZE(CO1)
kusano 2b45e8
	MUL	c31, ALPHA, c31
kusano 2b45e8
	ST	c12, -1 * SIZE(CO1)
kusano 2b45e8
	MUL	c32, ALPHA, c32
kusano 2b45e8
	ST	c21, -2 * SIZE(CO2)
kusano 2b45e8
	MUL	c41, ALPHA, c41
kusano 2b45e8
	ST	c22, -1 * SIZE(CO2)
kusano 2b45e8
	MUL	c42, ALPHA, c42
kusano 2b45e8
kusano 2b45e8
	ST	c31, -2 * SIZE(CO3)
kusano 2b45e8
	MUL	c51, ALPHA, c51
kusano 2b45e8
	ST	c32, -1 * SIZE(CO3)
kusano 2b45e8
	MUL	c52, ALPHA, c52
kusano 2b45e8
	ST	c41, -2 * SIZE(CO4)
kusano 2b45e8
	MUL	c61, ALPHA, c61
kusano 2b45e8
	ST	c42, -1 * SIZE(CO4)
kusano 2b45e8
	MUL	c62, ALPHA, c62
kusano 2b45e8
kusano 2b45e8
	ST	c51, -2 * SIZE(CO5)
kusano 2b45e8
	MUL	c71, ALPHA, c71
kusano 2b45e8
	ST	c52, -1 * SIZE(CO5)
kusano 2b45e8
	MUL	c72, ALPHA, c72
kusano 2b45e8
	ST	c61, -2 * SIZE(CO6)
kusano 2b45e8
	MUL	c81, ALPHA, c81
kusano 2b45e8
	ST	c62, -1 * SIZE(CO6)
kusano 2b45e8
	MUL	c82, ALPHA, c82
kusano 2b45e8
kusano 2b45e8
	ST	c71, -2 * SIZE(CO7)
kusano 2b45e8
	MOV	c11, a1
kusano 2b45e8
	ST	c72, -1 * SIZE(CO7)
kusano 2b45e8
	MOV	c21, a1
kusano 2b45e8
kusano 2b45e8
	daddiu	CO8,CO8, 2 * SIZE
kusano 2b45e8
	daddiu	BB, BB, 16 * SIZE
kusano 2b45e8
kusano 2b45e8
	ST	c81, -2 * SIZE(CO8)
kusano 2b45e8
	MOV	c31, a1
kusano 2b45e8
	ST	c82, -1 * SIZE(CO8)
kusano 2b45e8
	MOV	c41, a1
kusano 2b45e8
kusano 2b45e8
	daddiu	I, I, -1
kusano 2b45e8
	MOV	c51, a1
kusano 2b45e8
kusano 2b45e8
#if ( defined(LEFT) &&  defined(TRANSA)) || \
kusano 2b45e8
    (!defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
	dsubu	TEMP, K, KK
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
	daddiu	TEMP, TEMP, -2
kusano 2b45e8
#else
kusano 2b45e8
	daddiu	TEMP, TEMP, -8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	dsll	L,    TEMP, 1 + BASE_SHIFT
kusano 2b45e8
	dsll	TEMP, TEMP, 3 + BASE_SHIFT
kusano 2b45e8
kusano 2b45e8
	daddu	AO, AO, L
kusano 2b45e8
	daddu	BO, BO, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
	daddiu	KK, KK, 2
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	bgtz	I, .L11
kusano 2b45e8
	MOV	c61, a1
kusano 2b45e8
#endif
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L20:
kusano 2b45e8
	andi	I,  M, 1
kusano 2b45e8
	MOV	c61, c11
kusano 2b45e8
	blez	I, .L29
kusano 2b45e8
	MOV	c71, c11
kusano 2b45e8
kusano 2b45e8
#if defined(TRMMKERNEL)
kusano 2b45e8
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
	move	BO,  B
kusano 2b45e8
#else
kusano 2b45e8
	dsll	L,    KK, 0 + BASE_SHIFT
kusano 2b45e8
	dsll	TEMP, KK, 3 + BASE_SHIFT
kusano 2b45e8
kusano 2b45e8
	daddu	AO, AO, L
kusano 2b45e8
	daddu	BO, B,  TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	LD	a3,  2 * SIZE(AO)
kusano 2b45e8
	LD	a4,  3 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	LD	b5,  4 * SIZE(BO)
kusano 2b45e8
	LD	b6,  8 * SIZE(BO)
kusano 2b45e8
	LD	b7, 12 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
kusano 2b45e8
	dsubu	TEMP, K, KK
kusano 2b45e8
#elif defined(LEFT)
kusano 2b45e8
	daddiu	TEMP, KK, 1
kusano 2b45e8
#else
kusano 2b45e8
	daddiu	TEMP, KK, 8
kusano 2b45e8
#endif
kusano 2b45e8
	dsra	L,  TEMP, 2
kusano 2b45e8
kusano 2b45e8
	blez	L, .L25
kusano 2b45e8
	MOV	c81, c11
kusano 2b45e8
#else
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	LD	a3,  2 * SIZE(AO)
kusano 2b45e8
	LD	a4,  3 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(B)
kusano 2b45e8
	LD	b2,  1 * SIZE(B)
kusano 2b45e8
	LD	b3,  2 * SIZE(B)
kusano 2b45e8
	LD	b4,  3 * SIZE(B)
kusano 2b45e8
	LD	b5,  4 * SIZE(B)
kusano 2b45e8
	LD	b6,  8 * SIZE(B)
kusano 2b45e8
	LD	b7, 12 * SIZE(B)
kusano 2b45e8
kusano 2b45e8
	dsra	L,  K, 2
kusano 2b45e8
	MOV	c81, c11
kusano 2b45e8
kusano 2b45e8
	blez	L, .L25
kusano 2b45e8
	move	BO,  B
kusano 2b45e8
#endif
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L22:
kusano 2b45e8
	MADD	c11, c11, a1, b1
kusano 2b45e8
	LD	b1, 16 * SIZE(BO)
kusano 2b45e8
	MADD	c21, c21, a1, b2
kusano 2b45e8
	LD	b2,  5 * SIZE(BO)
kusano 2b45e8
	MADD	c31, c31, a1, b3
kusano 2b45e8
	LD	b3,  6 * SIZE(BO)
kusano 2b45e8
	MADD	c41, c41, a1, b4
kusano 2b45e8
	LD	b4,  7 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c51, c51, a1, b5
kusano 2b45e8
	LD	b5, 20 * SIZE(BO)
kusano 2b45e8
	MADD	c61, c61, a1, b2
kusano 2b45e8
	LD	b2,  9 * SIZE(BO)
kusano 2b45e8
	MADD	c71, c71, a1, b3
kusano 2b45e8
	LD	b3, 10 * SIZE(BO)
kusano 2b45e8
	MADD	c81, c81, a1, b4
kusano 2b45e8
	LD	b4, 11 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	LD	a1,  4 * SIZE(AO)
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a2, b6
kusano 2b45e8
	LD	b6, 24 * SIZE(BO)
kusano 2b45e8
	MADD	c21, c21, a2, b2
kusano 2b45e8
	LD	b2, 13 * SIZE(BO)
kusano 2b45e8
	MADD	c31, c31, a2, b3
kusano 2b45e8
	LD	b3, 14 * SIZE(BO)
kusano 2b45e8
	MADD	c41, c41, a2, b4
kusano 2b45e8
	LD	b4, 15 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c51, c51, a2, b7
kusano 2b45e8
	LD	b7, 28 * SIZE(BO)
kusano 2b45e8
	MADD	c61, c61, a2, b2
kusano 2b45e8
	LD	b2, 17 * SIZE(BO)
kusano 2b45e8
	MADD	c71, c71, a2, b3
kusano 2b45e8
	LD	b3, 18 * SIZE(BO)
kusano 2b45e8
	MADD	c81, c81, a2, b4
kusano 2b45e8
	LD	b4, 19 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	LD	a2,  5 * SIZE(AO)
kusano 2b45e8
	daddiu	AO, AO,  4 * SIZE
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a3, b1
kusano 2b45e8
	LD	b1, 32 * SIZE(BO)
kusano 2b45e8
	MADD	c21, c21, a3, b2
kusano 2b45e8
	LD	b2, 21 * SIZE(BO)
kusano 2b45e8
	MADD	c31, c31, a3, b3
kusano 2b45e8
	LD	b3, 22 * SIZE(BO)
kusano 2b45e8
	MADD	c41, c41, a3, b4
kusano 2b45e8
	LD	b4, 23 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c51, c51, a3, b5
kusano 2b45e8
	LD	b5, 36 * SIZE(BO)
kusano 2b45e8
	MADD	c61, c61, a3, b2
kusano 2b45e8
	LD	b2, 25 * SIZE(BO)
kusano 2b45e8
	MADD	c71, c71, a3, b3
kusano 2b45e8
	LD	b3, 26 * SIZE(BO)
kusano 2b45e8
	MADD	c81, c81, a3, b4
kusano 2b45e8
	LD	b4, 27 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	LD	a3,  2 * SIZE(AO)
kusano 2b45e8
	daddiu	BO, BO, 32 * SIZE
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a4, b6
kusano 2b45e8
	LD	b6,  8 * SIZE(BO)
kusano 2b45e8
	MADD	c21, c21, a4, b2
kusano 2b45e8
	LD	b2, -3 * SIZE(BO)
kusano 2b45e8
	MADD	c31, c31, a4, b3
kusano 2b45e8
	LD	b3, -2 * SIZE(BO)
kusano 2b45e8
	MADD	c41, c41, a4, b4
kusano 2b45e8
	LD	b4, -1 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c51, c51, a4, b7
kusano 2b45e8
	LD	b7, 12 * SIZE(BO)
kusano 2b45e8
	MADD	c61, c61, a4, b2
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	MADD	c71, c71, a4, b3
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	MADD	c81, c81, a4, b4
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	bgtz	L, .L22
kusano 2b45e8
	LD	a4,  3 * SIZE(AO)
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L25:
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	andi	L,  K, 3
kusano 2b45e8
#else
kusano 2b45e8
	andi	L,  TEMP, 3
kusano 2b45e8
#endif
kusano 2b45e8
	NOP
kusano 2b45e8
	blez	L, .L28
kusano 2b45e8
	NOP
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L26:
kusano 2b45e8
	MADD	c11, c11, a1, b1
kusano 2b45e8
	LD	b1,  8 * SIZE(BO)
kusano 2b45e8
	MADD	c21, c21, a1, b2
kusano 2b45e8
	LD	b2,  5 * SIZE(BO)
kusano 2b45e8
	MADD	c31, c31, a1, b3
kusano 2b45e8
	LD	b3,  6 * SIZE(BO)
kusano 2b45e8
	MADD	c41, c41, a1, b4
kusano 2b45e8
	LD	b4,  7 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
	MOV	a2, a2
kusano 2b45e8
	daddiu	AO, AO,  1 * SIZE
kusano 2b45e8
	daddiu	BO, BO,  8 * SIZE
kusano 2b45e8
kusano 2b45e8
	MADD	c51, c51, a1, b5
kusano 2b45e8
	LD	b5,  4 * SIZE(BO)
kusano 2b45e8
	MADD	c61, c61, a1, b2
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	MADD	c71, c71, a1, b3
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	MADD	c81, c81, a1, b4
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	bgtz	L, .L26
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
.L28:
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	$f0, 0 * SIZE(CO1)
kusano 2b45e8
	LD	$f1, 0 * SIZE(CO2)
kusano 2b45e8
	LD	$f2, 0 * SIZE(CO3)
kusano 2b45e8
	LD	$f3, 0 * SIZE(CO4)
kusano 2b45e8
	MADD	c11, $f0, ALPHA, c11
kusano 2b45e8
	LD	$f4, 0 * SIZE(CO5)
kusano 2b45e8
	MADD	c21, $f1, ALPHA, c21
kusano 2b45e8
	LD	$f5, 0 * SIZE(CO6)
kusano 2b45e8
	MADD	c31, $f2, ALPHA, c31
kusano 2b45e8
	LD	$f6, 0 * SIZE(CO7)
kusano 2b45e8
	MADD	c41, $f3, ALPHA, c41
kusano 2b45e8
	LD	$f7, 0 * SIZE(CO8)
kusano 2b45e8
	MADD	c51, $f4, ALPHA, c51
kusano 2b45e8
	ST	c11,  0 * SIZE(CO1)
kusano 2b45e8
	MADD	c61, $f5, ALPHA, c61
kusano 2b45e8
	ST	c21,  0 * SIZE(CO2)
kusano 2b45e8
	MADD	c71, $f6, ALPHA, c71
kusano 2b45e8
	ST	c31,  0 * SIZE(CO3)
kusano 2b45e8
	MADD	c81, $f7, ALPHA, c81
kusano 2b45e8
	ST	c41,  0 * SIZE(CO4)
kusano 2b45e8
	ST	c51,  0 * SIZE(CO5)
kusano 2b45e8
	ST	c61,  0 * SIZE(CO6)
kusano 2b45e8
	ST	c71,  0 * SIZE(CO7)
kusano 2b45e8
	ST	c81,  0 * SIZE(CO8)
kusano 2b45e8
#else
kusano 2b45e8
	MUL	c11, ALPHA, c11
kusano 2b45e8
	MUL	c21, ALPHA, c21
kusano 2b45e8
	MUL	c31, ALPHA, c31
kusano 2b45e8
	MUL	c41, ALPHA, c41
kusano 2b45e8
kusano 2b45e8
	ST	c11,  0 * SIZE(CO1)
kusano 2b45e8
	MUL	c51, ALPHA, c51
kusano 2b45e8
	ST	c21,  0 * SIZE(CO2)
kusano 2b45e8
	MUL	c61, ALPHA, c61
kusano 2b45e8
	ST	c31,  0 * SIZE(CO3)
kusano 2b45e8
	MUL	c71, ALPHA, c71
kusano 2b45e8
	ST	c41,  0 * SIZE(CO4)
kusano 2b45e8
	MUL	c81, ALPHA, c81
kusano 2b45e8
kusano 2b45e8
	ST	c51,  0 * SIZE(CO5)
kusano 2b45e8
	ST	c61,  0 * SIZE(CO6)
kusano 2b45e8
	ST	c71,  0 * SIZE(CO7)
kusano 2b45e8
	ST	c81,  0 * SIZE(CO8)
kusano 2b45e8
kusano 2b45e8
#if ( defined(LEFT) &&  defined(TRANSA)) || \
kusano 2b45e8
    (!defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
	dsubu	TEMP, K, KK
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
	daddiu	TEMP, TEMP, -1
kusano 2b45e8
#else
kusano 2b45e8
	daddiu	TEMP, TEMP, -8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	dsll	L,    TEMP, 0 + BASE_SHIFT
kusano 2b45e8
	dsll	TEMP, TEMP, 3 + BASE_SHIFT
kusano 2b45e8
kusano 2b45e8
	daddu	AO, AO, L
kusano 2b45e8
	daddu	BO, BO, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
	daddiu	KK, KK, 1
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L29:
kusano 2b45e8
#if defined(TRMMKERNEL) && !defined(LEFT)
kusano 2b45e8
	daddiu	KK, KK, 8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	bgtz	J, .L10
kusano 2b45e8
	move	B, BO
kusano 2b45e8
	.align 3
kusano 2b45e8
	
kusano 2b45e8
.L30:
kusano 2b45e8
	andi	J,  N, 4
kusano 2b45e8
	blez	J, .L50
kusano 2b45e8
	move	AO, A
kusano 2b45e8
kusano 2b45e8
	move	CO1, C
kusano 2b45e8
	MTC	$0,  c11
kusano 2b45e8
	daddu	CO2, C,   LDC
kusano 2b45e8
	daddu	CO3, CO2, LDC
kusano 2b45e8
	daddu	CO4, CO3, LDC
kusano 2b45e8
	MOV	c21, c11
kusano 2b45e8
	daddu	C,   CO4, LDC
kusano 2b45e8
	MOV	c31, c11
kusano 2b45e8
kusano 2b45e8
#if defined(TRMMKERNEL) &&  defined(LEFT)
kusano 2b45e8
	move	KK, OFFSET
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	dsra	I,  M, 1
kusano 2b45e8
	blez	I, .L40
kusano 2b45e8
	MOV	c41, c11
kusano 2b45e8
kusano 2b45e8
.L31:
kusano 2b45e8
#if defined(TRMMKERNEL)
kusano 2b45e8
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
	move	BO,  B
kusano 2b45e8
#else
kusano 2b45e8
	dsll	L,    KK, 1 + BASE_SHIFT
kusano 2b45e8
	dsll	TEMP, KK, 2 + BASE_SHIFT
kusano 2b45e8
kusano 2b45e8
	daddu	AO, AO, L
kusano 2b45e8
	daddu	BO, B,  TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	LD	a3,  4 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	MOV	c12, c11
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	MOV	c22, c11
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	MOV	c32, c11
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	MOV	c42, c11
kusano 2b45e8
kusano 2b45e8
	LD	b5,  4 * SIZE(BO)
kusano 2b45e8
	LD	b6,  8 * SIZE(BO)
kusano 2b45e8
	LD	b7, 12 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
kusano 2b45e8
	dsubu	TEMP, K, KK
kusano 2b45e8
#elif defined(LEFT)
kusano 2b45e8
	daddiu	TEMP, KK, 2
kusano 2b45e8
#else
kusano 2b45e8
	daddiu	TEMP, KK, 4
kusano 2b45e8
#endif
kusano 2b45e8
	dsra	L,  TEMP, 2
kusano 2b45e8
	blez	L, .L35
kusano 2b45e8
	NOP
kusano 2b45e8
#else
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	LD	a3,  4 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(B)
kusano 2b45e8
	MOV	c12, c11
kusano 2b45e8
	LD	b2,  1 * SIZE(B)
kusano 2b45e8
	MOV	c22, c11
kusano 2b45e8
	LD	b3,  2 * SIZE(B)
kusano 2b45e8
	MOV	c32, c11
kusano 2b45e8
	LD	b4,  3 * SIZE(B)
kusano 2b45e8
	MOV	c42, c11
kusano 2b45e8
kusano 2b45e8
	LD	b5,  4 * SIZE(B)
kusano 2b45e8
	dsra	L,  K, 2
kusano 2b45e8
	LD	b6,  8 * SIZE(B)
kusano 2b45e8
	LD	b7, 12 * SIZE(B)
kusano 2b45e8
kusano 2b45e8
	blez	L, .L35
kusano 2b45e8
	move	BO,  B
kusano 2b45e8
#endif
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L32:
kusano 2b45e8
	MADD	c11, c11, a1, b1
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	MADD	c21, c21, a1, b2
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
	MADD	c31, c31, a1, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c41, c41, a1, b4
kusano 2b45e8
	LD	a1,  2 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD	c12, c12, a2, b1
kusano 2b45e8
	LD	b1, 16 * SIZE(BO)
kusano 2b45e8
	MADD	c22, c22, a2, b2
kusano 2b45e8
	LD	b2,  5 * SIZE(BO)
kusano 2b45e8
	MADD	c32, c32, a2, b3
kusano 2b45e8
	LD	b3,  6 * SIZE(BO)
kusano 2b45e8
	MADD	c42, c42, a2, b4
kusano 2b45e8
	LD	b4,  7 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a1, b5
kusano 2b45e8
	LD	a2,  3 * SIZE(AO)
kusano 2b45e8
	MADD	c21, c21, a1, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c31, c31, a1, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c41, c41, a1, b4
kusano 2b45e8
	LD	a1,  8 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD	c12, c12, a2, b5
kusano 2b45e8
	LD	b5, 20 * SIZE(BO)
kusano 2b45e8
	MADD	c22, c22, a2, b2
kusano 2b45e8
	LD	b2,  9 * SIZE(BO)
kusano 2b45e8
	MADD	c32, c32, a2, b3
kusano 2b45e8
	LD	b3, 10 * SIZE(BO)
kusano 2b45e8
	MADD	c42, c42, a2, b4
kusano 2b45e8
	LD	b4, 11 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a3, b6
kusano 2b45e8
	LD	a2,  5 * SIZE(AO)
kusano 2b45e8
	MADD	c21, c21, a3, b2
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c31, c31, a3, b3
kusano 2b45e8
	NOP
kusano 2b45e8
	MADD	c41, c41, a3, b4
kusano 2b45e8
	LD	a3,  6 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD	c12, c12, a2, b6
kusano 2b45e8
	LD	b6, 24 * SIZE(BO)
kusano 2b45e8
	MADD	c22, c22, a2, b2
kusano 2b45e8
	LD	b2, 13 * SIZE(BO)
kusano 2b45e8
	MADD	c32, c32, a2, b3
kusano 2b45e8
	LD	b3, 14 * SIZE(BO)
kusano 2b45e8
	MADD	c42, c42, a2, b4
kusano 2b45e8
	LD	b4, 15 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a3, b7
kusano 2b45e8
	LD	a2,  7 * SIZE(AO)
kusano 2b45e8
	MADD	c21, c21, a3, b2
kusano 2b45e8
	daddiu	AO, AO,  8 * SIZE
kusano 2b45e8
	MADD	c31, c31, a3, b3
kusano 2b45e8
	daddiu	BO, BO, 16 * SIZE
kusano 2b45e8
	MADD	c41, c41, a3, b4
kusano 2b45e8
	LD	a3,  4 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD	c12, c12, a2, b7
kusano 2b45e8
	LD	b7, 12 * SIZE(BO)
kusano 2b45e8
	MADD	c22, c22, a2, b2
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	MADD	c32, c32, a2, b3
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	MADD	c42, c42, a2, b4
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
	bgtz	L, .L32
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L35:
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	andi	L,  K, 3
kusano 2b45e8
#else
kusano 2b45e8
	andi	L,  TEMP, 3
kusano 2b45e8
#endif
kusano 2b45e8
	NOP
kusano 2b45e8
	blez	L, .L38
kusano 2b45e8
	NOP
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L36:
kusano 2b45e8
	MADD	c11, c11, a1, b1
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	MADD	c21, c21, a1, b2
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
	MADD	c31, c31, a1, b3
kusano 2b45e8
	daddiu	AO, AO,  2 * SIZE
kusano 2b45e8
	MADD	c41, c41, a1, b4
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD	c12, c12, a2, b1
kusano 2b45e8
	LD	b1,  4 * SIZE(BO)
kusano 2b45e8
	MADD	c22, c22, a2, b2
kusano 2b45e8
	LD	b2,  5 * SIZE(BO)
kusano 2b45e8
	MADD	c32, c32, a2, b3
kusano 2b45e8
	LD	b3,  6 * SIZE(BO)
kusano 2b45e8
	MADD	c42, c42, a2, b4
kusano 2b45e8
	LD	b4,  7 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	bgtz	L, .L36
kusano 2b45e8
	daddiu	BO, BO,  4 * SIZE
kusano 2b45e8
kusano 2b45e8
.L38:
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	$f0, 0 * SIZE(CO1)
kusano 2b45e8
	daddiu	CO3,CO3, 2 * SIZE
kusano 2b45e8
	LD	$f1, 1 * SIZE(CO1)
kusano 2b45e8
	daddiu	CO1,CO1, 2 * SIZE
kusano 2b45e8
	LD	$f2, 0 * SIZE(CO2)
kusano 2b45e8
	daddiu	CO4,CO4, 2 * SIZE
kusano 2b45e8
	LD	$f3, 1 * SIZE(CO2)
kusano 2b45e8
	daddiu	CO2,CO2, 2 * SIZE
kusano 2b45e8
kusano 2b45e8
	LD	$f4, -2 * SIZE(CO3)
kusano 2b45e8
	MADD	c11, $f0, ALPHA, c11
kusano 2b45e8
	LD	$f5, -1 * SIZE(CO3)
kusano 2b45e8
	MADD	c12, $f1, ALPHA, c12
kusano 2b45e8
	LD	$f6, -2 * SIZE(CO4)
kusano 2b45e8
	MADD	c21, $f2, ALPHA, c21
kusano 2b45e8
	LD	$f7, -1 * SIZE(CO4)
kusano 2b45e8
	MADD	c22, $f3, ALPHA, c22
kusano 2b45e8
kusano 2b45e8
	MADD	c31, $f4, ALPHA, c31
kusano 2b45e8
	ST	c11, -2 * SIZE(CO1)
kusano 2b45e8
	MADD	c32, $f5, ALPHA, c32
kusano 2b45e8
	ST	c12, -1 * SIZE(CO1)
kusano 2b45e8
	MADD	c41, $f6, ALPHA, c41
kusano 2b45e8
	ST	c21, -2 * SIZE(CO2)
kusano 2b45e8
	MADD	c42, $f7, ALPHA, c42
kusano 2b45e8
	ST	c22, -1 * SIZE(CO2)
kusano 2b45e8
kusano 2b45e8
	ST	c31, -2 * SIZE(CO3)
kusano 2b45e8
	MTC	$0,  c11
kusano 2b45e8
	ST	c32, -1 * SIZE(CO3)
kusano 2b45e8
	daddiu	I, I, -1
kusano 2b45e8
	ST	c41, -2 * SIZE(CO4)
kusano 2b45e8
	MOV	c21, c11
kusano 2b45e8
	ST	c42, -1 * SIZE(CO4)
kusano 2b45e8
	MOV	c31, c11
kusano 2b45e8
#else
kusano 2b45e8
	MUL	c11, ALPHA, c11
kusano 2b45e8
	daddiu	CO3,CO3, 2 * SIZE
kusano 2b45e8
	MUL	c12, ALPHA, c12
kusano 2b45e8
	daddiu	CO1,CO1, 2 * SIZE
kusano 2b45e8
	MUL	c21, ALPHA, c21
kusano 2b45e8
	daddiu	CO4,CO4, 2 * SIZE
kusano 2b45e8
	MUL	c22, ALPHA, c22
kusano 2b45e8
	daddiu	CO2,CO2, 2 * SIZE
kusano 2b45e8
kusano 2b45e8
	ST	c11, -2 * SIZE(CO1)
kusano 2b45e8
	MUL	c31, ALPHA, c31
kusano 2b45e8
	ST	c12, -1 * SIZE(CO1)
kusano 2b45e8
	MUL	c32, ALPHA, c32
kusano 2b45e8
	ST	c21, -2 * SIZE(CO2)
kusano 2b45e8
	MUL	c41, ALPHA, c41
kusano 2b45e8
	ST	c22, -1 * SIZE(CO2)
kusano 2b45e8
	MUL	c42, ALPHA, c42
kusano 2b45e8
kusano 2b45e8
	ST	c31, -2 * SIZE(CO3)
kusano 2b45e8
	MTC	$0,  c11
kusano 2b45e8
	ST	c32, -1 * SIZE(CO3)
kusano 2b45e8
	daddiu	I, I, -1
kusano 2b45e8
	ST	c41, -2 * SIZE(CO4)
kusano 2b45e8
	MOV	c21, c11
kusano 2b45e8
	ST	c42, -1 * SIZE(CO4)
kusano 2b45e8
	MOV	c31, c11
kusano 2b45e8
kusano 2b45e8
#if ( defined(LEFT) &&  defined(TRANSA)) || \
kusano 2b45e8
    (!defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
	dsubu	TEMP, K, KK
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
	daddiu	TEMP, TEMP, -2
kusano 2b45e8
#else
kusano 2b45e8
	daddiu	TEMP, TEMP, -4
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	dsll	L,    TEMP, 1 + BASE_SHIFT
kusano 2b45e8
	dsll	TEMP, TEMP, 2 + BASE_SHIFT
kusano 2b45e8
kusano 2b45e8
	daddu	AO, AO, L
kusano 2b45e8
	daddu	BO, BO, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
	daddiu	KK, KK, 2
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	bgtz	I, .L31
kusano 2b45e8
	MOV	c41, c11
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L40:
kusano 2b45e8
	andi	I,  M, 1
kusano 2b45e8
	blez	I, .L49
kusano 2b45e8
	MOV	c61, c11
kusano 2b45e8
kusano 2b45e8
#if defined(TRMMKERNEL)
kusano 2b45e8
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
	move	BO,  B
kusano 2b45e8
#else
kusano 2b45e8
	dsll	L,    KK, 0 + BASE_SHIFT
kusano 2b45e8
	dsll	TEMP, KK, 2 + BASE_SHIFT
kusano 2b45e8
kusano 2b45e8
	daddu	AO, AO, L
kusano 2b45e8
	daddu	BO, B,  TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	MOV	c71, c11
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	MOV	c81, c11
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	LD	b5,  4 * SIZE(BO)
kusano 2b45e8
	LD	b6,  8 * SIZE(BO)
kusano 2b45e8
	LD	b7, 12 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
kusano 2b45e8
	dsubu	TEMP, K, KK
kusano 2b45e8
#elif defined(LEFT)
kusano 2b45e8
	daddiu	TEMP, KK, 1
kusano 2b45e8
#else
kusano 2b45e8
	daddiu	TEMP, KK, 4
kusano 2b45e8
#endif
kusano 2b45e8
	dsra	L,  TEMP, 2
kusano 2b45e8
kusano 2b45e8
	blez	L, .L45
kusano 2b45e8
	NOP
kusano 2b45e8
#else
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	MOV	c71, c11
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	MOV	c81, c11
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(B)
kusano 2b45e8
	LD	b2,  1 * SIZE(B)
kusano 2b45e8
	LD	b3,  2 * SIZE(B)
kusano 2b45e8
	LD	b4,  3 * SIZE(B)
kusano 2b45e8
	LD	b5,  4 * SIZE(B)
kusano 2b45e8
	LD	b6,  8 * SIZE(B)
kusano 2b45e8
	LD	b7, 12 * SIZE(B)
kusano 2b45e8
kusano 2b45e8
	dsra	L,  K, 2
kusano 2b45e8
kusano 2b45e8
	blez	L, .L45
kusano 2b45e8
	move	BO,  B
kusano 2b45e8
#endif
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L42:
kusano 2b45e8
	MADD	c11, c11, a1, b1
kusano 2b45e8
	LD	b1, 16 * SIZE(BO)
kusano 2b45e8
	MADD	c21, c21, a1, b2
kusano 2b45e8
	LD	b2,  5 * SIZE(BO)
kusano 2b45e8
	MADD	c31, c31, a1, b3
kusano 2b45e8
	LD	b3,  6 * SIZE(BO)
kusano 2b45e8
	MADD	c41, c41, a1, b4
kusano 2b45e8
	LD	b4,  7 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	LD	a1,  4 * SIZE(AO)
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a2, b5
kusano 2b45e8
	LD	b5, 20 * SIZE(BO)
kusano 2b45e8
	MADD	c21, c21, a2, b2
kusano 2b45e8
	LD	b2,  9 * SIZE(BO)
kusano 2b45e8
	MADD	c31, c31, a2, b3
kusano 2b45e8
	LD	b3, 10 * SIZE(BO)
kusano 2b45e8
	MADD	c41, c41, a2, b4
kusano 2b45e8
	LD	b4, 11 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	LD	a2,  2 * SIZE(AO)
kusano 2b45e8
	daddiu	AO, AO,  4 * SIZE
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a2, b6
kusano 2b45e8
	LD	b6, 24 * SIZE(BO)
kusano 2b45e8
	MADD	c21, c21, a2, b2
kusano 2b45e8
	LD	b2, 13 * SIZE(BO)
kusano 2b45e8
	MADD	c31, c31, a2, b3
kusano 2b45e8
	LD	b3, 14 * SIZE(BO)
kusano 2b45e8
	MADD	c41, c41, a2, b4
kusano 2b45e8
	LD	b4, 15 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	LD	a2, -1 * SIZE(AO)
kusano 2b45e8
	daddiu	BO, BO, 16 * SIZE
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a2, b7
kusano 2b45e8
	LD	b7, 12 * SIZE(BO)
kusano 2b45e8
	MADD	c21, c21, a2, b2
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	MADD	c31, c31, a2, b3
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	MADD	c41, c41, a2, b4
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	bgtz	L, .L42
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L45:
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	andi	L,  K, 3
kusano 2b45e8
#else
kusano 2b45e8
	andi	L,  TEMP, 3
kusano 2b45e8
#endif
kusano 2b45e8
	NOP
kusano 2b45e8
	blez	L, .L48
kusano 2b45e8
	NOP
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L46:
kusano 2b45e8
	MADD	c11, c11, a1, b1
kusano 2b45e8
	LD	b1,  4 * SIZE(BO)
kusano 2b45e8
	MADD	c21, c21, a1, b2
kusano 2b45e8
	LD	b2,  5 * SIZE(BO)
kusano 2b45e8
	MADD	c31, c31, a1, b3
kusano 2b45e8
	LD	b3,  6 * SIZE(BO)
kusano 2b45e8
	MADD	c41, c41, a1, b4
kusano 2b45e8
	LD	a1,  1 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	LD	b4,  7 * SIZE(BO)
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
kusano 2b45e8
	daddiu	AO, AO,  1 * SIZE
kusano 2b45e8
	MOV	a2, a2
kusano 2b45e8
	bgtz	L, .L46
kusano 2b45e8
	daddiu	BO, BO,  4 * SIZE
kusano 2b45e8
kusano 2b45e8
kusano 2b45e8
.L48:
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	$f0, 0 * SIZE(CO1)
kusano 2b45e8
	LD	$f1, 0 * SIZE(CO2)
kusano 2b45e8
	LD	$f2, 0 * SIZE(CO3)
kusano 2b45e8
	LD	$f3, 0 * SIZE(CO4)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, $f0, ALPHA, c11
kusano 2b45e8
	MADD	c21, $f1, ALPHA, c21
kusano 2b45e8
	MADD	c31, $f2, ALPHA, c31
kusano 2b45e8
	MADD	c41, $f3, ALPHA, c41
kusano 2b45e8
kusano 2b45e8
	ST	c11,  0 * SIZE(CO1)
kusano 2b45e8
	ST	c21,  0 * SIZE(CO2)
kusano 2b45e8
	ST	c31,  0 * SIZE(CO3)
kusano 2b45e8
	ST	c41,  0 * SIZE(CO4)
kusano 2b45e8
#else
kusano 2b45e8
	MUL	c11, ALPHA, c11
kusano 2b45e8
	MUL	c21, ALPHA, c21
kusano 2b45e8
	MUL	c31, ALPHA, c31
kusano 2b45e8
	MUL	c41, ALPHA, c41
kusano 2b45e8
kusano 2b45e8
	ST	c11,  0 * SIZE(CO1)
kusano 2b45e8
	ST	c21,  0 * SIZE(CO2)
kusano 2b45e8
	ST	c31,  0 * SIZE(CO3)
kusano 2b45e8
	ST	c41,  0 * SIZE(CO4)
kusano 2b45e8
kusano 2b45e8
#if ( defined(LEFT) &&  defined(TRANSA)) || \
kusano 2b45e8
    (!defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
	dsubu	TEMP, K, KK
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
	daddiu	TEMP, TEMP, -1
kusano 2b45e8
#else
kusano 2b45e8
	daddiu	TEMP, TEMP, -4
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	dsll	L,    TEMP, 0 + BASE_SHIFT
kusano 2b45e8
	dsll	TEMP, TEMP, 2 + BASE_SHIFT
kusano 2b45e8
kusano 2b45e8
	daddu	AO, AO, L
kusano 2b45e8
	daddu	BO, BO, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
	daddiu	KK, KK, 1
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L49:
kusano 2b45e8
#if defined(TRMMKERNEL) && !defined(LEFT)
kusano 2b45e8
	daddiu	KK, KK, 4
kusano 2b45e8
#endif
kusano 2b45e8
	move	B, BO
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L50:
kusano 2b45e8
	andi	J,  N, 2
kusano 2b45e8
	blez	J, .L70
kusano 2b45e8
kusano 2b45e8
	move	AO, A
kusano 2b45e8
	move	CO1, C
kusano 2b45e8
	daddu	CO2, C,   LDC
kusano 2b45e8
kusano 2b45e8
#if defined(TRMMKERNEL) &&  defined(LEFT)
kusano 2b45e8
	move	KK, OFFSET
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	dsra	I,  M, 1
kusano 2b45e8
	blez	I, .L60
kusano 2b45e8
	daddu	C,   CO2, LDC
kusano 2b45e8
kusano 2b45e8
.L51:
kusano 2b45e8
#if defined(TRMMKERNEL)
kusano 2b45e8
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
	move	BO,  B
kusano 2b45e8
#else
kusano 2b45e8
	dsll	L,    KK, 1 + BASE_SHIFT
kusano 2b45e8
	dsll	TEMP, KK, 1 + BASE_SHIFT
kusano 2b45e8
kusano 2b45e8
	daddu	AO, AO, L
kusano 2b45e8
	daddu	BO, B,  TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	MTC	$0,  c11
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	MOV	c21, c11
kusano 2b45e8
	LD	a5,  4 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	MOV	c12, c11
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	MOV	c22, c11
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	LD	b5,  4 * SIZE(BO)
kusano 2b45e8
	LD	b6,  8 * SIZE(BO)
kusano 2b45e8
	LD	b7, 12 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
kusano 2b45e8
	dsubu	TEMP, K, KK
kusano 2b45e8
#elif defined(LEFT)
kusano 2b45e8
	daddiu	TEMP, KK, 2
kusano 2b45e8
#else
kusano 2b45e8
	daddiu	TEMP, KK, 2
kusano 2b45e8
#endif
kusano 2b45e8
	dsra	L,  TEMP, 2
kusano 2b45e8
	blez	L, .L55
kusano 2b45e8
	NOP
kusano 2b45e8
#else
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	MTC	$0,  c11
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	MOV	c21, c11
kusano 2b45e8
	LD	a5,  4 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(B)
kusano 2b45e8
	MOV	c12, c11
kusano 2b45e8
	LD	b2,  1 * SIZE(B)
kusano 2b45e8
	MOV	c22, c11
kusano 2b45e8
	LD	b3,  2 * SIZE(B)
kusano 2b45e8
	LD	b5,  4 * SIZE(B)
kusano 2b45e8
	dsra	L,  K, 2
kusano 2b45e8
	LD	b6,  8 * SIZE(B)
kusano 2b45e8
	LD	b7, 12 * SIZE(B)
kusano 2b45e8
kusano 2b45e8
	blez	L, .L55
kusano 2b45e8
	move	BO,  B
kusano 2b45e8
#endif
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L52:
kusano 2b45e8
	MADD	c11, c11, a1, b1
kusano 2b45e8
	LD	a3,  2 * SIZE(AO)
kusano 2b45e8
	MADD	c21, c21, a1, b2
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	MADD	c12, c12, a2, b1
kusano 2b45e8
	LD	a4,  3 * SIZE(AO)
kusano 2b45e8
	MADD	c22, c22, a2, b2
kusano 2b45e8
	LD	b1,  8 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a3, b3
kusano 2b45e8
	LD	a1,  8 * SIZE(AO)
kusano 2b45e8
	MADD	c21, c21, a3, b4
kusano 2b45e8
	LD	b2,  5 * SIZE(BO)
kusano 2b45e8
	MADD	c12, c12, a4, b3
kusano 2b45e8
	LD	a2,  5 * SIZE(AO)
kusano 2b45e8
	MADD	c22, c22, a4, b4
kusano 2b45e8
	LD	b3,  6 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a5, b5
kusano 2b45e8
	LD	a3,  6 * SIZE(AO)
kusano 2b45e8
	MADD	c21, c21, a5, b2
kusano 2b45e8
	LD	b4,  7 * SIZE(BO)
kusano 2b45e8
	MADD	c12, c12, a2, b5
kusano 2b45e8
	LD	a4,  7 * SIZE(AO)
kusano 2b45e8
	MADD	c22, c22, a2, b2
kusano 2b45e8
	LD	b5, 12 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a3, b3
kusano 2b45e8
	LD	a5, 12 * SIZE(AO)
kusano 2b45e8
	MADD	c21, c21, a3, b4
kusano 2b45e8
	LD	b2,  9 * SIZE(BO)
kusano 2b45e8
	MADD	c12, c12, a4, b3
kusano 2b45e8
	LD	a2,  9 * SIZE(AO)
kusano 2b45e8
	MADD	c22, c22, a4, b4
kusano 2b45e8
	LD	b3, 10 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	daddiu	AO, AO,  8 * SIZE
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
	bgtz	L, .L52
kusano 2b45e8
	daddiu	BO, BO,  8 * SIZE
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L55:
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	andi	L,  K, 3
kusano 2b45e8
#else
kusano 2b45e8
	andi	L,  TEMP, 3
kusano 2b45e8
#endif
kusano 2b45e8
	NOP
kusano 2b45e8
	blez	L, .L58
kusano 2b45e8
	NOP
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L56:
kusano 2b45e8
	MADD	c11, c11, a1, b1
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	MADD	c21, c21, a1, b2
kusano 2b45e8
	LD	a1,  2 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD	c12, c12, a2, b1
kusano 2b45e8
	LD	b1,  2 * SIZE(BO)
kusano 2b45e8
	MADD	c22, c22, a2, b2
kusano 2b45e8
	LD	b2,  3 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
	daddiu	AO, AO,  2 * SIZE
kusano 2b45e8
	bgtz	L, .L56
kusano 2b45e8
	daddiu	BO, BO,  2 * SIZE
kusano 2b45e8
kusano 2b45e8
.L58:
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	$f0, 0 * SIZE(CO1)
kusano 2b45e8
	daddiu	I, I, -1
kusano 2b45e8
	LD	$f1, 1 * SIZE(CO1)
kusano 2b45e8
	daddiu	CO1,CO1, 2 * SIZE
kusano 2b45e8
	LD	$f2, 0 * SIZE(CO2)
kusano 2b45e8
	NOP
kusano 2b45e8
	LD	$f3, 1 * SIZE(CO2)
kusano 2b45e8
	daddiu	CO2,CO2, 2 * SIZE
kusano 2b45e8
kusano 2b45e8
	MADD	c11, $f0, ALPHA, c11
kusano 2b45e8
	MADD	c12, $f1, ALPHA, c12
kusano 2b45e8
	MADD	c21, $f2, ALPHA, c21
kusano 2b45e8
	MADD	c22, $f3, ALPHA, c22
kusano 2b45e8
kusano 2b45e8
	ST	c11, -2 * SIZE(CO1)
kusano 2b45e8
	ST	c12, -1 * SIZE(CO1)
kusano 2b45e8
	ST	c21, -2 * SIZE(CO2)
kusano 2b45e8
	NOP
kusano 2b45e8
	bgtz	I, .L51
kusano 2b45e8
	ST	c22, -1 * SIZE(CO2)
kusano 2b45e8
#else
kusano 2b45e8
	daddiu	I, I, -1
kusano 2b45e8
kusano 2b45e8
	daddiu	CO1,CO1, 2 * SIZE
kusano 2b45e8
	daddiu	CO2,CO2, 2 * SIZE
kusano 2b45e8
kusano 2b45e8
	MUL	c11, ALPHA, c11
kusano 2b45e8
	MUL	c12, ALPHA, c12
kusano 2b45e8
	MUL	c21, ALPHA, c21
kusano 2b45e8
	MUL	c22, ALPHA, c22
kusano 2b45e8
kusano 2b45e8
	ST	c11, -2 * SIZE(CO1)
kusano 2b45e8
	ST	c12, -1 * SIZE(CO1)
kusano 2b45e8
	ST	c21, -2 * SIZE(CO2)
kusano 2b45e8
	ST	c22, -1 * SIZE(CO2)
kusano 2b45e8
kusano 2b45e8
#if ( defined(LEFT) &&  defined(TRANSA)) || \
kusano 2b45e8
    (!defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
	dsubu	TEMP, K, KK
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
	daddiu	TEMP, TEMP, -2
kusano 2b45e8
#else
kusano 2b45e8
	daddiu	TEMP, TEMP, -2
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	dsll	L,    TEMP, 1 + BASE_SHIFT
kusano 2b45e8
	dsll	TEMP, TEMP, 1 + BASE_SHIFT
kusano 2b45e8
kusano 2b45e8
	daddu	AO, AO, L
kusano 2b45e8
	daddu	BO, BO, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
	daddiu	KK, KK, 2
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	bgtz	I, .L51
kusano 2b45e8
	NOP
kusano 2b45e8
#endif
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L60:
kusano 2b45e8
	andi	I,  M, 1
kusano 2b45e8
	blez	I, .L69
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
#if defined(TRMMKERNEL)
kusano 2b45e8
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
	move	BO,  B
kusano 2b45e8
#else
kusano 2b45e8
	dsll	L,    KK, 0 + BASE_SHIFT
kusano 2b45e8
	dsll	TEMP, KK, 1 + BASE_SHIFT
kusano 2b45e8
kusano 2b45e8
	daddu	AO, AO, L
kusano 2b45e8
	daddu	BO, B,  TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	MTC	$0,  c11
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	MOV	c21, c11
kusano 2b45e8
	LD	a3,  2 * SIZE(AO)
kusano 2b45e8
	MOV	c31, c11
kusano 2b45e8
	LD	a4,  3 * SIZE(AO)
kusano 2b45e8
	MOV	c41, c11
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	LD	b5,  4 * SIZE(BO)
kusano 2b45e8
	LD	b6,  8 * SIZE(BO)
kusano 2b45e8
	LD	b7, 12 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
kusano 2b45e8
	dsubu	TEMP, K, KK
kusano 2b45e8
#elif defined(LEFT)
kusano 2b45e8
	daddiu	TEMP, KK, 1
kusano 2b45e8
#else
kusano 2b45e8
	daddiu	TEMP, KK, 2
kusano 2b45e8
#endif
kusano 2b45e8
	dsra	L,  TEMP, 2
kusano 2b45e8
	blez	L, .L65
kusano 2b45e8
	NOP
kusano 2b45e8
#else
kusano 2b45e8
	dsra	L,  K, 2
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	MTC	$0,  c11
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	MOV	c21, c11
kusano 2b45e8
	LD	a3,  2 * SIZE(AO)
kusano 2b45e8
	MOV	c31, c11
kusano 2b45e8
	LD	a4,  3 * SIZE(AO)
kusano 2b45e8
	MOV	c41, c11
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(B)
kusano 2b45e8
	LD	b2,  1 * SIZE(B)
kusano 2b45e8
	LD	b3,  2 * SIZE(B)
kusano 2b45e8
	LD	b4,  3 * SIZE(B)
kusano 2b45e8
	LD	b5,  4 * SIZE(B)
kusano 2b45e8
	LD	b6,  8 * SIZE(B)
kusano 2b45e8
	LD	b7, 12 * SIZE(B)
kusano 2b45e8
kusano 2b45e8
	blez	L, .L65
kusano 2b45e8
	move	BO,  B
kusano 2b45e8
#endif
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L62:
kusano 2b45e8
	MADD	c11, c11, a1, b1
kusano 2b45e8
	LD	b1,  4 * SIZE(BO)
kusano 2b45e8
	MADD	c21, c21, a1, b2
kusano 2b45e8
	LD	b2,  5 * SIZE(BO)
kusano 2b45e8
	MADD	c31, c31, a2, b3
kusano 2b45e8
	LD	b3,  6 * SIZE(BO)
kusano 2b45e8
	MADD	c41, c41, a2, b4
kusano 2b45e8
	LD	b4,  7 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	LD	a1,  4 * SIZE(AO)
kusano 2b45e8
	LD	a2,  5 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a3, b1
kusano 2b45e8
	LD	b1,  8 * SIZE(BO)
kusano 2b45e8
	MADD	c21, c21, a3, b2
kusano 2b45e8
	LD	b2,  9 * SIZE(BO)
kusano 2b45e8
	MADD	c31, c31, a4, b3
kusano 2b45e8
	LD	b3, 10 * SIZE(BO)
kusano 2b45e8
	MADD	c41, c41, a4, b4
kusano 2b45e8
	LD	b4, 11 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	LD	a3,  6 * SIZE(AO)
kusano 2b45e8
	LD	a4,  7 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
	daddiu	AO, AO,  4 * SIZE
kusano 2b45e8
kusano 2b45e8
	bgtz	L, .L62
kusano 2b45e8
	daddiu	BO, BO,  8 * SIZE
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L65:
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	andi	L,  K, 3
kusano 2b45e8
#else
kusano 2b45e8
	andi	L,  TEMP, 3
kusano 2b45e8
#endif
kusano 2b45e8
	NOP
kusano 2b45e8
	blez	L, .L68
kusano 2b45e8
	NOP
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L66:
kusano 2b45e8
	MADD	c11, c11, a1, b1
kusano 2b45e8
	LD	b1,  2 * SIZE(BO)
kusano 2b45e8
	MADD	c21, c21, a1, b2
kusano 2b45e8
	LD	b2,  3 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	LD	a1,  1 * SIZE(AO)
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
kusano 2b45e8
	daddiu	AO, AO,  1 * SIZE
kusano 2b45e8
	bgtz	L, .L66
kusano 2b45e8
	daddiu	BO, BO,  2 * SIZE
kusano 2b45e8
kusano 2b45e8
kusano 2b45e8
.L68:
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	$f0, 0 * SIZE(CO1)
kusano 2b45e8
	LD	$f1, 0 * SIZE(CO2)
kusano 2b45e8
kusano 2b45e8
	ADD	c11, c11, c31
kusano 2b45e8
	ADD	c21, c21, c41
kusano 2b45e8
kusano 2b45e8
	MADD	c11, $f0, ALPHA, c11
kusano 2b45e8
	MADD	c21, $f1, ALPHA, c21
kusano 2b45e8
kusano 2b45e8
	ST	c11,  0 * SIZE(CO1)
kusano 2b45e8
	ST	c21,  0 * SIZE(CO2)
kusano 2b45e8
#else
kusano 2b45e8
	ADD	c11, c11, c31
kusano 2b45e8
	ADD	c21, c21, c41
kusano 2b45e8
kusano 2b45e8
	MUL	c11, ALPHA, c11
kusano 2b45e8
	MUL	c21, ALPHA, c21
kusano 2b45e8
kusano 2b45e8
	ST	c11,  0 * SIZE(CO1)
kusano 2b45e8
	ST	c21,  0 * SIZE(CO2)
kusano 2b45e8
kusano 2b45e8
#if ( defined(LEFT) &&  defined(TRANSA)) || \
kusano 2b45e8
    (!defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
	dsubu	TEMP, K, KK
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
	daddiu	TEMP, TEMP, -1
kusano 2b45e8
#else
kusano 2b45e8
	daddiu	TEMP, TEMP, -2
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	dsll	L,    TEMP, 0 + BASE_SHIFT
kusano 2b45e8
	dsll	TEMP, TEMP, 1 + BASE_SHIFT
kusano 2b45e8
kusano 2b45e8
	daddu	AO, AO, L
kusano 2b45e8
	daddu	BO, BO, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
	daddiu	KK, KK, 1
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L69:
kusano 2b45e8
#if defined(TRMMKERNEL) && !defined(LEFT)
kusano 2b45e8
	daddiu	KK, KK, 2
kusano 2b45e8
#endif
kusano 2b45e8
	move	B, BO
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L70:
kusano 2b45e8
	andi	J,  N, 1
kusano 2b45e8
	blez	J, .L999
kusano 2b45e8
kusano 2b45e8
	move	AO, A
kusano 2b45e8
	move	CO1, C
kusano 2b45e8
kusano 2b45e8
#if defined(TRMMKERNEL) &&  defined(LEFT)
kusano 2b45e8
	move	KK, OFFSET
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	dsra	I,  M, 1
kusano 2b45e8
	blez	I, .L80
kusano 2b45e8
	daddu	C,   CO1, LDC
kusano 2b45e8
kusano 2b45e8
.L71:
kusano 2b45e8
#if defined(TRMMKERNEL)
kusano 2b45e8
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
	move	BO,  B
kusano 2b45e8
#else
kusano 2b45e8
	dsll	L,    KK, 1 + BASE_SHIFT
kusano 2b45e8
	dsll	TEMP, KK, 0 + BASE_SHIFT
kusano 2b45e8
kusano 2b45e8
	daddu	AO, AO, L
kusano 2b45e8
	daddu	BO, B,  TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	MTC	$0,  c11
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	MOV	c21, c11
kusano 2b45e8
	LD	a5,  4 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	MOV	c12, c11
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	MOV	c22, c11
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	LD	b5,  4 * SIZE(BO)
kusano 2b45e8
	LD	b6,  8 * SIZE(BO)
kusano 2b45e8
	LD	b7, 12 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
kusano 2b45e8
	dsubu	TEMP, K, KK
kusano 2b45e8
#elif defined(LEFT)
kusano 2b45e8
	daddiu	TEMP, KK, 2
kusano 2b45e8
#else
kusano 2b45e8
	daddiu	TEMP, KK, 1
kusano 2b45e8
#endif
kusano 2b45e8
	dsra	L,  TEMP, 2
kusano 2b45e8
	blez	L, .L75
kusano 2b45e8
	NOP
kusano 2b45e8
#else
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	MTC	$0,  c11
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	MOV	c21, c11
kusano 2b45e8
	LD	a5,  4 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(B)
kusano 2b45e8
	MOV	c12, c11
kusano 2b45e8
	LD	b2,  1 * SIZE(B)
kusano 2b45e8
	MOV	c22, c11
kusano 2b45e8
	LD	b3,  2 * SIZE(B)
kusano 2b45e8
	LD	b5,  4 * SIZE(B)
kusano 2b45e8
	dsra	L,  K, 2
kusano 2b45e8
	LD	b6,  8 * SIZE(B)
kusano 2b45e8
	LD	b7, 12 * SIZE(B)
kusano 2b45e8
kusano 2b45e8
	blez	L, .L75
kusano 2b45e8
	move	BO,  B
kusano 2b45e8
#endif
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L72:
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a1, b1
kusano 2b45e8
	MADD	c12, c12, a2, b1
kusano 2b45e8
kusano 2b45e8
	LD	a1,  2 * SIZE(AO)
kusano 2b45e8
	LD	a2,  3 * SIZE(AO)
kusano 2b45e8
	LD	b1,  1 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a1, b1
kusano 2b45e8
	MADD	c12, c12, a2, b1
kusano 2b45e8
kusano 2b45e8
	LD	a1,  4 * SIZE(AO)
kusano 2b45e8
	LD	a2,  5 * SIZE(AO)
kusano 2b45e8
	LD	b1,  2 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a1, b1
kusano 2b45e8
	MADD	c12, c12, a2, b1
kusano 2b45e8
kusano 2b45e8
	LD	a1,  6 * SIZE(AO)
kusano 2b45e8
	LD	a2,  7 * SIZE(AO)
kusano 2b45e8
	LD	b1,  3 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a1, b1
kusano 2b45e8
	MADD	c12, c12, a2, b1
kusano 2b45e8
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
	daddiu	AO, AO,  8 * SIZE
kusano 2b45e8
	bgtz	L, .L72
kusano 2b45e8
	daddiu	BO, BO,  4 * SIZE
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L75:
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	andi	L,  K, 3
kusano 2b45e8
#else
kusano 2b45e8
	andi	L,  TEMP, 3
kusano 2b45e8
#endif
kusano 2b45e8
	NOP
kusano 2b45e8
	blez	L, .L78
kusano 2b45e8
	NOP
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L76:
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a1, b1
kusano 2b45e8
	MADD	c12, c12, a2, b1
kusano 2b45e8
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
	daddiu	AO, AO,  2 * SIZE
kusano 2b45e8
	bgtz	L, .L76
kusano 2b45e8
	daddiu	BO, BO,  1 * SIZE
kusano 2b45e8
kusano 2b45e8
.L78:
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	$f0, 0 * SIZE(CO1)
kusano 2b45e8
	daddiu	I, I, -1
kusano 2b45e8
	LD	$f1, 1 * SIZE(CO1)
kusano 2b45e8
	daddiu	CO1,CO1, 2 * SIZE
kusano 2b45e8
kusano 2b45e8
	ADD	c11, c11, c21
kusano 2b45e8
	ADD	c12, c12, c22
kusano 2b45e8
kusano 2b45e8
	MADD	c11, $f0, ALPHA, c11
kusano 2b45e8
	MADD	c12, $f1, ALPHA, c12
kusano 2b45e8
kusano 2b45e8
	ST	c11, -2 * SIZE(CO1)
kusano 2b45e8
	bgtz	I, .L71
kusano 2b45e8
	ST	c12, -1 * SIZE(CO1)
kusano 2b45e8
#else
kusano 2b45e8
	ADD	c11, c11, c21
kusano 2b45e8
	daddiu	I, I, -1
kusano 2b45e8
	ADD	c12, c12, c22
kusano 2b45e8
	daddiu	CO1,CO1, 2 * SIZE
kusano 2b45e8
kusano 2b45e8
	MUL	c11, ALPHA, c11
kusano 2b45e8
	MUL	c12, ALPHA, c12
kusano 2b45e8
kusano 2b45e8
	ST	c11, -2 * SIZE(CO1)
kusano 2b45e8
	ST	c12, -1 * SIZE(CO1)
kusano 2b45e8
kusano 2b45e8
#if ( defined(LEFT) &&  defined(TRANSA)) || \
kusano 2b45e8
    (!defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
	dsubu	TEMP, K, KK
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
	daddiu	TEMP, TEMP, -2
kusano 2b45e8
#else
kusano 2b45e8
	daddiu	TEMP, TEMP, -1
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	dsll	L,    TEMP, 1 + BASE_SHIFT
kusano 2b45e8
	dsll	TEMP, TEMP, 0 + BASE_SHIFT
kusano 2b45e8
kusano 2b45e8
	daddu	AO, AO, L
kusano 2b45e8
	daddu	BO, BO, TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
	daddiu	KK, KK, 2
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	bgtz	I, .L71
kusano 2b45e8
	NOP
kusano 2b45e8
#endif
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L80:
kusano 2b45e8
	andi	I,  M, 1
kusano 2b45e8
	blez	I, .L89
kusano 2b45e8
	NOP
kusano 2b45e8
kusano 2b45e8
#if defined(TRMMKERNEL)
kusano 2b45e8
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
	move	BO,  B
kusano 2b45e8
#else
kusano 2b45e8
	dsll	L,    KK, 0 + BASE_SHIFT
kusano 2b45e8
	dsll	TEMP, KK, 0 + BASE_SHIFT
kusano 2b45e8
kusano 2b45e8
	daddu	AO, AO, L
kusano 2b45e8
	daddu	BO, B,  TEMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	MTC	$0,  c11
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	MOV	c21, c11
kusano 2b45e8
	LD	a3,  2 * SIZE(AO)
kusano 2b45e8
	LD	a4,  3 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	LD	b5,  4 * SIZE(BO)
kusano 2b45e8
	LD	b6,  8 * SIZE(BO)
kusano 2b45e8
	LD	b7, 12 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
kusano 2b45e8
	dsubu	TEMP, K, KK
kusano 2b45e8
#elif defined(LEFT)
kusano 2b45e8
	daddiu	TEMP, KK, 1
kusano 2b45e8
#else
kusano 2b45e8
	daddiu	TEMP, KK, 1
kusano 2b45e8
#endif
kusano 2b45e8
	dsra	L,  TEMP, 2
kusano 2b45e8
	blez	L, .L85
kusano 2b45e8
	NOP
kusano 2b45e8
#else
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	MTC	$0,  c11
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	MOV	c21, c11
kusano 2b45e8
	LD	a3,  2 * SIZE(AO)
kusano 2b45e8
	LD	a4,  3 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(B)
kusano 2b45e8
	LD	b2,  1 * SIZE(B)
kusano 2b45e8
	LD	b3,  2 * SIZE(B)
kusano 2b45e8
	LD	b4,  3 * SIZE(B)
kusano 2b45e8
	LD	b5,  4 * SIZE(B)
kusano 2b45e8
	LD	b6,  8 * SIZE(B)
kusano 2b45e8
	LD	b7, 12 * SIZE(B)
kusano 2b45e8
kusano 2b45e8
	dsra	L,  K, 2
kusano 2b45e8
	blez	L, .L85
kusano 2b45e8
	move	BO,  B
kusano 2b45e8
#endif
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L82:
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a1, b1
kusano 2b45e8
kusano 2b45e8
	LD	a1,  1 * SIZE(AO)
kusano 2b45e8
	LD	b1,  1 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c21, c21, a1, b1
kusano 2b45e8
kusano 2b45e8
	LD	a1,  2 * SIZE(AO)
kusano 2b45e8
	LD	b1,  2 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a1, b1
kusano 2b45e8
kusano 2b45e8
	LD	a1,  3 * SIZE(AO)
kusano 2b45e8
	LD	b1,  3 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c21, c21, a1, b1
kusano 2b45e8
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
	daddiu	AO, AO,  4 * SIZE
kusano 2b45e8
	bgtz	L, .L82
kusano 2b45e8
	daddiu	BO, BO,  4 * SIZE
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L85:
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	andi	L,  K, 3
kusano 2b45e8
#else
kusano 2b45e8
	andi	L,  TEMP, 3
kusano 2b45e8
#endif
kusano 2b45e8
	NOP
kusano 2b45e8
	blez	L, .L88
kusano 2b45e8
	NOP
kusano 2b45e8
	.align	3
kusano 2b45e8
kusano 2b45e8
.L86:
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	MADD	c11, c11, a1, b1
kusano 2b45e8
kusano 2b45e8
	daddiu	L, L, -1
kusano 2b45e8
	daddiu	AO, AO,  1 * SIZE
kusano 2b45e8
	bgtz	L, .L86
kusano 2b45e8
	daddiu	BO, BO,  1 * SIZE
kusano 2b45e8
kusano 2b45e8
kusano 2b45e8
.L88:
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	$f0, 0 * SIZE(CO1)
kusano 2b45e8
kusano 2b45e8
	ADD	c11, c11, c21
kusano 2b45e8
	MADD	c11, $f0, ALPHA, c11
kusano 2b45e8
kusano 2b45e8
	ST	c11,  0 * SIZE(CO1)
kusano 2b45e8
#else
kusano 2b45e8
	ADD	c11, c11, c21
kusano 2b45e8
	MUL	c11, ALPHA, c11
kusano 2b45e8
kusano 2b45e8
	ST	c11,  0 * SIZE(CO1)
kusano 2b45e8
#endif
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
.L89:
kusano 2b45e8
#if defined(TRMMKERNEL) && !defined(LEFT)
kusano 2b45e8
	daddiu	KK, KK, 1
kusano 2b45e8
#endif
kusano 2b45e8
	move	B, BO
kusano 2b45e8
	.align 3
kusano 2b45e8
kusano 2b45e8
kusano 2b45e8
.L999:
kusano 2b45e8
	LDARG	$16,   0($sp)
kusano 2b45e8
	LDARG	$17,   8($sp)
kusano 2b45e8
	LDARG	$18,  16($sp)
kusano 2b45e8
	LDARG	$19,  24($sp)
kusano 2b45e8
	LDARG	$20,  32($sp)
kusano 2b45e8
	LDARG	$21,  40($sp)
kusano 2b45e8
	LDARG	$22,  48($sp)
kusano 2b45e8
kusano 2b45e8
	ldc1	$f24, 56($sp)
kusano 2b45e8
	ldc1	$f25, 64($sp)
kusano 2b45e8
	ldc1	$f26, 72($sp)
kusano 2b45e8
	ldc1	$f27, 80($sp)
kusano 2b45e8
	ldc1	$f28, 88($sp)
kusano 2b45e8
kusano 2b45e8
#if defined(TRMMKERNEL)
kusano 2b45e8
	LDARG	$23,  96($sp)
kusano 2b45e8
	LDARG	$24, 104($sp)
kusano 2b45e8
	LDARG	$25, 112($sp)
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef __64BIT__
kusano 2b45e8
	ldc1	$f20,120($sp)
kusano 2b45e8
	ldc1	$f21,128($sp)
kusano 2b45e8
	ldc1	$f22,136($sp)
kusano 2b45e8
	ldc1	$f23,144($sp)
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	j	$31
kusano 2b45e8
	daddiu	$sp, $sp, 160
kusano 2b45e8
kusano 2b45e8
	EPILOGUE