Blame thirdparty/openblas/xianyi-OpenBLAS-e6e87a2/kernel/alpha/zgemm_kernel_2x2.S

kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
/* Copyright 2009, 2010 The University of Texas at Austin.           */
kusano 2b45e8
/* All rights reserved.                                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* Redistribution and use in source and binary forms, with or        */
kusano 2b45e8
/* without modification, are permitted provided that the following   */
kusano 2b45e8
/* conditions are met:                                               */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   1. Redistributions of source code must retain the above         */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer.                                                  */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   2. Redistributions in binary form must reproduce the above      */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer in the documentation and/or other materials       */
kusano 2b45e8
/*      provided with the distribution.                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
kusano 2b45e8
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
kusano 2b45e8
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
kusano 2b45e8
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
kusano 2b45e8
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
kusano 2b45e8
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
kusano 2b45e8
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
kusano 2b45e8
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
kusano 2b45e8
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
kusano 2b45e8
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
kusano 2b45e8
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
kusano 2b45e8
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
kusano 2b45e8
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
kusano 2b45e8
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* The views and conclusions contained in the software and           */
kusano 2b45e8
/* documentation are those of the authors and should not be          */
kusano 2b45e8
/* interpreted as representing official policies, either expressed   */
kusano 2b45e8
/* or implied, of The University of Texas at Austin.                 */
kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
kusano 2b45e8
#define ASSEMBLER
kusano 2b45e8
#include "common.h"
kusano 2b45e8
#include "version.h"
kusano 2b45e8
kusano 2b45e8
#if !defined(EV4) && !defined(EV5) && !defined(EV6)
kusano 2b45e8
#error "Architecture is not specified."
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef EV6
kusano 2b45e8
#define PREFETCHSIZE 56
kusano 2b45e8
#define UNOP unop
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef EV5
kusano 2b45e8
#define PREFETCHSIZE 48
kusano 2b45e8
#define UNOP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef EV4
kusano 2b45e8
#define UNOP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	.set	noat
kusano 2b45e8
	.set	noreorder
kusano 2b45e8
	.arch ev6
kusano 2b45e8
kusano 2b45e8
.text
kusano 2b45e8
	.align	5
kusano 2b45e8
	.globl	CNAME
kusano 2b45e8
	.ent	CNAME
kusano 2b45e8
kusano 2b45e8
#define STACKSIZE 80
kusano 2b45e8
kusano 2b45e8
#define M	$16
kusano 2b45e8
#define N	$17
kusano 2b45e8
#define K	$18
kusano 2b45e8
#define A	$21
kusano 2b45e8
#define	B	$22
kusano 2b45e8
#define C	$20
kusano 2b45e8
#define	LDC	$23
kusano 2b45e8
kusano 2b45e8
#define C1	$19
kusano 2b45e8
#define C2	$24
kusano 2b45e8
kusano 2b45e8
#define AO	$at
kusano 2b45e8
#define	BO	$5
kusano 2b45e8
#define I	$6
kusano 2b45e8
#define J	$7
kusano 2b45e8
#define L	$8
kusano 2b45e8
kusano 2b45e8
#define a1	$f16
kusano 2b45e8
#define a2	$f17
kusano 2b45e8
#define a3	$f18
kusano 2b45e8
#define a4	$f19
kusano 2b45e8
kusano 2b45e8
#define b1	$f20
kusano 2b45e8
#define b2	$f21
kusano 2b45e8
#define b3	$f22
kusano 2b45e8
#define b4	$f23
kusano 2b45e8
kusano 2b45e8
#define t1	$f24
kusano 2b45e8
#define t2	$f25
kusano 2b45e8
#define t3	$f26
kusano 2b45e8
#define t4	$f27
kusano 2b45e8
kusano 2b45e8
#define a5	$f28
kusano 2b45e8
#define a6	$f30
kusano 2b45e8
#define b5	$f29
kusano 2b45e8
kusano 2b45e8
#define alpha_i	$f29
kusano 2b45e8
#define alpha_r	$f30
kusano 2b45e8
kusano 2b45e8
#define c01	$f0
kusano 2b45e8
#define c02	$f1
kusano 2b45e8
#define c03	$f2
kusano 2b45e8
#define c04	$f3
kusano 2b45e8
kusano 2b45e8
#define c05	$f4
kusano 2b45e8
#define c06	$f5
kusano 2b45e8
#define c07	$f6
kusano 2b45e8
#define c08	$f7
kusano 2b45e8
kusano 2b45e8
#define c09	$f8
kusano 2b45e8
#define c10	$f9
kusano 2b45e8
#define c11	$f10
kusano 2b45e8
#define c12	$f11
kusano 2b45e8
kusano 2b45e8
#define c13	$f12
kusano 2b45e8
#define c14	$f13
kusano 2b45e8
#define c15	$f14
kusano 2b45e8
#define c16	$f15
kusano 2b45e8
kusano 2b45e8
#define TMP1	$0
kusano 2b45e8
#define TMP2	$1
kusano 2b45e8
#define KK	$2
kusano 2b45e8
#define	BB	$3
kusano 2b45e8
#define OFFSET  $4
kusano 2b45e8
kusano 2b45e8
#define ALPHA_R	64($sp)
kusano 2b45e8
#define ALPHA_I	72($sp)
kusano 2b45e8
kusano 2b45e8
#if   defined(NN) || defined(NT) || defined(TN) || defined(TT)
kusano 2b45e8
#define ADD1	  ADD
kusano 2b45e8
#define ADD2	  SUB
kusano 2b45e8
#define ADD3	  ADD
kusano 2b45e8
#define ADD4	  ADD
kusano 2b45e8
#elif defined(RN) || defined(RT) || defined(CN) || defined(CT)
kusano 2b45e8
#define ADD1	  ADD
kusano 2b45e8
#define ADD2	  ADD
kusano 2b45e8
#define ADD3	  SUB
kusano 2b45e8
#define ADD4	  ADD
kusano 2b45e8
#elif defined(NR) || defined(NC) || defined(TR) || defined(TC)
kusano 2b45e8
#define ADD1	  ADD
kusano 2b45e8
#define ADD2	  ADD
kusano 2b45e8
#define ADD3	  ADD
kusano 2b45e8
#define ADD4	  SUB
kusano 2b45e8
#else
kusano 2b45e8
#define ADD1	  ADD
kusano 2b45e8
#define ADD2	  SUB
kusano 2b45e8
#define ADD3	  SUB
kusano 2b45e8
#define ADD4	  SUB
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
CNAME:
kusano 2b45e8
	.frame	$sp, STACKSIZE, $26, 0
kusano 2b45e8
kusano 2b45e8
#ifdef PROFILE
kusano 2b45e8
	ldgp	$gp, 0($27)
kusano 2b45e8
	lda	$at, _mcount
kusano 2b45e8
	jsr	$at, ($at), _mcount
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef PROFILE
kusano 2b45e8
	.prologue 0
kusano 2b45e8
#else
kusano 2b45e8
	.prologue 1
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	lda	$sp, -STACKSIZE($sp)
kusano 2b45e8
kusano 2b45e8
	ldq	B,        0 + STACKSIZE($sp)
kusano 2b45e8
	ldq	C,        8 + STACKSIZE($sp)
kusano 2b45e8
	ldq	LDC,     16 + STACKSIZE($sp)
kusano 2b45e8
#ifdef TRMMKERNEL
kusano 2b45e8
	ldq	OFFSET,  24 + STACKSIZE($sp)
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	sll	LDC, ZBASE_SHIFT, LDC
kusano 2b45e8
kusano 2b45e8
	stt	$f2,   0($sp)
kusano 2b45e8
	stt	$f3,   8($sp)
kusano 2b45e8
	stt	$f4,  16($sp)
kusano 2b45e8
	stt	$f5,  24($sp)
kusano 2b45e8
	stt	$f6,  32($sp)
kusano 2b45e8
	stt	$f7,  40($sp)
kusano 2b45e8
	stt	$f8,  48($sp)
kusano 2b45e8
	stt	$f9,  56($sp)
kusano 2b45e8
	stt	$f19, ALPHA_R
kusano 2b45e8
	stt	$f20, ALPHA_I
kusano 2b45e8
kusano 2b45e8
	cmple	M, 0, $0
kusano 2b45e8
	cmple	N, 0, $1
kusano 2b45e8
	cmple	K, 0, $2
kusano 2b45e8
kusano 2b45e8
	or	$0, $1, $0
kusano 2b45e8
	or	$0, $2, $0
kusano 2b45e8
	bne	$0, $L999
kusano 2b45e8
kusano 2b45e8
#if defined(TRMMKERNEL) && !defined(LEFT)
kusano 2b45e8
	subq	$31, OFFSET, KK
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	sra	N, 1, J
kusano 2b45e8
	ble	J, $L30
kusano 2b45e8
	.align 4
kusano 2b45e8
	
kusano 2b45e8
$L01:
kusano 2b45e8
	mov	C,  C1
kusano 2b45e8
	addq	C,  LDC, C2
kusano 2b45e8
	mov	A, AO
kusano 2b45e8
	s4addq	K, 0, BB
kusano 2b45e8
kusano 2b45e8
kusano 2b45e8
#if defined(TRMMKERNEL) &&  defined(LEFT)
kusano 2b45e8
	mov	OFFSET, KK
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	SXADDQ	BB, B, BB
kusano 2b45e8
	addq	C2, LDC, C
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	sra	M,  1, I
kusano 2b45e8
	fclr	t1
kusano 2b45e8
	fclr	t2
kusano 2b45e8
	fclr	t3
kusano 2b45e8
	fclr	t4
kusano 2b45e8
kusano 2b45e8
	fclr	c01
kusano 2b45e8
	fclr	c05
kusano 2b45e8
kusano 2b45e8
	ble	I, $L20
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L11:
kusano 2b45e8
#ifndef EV4
kusano 2b45e8
	ldl	$31,   0 * SIZE(BB)
kusano 2b45e8
	ldl	$31,   8 * SIZE(BB)
kusano 2b45e8
	unop
kusano 2b45e8
	lda	BB,   16 * SIZE(BB)
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if !defined(TRMMKERNEL) || \
kusano 2b45e8
	(defined(TRMMKERNEL) &&  defined(LEFT) &&  defined(TRANSA)) || \
kusano 2b45e8
	(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
kusano 2b45e8
#ifdef TRMMKERNEL
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
	addq	KK, 2, TMP1
kusano 2b45e8
#else
kusano 2b45e8
	addq	KK, 2, TMP1
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
 	fclr	c09
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	fclr	c13
kusano 2b45e8
kusano 2b45e8
	LD	a3,  2 * SIZE(AO)
kusano 2b45e8
	fclr	c02
kusano 2b45e8
	LD	a4,  3 * SIZE(AO)
kusano 2b45e8
	fclr	c06
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(B)
kusano 2b45e8
	fclr	c10
kusano 2b45e8
	LD	b2,  1 * SIZE(B)
kusano 2b45e8
	fclr	c14
kusano 2b45e8
kusano 2b45e8
	LD	b3,  2 * SIZE(B)
kusano 2b45e8
	fclr	c03
kusano 2b45e8
	LD	b4,  3 * SIZE(B)
kusano 2b45e8
	fclr	c07
kusano 2b45e8
kusano 2b45e8
	lda	BO,  4 * SIZE(B)
kusano 2b45e8
	fclr	c11
kusano 2b45e8
	lda	AO,  4 * SIZE(AO)
kusano 2b45e8
	fclr	c15
kusano 2b45e8
kusano 2b45e8
 	lds	$f31,  4 * SIZE(C1)
kusano 2b45e8
	fclr	c04
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	lda	L,        -2(K)
kusano 2b45e8
#else
kusano 2b45e8
	lda	L,        -2(TMP1)
kusano 2b45e8
#endif
kusano 2b45e8
	fclr	c08
kusano 2b45e8
kusano 2b45e8
	lds	$f31,  4 * SIZE(C2)
kusano 2b45e8
	fclr	c12
kusano 2b45e8
	fclr	c16
kusano 2b45e8
	ble	L, $L15
kusano 2b45e8
#else
kusano 2b45e8
	sll	KK, ZBASE_SHIFT + 1, TMP1
kusano 2b45e8
	addq	AO, TMP1, AO
kusano 2b45e8
	addq	B,  TMP1, BO
kusano 2b45e8
	subq	K, KK, TMP1
kusano 2b45e8
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
 	fclr	c09
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	fclr	c13
kusano 2b45e8
kusano 2b45e8
	LD	a3,  2 * SIZE(AO)
kusano 2b45e8
	fclr	c02
kusano 2b45e8
	LD	a4,  3 * SIZE(AO)
kusano 2b45e8
	fclr	c06
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	fclr	c10
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	fclr	c14
kusano 2b45e8
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	fclr	c03
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	fclr	c07
kusano 2b45e8
kusano 2b45e8
	lda	BO,  4 * SIZE(BO)
kusano 2b45e8
	fclr	c11
kusano 2b45e8
	lda	AO,  4 * SIZE(AO)
kusano 2b45e8
	fclr	c15
kusano 2b45e8
kusano 2b45e8
 	lds	$f31,  4 * SIZE(C1)
kusano 2b45e8
	fclr	c04
kusano 2b45e8
	lda	L,        -2(TMP1)
kusano 2b45e8
	fclr	c08
kusano 2b45e8
kusano 2b45e8
	lds	$f31,  4 * SIZE(C2)
kusano 2b45e8
	fclr	c12
kusano 2b45e8
	fclr	c16
kusano 2b45e8
	ble	L, $L15
kusano 2b45e8
#endif
kusano 2b45e8
	.align	5
kusano 2b45e8
kusano 2b45e8
$L12:
kusano 2b45e8
/*  1 */
kusano 2b45e8
	ADD1	c11,  t1, c11
kusano 2b45e8
#ifndef EV4
kusano 2b45e8
	ldq	$31,  PREFETCHSIZE * SIZE(AO)
kusano 2b45e8
#else
kusano 2b45e8
	unop
kusano 2b45e8
#endif
kusano 2b45e8
	MUL	b1, a1, t1
kusano 2b45e8
#ifndef EV4
kusano 2b45e8
	ldl	$31,  PREFETCHSIZE * SIZE(BO)
kusano 2b45e8
#else
kusano 2b45e8
	unop
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ADD3	c12,  t2, c12
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b1, a2, t2
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD2	c16,  t3, c16
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b2, a2, t3
kusano 2b45e8
	LD	a5,   0 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD4	c15, t4, c15
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b2, a1, t4
kusano 2b45e8
	LD	b5,   0 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
/*  2 */
kusano 2b45e8
	ADD1	c01, t1, c01
kusano 2b45e8
	UNOP
kusano 2b45e8
	MUL	b1, a3, t1
kusano 2b45e8
	UNOP
kusano 2b45e8
kusano 2b45e8
	ADD3	c02, t2, c02
kusano 2b45e8
	UNOP
kusano 2b45e8
	MUL	b1, a4, t2
kusano 2b45e8
	UNOP
kusano 2b45e8
kusano 2b45e8
	ADD2	c06,  t3, c06
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b2, a4, t3
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD4	c05, t4, c05
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b4, a1, t4
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
/*  3 */
kusano 2b45e8
	ADD1	c03, t1, c03
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b3, a1, t1
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD3	c04, t2, c04
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b3, a2, t2
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD2	c08,  t3, c08
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b4, a2, t3
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD4	c13, t4, c13
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b2, a3, t4
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
/*  4 */
kusano 2b45e8
	ADD1	c09,  t1, c09
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b3, a3, t1
kusano 2b45e8
	LD	a6,  2 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD3	c10,  t2, c10
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b3, a4, t2
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD2	c14, t3, c14
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b4, a4, t3
kusano 2b45e8
	LD	a4,  3 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD4	c07,  t4, c07
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b4, a3, t4
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
/*  5 */
kusano 2b45e8
	ADD1	c11,  t1, c11
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b5,  a5,  t1
kusano 2b45e8
	LD	a1,  4 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD3	c12,  t2, c12
kusano 2b45e8
	lda	L,        -2(L)
kusano 2b45e8
	MUL	b5,  a2, t2
kusano 2b45e8
	LD	b1,  4 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD2	c16,  t3, c16
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b2, a2, t3
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD4	c15, t4, c15
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b2, a5,  t4
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
/*  6 */
kusano 2b45e8
	ADD1	c01, t1, c01
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b5,  a6, t1
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD3	c02, t2, c02
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b5,  a4, t2
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD2	c06,  t3, c06
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b2, a4, t3
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD4	c05, t4, c05
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b4, a5,  t4
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
/*  7 */
kusano 2b45e8
	ADD1	c03, t1, c03
kusano 2b45e8
	lda	AO,    8 * SIZE(AO)
kusano 2b45e8
	MUL	b3, a5,  t1
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD3	c04, t2, c04
kusano 2b45e8
	lda	BO,    8 * SIZE(BO)
kusano 2b45e8
	MUL	b3, a2, t2
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD2	c08,  t3, c08
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b4, a2, t3
kusano 2b45e8
	LD	a2, -3 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD4	c13, t4, c13
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b2, a6, t4
kusano 2b45e8
	LD	b2, -3 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
/*  8 */
kusano 2b45e8
	ADD1	c09,  t1, c09
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b3, a6, t1
kusano 2b45e8
	LD	a3, -2 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD3	c10,  t2, c10
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b3, a4, t2
kusano 2b45e8
	LD	b3, -2 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD2	c14, t3, c14
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b4, a4, t3
kusano 2b45e8
	LD	a4, -1 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD4	c07,  t4, c07
kusano 2b45e8
	MUL	b4, a6, t4
kusano 2b45e8
	LD	b4, -1 * SIZE(BO)
kusano 2b45e8
	bgt	L,  $L12
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L15:
kusano 2b45e8
	ADD1	c11,  t1, c11
kusano 2b45e8
	ldt	alpha_r, ALPHA_R
kusano 2b45e8
	MUL	b1, a1, t1
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	blbs	K, $L18
kusano 2b45e8
#else
kusano 2b45e8
	blbs	TMP1, $L18
kusano 2b45e8
#endif
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
	ADD3	c12,  t2, c12
kusano 2b45e8
	MUL	b1, a2, t2
kusano 2b45e8
	ADD2	c16,  t3, c16
kusano 2b45e8
	MUL	b2, a2, t3
kusano 2b45e8
kusano 2b45e8
	ADD4	c15, t4, c15
kusano 2b45e8
	MUL	b2, a1, t4
kusano 2b45e8
	ADD1	c01, t1, c01
kusano 2b45e8
	MUL	b1, a3, t1
kusano 2b45e8
kusano 2b45e8
	ADD3	c02, t2, c02
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b1, a4, t2
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD2	c06,  t3, c06
kusano 2b45e8
	MUL	b2, a4, t3
kusano 2b45e8
	ADD4	c05, t4, c05
kusano 2b45e8
	MUL	b4, a1, t4
kusano 2b45e8
kusano 2b45e8
	ADD1	c03, t1, c03
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b3, a1, t1
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD3	c04, t2, c04
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b3, a2, t2
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD2	c08,  t3, c08
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b4, a2, t3
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD4	c13, t4, c13
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b2, a3, t4
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD1	c09,  t1, c09
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b3, a3, t1
kusano 2b45e8
	lda	AO,  4 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD3	c10,  t2, c10
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b3, a4, t2
kusano 2b45e8
 	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD2	c14, t3, c14
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b4, a4, t3
kusano 2b45e8
	LD	a4, -1 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD4	c07,  t4, c07
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b4, a3, t4
kusano 2b45e8
	LD	a3, -2 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD1	c11,  t1, c11
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	MUL	b1, a1, t1
kusano 2b45e8
	lda	BO,  4 * SIZE(BO)
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L18:
kusano 2b45e8
	ADD3	c12,  t2, c12
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b1, a2, t2
kusano 2b45e8
	ldt	alpha_i, ALPHA_I
kusano 2b45e8
kusano 2b45e8
	ADD2	c16,  t3, c16
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b2, a2, t3
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	a5, 0 * SIZE(C1)
kusano 2b45e8
#else
kusano 2b45e8
	unop
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ADD4	c15, t4, c15
kusano 2b45e8
	MUL	b2, a1, t4
kusano 2b45e8
	ADD1	c01, t1, c01
kusano 2b45e8
	MUL	b1, a3, t1
kusano 2b45e8
kusano 2b45e8
	ADD3	c02, t2, c02
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b1, a4, t2
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	b1, 1 * SIZE(C1)
kusano 2b45e8
#else
kusano 2b45e8
	unop
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ADD2	c06,  t3, c06
kusano 2b45e8
	MUL	b2, a4, t3
kusano 2b45e8
	ADD4	c05, t4, c05
kusano 2b45e8
	MUL	b4, a1, t4
kusano 2b45e8
kusano 2b45e8
	ADD1	c03, t1, c03
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b3, a1, t1
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	a1, 2 * SIZE(C1)
kusano 2b45e8
#else
kusano 2b45e8
	unop
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ADD3	c04, t2, c04
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b3, a2, t2
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD2	c08,  t3, c08
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b4, a2, t3
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	a2, 3 * SIZE(C1)
kusano 2b45e8
#else
kusano 2b45e8
	unop
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ADD4	c13, t4, c13
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b2, a3, t4
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	b2, 0 * SIZE(C2)
kusano 2b45e8
#else
kusano 2b45e8
	unop
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ADD1	c09,  t1, c09
kusano 2b45e8
	lda	I,        -1(I)
kusano 2b45e8
	MUL	b3, a3, t1
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD3	c10,  t2, c10
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b3, a4, t2
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	b3, 1 * SIZE(C2)
kusano 2b45e8
#else
kusano 2b45e8
	unop
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ADD2	c14, t3, c14
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b4, a4, t3
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
  	LD	a4, 2 * SIZE(C2)
kusano 2b45e8
#else
kusano 2b45e8
	unop
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ADD4	c07,  t4, c07
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	b4, a3, t4
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	a3, 3 * SIZE(C2)
kusano 2b45e8
#else
kusano 2b45e8
	unop
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ADD1	c11,  t1, c11
kusano 2b45e8
	ADD3	c12,  t2, c12
kusano 2b45e8
	ADD2	c16,  t3, c16
kusano 2b45e8
	ADD4	c15,  t4, c15
kusano 2b45e8
kusano 2b45e8
	ADD	c01, c06, c01
kusano 2b45e8
	ADD	c02, c05, c02
kusano 2b45e8
	ADD	c03, c08, c03
kusano 2b45e8
	ADD	c04, c07, c04
kusano 2b45e8
kusano 2b45e8
	ADD	c09, c14, c09
kusano 2b45e8
	MUL	  alpha_r, c01, t1
kusano 2b45e8
	ADD	c10, c13, c10
kusano 2b45e8
	MUL	  alpha_r, c02, t2
kusano 2b45e8
kusano 2b45e8
	ADD	c11, c16, c11
kusano 2b45e8
	MUL	  alpha_r, c03, t3
kusano 2b45e8
	ADD	c12, c15, c12
kusano 2b45e8
	MUL	  alpha_r, c04, t4
kusano 2b45e8
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	ADD	  a5,  t1,  a5
kusano 2b45e8
	MUL	  alpha_i, c02, t1
kusano 2b45e8
	ADD	  b1,  t2,  b1
kusano 2b45e8
	MUL	  alpha_i, c01, t2
kusano 2b45e8
kusano 2b45e8
	ADD	  a1,  t3,  a1
kusano 2b45e8
	MUL	  alpha_i, c04, t3
kusano 2b45e8
	ADD	  a2,  t4,  a2
kusano 2b45e8
	MUL	  alpha_i, c03, t4
kusano 2b45e8
#else
kusano 2b45e8
	ADD	  $f31,  t1,  a5
kusano 2b45e8
	MUL	  alpha_i, c02, t1
kusano 2b45e8
	ADD	  $f31,  t2,  b1
kusano 2b45e8
	MUL	  alpha_i, c01, t2
kusano 2b45e8
kusano 2b45e8
	ADD	  $f31,  t3,  a1
kusano 2b45e8
	MUL	  alpha_i, c04, t3
kusano 2b45e8
	ADD	  $f31,  t4,  a2
kusano 2b45e8
	MUL	  alpha_i, c03, t4
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	SUB	  a5,  t1,  a5
kusano 2b45e8
	MUL	  alpha_r, c09, t1
kusano 2b45e8
	ADD	  b1,  t2,  b1
kusano 2b45e8
	MUL	  alpha_r, c10, t2
kusano 2b45e8
kusano 2b45e8
	SUB	  a1,  t3,  a1
kusano 2b45e8
	MUL	  alpha_r, c11, t3
kusano 2b45e8
	ADD	  a2,  t4,  a2
kusano 2b45e8
	MUL	  alpha_r, c12, t4
kusano 2b45e8
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	ADD	  b2,  t1,  b2
kusano 2b45e8
	MUL	  alpha_i, c10, t1
kusano 2b45e8
	ADD	  b3,  t2,  b3
kusano 2b45e8
	MUL	  alpha_i, c09, t2
kusano 2b45e8
kusano 2b45e8
	ADD	  a4,  t3,  a4
kusano 2b45e8
	MUL	  alpha_i, c12, t3
kusano 2b45e8
	ADD	  a3,  t4,  a3
kusano 2b45e8
	MUL	  alpha_i, c11, t4
kusano 2b45e8
#else
kusano 2b45e8
	ADD	  $f31,  t1,  b2
kusano 2b45e8
	MUL	  alpha_i, c10, t1
kusano 2b45e8
	ADD	  $f31,  t2,  b3
kusano 2b45e8
	MUL	  alpha_i, c09, t2
kusano 2b45e8
kusano 2b45e8
	ADD	  $f31,  t3,  a4
kusano 2b45e8
	MUL	  alpha_i, c12, t3
kusano 2b45e8
	ADD	  $f31,  t4,  a3
kusano 2b45e8
	MUL	  alpha_i, c11, t4
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	SUB	  b2,  t1,  b2
kusano 2b45e8
	ST	a5,  0 * SIZE(C1)
kusano 2b45e8
	fclr	t1
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	  b3,  t2,  b3
kusano 2b45e8
	ST	b1,  1 * SIZE(C1)
kusano 2b45e8
	fclr	t2
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	SUB	  a4,  t3,  a4
kusano 2b45e8
	ST	a1,  2 * SIZE(C1)
kusano 2b45e8
	fclr	t3
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD	  a3,  t4,  a3
kusano 2b45e8
	ST	a2,  3 * SIZE(C1)
kusano 2b45e8
	fclr	t4
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ST	b2,  0 * SIZE(C2)
kusano 2b45e8
	fclr	c01
kusano 2b45e8
 	ST	b3,  1 * SIZE(C2)
kusano 2b45e8
	fclr	c05
kusano 2b45e8
kusano 2b45e8
	ST	a4,  2 * SIZE(C2)
kusano 2b45e8
	lda	C1,  4 * SIZE(C1)
kusano 2b45e8
	ST	a3,  3 * SIZE(C2)
kusano 2b45e8
	lda	C2,  4 * SIZE(C2)
kusano 2b45e8
kusano 2b45e8
#if (defined(TRMMKERNEL) &&  defined(LEFT) &&  defined(TRANSA)) || \
kusano 2b45e8
    (defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
	subq	K, KK, TMP1
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
	subq	TMP1, 2, TMP1
kusano 2b45e8
#else
kusano 2b45e8
	subq	TMP1, 2, TMP1
kusano 2b45e8
#endif
kusano 2b45e8
	sll	TMP1, ZBASE_SHIFT + 1, TMP1
kusano 2b45e8
	addq	AO, TMP1, AO
kusano 2b45e8
	addq	BO, TMP1, BO
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(TRMMKERNEL) && defined(LEFT)
kusano 2b45e8
	addq	KK, 2, KK
kusano 2b45e8
#endif
kusano 2b45e8
	bgt	I, $L11
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L20:
kusano 2b45e8
	and	M,  1, I
kusano 2b45e8
	ble	I, $L29
kusano 2b45e8
kusano 2b45e8
#if !defined(TRMMKERNEL) || \
kusano 2b45e8
	(defined(TRMMKERNEL) &&  defined(LEFT) &&  defined(TRANSA)) || \
kusano 2b45e8
	(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
kusano 2b45e8
#ifdef TRMMKERNEL
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
	addq	KK, 1, TMP1
kusano 2b45e8
#else
kusano 2b45e8
	addq	KK, 2, TMP1
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
 	fclr	c09
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	fclr	c13
kusano 2b45e8
kusano 2b45e8
	LD	a3,  2 * SIZE(AO)
kusano 2b45e8
	fclr	c02
kusano 2b45e8
	LD	a4,  3 * SIZE(AO)
kusano 2b45e8
	fclr	c06
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(B)
kusano 2b45e8
	fclr	c10
kusano 2b45e8
	LD	b2,  1 * SIZE(B)
kusano 2b45e8
	fclr	c14
kusano 2b45e8
kusano 2b45e8
	LD	b3,  2 * SIZE(B)
kusano 2b45e8
	lda	AO,  2 * SIZE(AO)
kusano 2b45e8
	LD	b4,  3 * SIZE(B)
kusano 2b45e8
	lda	BO,  4 * SIZE(B)
kusano 2b45e8
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	lda	L,        -2(K)
kusano 2b45e8
#else
kusano 2b45e8
	lda	L,        -2(TMP1)
kusano 2b45e8
#endif
kusano 2b45e8
	ble	L, $L25
kusano 2b45e8
#else
kusano 2b45e8
	sll	KK, ZBASE_SHIFT + 0, TMP1
kusano 2b45e8
	addq	AO, TMP1, AO
kusano 2b45e8
	sll	KK, ZBASE_SHIFT + 1, TMP1
kusano 2b45e8
	addq	B,  TMP1, BO
kusano 2b45e8
	subq	K, KK, TMP1
kusano 2b45e8
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
 	fclr	c09
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	fclr	c13
kusano 2b45e8
kusano 2b45e8
	LD	a3,  2 * SIZE(AO)
kusano 2b45e8
	fclr	c02
kusano 2b45e8
	LD	a4,  3 * SIZE(AO)
kusano 2b45e8
	fclr	c06
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	fclr	c10
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	fclr	c14
kusano 2b45e8
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
	lda	AO,  2 * SIZE(AO)
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	lda	BO,  4 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	lda	L,        -2(TMP1)
kusano 2b45e8
	ble	L, $L25
kusano 2b45e8
#endif
kusano 2b45e8
	.align	5
kusano 2b45e8
kusano 2b45e8
$L22:
kusano 2b45e8
	ADD1	c09, t1, c09
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a1, b1, t1
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD3	c10, t2, c10
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a2, b1, t2
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD4	c13, t3, c13
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a1, b2, t3
kusano 2b45e8
	lda	BO,    8 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD2	c14, t4, c14
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a2, b2, t4
kusano 2b45e8
	LD	b2, -7 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD1	c01, t1, c01
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a1, b3, t1
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD3	c02, t2, c02
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a2, b3, t2
kusano 2b45e8
	LD	b3, -6 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD4	c05, t3, c05
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a1, b4, t3
kusano 2b45e8
	LD	a1,  2 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD2	c06, t4, c06
kusano 2b45e8
	MUL	a2, b4, t4
kusano 2b45e8
	LD	b5, -5 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD1	c09, t1, c09
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a3, b1, t1
kusano 2b45e8
	LD	a2,  3 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD3	c10, t2, c10
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a4, b1, t2
kusano 2b45e8
	LD	b1, -4 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD4	c13, t3, c13
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a3, b2, t3
kusano 2b45e8
	lda	AO,    4 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD2	c14, t4, c14
kusano 2b45e8
	MUL	a4, b2, t4
kusano 2b45e8
	LD	b2, -3 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD1	c01, t1, c01
kusano 2b45e8
	lda	L,        -2(L)
kusano 2b45e8
	MUL	a3, b3, t1
kusano 2b45e8
	LD	b4, -1 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD3	c02, t2, c02
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a4, b3, t2
kusano 2b45e8
	LD	b3, -2 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD4	c05, t3, c05
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a3, b5, t3
kusano 2b45e8
	LD	a3,  0 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD2	c06, t4, c06
kusano 2b45e8
	MUL	a4, b5, t4
kusano 2b45e8
	LD	a4,  1 * SIZE(AO)
kusano 2b45e8
	bgt	L,  $L22
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L25:
kusano 2b45e8
	ADD1	c09, t1, c09
kusano 2b45e8
	ldt	alpha_r, ALPHA_R
kusano 2b45e8
	MUL	a1, b1, t1
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	blbs	K, $L28
kusano 2b45e8
#else
kusano 2b45e8
	blbs	TMP1, $L28
kusano 2b45e8
#endif
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
	ADD3	c10, t2, c10
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a2, b1, t2
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD4	c13, t3, c13
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a1, b2, t3
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD2	c14, t4, c14
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a2, b2, t4
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD1	c01, t1, c01
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a1, b3, t1
kusano 2b45e8
	lda	AO,  2 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD3	c02, t2, c02
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a2, b3, t2
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD4	c05, t3, c05
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a1, b4, t3
kusano 2b45e8
	LD	a1, -2 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD2	c06, t4, c06
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a2, b4, t4
kusano 2b45e8
	LD	a2, -1 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD1	c09, t1, c09
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	MUL	a1, b1, t1
kusano 2b45e8
	lda	BO,  4 * SIZE(BO)
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L28:
kusano 2b45e8
	ADD3	c10, t2, c10
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a2, b1, t2
kusano 2b45e8
	ldt	alpha_i, ALPHA_I
kusano 2b45e8
kusano 2b45e8
	ADD4	c13, t3, c13
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a1, b2, t3
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	c03, 0 * SIZE(C1)
kusano 2b45e8
#else
kusano 2b45e8
	unop
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ADD2	c14, t4, c14
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a2, b2, t4
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	c04, 1 * SIZE(C1)
kusano 2b45e8
#else
kusano 2b45e8
	unop
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ADD1	c01, t1, c01
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a1, b3, t1
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	c11, 0 * SIZE(C2)
kusano 2b45e8
#else
kusano 2b45e8
	unop
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ADD3	c02, t2, c02
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a2, b3, t2
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	c12, 1 * SIZE(C2)
kusano 2b45e8
#else
kusano 2b45e8
	unop
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ADD4	c05, t3, c05
kusano 2b45e8
	MUL	a1, b4, t3
kusano 2b45e8
	ADD2	c06, t4, c06
kusano 2b45e8
	MUL	a2, b4, t4
kusano 2b45e8
kusano 2b45e8
	ADD1	c09, t1, c09
kusano 2b45e8
	ADD3	c10, t2, c10
kusano 2b45e8
	ADD4	c13, t3, c13
kusano 2b45e8
	ADD2	c14, t4, c14
kusano 2b45e8
kusano 2b45e8
	ADD	c01, c06, c01
kusano 2b45e8
	ADD	c02, c05, c02
kusano 2b45e8
	ADD	c09, c14, c09
kusano 2b45e8
	ADD	c10, c13, c10
kusano 2b45e8
kusano 2b45e8
	MUL	  alpha_r, c01, t1
kusano 2b45e8
	MUL	  alpha_r, c02, t2
kusano 2b45e8
	MUL	  alpha_r, c09, t3
kusano 2b45e8
	MUL	  alpha_r, c10, t4
kusano 2b45e8
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	ADD	  c03,  t1,  c03
kusano 2b45e8
	MUL	  alpha_i, c02, t1
kusano 2b45e8
	ADD	  c04,  t2,  c04
kusano 2b45e8
	MUL	  alpha_i, c01, t2
kusano 2b45e8
kusano 2b45e8
	ADD	  c11,  t3,  c11
kusano 2b45e8
	MUL	  alpha_i, c10, t3
kusano 2b45e8
	ADD	  c12,  t4,  c12
kusano 2b45e8
	MUL	  alpha_i, c09, t4
kusano 2b45e8
#else
kusano 2b45e8
	ADD	  $f31,  t1,  c03
kusano 2b45e8
	MUL	  alpha_i, c02, t1
kusano 2b45e8
	ADD	  $f31,  t2,  c04
kusano 2b45e8
	MUL	  alpha_i, c01, t2
kusano 2b45e8
kusano 2b45e8
	ADD	  $f31,  t3,  c11
kusano 2b45e8
	MUL	  alpha_i, c10, t3
kusano 2b45e8
	ADD	  $f31,  t4,  c12
kusano 2b45e8
	MUL	  alpha_i, c09, t4
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	SUB	  c03,  t1,  c03
kusano 2b45e8
	ADD	  c04,  t2,  c04
kusano 2b45e8
	SUB	  c11,  t3,  c11
kusano 2b45e8
	ADD	  c12,  t4,  c12
kusano 2b45e8
kusano 2b45e8
	ST	c03,  0 * SIZE(C1)
kusano 2b45e8
	ST	c04,  1 * SIZE(C1)
kusano 2b45e8
	ST	c11,  0 * SIZE(C2)
kusano 2b45e8
 	ST	c12,  1 * SIZE(C2)
kusano 2b45e8
kusano 2b45e8
#if (defined(TRMMKERNEL) &&  defined(LEFT) &&  defined(TRANSA)) || \
kusano 2b45e8
    (defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
	subq	K, KK, TMP1
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
	subq	TMP1, 1, TMP1
kusano 2b45e8
#else
kusano 2b45e8
	subq	TMP1, 2, TMP1
kusano 2b45e8
#endif
kusano 2b45e8
	sll	TMP1, ZBASE_SHIFT + 0, TMP2
kusano 2b45e8
	addq	AO, TMP2, AO
kusano 2b45e8
	sll	TMP1, ZBASE_SHIFT + 1, TMP2
kusano 2b45e8
	addq	BO, TMP2, BO
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(TRMMKERNEL) && defined(LEFT)
kusano 2b45e8
	addq	KK, 1, KK
kusano 2b45e8
#endif
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L29:
kusano 2b45e8
 	mov	BO, B
kusano 2b45e8
	lda	J,        -1(J)
kusano 2b45e8
#if defined(TRMMKERNEL) && !defined(LEFT)
kusano 2b45e8
	addq	KK, 2, KK
kusano 2b45e8
#else
kusano 2b45e8
	unop
kusano 2b45e8
#endif
kusano 2b45e8
	bgt	J, $L01
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L30:
kusano 2b45e8
	and	N, 1, J
kusano 2b45e8
	ble	J, $L999
kusano 2b45e8
kusano 2b45e8
	mov	C,  C1
kusano 2b45e8
	mov	A, AO
kusano 2b45e8
kusano 2b45e8
#if defined(TRMMKERNEL) &&  defined(LEFT)
kusano 2b45e8
	mov	OFFSET, KK
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	sra	M,  1, I
kusano 2b45e8
	ble	I, $L50
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L41:
kusano 2b45e8
#if !defined(TRMMKERNEL) || \
kusano 2b45e8
	(defined(TRMMKERNEL) &&  defined(LEFT) &&  defined(TRANSA)) || \
kusano 2b45e8
	(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
kusano 2b45e8
#ifdef TRMMKERNEL
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
	addq	KK, 2, TMP1
kusano 2b45e8
#else
kusano 2b45e8
	addq	KK, 1, TMP1
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	fclr	t1
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	fclr	t2
kusano 2b45e8
	LD	a3,  2 * SIZE(AO)
kusano 2b45e8
	fclr	t3
kusano 2b45e8
	LD	a4,  3 * SIZE(AO)
kusano 2b45e8
	fclr	t4
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(B)
kusano 2b45e8
	fclr	c01
kusano 2b45e8
	LD	b2,  1 * SIZE(B)
kusano 2b45e8
	fclr	c05
kusano 2b45e8
	LD	b3,  2 * SIZE(B)
kusano 2b45e8
 	fclr	c02
kusano 2b45e8
	LD	b4,  3 * SIZE(B)
kusano 2b45e8
	fclr	c06
kusano 2b45e8
kusano 2b45e8
	lda	BO,  2 * SIZE(B)
kusano 2b45e8
	fclr	c03
kusano 2b45e8
	lda	AO,  4 * SIZE(AO)
kusano 2b45e8
	fclr	c07
kusano 2b45e8
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	lda	L,        -2(K)
kusano 2b45e8
#else
kusano 2b45e8
	lda	L,        -2(TMP1)
kusano 2b45e8
#endif
kusano 2b45e8
	fclr	c04
kusano 2b45e8
	fclr	c08
kusano 2b45e8
	ble	L, $L45
kusano 2b45e8
#else
kusano 2b45e8
	sll	KK, ZBASE_SHIFT + 1, TMP1
kusano 2b45e8
	addq	AO, TMP1, AO
kusano 2b45e8
	sll	KK, ZBASE_SHIFT + 0, TMP1
kusano 2b45e8
	addq	B,  TMP1, BO
kusano 2b45e8
	subq	K, KK, TMP1
kusano 2b45e8
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	fclr	t1
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	fclr	t2
kusano 2b45e8
	LD	a3,  2 * SIZE(AO)
kusano 2b45e8
	fclr	t3
kusano 2b45e8
	LD	a4,  3 * SIZE(AO)
kusano 2b45e8
	fclr	t4
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	fclr	c01
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	fclr	c05
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
 	fclr	c02
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	fclr	c06
kusano 2b45e8
kusano 2b45e8
	lda	BO,  2 * SIZE(BO)
kusano 2b45e8
	fclr	c03
kusano 2b45e8
	lda	AO,  4 * SIZE(AO)
kusano 2b45e8
	fclr	c07
kusano 2b45e8
kusano 2b45e8
	lda	L,        -2(TMP1)
kusano 2b45e8
	fclr	c04
kusano 2b45e8
	fclr	c08
kusano 2b45e8
	ble	L, $L45
kusano 2b45e8
#endif
kusano 2b45e8
	.align	5
kusano 2b45e8
kusano 2b45e8
$L42:
kusano 2b45e8
	ADD4	c05, t1, c05
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a1, b1, t1
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD2	c06, t2, c06
kusano 2b45e8
	lda	L,   -2(L)
kusano 2b45e8
	MUL	a2, b1, t2
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD4	c07, t3, c07
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a3, b1, t3
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD2	c08, t4, c08
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a4, b1, t4
kusano 2b45e8
	LD	b1,  2 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD1	c01, t1, c01
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a1, b2, t1
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD3	c02, t2, c02
kusano 2b45e8
	lda	BO,  4 * SIZE(BO)
kusano 2b45e8
	MUL	a2, b2, t2
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD1	c03, t3, c03
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a3, b2, t3
kusano 2b45e8
	LD	a3,  2 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD3	c04, t4, c04
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a4, b2, t4
kusano 2b45e8
	LD	a5,  3 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD4	c05, t1, c05
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a1, b3, t1
kusano 2b45e8
	LD	b2, -1 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD2	c06, t2, c06
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a2, b3, t2
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD4	c07, t3, c07
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a3, b3, t3
kusano 2b45e8
	lda	AO,  8 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD2	c08, t4, c08
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a5, b3, t4
kusano 2b45e8
	LD	b3,  0 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD1	c01, t1, c01
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a1, b4, t1
kusano 2b45e8
	LD	a1, -4 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD3	c02, t2, c02
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a2, b4, t2
kusano 2b45e8
	LD	a2, -3 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD1	c03, t3, c03
kusano 2b45e8
	LD	a4, -1 * SIZE(AO)
kusano 2b45e8
	MUL	a3, b4, t3
kusano 2b45e8
	LD	a3, -2 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD3	c04, t4, c04
kusano 2b45e8
	MUL	a5, b4, t4
kusano 2b45e8
	LD	b4,  1 * SIZE(BO)
kusano 2b45e8
	bgt	L,  $L42
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L45:
kusano 2b45e8
	ADD4	c05, t1, c05
kusano 2b45e8
	ldt	alpha_r, ALPHA_R
kusano 2b45e8
	MUL	b1, a1, t1
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	blbs	K, $L48
kusano 2b45e8
#else
kusano 2b45e8
	blbs	TMP1, $L48
kusano 2b45e8
#endif
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
	ADD2	c06, t2, c06
kusano 2b45e8
	MUL	a2, b1, t2
kusano 2b45e8
	ADD4	c07, t3, c07
kusano 2b45e8
	MUL	a3, b1, t3
kusano 2b45e8
kusano 2b45e8
	ADD2	c08, t4, c08
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a4, b1, t4
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD1	c01, t1, c01
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a1, b2, t1
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD3	c02, t2, c02
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a2, b2, t2
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD1	c03, t3, c03
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a3, b2, t3
kusano 2b45e8
	LD	a3,  2 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD3	c04, t4, c04
kusano 2b45e8
	MUL	a4, b2, t4
kusano 2b45e8
	LD	a4,  3 * SIZE(AO)
kusano 2b45e8
	lda	AO,  4 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD4	c05, t1, c05
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	MUL	a1, b1, t1
kusano 2b45e8
	lda	BO,  2 * SIZE(BO)
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L48:
kusano 2b45e8
	ADD2	c06, t2, c06
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a2, b1, t2
kusano 2b45e8
	ldt	alpha_i, ALPHA_I
kusano 2b45e8
kusano 2b45e8
	ADD4	c07, t3, c07
kusano 2b45e8
	lda	I,        -1(I)
kusano 2b45e8
	MUL	a3, b1, t3
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	c09, 0 * SIZE(C1)
kusano 2b45e8
#else
kusano 2b45e8
	unop
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ADD2	c08, t4, c08
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a4, b1, t4
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	c10, 1 * SIZE(C1)
kusano 2b45e8
#else
kusano 2b45e8
	unop
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ADD1	c01, t1, c01
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a1, b2, t1
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	c11, 2 * SIZE(C1)
kusano 2b45e8
#else
kusano 2b45e8
	unop
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ADD3	c02, t2, c02
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a2, b2, t2
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	c12, 3 * SIZE(C1)
kusano 2b45e8
#else
kusano 2b45e8
	unop
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ADD1	c03, t3, c03
kusano 2b45e8
	MUL	a3, b2, t3
kusano 2b45e8
	ADD3	c04, t4, c04
kusano 2b45e8
	MUL	a4, b2, t4
kusano 2b45e8
kusano 2b45e8
	ADD4	c05, t1, c05
kusano 2b45e8
	ADD2	c06, t2, c06
kusano 2b45e8
	ADD4	c07, t3, c07
kusano 2b45e8
	ADD2	c08, t4, c08
kusano 2b45e8
kusano 2b45e8
	ADD	c01, c06, c01
kusano 2b45e8
	ADD	c02, c05, c02
kusano 2b45e8
	ADD	c03, c08, c03
kusano 2b45e8
	ADD	c04, c07, c04
kusano 2b45e8
kusano 2b45e8
	MUL	  alpha_r, c01, t1
kusano 2b45e8
	MUL	  alpha_r, c02, t2
kusano 2b45e8
	MUL	  alpha_r, c03, t3
kusano 2b45e8
	MUL	  alpha_r, c04, t4
kusano 2b45e8
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	ADD	  c09,  t1,  c09
kusano 2b45e8
	MUL	  alpha_i, c02, t1
kusano 2b45e8
	ADD	  c10,  t2,  c10
kusano 2b45e8
	MUL	  alpha_i, c01, t2
kusano 2b45e8
kusano 2b45e8
	ADD	  c11,  t3,  c11
kusano 2b45e8
	MUL	  alpha_i, c04, t3
kusano 2b45e8
	ADD	  c12,  t4,  c12
kusano 2b45e8
	MUL	  alpha_i, c03, t4
kusano 2b45e8
#else
kusano 2b45e8
	ADD	  $f31,  t1,  c09
kusano 2b45e8
	MUL	  alpha_i, c02, t1
kusano 2b45e8
	ADD	  $f31,  t2,  c10
kusano 2b45e8
	MUL	  alpha_i, c01, t2
kusano 2b45e8
kusano 2b45e8
	ADD	  $f31,  t3,  c11
kusano 2b45e8
	MUL	  alpha_i, c04, t3
kusano 2b45e8
	ADD	  $f31,  t4,  c12
kusano 2b45e8
	MUL	  alpha_i, c03, t4
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	SUB	  c09,  t1,  c09
kusano 2b45e8
	ADD	  c10,  t2,  c10
kusano 2b45e8
	SUB	  c11,  t3,  c11
kusano 2b45e8
	ADD	  c12,  t4,  c12
kusano 2b45e8
kusano 2b45e8
	ST	c09,  0 * SIZE(C1)
kusano 2b45e8
	ST	c10,  1 * SIZE(C1)
kusano 2b45e8
	ST	c11,  2 * SIZE(C1)
kusano 2b45e8
	ST	c12,  3 * SIZE(C1)
kusano 2b45e8
kusano 2b45e8
	lda	C1,   4 * SIZE(C1)
kusano 2b45e8
kusano 2b45e8
#if (defined(TRMMKERNEL) &&  defined(LEFT) &&  defined(TRANSA)) || \
kusano 2b45e8
    (defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
	subq	K, KK, TMP1
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
	subq	TMP1, 2, TMP1
kusano 2b45e8
#else
kusano 2b45e8
	subq	TMP1, 1, TMP1
kusano 2b45e8
#endif
kusano 2b45e8
	sll	TMP1, ZBASE_SHIFT + 1, TMP2
kusano 2b45e8
	addq	AO, TMP2, AO
kusano 2b45e8
	sll	TMP1, ZBASE_SHIFT + 0, TMP2
kusano 2b45e8
	addq	BO, TMP2, BO
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(TRMMKERNEL) && defined(LEFT)
kusano 2b45e8
	addq	KK, 2, KK
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	bgt	I, $L41
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L50:
kusano 2b45e8
	and	M,  1, I
kusano 2b45e8
	ble	I, $L999
kusano 2b45e8
kusano 2b45e8
#if !defined(TRMMKERNEL) || \
kusano 2b45e8
	(defined(TRMMKERNEL) &&  defined(LEFT) &&  defined(TRANSA)) || \
kusano 2b45e8
	(defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
kusano 2b45e8
#ifdef TRMMKERNEL
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
	addq	KK, 1, TMP1
kusano 2b45e8
#else
kusano 2b45e8
	addq	KK, 1, TMP1
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	fclr	t1
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	fclr	t2
kusano 2b45e8
	LD	a3,  2 * SIZE(AO)
kusano 2b45e8
	fclr	t3
kusano 2b45e8
	LD	a4,  3 * SIZE(AO)
kusano 2b45e8
	fclr	t4
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(B)
kusano 2b45e8
	fclr	c01
kusano 2b45e8
	LD	b2,  1 * SIZE(B)
kusano 2b45e8
	fclr	c05
kusano 2b45e8
kusano 2b45e8
	LD	b3,  2 * SIZE(B)
kusano 2b45e8
 	fclr	c02
kusano 2b45e8
	LD	b4,  3 * SIZE(B)
kusano 2b45e8
	fclr	c06
kusano 2b45e8
kusano 2b45e8
	lda	AO,  2 * SIZE(AO)
kusano 2b45e8
	lda	BO,  2 * SIZE(B)
kusano 2b45e8
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	lda	L,        -2(K)
kusano 2b45e8
#else
kusano 2b45e8
	lda	L,        -2(TMP1)
kusano 2b45e8
#endif
kusano 2b45e8
	ble	L, $L55
kusano 2b45e8
#else
kusano 2b45e8
	sll	KK, ZBASE_SHIFT + 0, TMP1
kusano 2b45e8
	addq	AO, TMP1, AO
kusano 2b45e8
	addq	B,  TMP1, BO
kusano 2b45e8
	subq	K, KK, TMP1
kusano 2b45e8
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
	fclr	t1
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
	fclr	t2
kusano 2b45e8
	LD	a3,  2 * SIZE(AO)
kusano 2b45e8
	fclr	t3
kusano 2b45e8
	LD	a4,  3 * SIZE(AO)
kusano 2b45e8
	fclr	t4
kusano 2b45e8
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
	fclr	c01
kusano 2b45e8
	LD	b2,  1 * SIZE(BO)
kusano 2b45e8
	fclr	c05
kusano 2b45e8
kusano 2b45e8
	LD	b3,  2 * SIZE(BO)
kusano 2b45e8
 	fclr	c02
kusano 2b45e8
	LD	b4,  3 * SIZE(BO)
kusano 2b45e8
	fclr	c06
kusano 2b45e8
kusano 2b45e8
	lda	AO,  2 * SIZE(AO)
kusano 2b45e8
	lda	BO,  2 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	lda	L,        -2(TMP1)
kusano 2b45e8
	ble	L, $L55
kusano 2b45e8
#endif
kusano 2b45e8
	.align	5
kusano 2b45e8
kusano 2b45e8
$L52:
kusano 2b45e8
	ADD1	c01, t1, c01
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a1, b1, t1
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	ADD3	c02, t2, c02
kusano 2b45e8
	lda	AO,  4 * SIZE(AO)
kusano 2b45e8
	MUL	a2, b1, t2
kusano 2b45e8
	LD	b1,  2 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD4	c05, t3, c05
kusano 2b45e8
	lda	L,        -2(L)
kusano 2b45e8
	MUL	a1, b2, t3
kusano 2b45e8
	LD	a1, -2 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD2	c06, t4, c06
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a2, b2, t4
kusano 2b45e8
	LD	a2, -1 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD1	c01, t1, c01
kusano 2b45e8
	LD	b2,  3 * SIZE(BO)
kusano 2b45e8
	MUL	a3, b3, t1
kusano 2b45e8
	lda	BO,    4 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD3	c02, t2, c02
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a4, b3, t2
kusano 2b45e8
	LD	b3,  0 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD4	c05, t3, c05
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a3, b4, t3
kusano 2b45e8
	LD	a3,  0 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD2	c06, t4, c06
kusano 2b45e8
	MUL	a4, b4, t4
kusano 2b45e8
	LD	b4,  1 * SIZE(BO)
kusano 2b45e8
	unop
kusano 2b45e8
kusano 2b45e8
	LD	a4,  1 * SIZE(AO)
kusano 2b45e8
	unop
kusano 2b45e8
	unop
kusano 2b45e8
	bgt	L,  $L52
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L55:
kusano 2b45e8
	ADD1	c01, t1, c01
kusano 2b45e8
	ldt	alpha_r, ALPHA_R
kusano 2b45e8
	MUL	a1, b1, t1
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	blbs	K, $L58
kusano 2b45e8
#else
kusano 2b45e8
	blbs	TMP1, $L58
kusano 2b45e8
#endif
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
	ADD3	c02, t2, c02
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a2, b1, t2
kusano 2b45e8
	LD	b1,  0 * SIZE(BO)
kusano 2b45e8
kusano 2b45e8
	ADD4	c05, t3, c05
kusano 2b45e8
	lda	BO,  2 * SIZE(BO)
kusano 2b45e8
	MUL	a1, b2, t3
kusano 2b45e8
	LD	a1,  0 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD2	c06, t4, c06
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a2, b2, t4
kusano 2b45e8
	LD	a2,  1 * SIZE(AO)
kusano 2b45e8
kusano 2b45e8
	ADD1	c01, t1, c01
kusano 2b45e8
	LD	b2, -1 * SIZE(BO)
kusano 2b45e8
	MUL	a1, b1, t1
kusano 2b45e8
	lda	AO,  2 * SIZE(AO)
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L58:
kusano 2b45e8
	ADD3	c02, t2, c02
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a2, b1, t2
kusano 2b45e8
	ldt	alpha_i, ALPHA_I
kusano 2b45e8
kusano 2b45e8
	ADD4	c05, t3, c05
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a1, b2, t3
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	c03, 0 * SIZE(C1)
kusano 2b45e8
#else
kusano 2b45e8
	unop
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ADD2	c06, t4, c06
kusano 2b45e8
	unop
kusano 2b45e8
	MUL	a2, b2, t4
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	LD	c04, 1 * SIZE(C1)
kusano 2b45e8
#else
kusano 2b45e8
	unop
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	ADD1	c01, t1, c01
kusano 2b45e8
	ADD3	c02, t2, c02
kusano 2b45e8
	ADD4	c05, t3, c05
kusano 2b45e8
	ADD2	c06, t4, c06
kusano 2b45e8
kusano 2b45e8
	ADD	c01, c06, c01
kusano 2b45e8
	ADD	c02, c05, c02
kusano 2b45e8
kusano 2b45e8
	MUL	  alpha_r, c01, t1
kusano 2b45e8
	MUL	  alpha_r, c02, t2
kusano 2b45e8
	MUL	  alpha_i, c02, t3
kusano 2b45e8
	MUL	  alpha_i, c01, t4
kusano 2b45e8
kusano 2b45e8
#ifndef TRMMKERNEL
kusano 2b45e8
	ADD	  c03,  t1,  c03
kusano 2b45e8
	ADD	  c04,  t2,  c04
kusano 2b45e8
#else
kusano 2b45e8
	ADD	  $f31,  t1,  c03
kusano 2b45e8
	ADD	  $f31,  t2,  c04
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	SUB	  c03,  t3,  c03
kusano 2b45e8
	ADD	  c04,  t4,  c04
kusano 2b45e8
kusano 2b45e8
	ST	c03,  0 * SIZE(C1)
kusano 2b45e8
	ST	c04,  1 * SIZE(C1)
kusano 2b45e8
	.align 4
kusano 2b45e8
kusano 2b45e8
$L999:
kusano 2b45e8
	ldt	$f2,   0($sp)
kusano 2b45e8
	ldt	$f3,   8($sp)
kusano 2b45e8
	ldt	$f4,  16($sp)
kusano 2b45e8
	ldt	$f5,  24($sp)
kusano 2b45e8
	ldt	$f6,  32($sp)
kusano 2b45e8
	ldt	$f7,  40($sp)
kusano 2b45e8
	ldt	$f8,  48($sp)
kusano 2b45e8
	ldt	$f9,  56($sp)
kusano 2b45e8
	clr	$0
kusano 2b45e8
	lda	$sp, STACKSIZE($sp)
kusano 2b45e8
	ret
kusano 2b45e8
	.ident	VERSION
kusano 2b45e8
	.end	CNAME