Blame thirdparty/openblas/xianyi-OpenBLAS-e6e87a2/kernel/ia64/gemm_tcopy.S

kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
/* Copyright 2009, 2010 The University of Texas at Austin.           */
kusano 2b45e8
/* All rights reserved.                                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* Redistribution and use in source and binary forms, with or        */
kusano 2b45e8
/* without modification, are permitted provided that the following   */
kusano 2b45e8
/* conditions are met:                                               */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   1. Redistributions of source code must retain the above         */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer.                                                  */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   2. Redistributions in binary form must reproduce the above      */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer in the documentation and/or other materials       */
kusano 2b45e8
/*      provided with the distribution.                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
kusano 2b45e8
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
kusano 2b45e8
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
kusano 2b45e8
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
kusano 2b45e8
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
kusano 2b45e8
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
kusano 2b45e8
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
kusano 2b45e8
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
kusano 2b45e8
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
kusano 2b45e8
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
kusano 2b45e8
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
kusano 2b45e8
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
kusano 2b45e8
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
kusano 2b45e8
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* The views and conclusions contained in the software and           */
kusano 2b45e8
/* documentation are those of the authors and should not be          */
kusano 2b45e8
/* interpreted as representing official policies, either expressed   */
kusano 2b45e8
/* or implied, of The University of Texas at Austin.                 */
kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
kusano 2b45e8
#define ASSEMBLER
kusano 2b45e8
#include "common.h"
kusano 2b45e8
kusano 2b45e8
#define PREFETCHSIZE   24
kusano 2b45e8
#define WPREFETCHSIZE  32
kusano 2b45e8
kusano 2b45e8
#ifndef XDOUBLE
kusano 2b45e8
#define LD	LDFD
kusano 2b45e8
#define ST	STFD_NTA
kusano 2b45e8
#else
kusano 2b45e8
#define LD	LDFD
kusano 2b45e8
#define ST	STFD_NTA
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#define PREA	r2
kusano 2b45e8
#define PREB	r3
kusano 2b45e8
kusano 2b45e8
#define A1	r14
kusano 2b45e8
#define A2	r15
kusano 2b45e8
#define B1	r16
kusano 2b45e8
#define B2	r17
kusano 2b45e8
#define I	r18
kusano 2b45e8
#define J	r19
kusano 2b45e8
kusano 2b45e8
#define BO2	r20
kusano 2b45e8
#define BO3	r21
kusano 2b45e8
#define BO4	r22
kusano 2b45e8
kusano 2b45e8
#define LDB	r23
kusano 2b45e8
#define II	r24
kusano 2b45e8
#define TEMP1	r25
kusano 2b45e8
#define TEMP2	r26
kusano 2b45e8
#define TEMP3	r27
kusano 2b45e8
#define LCOUNT	r28
kusano 2b45e8
#define SCOUNT	r29
kusano 2b45e8
kusano 2b45e8
#define ARLC	r30
kusano 2b45e8
#define PR	r31
kusano 2b45e8
kusano 2b45e8
#define MLDA8	r8
kusano 2b45e8
kusano 2b45e8
#define M	r32
kusano 2b45e8
#define N	r33
kusano 2b45e8
#define A	r34
kusano 2b45e8
#define LDA	r35
kusano 2b45e8
#define B	r36
kusano 2b45e8
kusano 2b45e8
	PROLOGUE
kusano 2b45e8
	.prologue
kusano 2b45e8
	PROFCODE
kusano 2b45e8
kusano 2b45e8
	.body
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	setf.sig f32 = M
kusano 2b45e8
	and	r8  = -8, N
kusano 2b45e8
	mov	ARLC  = ar.lc
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	setf.sig f33  = r8
kusano 2b45e8
	and	r9  = -4, N
kusano 2b45e8
	mov	PR = pr
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	setf.sig f34  = r9
kusano 2b45e8
	and	r10 = -2, N
kusano 2b45e8
	shladd	LDA = LDA, BASE_SHIFT, r0
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	setf.sig f35 = r10
kusano 2b45e8
	shladd	MLDA8 = LDA, 3, r0
kusano 2b45e8
	shl	LDB = M, BASE_SHIFT + 3
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mfi
kusano 2b45e8
	sub	MLDA8 = r0, MLDA8
kusano 2b45e8
	xmpy.l	f33  = f32, f33
kusano 2b45e8
	shr	J = M, 3
kusano 2b45e8
	}
kusano 2b45e8
	{ .mfi
kusano 2b45e8
	xmpy.l	f34  = f32, f34
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmf
kusano 2b45e8
	getf.sig BO2 = f33
kusano 2b45e8
	adds	MLDA8 = 16 * SIZE, MLDA8
kusano 2b45e8
	xmpy.l	f35 = f32, f35
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	getf.sig BO3 = f34
kusano 2b45e8
	getf.sig BO4 = f35
kusano 2b45e8
	nop	 __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	shladd	BO2 = BO2, BASE_SHIFT, B
kusano 2b45e8
	shladd	BO3 = BO3, BASE_SHIFT, B
kusano 2b45e8
	shladd	BO4 = BO4, BASE_SHIFT, B
kusano 2b45e8
	}
kusano 2b45e8
	{ .mib
kusano 2b45e8
	cmp.eq	p6, p0 = 0, J
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	(p6)	br.cond.dpnt .L100
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	.align 32
kusano 2b45e8
kusano 2b45e8
.L11:
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	add	I = 8, N
kusano 2b45e8
	mov	A1 = A
kusano 2b45e8
	mov	pr.rot = 0
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	adds	A2 = 4 * SIZE, A
kusano 2b45e8
	shladd	A = LDA, 3, A
kusano 2b45e8
	shr	II = N, 3
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	mov	B1 =  B
kusano 2b45e8
	cmp.eq	p16, p0 = r0, r0
kusano 2b45e8
	mov	ar.ec  = 3
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	adds	B2 =  4 * SIZE, B
kusano 2b45e8
	adds	B  = 64 * SIZE, B
kusano 2b45e8
	shr	I = I, 4
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	cmp.eq	p8, p0 = 0, I
kusano 2b45e8
	shladd	I = I, 2, r0
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	mov	LCOUNT = 0
kusano 2b45e8
	mov	SCOUNT = 0
kusano 2b45e8
	adds	I = -1, I
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	adds  PREA =  PREFETCHSIZE * SIZE, A1
kusano 2b45e8
	adds  PREB = WPREFETCHSIZE * SIZE, B1
kusano 2b45e8
	mov	ar.lc = I
kusano 2b45e8
	}
kusano 2b45e8
	{ .mib
kusano 2b45e8
	adds	J = -1, J
kusano 2b45e8
	mov	I = II
kusano 2b45e8
	(p8)	br.cond.dpnt .L20
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	.align 32
kusano 2b45e8
kusano 2b45e8
.L12:
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f34,  1 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f46,  1 * SIZE
kusano 2b45e8
	(p18) cmp.ne.unc p13, p0 = 1, II
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) lfetch.nt1 [PREA], LDA
kusano 2b45e8
	(p16) lfetch.excl.nt1 [PREB], LDB
kusano 2b45e8
	(p16) cmp.ne.unc p12, p0 = 1, I
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f37,  1 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f49,  1 * SIZE
kusano 2b45e8
	(p18) adds	SCOUNT = 1, SCOUNT
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f32  = [A1], SIZE
kusano 2b45e8
	(p16) LD	f44  = [A2], SIZE
kusano 2b45e8
	(p16) adds	LCOUNT = 1, LCOUNT
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f40,  1 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f52,  1 * SIZE
kusano 2b45e8
	(p16) cmp.eq.unc p14, p0 = 4, LCOUNT
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f35  = [A1], SIZE
kusano 2b45e8
	(p16) LD	f47  = [A2], SIZE
kusano 2b45e8
	adds	TEMP1 = -3 * SIZE, LDA
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f43,  5 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f55,  5 * SIZE
kusano 2b45e8
	(p18) cmp.eq.unc p15, p0 = 4, SCOUNT
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f38  = [A1], SIZE
kusano 2b45e8
	(p16) LD	f50  = [A2], SIZE
kusano 2b45e8
	(p12) mov TEMP1 = 5 * SIZE
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f82,  1 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f94,  1 * SIZE
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f41  = [A1], TEMP1
kusano 2b45e8
	(p16) LD	f53  = [A2], TEMP1
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f85,  1 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f97,  1 * SIZE
kusano 2b45e8
	mov	TEMP2 = 5 * SIZE
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f56  = [A1], SIZE
kusano 2b45e8
	(p12) LD	f68  = [A2], SIZE
kusano 2b45e8
	shladd TEMP3 = LDA, 3, r0
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f88,  1 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f100, 1 * SIZE
kusano 2b45e8
	(p13) adds TEMP2 = - 11 * SIZE, LDB
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f59  = [A1], SIZE
kusano 2b45e8
	(p12) LD	f71  = [A2], SIZE
kusano 2b45e8
	(p12) adds  TEMP1 = - 11 * SIZE, LDA
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f91
kusano 2b45e8
	(p18) ST	[B2] = f103
kusano 2b45e8
	(p18) add	B1 = B1, TEMP2
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f62  = [A1], SIZE
kusano 2b45e8
	(p12) LD	f74  = [A2], SIZE
kusano 2b45e8
	(p18) add	B2 = B2, TEMP2
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f58,  1 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f70,  1 * SIZE
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f65  = [A1], TEMP1
kusano 2b45e8
	(p12) LD	f77  = [A2], TEMP1
kusano 2b45e8
	sub TEMP3 = LDA, TEMP3
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f61,  1 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f73,  1 * SIZE
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) lfetch.nt1 [PREA], LDA
kusano 2b45e8
	(p16) lfetch.excl.nt1 [PREB]
kusano 2b45e8
	adds TEMP3 = 5 * SIZE, TEMP3
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f64,  1 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f76,  1 * SIZE
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f80  = [A1], SIZE
kusano 2b45e8
	(p16) LD	f92  = [A2], SIZE
kusano 2b45e8
	adds	TEMP1 = -3 * SIZE, LDA
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f67,  5 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f79,  5 * SIZE
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f83  = [A1], SIZE
kusano 2b45e8
	(p16) LD	f95  = [A2], SIZE
kusano 2b45e8
	(p14) mov TEMP1 = TEMP3
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f106,  1 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f118,  1 * SIZE
kusano 2b45e8
	mov	TEMP2 = 5 * SIZE
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f86  = [A1], SIZE
kusano 2b45e8
	(p16) LD	f98  = [A2], SIZE
kusano 2b45e8
	(p12) mov TEMP1	= 5 * SIZE
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f109,  1 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f121,  1 * SIZE
kusano 2b45e8
	sub	TEMP2 = TEMP2, LDB
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f89  = [A1], TEMP1
kusano 2b45e8
	(p16) LD	f101 = [A2], TEMP1
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f112,  1 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f124,  1 * SIZE
kusano 2b45e8
	(p15) adds TEMP2 = -59 * SIZE, LDB
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f104 = [A1], SIZE
kusano 2b45e8
	(p12) LD	f116 = [A2], SIZE
kusano 2b45e8
	(p14)	add PREA = PREA, MLDA8
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f115
kusano 2b45e8
	(p13) ST	[B2] = f127
kusano 2b45e8
	(p13) add	B1 = B1, TEMP2
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f107 = [A1], SIZE
kusano 2b45e8
	(p12) LD	f119 = [A2], SIZE
kusano 2b45e8
	adds	TEMP1 = -11 * SIZE, LDA
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f110 = [A1], SIZE
kusano 2b45e8
	(p12) LD	f122 = [A2], SIZE
kusano 2b45e8
	(p14) mov TEMP1 = TEMP3
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p14) mov	LCOUNT = 0
kusano 2b45e8
	(p15) mov	SCOUNT = 0
kusano 2b45e8
	adds  PREB = WPREFETCHSIZE * SIZE, B1
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f113 = [A1], TEMP1
kusano 2b45e8
	(p12) LD	f125 = [A2], TEMP1
kusano 2b45e8
	(p13) add	B2 = B2, TEMP2
kusano 2b45e8
	}
kusano 2b45e8
	{ .mib
kusano 2b45e8
	(p14) adds	I = -2, I
kusano 2b45e8
	(p15) adds	II = -2, II
kusano 2b45e8
	br.ctop.sptk .L12
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	.align 32
kusano 2b45e8
kusano 2b45e8
.L20:
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	add	A2 = A1, LDA
kusano 2b45e8
	and	TEMP3 = 7, N
kusano 2b45e8
	tbit.nz p7, p0 = N, 2
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f32  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f36  = [A2], SIZE
kusano 2b45e8
	cmp.eq	p6, p0 = 0, TEMP3
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f33  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f37  = [A2], SIZE
kusano 2b45e8
	adds	TEMP1 = -3 * SIZE, LDA
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f34  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f38  = [A2], SIZE
kusano 2b45e8
	add	TEMP1 = TEMP1, LDA
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f35  = [A1], TEMP1
kusano 2b45e8
	(p7) LD	f39  = [A2], TEMP1
kusano 2b45e8
	(p6) cmp.ne.unc	p10, p0 = 0, J
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmb
kusano 2b45e8
	(p7) LD	f40  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f44  = [A2], SIZE
kusano 2b45e8
	(p10) br.cond.dptk .L11
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f41  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f45  = [A2], SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f42  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f46  = [A2], SIZE
kusano 2b45e8
	tbit.nz p8, p0 = N, 1
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f43  = [A1], TEMP1
kusano 2b45e8
	(p7) LD	f47  = [A2], TEMP1
kusano 2b45e8
	adds	B2 = 4 * SIZE, BO2
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f32,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f36,  1 * SIZE
kusano 2b45e8
	tbit.nz p9, p0 = N, 0
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f48  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f52  = [A2], SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f33,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f37,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f49  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f53  = [A2], SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f34,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f38,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f50  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f54  = [A2], SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f35,  5 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f39,  5 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f51  = [A1], TEMP1
kusano 2b45e8
	(p7) LD	f55  = [A2], TEMP1
kusano 2b45e8
	mov	TEMP1 = -1 * SIZE
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f40,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f44,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f56  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f60  = [A2], SIZE
kusano 2b45e8
	shladd	TEMP1 = LDA, 3, TEMP1
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f41,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f45,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f57  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f61  = [A2], SIZE
kusano 2b45e8
	sub	TEMP1 = 0, TEMP1
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f42,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f46,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f58  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f62  = [A2], SIZE
kusano 2b45e8
	shladd	TEMP1 = LDA, 1, TEMP1
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f43,  5 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f47,  5 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f59  = [A1], TEMP1
kusano 2b45e8
	(p7) LD	f63  = [A2], TEMP1
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f48,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f52,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	add	A2 = A1, LDA
kusano 2b45e8
	adds	TEMP1 = -1 * SIZE, LDA
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f49,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f53,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) LD	f64  = [A1], SIZE
kusano 2b45e8
	(p8) LD	f66  = [A2], SIZE
kusano 2b45e8
	add	TEMP1 = TEMP1, LDA
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f50,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f54,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) LD	f65  = [A1], TEMP1
kusano 2b45e8
	(p8) LD	f67  = [A2], TEMP1
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f51,  5 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f55,  5 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) LD	f68  = [A1], SIZE
kusano 2b45e8
	(p8) LD	f70  = [A2], SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f56,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f60,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) LD	f69  = [A1], TEMP1
kusano 2b45e8
	(p8) LD	f71  = [A2], TEMP1
kusano 2b45e8
	mov	TEMP3 = -1 * SIZE
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f57,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f61,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) LD	f72  = [A1], SIZE
kusano 2b45e8
	(p8) LD	f74  = [A2], SIZE
kusano 2b45e8
	shladd	TEMP3 = LDA, 3, TEMP3
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f58,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f62,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) LD	f73  = [A1], TEMP1
kusano 2b45e8
	(p8) LD	f75  = [A2], TEMP1
kusano 2b45e8
	sub	TEMP3 = 0, TEMP3
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f59,  5 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f63
kusano 2b45e8
	adds	B2 = 4 * SIZE, BO3
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) LD	f76  = [A1], SIZE
kusano 2b45e8
	(p8) LD	f78  = [A2], SIZE
kusano 2b45e8
	shladd	TEMP3 = LDA, 1, TEMP3
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) ST	[BO3] = f64,  1 * SIZE
kusano 2b45e8
	(p8) ST	[B2 ] = f68,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) LD	f77  = [A1], TEMP3
kusano 2b45e8
	(p8) LD	f79  = [A2], TEMP3
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) ST	[BO3] = f65,  1 * SIZE
kusano 2b45e8
	(p8) ST	[B2 ] = f69,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	add	A2 = A1, LDA
kusano 2b45e8
	shladd	TEMP3 = LDA, 1, r0
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) ST	[BO3] = f66,  1 * SIZE
kusano 2b45e8
	(p8) ST	[B2 ] = f70,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p9) LD	f80  = [A1], TEMP3
kusano 2b45e8
	(p9) LD	f81  = [A2], TEMP3
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) ST	[BO3] = f67,  5 * SIZE
kusano 2b45e8
	(p8) ST	[B2 ] = f71,  5 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p9) LD	f82  = [A1], TEMP3
kusano 2b45e8
	(p9) LD	f83  = [A2], TEMP3
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) ST	[BO3] = f72,  1 * SIZE
kusano 2b45e8
	(p8) ST	[B2 ] = f76,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p9) LD	f84  = [A1], TEMP3
kusano 2b45e8
	(p9) LD	f85  = [A2], TEMP3
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) ST	[BO3] = f73,  1 * SIZE
kusano 2b45e8
	(p8) ST	[B2 ] = f77,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p9) LD	f86  = [A1]
kusano 2b45e8
	(p9) LD	f87  = [A2]
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) ST	[BO3] = f74,  1 * SIZE
kusano 2b45e8
	(p8) ST	[B2 ] = f78,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) ST	[BO3] = f75,  5 * SIZE
kusano 2b45e8
	(p8) ST	[B2 ] = f79
kusano 2b45e8
	adds	B2 = 4 * SIZE, BO4
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p9) ST	[BO4] = f80,  1 * SIZE
kusano 2b45e8
	(p9) ST	[B2 ] = f84,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p9) ST	[BO4] = f81,  1 * SIZE
kusano 2b45e8
	(p9) ST	[B2 ] = f85,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p9) ST	[BO4] = f82,  1 * SIZE
kusano 2b45e8
	(p9) ST	[B2 ] = f86,  1 * SIZE
kusano 2b45e8
	cmp.ne	p8, p0 = 0, J
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmb
kusano 2b45e8
	(p9) ST	[BO4] = f83,  5 * SIZE
kusano 2b45e8
	(p9) ST	[B2 ] = f87,  5 * SIZE
kusano 2b45e8
	(p8)	br.cond.dptk .L11
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	.align 32
kusano 2b45e8
kusano 2b45e8
.L100:
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	mov	A1 = A
kusano 2b45e8
	add	I = 8, N
kusano 2b45e8
	mov	pr.rot = 0
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	adds	A2 = 4 * SIZE, A
kusano 2b45e8
	tbit.z p6, p0 = M, 2
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	mov	B1 =  B
kusano 2b45e8
	adds	B2 =  4 * SIZE, B
kusano 2b45e8
	mov	ar.ec  = 3
kusano 2b45e8
	}
kusano 2b45e8
	{ .mib
kusano 2b45e8
	cmp.eq	p16, p0 = r0, r0
kusano 2b45e8
	shr	I = I, 4
kusano 2b45e8
	(p6)	br.cond.dpnt .L200
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	cmp.eq	p8, p0 = 0, I
kusano 2b45e8
	shladd	I = I, 1, r0
kusano 2b45e8
	shladd	A = LDA, 2, A
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	adds	B  = 32 * SIZE, B
kusano 2b45e8
	adds	I = -1, I
kusano 2b45e8
	shr	II = N, 3
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	mov	LCOUNT = 0
kusano 2b45e8
	mov	SCOUNT = 0
kusano 2b45e8
	mov	ar.lc = I
kusano 2b45e8
	}
kusano 2b45e8
	{ .mib
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	mov	I = II
kusano 2b45e8
	(p8)	br.cond.dpnt .L120
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	.align 32
kusano 2b45e8
kusano 2b45e8
.L112:
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f34,  1 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f46,  1 * SIZE
kusano 2b45e8
	(p16) cmp.ne.unc p12, p0 = 1, I
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f32  = [A1], SIZE
kusano 2b45e8
	(p16) LD	f44  = [A2], SIZE
kusano 2b45e8
	(p18) cmp.ne.unc p13, p0 = 1, II
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f37,  1 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f49,  1 * SIZE
kusano 2b45e8
	nop   __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f35  = [A1], SIZE
kusano 2b45e8
	(p16) LD	f47  = [A2], SIZE
kusano 2b45e8
	adds	TEMP1 = -3 * SIZE, LDA
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f40,  1 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f52,  1 * SIZE
kusano 2b45e8
	shladd TEMP3 = LDA, 2, r0
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f38  = [A1], SIZE
kusano 2b45e8
	(p16) LD	f50  = [A2], SIZE
kusano 2b45e8
	(p12) mov TEMP1 = 5 * SIZE
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f43,  5 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f55,  5 * SIZE
kusano 2b45e8
	(p16) adds	LCOUNT = 1, LCOUNT
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f41  = [A1], TEMP1
kusano 2b45e8
	(p16) LD	f53  = [A2], TEMP1
kusano 2b45e8
	(p18) adds	SCOUNT = 1, SCOUNT
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f82,  1 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f94,  1 * SIZE
kusano 2b45e8
	(p16) cmp.eq.unc p14, p0 = 2, LCOUNT
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f56  = [A1], SIZE
kusano 2b45e8
	(p12) LD	f68  = [A2], SIZE
kusano 2b45e8
	(p18) cmp.eq.unc p15, p0 = 2, SCOUNT
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f85,  1 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f97,  1 * SIZE
kusano 2b45e8
	mov	TEMP2 = 5 * SIZE
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f59  = [A1], SIZE
kusano 2b45e8
	(p12) LD	f71  = [A2], SIZE
kusano 2b45e8
	sub TEMP3 = LDA, TEMP3
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f88,  1 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f100, 1 * SIZE
kusano 2b45e8
	(p13) adds TEMP2 = - 11 * SIZE, LDB
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f62  = [A1], SIZE
kusano 2b45e8
	(p12) LD	f74  = [A2], SIZE
kusano 2b45e8
	(p12) adds  TEMP1 = - 11 * SIZE, LDA
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f91
kusano 2b45e8
	(p18) ST	[B2] = f103
kusano 2b45e8
	(p18) add	B1 = B1, TEMP2
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f65  = [A1], TEMP1
kusano 2b45e8
	(p12) LD	f77  = [A2], TEMP1
kusano 2b45e8
	(p18) add	B2 = B2, TEMP2
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f58,  1 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f70,  1 * SIZE
kusano 2b45e8
	adds TEMP3 = 5 * SIZE, TEMP3
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f80  = [A1], SIZE
kusano 2b45e8
	(p16) LD	f92  = [A2], SIZE
kusano 2b45e8
	adds	TEMP1 = -3 * SIZE, LDA
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f61,  1 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f73,  1 * SIZE
kusano 2b45e8
	nop   __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f83  = [A1], SIZE
kusano 2b45e8
	(p16) LD	f95  = [A2], SIZE
kusano 2b45e8
	(p14) mov  TEMP1 = TEMP3
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f64,  1 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f76,  1 * SIZE
kusano 2b45e8
	nop   __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f86  = [A1], SIZE
kusano 2b45e8
	(p16) LD	f98  = [A2], SIZE
kusano 2b45e8
	(p12) mov TEMP1	= 5 * SIZE
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f67,  5 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f79,  5 * SIZE
kusano 2b45e8
	(p14) mov	LCOUNT = 0
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f89  = [A1], TEMP1
kusano 2b45e8
	(p16) LD	f101 = [A2], TEMP1
kusano 2b45e8
	(p15) mov	SCOUNT = 0
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f106,  1 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f118,  1 * SIZE
kusano 2b45e8
	mov	TEMP2 = 5 * SIZE
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f104 = [A1], SIZE
kusano 2b45e8
	(p12) LD	f116 = [A2], SIZE
kusano 2b45e8
	nop   __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f109,  1 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f121,  1 * SIZE
kusano 2b45e8
	sub	TEMP2 = TEMP2, LDB
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f107 = [A1], SIZE
kusano 2b45e8
	(p12) LD	f119 = [A2], SIZE
kusano 2b45e8
	adds	TEMP1 = -11 * SIZE, LDA
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f112,  1 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f124,  1 * SIZE
kusano 2b45e8
	(p15) adds TEMP2 = -27 * SIZE, LDB
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f110 = [A1], SIZE
kusano 2b45e8
	(p12) LD	f122 = [A2], SIZE
kusano 2b45e8
	(p14) mov TEMP1 = TEMP3
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f115
kusano 2b45e8
	(p13) ST	[B2] = f127
kusano 2b45e8
	(p13) add	B1 = B1, TEMP2
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f113 = [A1], TEMP1
kusano 2b45e8
	(p12) LD	f125 = [A2], TEMP1
kusano 2b45e8
	(p13) add	B2 = B2, TEMP2
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmb
kusano 2b45e8
	(p14) adds	I = -2, I
kusano 2b45e8
	(p15) adds	II = -2, II
kusano 2b45e8
	br.ctop.sptk .L112
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	.align 32
kusano 2b45e8
kusano 2b45e8
.L120:
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	add	A2 = A1, LDA
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	tbit.nz p7, p0 = N, 2
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f32  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f36  = [A2], SIZE
kusano 2b45e8
	tbit.nz p8, p0 = N, 1
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f33  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f37  = [A2], SIZE
kusano 2b45e8
	adds	TEMP1 = -3 * SIZE, LDA
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f34  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f38  = [A2], SIZE
kusano 2b45e8
	add	TEMP1 = TEMP1, LDA
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f35  = [A1], TEMP1
kusano 2b45e8
	(p7) LD	f39  = [A2], TEMP1
kusano 2b45e8
	tbit.nz p9, p0 = N, 0
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f40  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f44  = [A2], SIZE
kusano 2b45e8
	mov	TEMP2 = -1 * SIZE
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f41  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f45  = [A2], SIZE
kusano 2b45e8
	shladd	TEMP2 = LDA, 1, TEMP2
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f42  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f46  = [A2], SIZE
kusano 2b45e8
	sub	TEMP2 = 0, TEMP2
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f43  = [A1], TEMP2
kusano 2b45e8
	(p7) LD	f47  = [A2]
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	add	A2 = A1, LDA
kusano 2b45e8
	adds	TEMP1 = -1 * SIZE, LDA
kusano 2b45e8
	mov	TEMP2 = -1 * SIZE
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) LD	f48  = [A1], SIZE
kusano 2b45e8
	(p8) LD	f50  = [A2], SIZE
kusano 2b45e8
	add	TEMP1 = TEMP1, LDA
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) LD	f49  = [A1], TEMP1
kusano 2b45e8
	(p8) LD	f51  = [A2], TEMP1
kusano 2b45e8
	shladd	TEMP2 = LDA, 1, TEMP2
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) LD	f52  = [A1], SIZE
kusano 2b45e8
	(p8) LD	f54  = [A2], SIZE
kusano 2b45e8
	sub	TEMP2 = r0, TEMP2
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) LD	f53  = [A1], TEMP2
kusano 2b45e8
	(p8) LD	f55  = [A2], TEMP2
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	add	A2 = A1, LDA
kusano 2b45e8
	adds	B2 = 4 * SIZE, BO2
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p9) LD	f56  = [A1]
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	(p9) shladd	A1 = LDA, 1, A1
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p9) LD	f57  = [A2]
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	(p9) shladd	A2 = LDA, 1, A2
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f32,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f36,  1 * SIZE
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p9) LD	f58  = [A1]
kusano 2b45e8
	(p9) LD	f59  = [A2]
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f33,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f37,  1 * SIZE
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f34,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f38,  1 * SIZE
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f35,  5 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f39,  5 * SIZE
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f40,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f44,  1 * SIZE
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f41,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f45,  1 * SIZE
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f42,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f46,  1 * SIZE
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f43,  5 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f47
kusano 2b45e8
	adds	B2 = 4 * SIZE, BO3
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) ST	[BO3] = f48,  1 * SIZE
kusano 2b45e8
	(p8) ST	[B2 ] = f52,  1 * SIZE
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) ST	[BO3] = f49,  1 * SIZE
kusano 2b45e8
	(p8) ST	[B2 ] = f53,  1 * SIZE
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) ST	[BO3] = f50,  1 * SIZE
kusano 2b45e8
	(p8) ST	[B2 ] = f54,  1 * SIZE
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) ST	[BO3] = f51,  5 * SIZE
kusano 2b45e8
	(p8) ST	[B2 ] = f55
kusano 2b45e8
	adds	B2 = 2 * SIZE, BO4
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p9) ST	[BO4] = f56,  1 * SIZE
kusano 2b45e8
	(p9) ST	[B2 ] = f58,  1 * SIZE
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p9) ST	[BO4] = f57,  3 * SIZE
kusano 2b45e8
	(p9) ST	[B2 ] = f59
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	.align 32
kusano 2b45e8
kusano 2b45e8
.L200:
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	add	I = 8, N
kusano 2b45e8
	mov	A1 = A
kusano 2b45e8
	mov	pr.rot = 0
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	adds	A2 = 4 * SIZE, A
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	tbit.z p6, p0 = M, 1
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	mov	B1 =  B
kusano 2b45e8
	cmp.eq	p16, p0 = r0, r0
kusano 2b45e8
	mov	ar.ec  = 3
kusano 2b45e8
	}
kusano 2b45e8
	{ .mib
kusano 2b45e8
	adds	B2 =  4 * SIZE, B
kusano 2b45e8
	shr	I = I, 4
kusano 2b45e8
	(p6)	br.cond.dpnt .L300
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	shladd	A = LDA, 1, A
kusano 2b45e8
	adds	B  = 16 * SIZE, B
kusano 2b45e8
	shr	II = N, 3
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	cmp.eq	p8, p0 = 0, I
kusano 2b45e8
	adds	I = -1, I
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	mov	ar.lc = I
kusano 2b45e8
	}
kusano 2b45e8
	{ .mib
kusano 2b45e8
	mov	I = II
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	(p8)	br.cond.dpnt .L220
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	.align 32
kusano 2b45e8
kusano 2b45e8
.L212:
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f34,  1 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f46,  1 * SIZE
kusano 2b45e8
	(p16) cmp.ne.unc p12, p0 = 1, I
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f32  = [A1], SIZE
kusano 2b45e8
	(p16) LD	f44  = [A2], SIZE
kusano 2b45e8
	(p18) cmp.ne.unc p13, p0 = 1, II
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f37,  1 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f49,  1 * SIZE
kusano 2b45e8
	adds	TEMP1 = -3 * SIZE, LDA
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f35  = [A1], SIZE
kusano 2b45e8
	(p16) LD	f47  = [A2], SIZE
kusano 2b45e8
	nop   __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f40,  1 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f52,  1 * SIZE
kusano 2b45e8
	(p12) mov TEMP1 = 5 * SIZE
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f38  = [A1], SIZE
kusano 2b45e8
	(p16) LD	f50  = [A2], SIZE
kusano 2b45e8
	nop   __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f43,  5 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f55,  5 * SIZE
kusano 2b45e8
	nop   __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f41  = [A1], TEMP1
kusano 2b45e8
	(p16) LD	f53  = [A2], TEMP1
kusano 2b45e8
	nop   __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f82,  1 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f94,  1 * SIZE
kusano 2b45e8
	nop   __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f56  = [A1], SIZE
kusano 2b45e8
	(p12) LD	f68  = [A2], SIZE
kusano 2b45e8
	nop   __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f85,  1 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f97,  1 * SIZE
kusano 2b45e8
	mov	TEMP2 = 5 * SIZE
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f59  = [A1], SIZE
kusano 2b45e8
	(p12) LD	f71  = [A2], SIZE
kusano 2b45e8
	nop   __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f88,  1 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f100, 1 * SIZE
kusano 2b45e8
	(p13) adds TEMP2 = - 11 * SIZE, LDB
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f62  = [A1], SIZE
kusano 2b45e8
	(p12) LD	f74  = [A2], SIZE
kusano 2b45e8
	(p12) adds  TEMP1 = - 11 * SIZE, LDA
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f91
kusano 2b45e8
	(p18) ST	[B2] = f103
kusano 2b45e8
	(p18) add	B1 = B1, TEMP2
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f65  = [A1], TEMP1
kusano 2b45e8
	(p12) LD	f77  = [A2], TEMP1
kusano 2b45e8
	(p18) add	B2 = B2, TEMP2
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f58,  1 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f70,  1 * SIZE
kusano 2b45e8
	nop   __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f80  = [A1], SIZE
kusano 2b45e8
	(p16) LD	f92  = [A2], SIZE
kusano 2b45e8
	sub TEMP1 = r0, LDA
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f61,  1 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f73,  1 * SIZE
kusano 2b45e8
	nop   __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f83  = [A1], SIZE
kusano 2b45e8
	(p16) LD	f95  = [A2], SIZE
kusano 2b45e8
	(p16) adds TEMP1 = 5 * SIZE, TEMP1
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f64,  1 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f76,  1 * SIZE
kusano 2b45e8
	nop   __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f86  = [A1], SIZE
kusano 2b45e8
	(p16) LD	f98  = [A2], SIZE
kusano 2b45e8
	(p12) mov TEMP1	= 5 * SIZE
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f67,  5 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f79,  5 * SIZE
kusano 2b45e8
	nop   __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f89  = [A1], TEMP1
kusano 2b45e8
	(p16) LD	f101 = [A2], TEMP1
kusano 2b45e8
	adds	TEMP1 = -11 * SIZE, LDA
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f106,  1 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f118,  1 * SIZE
kusano 2b45e8
	mov	TEMP2 = 5 * SIZE
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f104 = [A1], SIZE
kusano 2b45e8
	(p12) LD	f116 = [A2], SIZE
kusano 2b45e8
	(p16) shladd TEMP1 = LDA, 1, r0
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f109,  1 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f121,  1 * SIZE
kusano 2b45e8
	sub	TEMP2 = TEMP2, LDB
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f107 = [A1], SIZE
kusano 2b45e8
	(p12) LD	f119 = [A2], SIZE
kusano 2b45e8
	(p16) sub TEMP1 = LDA, TEMP1
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f112,  1 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f124,  1 * SIZE
kusano 2b45e8
	(p18) adds TEMP2 = -11 * SIZE, LDB
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f110 = [A1], SIZE
kusano 2b45e8
	(p12) LD	f122 = [A2], SIZE
kusano 2b45e8
	(p16) adds TEMP1 = 5 * SIZE, TEMP1
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f115
kusano 2b45e8
	(p13) ST	[B2] = f127
kusano 2b45e8
	(p13) add	B1 = B1, TEMP2
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f113 = [A1], TEMP1
kusano 2b45e8
	(p12) LD	f125 = [A2], TEMP1
kusano 2b45e8
	(p13) add	B2 = B2, TEMP2
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmb
kusano 2b45e8
	(p16) adds	I = -2, I
kusano 2b45e8
	(p18) adds	II = -2, II
kusano 2b45e8
	br.ctop.sptk .L212
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	.align 32
kusano 2b45e8
kusano 2b45e8
.L220:
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	add	A2 = A1, LDA
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	tbit.nz p7, p0 = N, 2
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f32  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f36  = [A2], SIZE
kusano 2b45e8
	tbit.nz p8, p0 = N, 1
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f33  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f37  = [A2], SIZE
kusano 2b45e8
	tbit.nz p9, p0 = N, 0
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f34  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f38  = [A2], SIZE
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f35  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f39  = [A2]
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	add	A2 = A1, LDA
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) LD	f40  = [A1], SIZE
kusano 2b45e8
	(p8) LD	f42  = [A2], SIZE
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) LD	f41  = [A1], SIZE
kusano 2b45e8
	(p8) LD	f43  = [A2]
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	add	A2 = A1, LDA
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p9) LD	f44  = [A1]
kusano 2b45e8
	(p9) LD	f45  = [A2]
kusano 2b45e8
	adds	B2 = 4 * SIZE, BO2
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f32,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f36,  1 * SIZE
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f33,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f37,  1 * SIZE
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f34,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f38,  1 * SIZE
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f35,  5 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f39
kusano 2b45e8
	adds	B2 = 2 * SIZE, BO3
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) ST	[BO3] = f40,  1 * SIZE
kusano 2b45e8
	(p8) ST	[B2 ] = f42,  1 * SIZE
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) ST	[BO3] = f41,  3 * SIZE
kusano 2b45e8
	(p8) ST	[B2 ] = f43
kusano 2b45e8
	adds	B2 = 1 * SIZE, BO4
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p9) ST	[BO4] = f44,  2 * SIZE
kusano 2b45e8
	(p9) ST	[B2 ] = f45
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	.align 32
kusano 2b45e8
kusano 2b45e8
.L300:
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	add	I = 8, N
kusano 2b45e8
	mov	A1 = A
kusano 2b45e8
	mov	pr.rot = 0
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	mov	B1 =  B
kusano 2b45e8
	adds	A2 = 4 * SIZE, A
kusano 2b45e8
	tbit.z p6, p0 = M, 0
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	adds	B2 =  4 * SIZE, B
kusano 2b45e8
	cmp.eq	p16, p0 = r0, r0
kusano 2b45e8
	mov	ar.ec  = 3
kusano 2b45e8
	}
kusano 2b45e8
	{ .mib
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	shr	I = I, 4
kusano 2b45e8
	(p6)	br.cond.dpnt .L999
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	cmp.eq	p8, p0 = 0, I
kusano 2b45e8
	adds	I = -1, I
kusano 2b45e8
	shr	II = N, 3
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	mov	ar.lc = I
kusano 2b45e8
	}
kusano 2b45e8
	{ .mib
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	mov	I = II
kusano 2b45e8
	(p8)	br.cond.dpnt .L320
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	.align 32
kusano 2b45e8
kusano 2b45e8
.L312:
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f34,  1 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f46,  1 * SIZE
kusano 2b45e8
	(p16) cmp.ne.unc p12, p0 = 1, I
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f32  = [A1], SIZE
kusano 2b45e8
	(p16) LD	f44  = [A2], SIZE
kusano 2b45e8
	(p18) cmp.ne.unc p13, p0 = 1, II
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f37,  1 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f49,  1 * SIZE
kusano 2b45e8
	adds	TEMP2 = - 3 * SIZE, LDB
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f35  = [A1], SIZE
kusano 2b45e8
	(p16) LD	f47  = [A2], SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f40,  1 * SIZE
kusano 2b45e8
	(p18) ST	[B2] = f52,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f38  = [A1], SIZE
kusano 2b45e8
	(p16) LD	f50  = [A2], SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p18) ST	[B1] = f43
kusano 2b45e8
	(p18) ST	[B2] = f55
kusano 2b45e8
	(p18) add	B1 = B1, TEMP2
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p16) LD	f41  = [A1], 5 * SIZE
kusano 2b45e8
	(p16) LD	f53  = [A2], 5 * SIZE
kusano 2b45e8
	(p18) add	B2 = B2, TEMP2
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f58,  1 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f70,  1 * SIZE
kusano 2b45e8
	(p16) adds	I = -2, I
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f56  = [A1], SIZE
kusano 2b45e8
	(p12) LD	f68  = [A2], SIZE
kusano 2b45e8
	(p18) adds	II = -2, II
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f61,  1 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f73,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f59  = [A1], SIZE
kusano 2b45e8
	(p12) LD	f71  = [A2], SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f64,  1 * SIZE
kusano 2b45e8
	(p13) ST	[B2] = f76,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f62  = [A1], SIZE
kusano 2b45e8
	(p12) LD	f74  = [A2], SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p13) ST	[B1] = f67
kusano 2b45e8
	(p13) ST	[B2] = f79
kusano 2b45e8
	(p13) add	B1 = B1, TEMP2
kusano 2b45e8
	}
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p12) LD	f65  = [A1], 5 * SIZE
kusano 2b45e8
	(p12) LD	f77  = [A2], 5 * SIZE
kusano 2b45e8
	(p13) add	B2 = B2, TEMP2
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmb
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	br.ctop.sptk .L312
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	.align 32
kusano 2b45e8
kusano 2b45e8
.L320:
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	adds	A2 = 2 * SIZE, A1
kusano 2b45e8
	adds	B2 = 2 * SIZE, BO2
kusano 2b45e8
	tbit.nz p7, p0 = N, 2
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f32  = [A1], SIZE
kusano 2b45e8
	(p7) LD	f34  = [A2], SIZE
kusano 2b45e8
	tbit.nz p8, p0 = N, 1
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) LD	f33  = [A1], 3 * SIZE
kusano 2b45e8
	(p7) LD	f35  = [A2]
kusano 2b45e8
	nop  __LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	adds	A2 = SIZE, A1
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) LD	f36  = [A1], 2 * SIZE
kusano 2b45e8
	(p8) LD	f37  = [A2]
kusano 2b45e8
	tbit.nz p9, p0 = N, 0
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p9) LD	f38  = [A1]
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f32,  1 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f34,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p7) ST	[BO2] = f33,  3 * SIZE
kusano 2b45e8
	(p7) ST	[B2 ] = f35
kusano 2b45e8
	adds	B2 = SIZE, BO3
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p8) ST	[BO3] = f36,  2 * SIZE
kusano 2b45e8
	(p8) ST	[B2 ] = f37
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	{ .mmi
kusano 2b45e8
	(p9) ST	[BO4] = f38,  1 * SIZE
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	nop	__LINE__
kusano 2b45e8
	}
kusano 2b45e8
	;;
kusano 2b45e8
	.align 32
kusano 2b45e8
kusano 2b45e8
.L999:
kusano 2b45e8
	mov pr    = PR, -1
kusano 2b45e8
	mov	 ar.lc = ARLC
kusano 2b45e8
	br.ret.sptk.many b0
kusano 2b45e8
	EPILOGUE