|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/* All rights reserved. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* Redistribution and use in source and binary forms, with or */
|
|
kusano |
2b45e8 |
/* without modification, are permitted provided that the following */
|
|
kusano |
2b45e8 |
/* conditions are met: */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 1. Redistributions of source code must retain the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 2. Redistributions in binary form must reproduce the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer in the documentation and/or other materials */
|
|
kusano |
2b45e8 |
/* provided with the distribution. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
|
kusano |
2b45e8 |
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
|
kusano |
2b45e8 |
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
|
kusano |
2b45e8 |
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
|
kusano |
2b45e8 |
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
|
kusano |
2b45e8 |
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
|
kusano |
2b45e8 |
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
|
kusano |
2b45e8 |
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
|
kusano |
2b45e8 |
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
|
kusano |
2b45e8 |
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
|
kusano |
2b45e8 |
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
|
kusano |
2b45e8 |
/* POSSIBILITY OF SUCH DAMAGE. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* The views and conclusions contained in the software and */
|
|
kusano |
2b45e8 |
/* documentation are those of the authors and should not be */
|
|
kusano |
2b45e8 |
/* interpreted as representing official policies, either expressed */
|
|
kusano |
2b45e8 |
/* or implied, of The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ASSEMBLER
|
|
kusano |
2b45e8 |
#include "common.h"
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define PREFETCHSIZE 64
|
|
kusano |
2b45e8 |
#define WPREFETCHSIZE 32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef XDOUBLE
|
|
kusano |
2b45e8 |
#define LD LDF8
|
|
kusano |
2b45e8 |
#define ST STF8_NTA
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define LD LDFD
|
|
kusano |
2b45e8 |
#define ST STFD_NTA
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define J r15
|
|
kusano |
2b45e8 |
#define PREB r17
|
|
kusano |
2b45e8 |
#define PREA r18
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define A1 r19
|
|
kusano |
2b45e8 |
#define A2 r20
|
|
kusano |
2b45e8 |
#define A3 r21
|
|
kusano |
2b45e8 |
#define A4 r22
|
|
kusano |
2b45e8 |
#define A5 r23
|
|
kusano |
2b45e8 |
#define A6 r24
|
|
kusano |
2b45e8 |
#define A7 r25
|
|
kusano |
2b45e8 |
#define A8 r26
|
|
kusano |
2b45e8 |
#define B1 r27
|
|
kusano |
2b45e8 |
#define B2 r28
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define COUNT r9
|
|
kusano |
2b45e8 |
#define I r10
|
|
kusano |
2b45e8 |
#define II r11
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ARLC r29
|
|
kusano |
2b45e8 |
#define PR r30
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define M r32
|
|
kusano |
2b45e8 |
#define N r33
|
|
kusano |
2b45e8 |
#define A r34
|
|
kusano |
2b45e8 |
#define LDA r35
|
|
kusano |
2b45e8 |
#define B r36
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
PROLOGUE
|
|
kusano |
2b45e8 |
.prologue
|
|
kusano |
2b45e8 |
PROFCODE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.body
|
|
kusano |
2b45e8 |
{ .mii
|
|
kusano |
2b45e8 |
shladd LDA = LDA, BASE_SHIFT, r0
|
|
kusano |
2b45e8 |
mov PR = pr
|
|
kusano |
2b45e8 |
shr J = N, 3
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mib
|
|
kusano |
2b45e8 |
cmp.eq p8, p0 = 0, J
|
|
kusano |
2b45e8 |
mov ARLC = ar.lc
|
|
kusano |
2b45e8 |
(p8) br.cond.dpnt .L20
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
.align 32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L11:
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
mov A1 = A
|
|
kusano |
2b45e8 |
add A2 = A, LDA
|
|
kusano |
2b45e8 |
mov pr.rot = 0
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
shladd A3 = LDA, 1, A
|
|
kusano |
2b45e8 |
shladd A5 = LDA, 2, A
|
|
kusano |
2b45e8 |
adds I = 1, M
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
shladd A4 = LDA, 1, A2
|
|
kusano |
2b45e8 |
shladd A6 = LDA, 2, A2
|
|
kusano |
2b45e8 |
mov ar.ec = 6
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
cmp.eq p16, p0 = r0, r0
|
|
kusano |
2b45e8 |
shladd A7 = LDA, 2, A3
|
|
kusano |
2b45e8 |
shr I = I, 1
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
adds B1 = 8 * SIZE, B
|
|
kusano |
2b45e8 |
shladd A8 = LDA, 2, A4
|
|
kusano |
2b45e8 |
shladd A = LDA, 3, A
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
adds I = -1, I
|
|
kusano |
2b45e8 |
mov COUNT = 0
|
|
kusano |
2b45e8 |
adds J = -1, J
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
adds PREA = PREFETCHSIZE * SIZE, A
|
|
kusano |
2b45e8 |
adds PREB = WPREFETCHSIZE * SIZE, B
|
|
kusano |
2b45e8 |
mov ar.lc = I
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
mov I = M
|
|
kusano |
2b45e8 |
mov II = M
|
|
kusano |
2b45e8 |
cmp.ne p14, p0 = r0, r0
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
.align 32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L12:
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
(p21) ST [B ] = f37, 1 * SIZE
|
|
kusano |
2b45e8 |
(p14) ST [B1] = f49, 1 * SIZE
|
|
kusano |
2b45e8 |
(p16) cmp.ne.unc p13, p0 = 1, I
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
lfetch.nt1 [PREA], LDA
|
|
kusano |
2b45e8 |
lfetch.excl.nt1 [PREB]
|
|
kusano |
2b45e8 |
adds PREB = 16 * SIZE, PREB
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
(p21) ST [B ] = f43, 1 * SIZE
|
|
kusano |
2b45e8 |
(p14) ST [B1] = f55, 1 * SIZE
|
|
kusano |
2b45e8 |
cmp.eq p9, p0 = 8, COUNT
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
(p16) LD f32 = [A1], SIZE
|
|
kusano |
2b45e8 |
(p16) LD f38 = [A2], SIZE
|
|
kusano |
2b45e8 |
(p16) adds I = -2, I
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
(p21) ST [B ] = f61, 1 * SIZE
|
|
kusano |
2b45e8 |
(p14) ST [B1] = f73, 1 * SIZE
|
|
kusano |
2b45e8 |
(p9) mov COUNT = 0
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
(p13) LD f44 = [A1], SIZE
|
|
kusano |
2b45e8 |
(p13) LD f50 = [A2], SIZE
|
|
kusano |
2b45e8 |
(p21) adds II = -2, II
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmb
|
|
kusano |
2b45e8 |
(p21) ST [B ] = f67, 1 * SIZE
|
|
kusano |
2b45e8 |
(p14) ST [B1] = f79, 1 * SIZE
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmb
|
|
kusano |
2b45e8 |
(p16) LD f56 = [A3], SIZE
|
|
kusano |
2b45e8 |
(p16) LD f62 = [A4], SIZE
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
(p21) ST [B ] = f85, 1 * SIZE
|
|
kusano |
2b45e8 |
(p14) ST [B1] = f97, 1 * SIZE
|
|
kusano |
2b45e8 |
(p9) adds PREA = (PREFETCHSIZE - 2)* SIZE, A1
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmb
|
|
kusano |
2b45e8 |
(p13) LD f68 = [A3], SIZE
|
|
kusano |
2b45e8 |
(p13) LD f74 = [A4], SIZE
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmb
|
|
kusano |
2b45e8 |
(p21) ST [B ] = f91, 1 * SIZE
|
|
kusano |
2b45e8 |
(p14) ST [B1] = f103, 1 * SIZE
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmb
|
|
kusano |
2b45e8 |
(p16) LD f80 = [A5], SIZE
|
|
kusano |
2b45e8 |
(p16) LD f86 = [A6], SIZE
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmb
|
|
kusano |
2b45e8 |
(p21) ST [B ] = f109, 1 * SIZE
|
|
kusano |
2b45e8 |
(p14) ST [B1] = f121, 1 * SIZE
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmb
|
|
kusano |
2b45e8 |
(p13) LD f92 = [A5], SIZE
|
|
kusano |
2b45e8 |
(p13) LD f98 = [A6], SIZE
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
(p21) ST [B ] = f115, 1 * SIZE
|
|
kusano |
2b45e8 |
(p14) ST [B1] = f127, 9 * SIZE
|
|
kusano |
2b45e8 |
(p16) adds COUNT = 1, COUNT
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmb
|
|
kusano |
2b45e8 |
(p16) LD f104 = [A7], SIZE
|
|
kusano |
2b45e8 |
(p16) LD f110 = [A8], SIZE
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
(p13) LD f116 = [A7], SIZE
|
|
kusano |
2b45e8 |
(p13) LD f122 = [A8], SIZE
|
|
kusano |
2b45e8 |
(p14) adds B = 8 * SIZE, B
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmb
|
|
kusano |
2b45e8 |
(p20) cmp.ne.unc p14, p0 = 1, II
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
br.ctop.sptk.few .L12
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmb
|
|
kusano |
2b45e8 |
cmp.ne p6, p0 = 0, J
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
(p6) br.cond.dptk .L11
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
.align 32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L20:
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
adds I = 1, M
|
|
kusano |
2b45e8 |
mov A1 = A
|
|
kusano |
2b45e8 |
mov pr.rot = 0
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
add A2 = A, LDA
|
|
kusano |
2b45e8 |
shladd A3 = LDA, 1, A
|
|
kusano |
2b45e8 |
tbit.z p6, p0 = N, 2
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
shladd A4 = LDA, 1, A2
|
|
kusano |
2b45e8 |
adds B1 = 4 * SIZE, B
|
|
kusano |
2b45e8 |
mov ar.ec = 6
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mib
|
|
kusano |
2b45e8 |
cmp.eq p16, p0 = r0, r0
|
|
kusano |
2b45e8 |
shr I = I, 1
|
|
kusano |
2b45e8 |
(p6) br.cond.dpnt .L30
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
shladd A = LDA, 2, A
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
adds I = -1, I
|
|
kusano |
2b45e8 |
mov COUNT = 0
|
|
kusano |
2b45e8 |
adds J = -1, J
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
adds PREA = PREFETCHSIZE * SIZE, A
|
|
kusano |
2b45e8 |
adds PREB = WPREFETCHSIZE * SIZE, B
|
|
kusano |
2b45e8 |
mov ar.lc = I
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
mov I = M
|
|
kusano |
2b45e8 |
mov II = M
|
|
kusano |
2b45e8 |
cmp.ne p14, p0 = r0, r0
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
.align 32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L22:
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
(p21) ST [B ] = f37, 1 * SIZE
|
|
kusano |
2b45e8 |
(p14) ST [B1] = f49, 1 * SIZE
|
|
kusano |
2b45e8 |
(p16) cmp.ne.unc p13, p0 = 1, I
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
lfetch.nt1 [PREA], LDA
|
|
kusano |
2b45e8 |
lfetch.excl.nt1 [PREB], 8 * SIZE
|
|
kusano |
2b45e8 |
cmp.eq p9, p0 = 4, COUNT
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
(p21) ST [B ] = f43, 1 * SIZE
|
|
kusano |
2b45e8 |
(p14) ST [B1] = f55, 1 * SIZE
|
|
kusano |
2b45e8 |
(p16) adds I = -2, I
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
(p16) LD f32 = [A1], SIZE
|
|
kusano |
2b45e8 |
(p16) LD f38 = [A2], SIZE
|
|
kusano |
2b45e8 |
(p21) adds II = -2, II
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
(p21) ST [B ] = f61, 1 * SIZE
|
|
kusano |
2b45e8 |
(p14) ST [B1] = f73, 1 * SIZE
|
|
kusano |
2b45e8 |
(p9) mov COUNT = 0
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
(p13) LD f44 = [A1], SIZE
|
|
kusano |
2b45e8 |
(p13) LD f50 = [A2], SIZE
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
(p21) ST [B ] = f67, 1 * SIZE
|
|
kusano |
2b45e8 |
(p14) ST [B1] = f79, 5 * SIZE
|
|
kusano |
2b45e8 |
(p9) adds PREA = PREFETCHSIZE * SIZE, A1
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmb
|
|
kusano |
2b45e8 |
(p16) LD f56 = [A3], SIZE
|
|
kusano |
2b45e8 |
(p16) LD f62 = [A4], SIZE
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
(p13) LD f68 = [A3], SIZE
|
|
kusano |
2b45e8 |
(p13) LD f74 = [A4], SIZE
|
|
kusano |
2b45e8 |
(p16) adds COUNT = 1, COUNT
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmb
|
|
kusano |
2b45e8 |
(p14) adds B = 4 * SIZE, B
|
|
kusano |
2b45e8 |
(p20) cmp.ne.unc p14, p0 = 1, II
|
|
kusano |
2b45e8 |
br.ctop.sptk.few .L22
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
.align 32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L30:
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
adds I = 1, M
|
|
kusano |
2b45e8 |
mov A1 = A
|
|
kusano |
2b45e8 |
mov pr.rot = 0
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
add A2 = A, LDA
|
|
kusano |
2b45e8 |
adds B1 = 2 * SIZE, B
|
|
kusano |
2b45e8 |
tbit.z p6, p0 = N, 1
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
mov ar.ec = 6
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mib
|
|
kusano |
2b45e8 |
cmp.eq p16, p0 = r0, r0
|
|
kusano |
2b45e8 |
shr I = I, 1
|
|
kusano |
2b45e8 |
(p6) br.cond.dpnt .L40
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
adds I = -1, I
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
shladd A = LDA, 1, A
|
|
kusano |
2b45e8 |
mov ar.lc = I
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
mov I = M
|
|
kusano |
2b45e8 |
mov II = M
|
|
kusano |
2b45e8 |
cmp.ne p14, p0 = r0, r0
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
.align 32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L32:
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
(p21) ST [B ] = f37, 1 * SIZE
|
|
kusano |
2b45e8 |
(p14) ST [B1] = f49, 1 * SIZE
|
|
kusano |
2b45e8 |
(p16) cmp.ne.unc p13, p0 = 1, I
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
(p21) adds II = -2, II
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
(p21) ST [B ] = f43, 1 * SIZE
|
|
kusano |
2b45e8 |
(p14) ST [B1] = f55, 3 * SIZE
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
(p16) LD f32 = [A1], SIZE
|
|
kusano |
2b45e8 |
(p16) LD f38 = [A2], SIZE
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
(p13) LD f44 = [A1], SIZE
|
|
kusano |
2b45e8 |
(p13) LD f50 = [A2], SIZE
|
|
kusano |
2b45e8 |
(p16) adds I = -2, I
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmb
|
|
kusano |
2b45e8 |
(p14) adds B = 2 * SIZE, B
|
|
kusano |
2b45e8 |
(p20) cmp.ne.unc p14, p0 = 1, II
|
|
kusano |
2b45e8 |
br.ctop.sptk.few .L32
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
.align 32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L40:
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
adds I = 1, M
|
|
kusano |
2b45e8 |
mov A1 = A
|
|
kusano |
2b45e8 |
mov pr.rot = 0
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
tbit.z p6, p0 = N, 0
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
nop __LINE__
|
|
kusano |
2b45e8 |
mov ar.ec = 6
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mib
|
|
kusano |
2b45e8 |
cmp.eq p16, p0 = r0, r0
|
|
kusano |
2b45e8 |
shr I = I, 1
|
|
kusano |
2b45e8 |
(p6) br.cond.dpnt .L999
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
adds I = -1, I
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
mov ar.lc = I
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
mov I = M
|
|
kusano |
2b45e8 |
mov II = M
|
|
kusano |
2b45e8 |
cmp.ne p14, p0 = r0, r0
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
.align 32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L42:
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
(p21) ST [B ] = f37, 1 * SIZE
|
|
kusano |
2b45e8 |
(p16) cmp.ne.unc p13, p0 = 1, I
|
|
kusano |
2b45e8 |
(p21) adds II = -2, II
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmi
|
|
kusano |
2b45e8 |
(p14) ST [B ] = f49, 1 * SIZE
|
|
kusano |
2b45e8 |
(p16) LD f32 = [A1], SIZE
|
|
kusano |
2b45e8 |
(p16) adds I = -2, I
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
{ .mmb
|
|
kusano |
2b45e8 |
(p13) LD f44 = [A1], SIZE
|
|
kusano |
2b45e8 |
(p20) cmp.ne.unc p14, p0 = 1, II
|
|
kusano |
2b45e8 |
br.ctop.sptk.few .L42
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
;;
|
|
kusano |
2b45e8 |
.align 32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
.L999:
|
|
kusano |
2b45e8 |
mov pr = PR, -1
|
|
kusano |
2b45e8 |
mov ar.lc = ARLC
|
|
kusano |
2b45e8 |
br.ret.sptk.many b0
|
|
kusano |
2b45e8 |
EPILOGUE
|
|
kusano |
2b45e8 |
|