|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/* All rights reserved. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* Redistribution and use in source and binary forms, with or */
|
|
kusano |
2b45e8 |
/* without modification, are permitted provided that the following */
|
|
kusano |
2b45e8 |
/* conditions are met: */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 1. Redistributions of source code must retain the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 2. Redistributions in binary form must reproduce the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer in the documentation and/or other materials */
|
|
kusano |
2b45e8 |
/* provided with the distribution. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
|
kusano |
2b45e8 |
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
|
kusano |
2b45e8 |
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
|
kusano |
2b45e8 |
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
|
kusano |
2b45e8 |
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
|
kusano |
2b45e8 |
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
|
kusano |
2b45e8 |
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
|
kusano |
2b45e8 |
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
|
kusano |
2b45e8 |
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
|
kusano |
2b45e8 |
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
|
kusano |
2b45e8 |
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
|
kusano |
2b45e8 |
/* POSSIBILITY OF SUCH DAMAGE. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* The views and conclusions contained in the software and */
|
|
kusano |
2b45e8 |
/* documentation are those of the authors and should not be */
|
|
kusano |
2b45e8 |
/* interpreted as representing official policies, either expressed */
|
|
kusano |
2b45e8 |
/* or implied, of The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ASSEMBLER
|
|
kusano |
2b45e8 |
#include "common.h"
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef __64BIT__
|
|
kusano |
2b45e8 |
#define LOAD lwz
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define LOAD ld
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef __64BIT__
|
|
kusano |
2b45e8 |
#define STACKSIZE 360
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define STACKSIZE 272
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ALIGN_SIZE 0xffff
|
|
kusano |
2b45e8 |
#define SWAP 0
|
|
kusano |
2b45e8 |
#define NEG 16
|
|
kusano |
2b45e8 |
#define ALPHA_R 32
|
|
kusano |
2b45e8 |
#define ALPHA_I 48
|
|
kusano |
2b45e8 |
#define FZERO 64
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define M r3
|
|
kusano |
2b45e8 |
#define N r4
|
|
kusano |
2b45e8 |
#define K r5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef linux
|
|
kusano |
2b45e8 |
#ifndef __64BIT__
|
|
kusano |
2b45e8 |
#define A r6
|
|
kusano |
2b45e8 |
#define B r7
|
|
kusano |
2b45e8 |
#define C r8
|
|
kusano |
2b45e8 |
#define LDC r9
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define A r8
|
|
kusano |
2b45e8 |
#define B r9
|
|
kusano |
2b45e8 |
#define C r10
|
|
kusano |
2b45e8 |
#define LDC r6
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(_AIX) || defined(__APPLE__)
|
|
kusano |
2b45e8 |
#if !defined(__64BIT__) && defined(DOUBLE)
|
|
kusano |
2b45e8 |
#define A r10
|
|
kusano |
2b45e8 |
#define B r6
|
|
kusano |
2b45e8 |
#define C r7
|
|
kusano |
2b45e8 |
#define LDC r8
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define A r8
|
|
kusano |
2b45e8 |
#define B r9
|
|
kusano |
2b45e8 |
#define C r10
|
|
kusano |
2b45e8 |
#define LDC r6
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define STACK r11
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define I r21
|
|
kusano |
2b45e8 |
#define J r22
|
|
kusano |
2b45e8 |
#define AO r23
|
|
kusano |
2b45e8 |
#define BO r24
|
|
kusano |
2b45e8 |
#define CO1 r25
|
|
kusano |
2b45e8 |
#define CO2 r26
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define PREA r29
|
|
kusano |
2b45e8 |
#define PREB r29
|
|
kusano |
2b45e8 |
#define PREC r30
|
|
kusano |
2b45e8 |
#define VREG r31
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define LOAD_A lvx
|
|
kusano |
2b45e8 |
#define LOAD_B lvx
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define OFFSET_0 0
|
|
kusano |
2b45e8 |
#define OFFSET_1 r14
|
|
kusano |
2b45e8 |
#define OFFSET_2 r15
|
|
kusano |
2b45e8 |
#define OFFSET_3 r16
|
|
kusano |
2b45e8 |
#define OFFSET_4 r17
|
|
kusano |
2b45e8 |
#define OFFSET_5 r18
|
|
kusano |
2b45e8 |
#define OFFSET_6 r19
|
|
kusano |
2b45e8 |
#define OFFSET_7 r20
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define c01 v0
|
|
kusano |
2b45e8 |
#define c02 v1
|
|
kusano |
2b45e8 |
#define c03 v2
|
|
kusano |
2b45e8 |
#define c04 v3
|
|
kusano |
2b45e8 |
#define c05 v4
|
|
kusano |
2b45e8 |
#define c06 v5
|
|
kusano |
2b45e8 |
#define c07 v6
|
|
kusano |
2b45e8 |
#define c08 v7
|
|
kusano |
2b45e8 |
#define c09 v8
|
|
kusano |
2b45e8 |
#define c10 v9
|
|
kusano |
2b45e8 |
#define c11 v10
|
|
kusano |
2b45e8 |
#define c12 v11
|
|
kusano |
2b45e8 |
#define c13 v12
|
|
kusano |
2b45e8 |
#define c14 v13
|
|
kusano |
2b45e8 |
#define c15 v14
|
|
kusano |
2b45e8 |
#define c16 v15
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define a1 v16
|
|
kusano |
2b45e8 |
#define a2 v17
|
|
kusano |
2b45e8 |
#define a3 v18
|
|
kusano |
2b45e8 |
#define a4 v19
|
|
kusano |
2b45e8 |
#define a5 v20
|
|
kusano |
2b45e8 |
#define a6 v21
|
|
kusano |
2b45e8 |
#define a7 v22
|
|
kusano |
2b45e8 |
#define a8 v23
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define b1 v24
|
|
kusano |
2b45e8 |
#define b2 v25
|
|
kusano |
2b45e8 |
#define bp1 v26
|
|
kusano |
2b45e8 |
#define bp2 v27
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define C1 v16
|
|
kusano |
2b45e8 |
#define C2 v17
|
|
kusano |
2b45e8 |
#define C3 v18
|
|
kusano |
2b45e8 |
#define C4 v19
|
|
kusano |
2b45e8 |
#define C5 v20
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define c00 v24
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define VZERO v25
|
|
kusano |
2b45e8 |
#define PERMRSHIFT1 v26
|
|
kusano |
2b45e8 |
#define PERMRSHIFT2 v27
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define swap v28
|
|
kusano |
2b45e8 |
#define neg v29
|
|
kusano |
2b45e8 |
#define alpha_r v30
|
|
kusano |
2b45e8 |
#define alpha_i v31
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef NEEDPARAM
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
PROLOGUE
|
|
kusano |
2b45e8 |
PROFCODE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
addi SP, SP, -STACKSIZE
|
|
kusano |
2b45e8 |
mr STACK, SP
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
li r0, 0 * 16
|
|
kusano |
2b45e8 |
stvx v20, SP, r0
|
|
kusano |
2b45e8 |
li r0, 1 * 16
|
|
kusano |
2b45e8 |
stvx v21, SP, r0
|
|
kusano |
2b45e8 |
li r0, 2 * 16
|
|
kusano |
2b45e8 |
stvx v22, SP, r0
|
|
kusano |
2b45e8 |
li r0, 3 * 16
|
|
kusano |
2b45e8 |
stvx v23, SP, r0
|
|
kusano |
2b45e8 |
li r0, 4 * 16
|
|
kusano |
2b45e8 |
stvx v24, SP, r0
|
|
kusano |
2b45e8 |
li r0, 5 * 16
|
|
kusano |
2b45e8 |
stvx v25, SP, r0
|
|
kusano |
2b45e8 |
li r0, 6 * 16
|
|
kusano |
2b45e8 |
stvx v26, SP, r0
|
|
kusano |
2b45e8 |
li r0, 7 * 16
|
|
kusano |
2b45e8 |
stvx v27, SP, r0
|
|
kusano |
2b45e8 |
li r0, 8 * 16
|
|
kusano |
2b45e8 |
stvx v28, SP, r0
|
|
kusano |
2b45e8 |
li r0, 9 * 16
|
|
kusano |
2b45e8 |
stvx v29, SP, r0
|
|
kusano |
2b45e8 |
li r0, 10 * 16
|
|
kusano |
2b45e8 |
stvx v30, SP, r0
|
|
kusano |
2b45e8 |
li r0, 11 * 16
|
|
kusano |
2b45e8 |
stvx v31, SP, r0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef __64BIT__
|
|
kusano |
2b45e8 |
std r31, 192(SP)
|
|
kusano |
2b45e8 |
std r30, 200(SP)
|
|
kusano |
2b45e8 |
std r29, 208(SP)
|
|
kusano |
2b45e8 |
std r28, 216(SP)
|
|
kusano |
2b45e8 |
std r27, 224(SP)
|
|
kusano |
2b45e8 |
std r26, 232(SP)
|
|
kusano |
2b45e8 |
std r25, 240(SP)
|
|
kusano |
2b45e8 |
std r24, 248(SP)
|
|
kusano |
2b45e8 |
std r23, 256(SP)
|
|
kusano |
2b45e8 |
std r22, 264(SP)
|
|
kusano |
2b45e8 |
std r21, 272(SP)
|
|
kusano |
2b45e8 |
std r20, 280(SP)
|
|
kusano |
2b45e8 |
std r19, 288(SP)
|
|
kusano |
2b45e8 |
std r18, 296(SP)
|
|
kusano |
2b45e8 |
std r17, 304(SP)
|
|
kusano |
2b45e8 |
std r16, 312(SP)
|
|
kusano |
2b45e8 |
std r15, 320(SP)
|
|
kusano |
2b45e8 |
std r14, 328(SP)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
stw r31, 192(SP)
|
|
kusano |
2b45e8 |
stw r30, 196(SP)
|
|
kusano |
2b45e8 |
stw r29, 200(SP)
|
|
kusano |
2b45e8 |
stw r28, 204(SP)
|
|
kusano |
2b45e8 |
stw r27, 208(SP)
|
|
kusano |
2b45e8 |
stw r26, 212(SP)
|
|
kusano |
2b45e8 |
stw r25, 216(SP)
|
|
kusano |
2b45e8 |
stw r24, 220(SP)
|
|
kusano |
2b45e8 |
stw r23, 224(SP)
|
|
kusano |
2b45e8 |
stw r22, 228(SP)
|
|
kusano |
2b45e8 |
stw r21, 232(SP)
|
|
kusano |
2b45e8 |
stw r20, 236(SP)
|
|
kusano |
2b45e8 |
stw r19, 240(SP)
|
|
kusano |
2b45e8 |
stw r18, 244(SP)
|
|
kusano |
2b45e8 |
stw r17, 248(SP)
|
|
kusano |
2b45e8 |
stw r16, 252(SP)
|
|
kusano |
2b45e8 |
stw r15, 256(SP)
|
|
kusano |
2b45e8 |
stw r14, 260(SP)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef linux
|
|
kusano |
2b45e8 |
#ifdef __64BIT__
|
|
kusano |
2b45e8 |
ld LDC, 112 + STACKSIZE(SP)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(_AIX) || defined(__APPLE__)
|
|
kusano |
2b45e8 |
#ifdef __64BIT__
|
|
kusano |
2b45e8 |
ld LDC, 112 + STACKSIZE(SP)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#ifdef DOUBLE
|
|
kusano |
2b45e8 |
lwz B, 56 + STACKSIZE(SP)
|
|
kusano |
2b45e8 |
lwz C, 60 + STACKSIZE(SP)
|
|
kusano |
2b45e8 |
lwz LDC, 64 + STACKSIZE(SP)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
lwz LDC, 56 + STACKSIZE(SP)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef PREFETCHTEST
|
|
kusano |
2b45e8 |
#ifdef PPC970
|
|
kusano |
2b45e8 |
li PREC, 16 * SIZE
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef linux
|
|
kusano |
2b45e8 |
#ifndef __64BIT__
|
|
kusano |
2b45e8 |
lwz PREB, 16 + STACKSIZE(SP)
|
|
kusano |
2b45e8 |
lwz PREC, 20 + STACKSIZE(SP)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
ld PREB, 136 + STACKSIZE(SP)
|
|
kusano |
2b45e8 |
ld PREC, 144 + STACKSIZE(SP)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(_AIX) || defined(__APPLE__)
|
|
kusano |
2b45e8 |
#ifdef __64BIT__
|
|
kusano |
2b45e8 |
ld PREB, 136 + STACKSIZE(SP)
|
|
kusano |
2b45e8 |
ld PREC, 144 + STACKSIZE(SP)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#ifdef DOUBLE
|
|
kusano |
2b45e8 |
lwz PREB, 72 + STACKSIZE(SP)
|
|
kusano |
2b45e8 |
lwz PREC, 76 + STACKSIZE(SP)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
lwz PREB, 68 + STACKSIZE(SP)
|
|
kusano |
2b45e8 |
lwz PREC, 72 + STACKSIZE(SP)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef PREFETCHTEST
|
|
kusano |
2b45e8 |
#ifdef CELL
|
|
kusano |
2b45e8 |
li PREB, (3 * 32 * SIZE)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
li PREB, (5 * 32 * SIZE)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
li r0, -1
|
|
kusano |
2b45e8 |
mfspr VREG, VRsave
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
mtspr VRsave, r0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
addi SP, SP, -128
|
|
kusano |
2b45e8 |
li r0, -8192
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
and SP, SP, r0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
fneg f3, f1
|
|
kusano |
2b45e8 |
fneg f4, f2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
|
|
kusano |
2b45e8 |
defined(NC) || defined(TC) || defined(NR) || defined(TR)
|
|
kusano |
2b45e8 |
stfs f1, ALPHA_R + 0(SP)
|
|
kusano |
2b45e8 |
stfs f1, ALPHA_R + 4(SP)
|
|
kusano |
2b45e8 |
stfs f1, ALPHA_R + 8(SP)
|
|
kusano |
2b45e8 |
stfs f1, ALPHA_R + 12(SP)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
stfs f4, ALPHA_I + 0(SP)
|
|
kusano |
2b45e8 |
stfs f2, ALPHA_I + 4(SP)
|
|
kusano |
2b45e8 |
stfs f4, ALPHA_I + 8(SP)
|
|
kusano |
2b45e8 |
stfs f2, ALPHA_I + 12(SP)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
stfs f1, ALPHA_R + 0(SP)
|
|
kusano |
2b45e8 |
stfs f3, ALPHA_R + 4(SP)
|
|
kusano |
2b45e8 |
stfs f1, ALPHA_R + 8(SP)
|
|
kusano |
2b45e8 |
stfs f3, ALPHA_R + 12(SP)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
stfs f2, ALPHA_I + 0(SP)
|
|
kusano |
2b45e8 |
stfs f2, ALPHA_I + 4(SP)
|
|
kusano |
2b45e8 |
stfs f2, ALPHA_I + 8(SP)
|
|
kusano |
2b45e8 |
stfs f2, ALPHA_I + 12(SP)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
li I, Address_L(0x04050607)
|
|
kusano |
2b45e8 |
addis I, I, Address_H(0x04050607)
|
|
kusano |
2b45e8 |
stw I, SWAP + 0(SP)
|
|
kusano |
2b45e8 |
li I, Address_L(0x00010203)
|
|
kusano |
2b45e8 |
addis I, I, Address_H(0x00010203)
|
|
kusano |
2b45e8 |
stw I, SWAP + 4(SP)
|
|
kusano |
2b45e8 |
li I, Address_L(0x0c0d0e0f)
|
|
kusano |
2b45e8 |
addis I, I, Address_H(0x0c0d0e0f)
|
|
kusano |
2b45e8 |
stw I, SWAP + 8(SP)
|
|
kusano |
2b45e8 |
li I, Address_L(0x08090a0b)
|
|
kusano |
2b45e8 |
addis I, I, Address_H(0x08090a0b)
|
|
kusano |
2b45e8 |
stw I, SWAP + 12(SP)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) || \
|
|
kusano |
2b45e8 |
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
|
kusano |
2b45e8 |
lis I, 0x8000
|
|
kusano |
2b45e8 |
stw I, NEG + 0(SP)
|
|
kusano |
2b45e8 |
stw I, NEG + 8(SP)
|
|
kusano |
2b45e8 |
li I, 0
|
|
kusano |
2b45e8 |
stw I, NEG + 4(SP)
|
|
kusano |
2b45e8 |
stw I, NEG + 12(SP)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
li I, 0
|
|
kusano |
2b45e8 |
stw I, NEG + 0(SP)
|
|
kusano |
2b45e8 |
stw I, NEG + 8(SP)
|
|
kusano |
2b45e8 |
lis I, 0x8000
|
|
kusano |
2b45e8 |
stw I, NEG + 4(SP)
|
|
kusano |
2b45e8 |
stw I, NEG + 12(SP)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
li r0, 0
|
|
kusano |
2b45e8 |
stw r0, FZERO(SP)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
slwi LDC, LDC, ZBASE_SHIFT
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
li OFFSET_1, 4 * SIZE
|
|
kusano |
2b45e8 |
li OFFSET_2, 8 * SIZE
|
|
kusano |
2b45e8 |
li OFFSET_3, 12 * SIZE
|
|
kusano |
2b45e8 |
li OFFSET_4, 16 * SIZE
|
|
kusano |
2b45e8 |
li OFFSET_5, 20 * SIZE
|
|
kusano |
2b45e8 |
li OFFSET_6, 24 * SIZE
|
|
kusano |
2b45e8 |
li OFFSET_7, 28 * SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cmpwi cr0, M, 0
|
|
kusano |
2b45e8 |
ble LL(999)
|
|
kusano |
2b45e8 |
cmpwi cr0, N, 0
|
|
kusano |
2b45e8 |
ble LL(999)
|
|
kusano |
2b45e8 |
cmpwi cr0, K, 0
|
|
kusano |
2b45e8 |
ble LL(999)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
srawi. J, N, 1
|
|
kusano |
2b45e8 |
ble LL(50)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(01):
|
|
kusano |
2b45e8 |
mr CO1, C
|
|
kusano |
2b45e8 |
add CO2, C, LDC
|
|
kusano |
2b45e8 |
add C, CO2, LDC
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
mr AO, A
|
|
kusano |
2b45e8 |
srawi. I, M, 3
|
|
kusano |
2b45e8 |
ble LL(20)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(11):
|
|
kusano |
2b45e8 |
vxor c01, c01, c01
|
|
kusano |
2b45e8 |
LOAD_B b1, OFFSET_0, B
|
|
kusano |
2b45e8 |
vxor c02, c02, c02
|
|
kusano |
2b45e8 |
LOAD_B b2, OFFSET_1, B
|
|
kusano |
2b45e8 |
vxor c03, c03, c03
|
|
kusano |
2b45e8 |
LOAD_A a1, OFFSET_0, AO
|
|
kusano |
2b45e8 |
vxor c04, c04, c04
|
|
kusano |
2b45e8 |
LOAD_A a2, OFFSET_1, AO
|
|
kusano |
2b45e8 |
vxor c05, c05, c05
|
|
kusano |
2b45e8 |
LOAD_A a3, OFFSET_2, AO
|
|
kusano |
2b45e8 |
vxor c06, c06, c06
|
|
kusano |
2b45e8 |
LOAD_A a4, OFFSET_3, AO
|
|
kusano |
2b45e8 |
vxor c07, c07, c07
|
|
kusano |
2b45e8 |
LOAD_A a5, OFFSET_4, AO
|
|
kusano |
2b45e8 |
vxor c08, c08, c08
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vxor c09, c09, c09
|
|
kusano |
2b45e8 |
dcbtst CO1, PREC
|
|
kusano |
2b45e8 |
vxor c10, c10, c10
|
|
kusano |
2b45e8 |
dcbtst CO2, PREC
|
|
kusano |
2b45e8 |
vxor c11, c11, c11
|
|
kusano |
2b45e8 |
vxor c12, c12, c12
|
|
kusano |
2b45e8 |
vxor c13, c13, c13
|
|
kusano |
2b45e8 |
mr BO, B
|
|
kusano |
2b45e8 |
vxor c14, c14, c14
|
|
kusano |
2b45e8 |
srawi. r0, K, 1
|
|
kusano |
2b45e8 |
vxor c15, c15, c15
|
|
kusano |
2b45e8 |
mtspr CTR, r0
|
|
kusano |
2b45e8 |
vxor c16, c16, c16
|
|
kusano |
2b45e8 |
vspltw bp1, b1, 0
|
|
kusano |
2b45e8 |
ble LL(15)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(12):
|
|
kusano |
2b45e8 |
vmaddfp c01, a1, bp1, c01
|
|
kusano |
2b45e8 |
vspltw bp2, b1, 1
|
|
kusano |
2b45e8 |
vmaddfp c02, a2, bp1, c02
|
|
kusano |
2b45e8 |
DCBT(BO, PREB)
|
|
kusano |
2b45e8 |
vmaddfp c03, a3, bp1, c03
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
vmaddfp c04, a4, bp1, c04
|
|
kusano |
2b45e8 |
LOAD_A a6, OFFSET_5, AO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c05, a1, bp2, c05
|
|
kusano |
2b45e8 |
vspltw bp1, b1, 2
|
|
kusano |
2b45e8 |
vmaddfp c06, a2, bp2, c06
|
|
kusano |
2b45e8 |
#ifdef CELL
|
|
kusano |
2b45e8 |
DCBT(AO, PREA)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
vmaddfp c07, a3, bp2, c07
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
vmaddfp c08, a4, bp2, c08
|
|
kusano |
2b45e8 |
LOAD_A a7, OFFSET_6, AO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c09, a1, bp1, c09
|
|
kusano |
2b45e8 |
vspltw bp2, b1, 3
|
|
kusano |
2b45e8 |
vmaddfp c10, a2, bp1, c10
|
|
kusano |
2b45e8 |
LOAD_B b1, OFFSET_2, BO
|
|
kusano |
2b45e8 |
vmaddfp c11, a3, bp1, c11
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
vmaddfp c12, a4, bp1, c12
|
|
kusano |
2b45e8 |
LOAD_A a8, OFFSET_7, AO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c13, a1, bp2, c13
|
|
kusano |
2b45e8 |
vspltw bp1, b2, 0
|
|
kusano |
2b45e8 |
vmaddfp c14, a2, bp2, c14
|
|
kusano |
2b45e8 |
addi AO, AO, 32 * SIZE
|
|
kusano |
2b45e8 |
vmaddfp c15, a3, bp2, c15
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
vmaddfp c16, a4, bp2, c16
|
|
kusano |
2b45e8 |
LOAD_A a1, OFFSET_0, AO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c01, a5, bp1, c01
|
|
kusano |
2b45e8 |
vspltw bp2, b2, 1
|
|
kusano |
2b45e8 |
vmaddfp c02, a6, bp1, c02
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
vmaddfp c03, a7, bp1, c03
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
vmaddfp c04, a8, bp1, c04
|
|
kusano |
2b45e8 |
LOAD_A a2, OFFSET_1, AO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c05, a5, bp2, c05
|
|
kusano |
2b45e8 |
vspltw bp1, b2, 2
|
|
kusano |
2b45e8 |
vmaddfp c06, a6, bp2, c06
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
vmaddfp c07, a7, bp2, c07
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
vmaddfp c08, a8, bp2, c08
|
|
kusano |
2b45e8 |
LOAD_A a3, OFFSET_2, AO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c09, a5, bp1, c09
|
|
kusano |
2b45e8 |
vspltw bp2, b2, 3
|
|
kusano |
2b45e8 |
vmaddfp c10, a6, bp1, c10
|
|
kusano |
2b45e8 |
LOAD_B b2, OFFSET_3, BO
|
|
kusano |
2b45e8 |
vmaddfp c11, a7, bp1, c11
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
vmaddfp c12, a8, bp1, c12
|
|
kusano |
2b45e8 |
LOAD_A a4, OFFSET_3, AO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c13, a5, bp2, c13
|
|
kusano |
2b45e8 |
vspltw bp1, b1, 0
|
|
kusano |
2b45e8 |
vmaddfp c14, a6, bp2, c14
|
|
kusano |
2b45e8 |
addi BO, BO, 8 * SIZE
|
|
kusano |
2b45e8 |
vmaddfp c15, a7, bp2, c15
|
|
kusano |
2b45e8 |
LOAD_A a5, OFFSET_4, AO
|
|
kusano |
2b45e8 |
vmaddfp c16, a8, bp2, c16
|
|
kusano |
2b45e8 |
bdnz+ LL(12)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(15):
|
|
kusano |
2b45e8 |
lvx swap, OFFSET_0, SP
|
|
kusano |
2b45e8 |
lvx neg, OFFSET_1, SP
|
|
kusano |
2b45e8 |
lvx alpha_r, OFFSET_2, SP
|
|
kusano |
2b45e8 |
lvx alpha_i, OFFSET_3, SP
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
andi. r0, K, 1
|
|
kusano |
2b45e8 |
ble+ LL(18)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(16):
|
|
kusano |
2b45e8 |
vmaddfp c01, a1, bp1, c01
|
|
kusano |
2b45e8 |
vspltw bp2, b1, 1
|
|
kusano |
2b45e8 |
vmaddfp c02, a2, bp1, c02
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
vmaddfp c03, a3, bp1, c03
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
vmaddfp c04, a4, bp1, c04
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c05, a1, bp2, c05
|
|
kusano |
2b45e8 |
vspltw bp1, b1, 2
|
|
kusano |
2b45e8 |
vmaddfp c06, a2, bp2, c06
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
vmaddfp c07, a3, bp2, c07
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
vmaddfp c08, a4, bp2, c08
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c09, a1, bp1, c09
|
|
kusano |
2b45e8 |
vspltw bp2, b1, 3
|
|
kusano |
2b45e8 |
vmaddfp c10, a2, bp1, c10
|
|
kusano |
2b45e8 |
addi AO, AO, 16 * SIZE
|
|
kusano |
2b45e8 |
vmaddfp c11, a3, bp1, c11
|
|
kusano |
2b45e8 |
addi BO, BO, 4 * SIZE
|
|
kusano |
2b45e8 |
vmaddfp c12, a4, bp1, c12
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c13, a1, bp2, c13
|
|
kusano |
2b45e8 |
vmaddfp c14, a2, bp2, c14
|
|
kusano |
2b45e8 |
vmaddfp c15, a3, bp2, c15
|
|
kusano |
2b45e8 |
vmaddfp c16, a4, bp2, c16
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(18):
|
|
kusano |
2b45e8 |
vxor VZERO, VZERO, VZERO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c05, c05, c05, swap
|
|
kusano |
2b45e8 |
vperm c06, c06, c06, swap
|
|
kusano |
2b45e8 |
vperm c07, c07, c07, swap
|
|
kusano |
2b45e8 |
vperm c08, c08, c08, swap
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c13, c13, c13, swap
|
|
kusano |
2b45e8 |
vperm c14, c14, c14, swap
|
|
kusano |
2b45e8 |
vperm c15, c15, c15, swap
|
|
kusano |
2b45e8 |
vperm c16, c16, c16, swap
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vxor c05, c05, neg
|
|
kusano |
2b45e8 |
vxor c06, c06, neg
|
|
kusano |
2b45e8 |
vxor c07, c07, neg
|
|
kusano |
2b45e8 |
vxor c08, c08, neg
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vxor c13, c13, neg
|
|
kusano |
2b45e8 |
vxor c14, c14, neg
|
|
kusano |
2b45e8 |
vxor c15, c15, neg
|
|
kusano |
2b45e8 |
vxor c16, c16, neg
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vaddfp c01, c01, c05
|
|
kusano |
2b45e8 |
vaddfp c02, c02, c06
|
|
kusano |
2b45e8 |
vaddfp c03, c03, c07
|
|
kusano |
2b45e8 |
vaddfp c04, c04, c08
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vaddfp c09, c09, c13
|
|
kusano |
2b45e8 |
vaddfp c10, c10, c14
|
|
kusano |
2b45e8 |
vaddfp c11, c11, c15
|
|
kusano |
2b45e8 |
vaddfp c12, c12, c16
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c05, c01, c01, swap
|
|
kusano |
2b45e8 |
vperm c06, c02, c02, swap
|
|
kusano |
2b45e8 |
vperm c07, c03, c03, swap
|
|
kusano |
2b45e8 |
vperm c08, c04, c04, swap
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c13, c09, c09, swap
|
|
kusano |
2b45e8 |
vperm c14, c10, c10, swap
|
|
kusano |
2b45e8 |
vperm c15, c11, c11, swap
|
|
kusano |
2b45e8 |
vperm c16, c12, c12, swap
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c01, alpha_r, c01, VZERO
|
|
kusano |
2b45e8 |
vmaddfp c02, alpha_r, c02, VZERO
|
|
kusano |
2b45e8 |
vmaddfp c03, alpha_r, c03, VZERO
|
|
kusano |
2b45e8 |
vmaddfp c04, alpha_r, c04, VZERO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c01, alpha_i, c05, c01
|
|
kusano |
2b45e8 |
vmaddfp c02, alpha_i, c06, c02
|
|
kusano |
2b45e8 |
vmaddfp c03, alpha_i, c07, c03
|
|
kusano |
2b45e8 |
vmaddfp c04, alpha_i, c08, c04
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c09, alpha_r, c09, VZERO
|
|
kusano |
2b45e8 |
vmaddfp c10, alpha_r, c10, VZERO
|
|
kusano |
2b45e8 |
vmaddfp c11, alpha_r, c11, VZERO
|
|
kusano |
2b45e8 |
vmaddfp c12, alpha_r, c12, VZERO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c09, alpha_i, c13, c09
|
|
kusano |
2b45e8 |
vmaddfp c10, alpha_i, c14, c10
|
|
kusano |
2b45e8 |
vmaddfp c11, alpha_i, c15, c11
|
|
kusano |
2b45e8 |
vmaddfp c12, alpha_i, c16, c12
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lvx C1, OFFSET_0, CO1
|
|
kusano |
2b45e8 |
lvx C2, OFFSET_1, CO1
|
|
kusano |
2b45e8 |
lvx C3, OFFSET_2, CO1
|
|
kusano |
2b45e8 |
lvx C4, OFFSET_3, CO1
|
|
kusano |
2b45e8 |
lvx C5, OFFSET_4, CO1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lvsr PERMRSHIFT1, 0, CO1
|
|
kusano |
2b45e8 |
lvsr PERMRSHIFT2, 0, CO2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c00, VZERO, c01, PERMRSHIFT1
|
|
kusano |
2b45e8 |
vperm c01, c01, c02, PERMRSHIFT1
|
|
kusano |
2b45e8 |
vperm c02, c02, c03, PERMRSHIFT1
|
|
kusano |
2b45e8 |
vperm c03, c03, c04, PERMRSHIFT1
|
|
kusano |
2b45e8 |
vperm c04, c04, VZERO, PERMRSHIFT1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vaddfp c00, c00, C1
|
|
kusano |
2b45e8 |
vaddfp c01, c01, C2
|
|
kusano |
2b45e8 |
vaddfp c02, c02, C3
|
|
kusano |
2b45e8 |
vaddfp c03, c03, C4
|
|
kusano |
2b45e8 |
vaddfp c04, c04, C5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
stvx c00, OFFSET_0, CO1
|
|
kusano |
2b45e8 |
stvx c01, OFFSET_1, CO1
|
|
kusano |
2b45e8 |
stvx c02, OFFSET_2, CO1
|
|
kusano |
2b45e8 |
stvx c03, OFFSET_3, CO1
|
|
kusano |
2b45e8 |
stvx c04, OFFSET_4, CO1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lvx C1, OFFSET_0, CO2
|
|
kusano |
2b45e8 |
lvx C2, OFFSET_1, CO2
|
|
kusano |
2b45e8 |
lvx C3, OFFSET_2, CO2
|
|
kusano |
2b45e8 |
lvx C4, OFFSET_3, CO2
|
|
kusano |
2b45e8 |
lvx C5, OFFSET_4, CO2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c00, VZERO, c09, PERMRSHIFT2
|
|
kusano |
2b45e8 |
vperm c09, c09, c10, PERMRSHIFT2
|
|
kusano |
2b45e8 |
vperm c10, c10, c11, PERMRSHIFT2
|
|
kusano |
2b45e8 |
vperm c11, c11, c12, PERMRSHIFT2
|
|
kusano |
2b45e8 |
vperm c12, c12, VZERO, PERMRSHIFT2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vaddfp c00, c00, C1
|
|
kusano |
2b45e8 |
vaddfp c09, c09, C2
|
|
kusano |
2b45e8 |
vaddfp c10, c10, C3
|
|
kusano |
2b45e8 |
vaddfp c11, c11, C4
|
|
kusano |
2b45e8 |
vaddfp c12, c12, C5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
stvx c00, OFFSET_0, CO2
|
|
kusano |
2b45e8 |
stvx c09, OFFSET_1, CO2
|
|
kusano |
2b45e8 |
stvx c10, OFFSET_2, CO2
|
|
kusano |
2b45e8 |
stvx c11, OFFSET_3, CO2
|
|
kusano |
2b45e8 |
stvx c12, OFFSET_4, CO2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
addi CO1, CO1, 16 * SIZE
|
|
kusano |
2b45e8 |
addi CO2, CO2, 16 * SIZE
|
|
kusano |
2b45e8 |
addic. I, I, -1
|
|
kusano |
2b45e8 |
bgt+ LL(11)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(20):
|
|
kusano |
2b45e8 |
andi. I, M, 4
|
|
kusano |
2b45e8 |
ble LL(30)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vxor c01, c01, c01
|
|
kusano |
2b45e8 |
LOAD_A a1, OFFSET_0, AO
|
|
kusano |
2b45e8 |
vxor c02, c02, c02
|
|
kusano |
2b45e8 |
LOAD_A a2, OFFSET_1, AO
|
|
kusano |
2b45e8 |
vxor c05, c05, c05
|
|
kusano |
2b45e8 |
LOAD_A a3, OFFSET_2, AO
|
|
kusano |
2b45e8 |
vxor c06, c06, c06
|
|
kusano |
2b45e8 |
LOAD_A a4, OFFSET_3, AO
|
|
kusano |
2b45e8 |
vxor c09, c09, c09
|
|
kusano |
2b45e8 |
LOAD_B b1, OFFSET_0, B
|
|
kusano |
2b45e8 |
vxor c10, c10, c10
|
|
kusano |
2b45e8 |
LOAD_B b2, OFFSET_1, B
|
|
kusano |
2b45e8 |
vxor c13, c13, c13
|
|
kusano |
2b45e8 |
vxor c14, c14, c14
|
|
kusano |
2b45e8 |
mr BO, B
|
|
kusano |
2b45e8 |
vspltw bp1, b1, 0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
srawi. r0, K, 1
|
|
kusano |
2b45e8 |
mtspr CTR, r0
|
|
kusano |
2b45e8 |
ble LL(25)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(22):
|
|
kusano |
2b45e8 |
vmaddfp c01, a1, bp1, c01
|
|
kusano |
2b45e8 |
vspltw bp2, b1, 1
|
|
kusano |
2b45e8 |
addi AO, AO, 16 * SIZE
|
|
kusano |
2b45e8 |
vmaddfp c02, a2, bp1, c02
|
|
kusano |
2b45e8 |
addi BO, BO, 8 * SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c05, a1, bp2, c05
|
|
kusano |
2b45e8 |
vspltw bp1, b1, 2
|
|
kusano |
2b45e8 |
vmaddfp c06, a2, bp2, c06
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c09, a1, bp1, c09
|
|
kusano |
2b45e8 |
vspltw bp2, b1, 3
|
|
kusano |
2b45e8 |
LOAD_B b1, OFFSET_0, BO
|
|
kusano |
2b45e8 |
vmaddfp c10, a2, bp1, c10
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c13, a1, bp2, c13
|
|
kusano |
2b45e8 |
LOAD_A a1, OFFSET_0, AO
|
|
kusano |
2b45e8 |
vspltw bp1, b2, 0
|
|
kusano |
2b45e8 |
vmaddfp c14, a2, bp2, c14
|
|
kusano |
2b45e8 |
LOAD_A a2, OFFSET_1, AO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c01, a3, bp1, c01
|
|
kusano |
2b45e8 |
vspltw bp2, b2, 1
|
|
kusano |
2b45e8 |
vmaddfp c02, a4, bp1, c02
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c05, a3, bp2, c05
|
|
kusano |
2b45e8 |
vspltw bp1, b2, 2
|
|
kusano |
2b45e8 |
vmaddfp c06, a4, bp2, c06
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c09, a3, bp1, c09
|
|
kusano |
2b45e8 |
vspltw bp2, b2, 3
|
|
kusano |
2b45e8 |
LOAD_B b2, OFFSET_1, BO
|
|
kusano |
2b45e8 |
vmaddfp c10, a4, bp1, c10
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c13, a3, bp2, c13
|
|
kusano |
2b45e8 |
LOAD_A a3, OFFSET_2, AO
|
|
kusano |
2b45e8 |
vmaddfp c14, a4, bp2, c14
|
|
kusano |
2b45e8 |
LOAD_A a4, OFFSET_3, AO
|
|
kusano |
2b45e8 |
vspltw bp1, b1, 0
|
|
kusano |
2b45e8 |
bdnz LL(22)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(25):
|
|
kusano |
2b45e8 |
andi. r0, K, 1
|
|
kusano |
2b45e8 |
ble+ LL(28)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(26):
|
|
kusano |
2b45e8 |
vmaddfp c01, a1, bp1, c01
|
|
kusano |
2b45e8 |
vspltw bp2, b1, 1
|
|
kusano |
2b45e8 |
vmaddfp c02, a2, bp1, c02
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c05, a1, bp2, c05
|
|
kusano |
2b45e8 |
vspltw bp1, b1, 2
|
|
kusano |
2b45e8 |
vmaddfp c06, a2, bp2, c06
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c09, a1, bp1, c09
|
|
kusano |
2b45e8 |
vspltw bp2, b1, 3
|
|
kusano |
2b45e8 |
vmaddfp c10, a2, bp1, c10
|
|
kusano |
2b45e8 |
addi AO, AO, 8 * SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c13, a1, bp2, c13
|
|
kusano |
2b45e8 |
addi BO, BO, 4 * SIZE
|
|
kusano |
2b45e8 |
vmaddfp c14, a2, bp2, c14
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(28):
|
|
kusano |
2b45e8 |
vxor VZERO, VZERO, VZERO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lvx swap, OFFSET_0, SP
|
|
kusano |
2b45e8 |
lvx neg, OFFSET_1, SP
|
|
kusano |
2b45e8 |
lvx alpha_r, OFFSET_2, SP
|
|
kusano |
2b45e8 |
lvx alpha_i, OFFSET_3, SP
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c05, c05, c05, swap
|
|
kusano |
2b45e8 |
vperm c06, c06, c06, swap
|
|
kusano |
2b45e8 |
vperm c13, c13, c13, swap
|
|
kusano |
2b45e8 |
vperm c14, c14, c14, swap
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vxor c05, c05, neg
|
|
kusano |
2b45e8 |
vxor c06, c06, neg
|
|
kusano |
2b45e8 |
vxor c13, c13, neg
|
|
kusano |
2b45e8 |
vxor c14, c14, neg
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vaddfp c01, c01, c05
|
|
kusano |
2b45e8 |
vaddfp c02, c02, c06
|
|
kusano |
2b45e8 |
vaddfp c09, c09, c13
|
|
kusano |
2b45e8 |
vaddfp c10, c10, c14
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c05, c01, c01, swap
|
|
kusano |
2b45e8 |
vperm c06, c02, c02, swap
|
|
kusano |
2b45e8 |
vperm c13, c09, c09, swap
|
|
kusano |
2b45e8 |
vperm c14, c10, c10, swap
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c01, alpha_r, c01, VZERO
|
|
kusano |
2b45e8 |
vmaddfp c02, alpha_r, c02, VZERO
|
|
kusano |
2b45e8 |
vmaddfp c01, alpha_i, c05, c01
|
|
kusano |
2b45e8 |
vmaddfp c02, alpha_i, c06, c02
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c09, alpha_r, c09, VZERO
|
|
kusano |
2b45e8 |
vmaddfp c10, alpha_r, c10, VZERO
|
|
kusano |
2b45e8 |
vmaddfp c09, alpha_i, c13, c09
|
|
kusano |
2b45e8 |
vmaddfp c10, alpha_i, c14, c10
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lvx C1, OFFSET_0, CO1
|
|
kusano |
2b45e8 |
lvx C2, OFFSET_1, CO1
|
|
kusano |
2b45e8 |
lvx C3, OFFSET_2, CO1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lvsr PERMRSHIFT1, 0, CO1
|
|
kusano |
2b45e8 |
lvsr PERMRSHIFT2, 0, CO2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c00, VZERO, c01, PERMRSHIFT1
|
|
kusano |
2b45e8 |
vperm c01, c01, c02, PERMRSHIFT1
|
|
kusano |
2b45e8 |
vperm c02, c02, VZERO, PERMRSHIFT1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vaddfp c00, c00, C1
|
|
kusano |
2b45e8 |
vaddfp c01, c01, C2
|
|
kusano |
2b45e8 |
vaddfp c02, c02, C3
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
stvx c00, OFFSET_0, CO1
|
|
kusano |
2b45e8 |
stvx c01, OFFSET_1, CO1
|
|
kusano |
2b45e8 |
stvx c02, OFFSET_2, CO1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lvx C1, OFFSET_0, CO2
|
|
kusano |
2b45e8 |
lvx C2, OFFSET_1, CO2
|
|
kusano |
2b45e8 |
lvx C3, OFFSET_2, CO2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c00, VZERO, c09, PERMRSHIFT2
|
|
kusano |
2b45e8 |
vperm c09, c09, c10, PERMRSHIFT2
|
|
kusano |
2b45e8 |
vperm c10, c10, VZERO, PERMRSHIFT2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vaddfp c00, c00, C1
|
|
kusano |
2b45e8 |
vaddfp c09, c09, C2
|
|
kusano |
2b45e8 |
vaddfp c10, c10, C3
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
stvx c00, OFFSET_0, CO2
|
|
kusano |
2b45e8 |
stvx c09, OFFSET_1, CO2
|
|
kusano |
2b45e8 |
stvx c10, OFFSET_2, CO2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
addi CO1, CO1, 8 * SIZE
|
|
kusano |
2b45e8 |
addi CO2, CO2, 8 * SIZE
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(30):
|
|
kusano |
2b45e8 |
andi. I, M, 2
|
|
kusano |
2b45e8 |
ble LL(40)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vxor c01, c01, c01
|
|
kusano |
2b45e8 |
LOAD_A a1, OFFSET_0, AO
|
|
kusano |
2b45e8 |
vxor c02, c02, c02
|
|
kusano |
2b45e8 |
LOAD_A a2, OFFSET_1, AO
|
|
kusano |
2b45e8 |
vxor c05, c05, c05
|
|
kusano |
2b45e8 |
LOAD_B b1, OFFSET_0, B
|
|
kusano |
2b45e8 |
vxor c06, c06, c06
|
|
kusano |
2b45e8 |
LOAD_B b2, OFFSET_1, B
|
|
kusano |
2b45e8 |
vxor c09, c09, c09
|
|
kusano |
2b45e8 |
vxor c10, c10, c10
|
|
kusano |
2b45e8 |
vxor c13, c13, c13
|
|
kusano |
2b45e8 |
vxor c14, c14, c14
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vspltw bp1, b1, 0
|
|
kusano |
2b45e8 |
mr BO, B
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
srawi. r0, K, 1
|
|
kusano |
2b45e8 |
mtspr CTR, r0
|
|
kusano |
2b45e8 |
ble LL(35)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(32):
|
|
kusano |
2b45e8 |
vmaddfp c01, a1, bp1, c01
|
|
kusano |
2b45e8 |
addi AO, AO, 8 * SIZE
|
|
kusano |
2b45e8 |
vspltw bp2, b1, 1
|
|
kusano |
2b45e8 |
vmaddfp c05, a1, bp2, c05
|
|
kusano |
2b45e8 |
addi BO, BO, 8 * SIZE
|
|
kusano |
2b45e8 |
vspltw bp1, b1, 2
|
|
kusano |
2b45e8 |
vmaddfp c09, a1, bp1, c09
|
|
kusano |
2b45e8 |
vspltw bp2, b1, 3
|
|
kusano |
2b45e8 |
vmaddfp c13, a1, bp2, c13
|
|
kusano |
2b45e8 |
LOAD_A a1, OFFSET_0, AO
|
|
kusano |
2b45e8 |
vspltw bp1, b2, 0
|
|
kusano |
2b45e8 |
LOAD_B b1, OFFSET_0, BO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c02, a2, bp1, c02
|
|
kusano |
2b45e8 |
vspltw bp2, b2, 1
|
|
kusano |
2b45e8 |
vmaddfp c06, a2, bp2, c06
|
|
kusano |
2b45e8 |
vspltw bp1, b2, 2
|
|
kusano |
2b45e8 |
vmaddfp c10, a2, bp1, c10
|
|
kusano |
2b45e8 |
vspltw bp2, b2, 3
|
|
kusano |
2b45e8 |
LOAD_B b2, OFFSET_1, BO
|
|
kusano |
2b45e8 |
vmaddfp c14, a2, bp2, c14
|
|
kusano |
2b45e8 |
LOAD_A a2, OFFSET_1, AO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vspltw bp1, b1, 0
|
|
kusano |
2b45e8 |
bdnz LL(32)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(35):
|
|
kusano |
2b45e8 |
andi. r0, K, 1
|
|
kusano |
2b45e8 |
ble+ LL(38)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(36):
|
|
kusano |
2b45e8 |
vmaddfp c01, a1, bp1, c01
|
|
kusano |
2b45e8 |
vspltw bp2, b1, 1
|
|
kusano |
2b45e8 |
vmaddfp c05, a1, bp2, c05
|
|
kusano |
2b45e8 |
vspltw bp1, b1, 2
|
|
kusano |
2b45e8 |
vmaddfp c09, a1, bp1, c09
|
|
kusano |
2b45e8 |
vspltw bp2, b1, 3
|
|
kusano |
2b45e8 |
vmaddfp c13, a1, bp2, c13
|
|
kusano |
2b45e8 |
addi AO, AO, 4 * SIZE
|
|
kusano |
2b45e8 |
addi BO, BO, 4 * SIZE
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(38):
|
|
kusano |
2b45e8 |
vaddfp c01, c01, c02
|
|
kusano |
2b45e8 |
vaddfp c05, c05, c06
|
|
kusano |
2b45e8 |
vaddfp c09, c09, c10
|
|
kusano |
2b45e8 |
vaddfp c13, c13, c14
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vxor VZERO, VZERO, VZERO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lvx swap, OFFSET_0, SP
|
|
kusano |
2b45e8 |
lvx neg, OFFSET_1, SP
|
|
kusano |
2b45e8 |
lvx alpha_r, OFFSET_2, SP
|
|
kusano |
2b45e8 |
lvx alpha_i, OFFSET_3, SP
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c05, c05, c05, swap
|
|
kusano |
2b45e8 |
vperm c13, c13, c13, swap
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vxor c05, c05, neg
|
|
kusano |
2b45e8 |
vxor c13, c13, neg
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vaddfp c01, c01, c05
|
|
kusano |
2b45e8 |
vaddfp c09, c09, c13
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c05, c01, c01, swap
|
|
kusano |
2b45e8 |
vperm c13, c09, c09, swap
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c01, alpha_r, c01, VZERO
|
|
kusano |
2b45e8 |
vmaddfp c01, alpha_i, c05, c01
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c09, alpha_r, c09, VZERO
|
|
kusano |
2b45e8 |
vmaddfp c09, alpha_i, c13, c09
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lvx C1, OFFSET_0, CO1
|
|
kusano |
2b45e8 |
lvx C2, OFFSET_1, CO1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lvsr PERMRSHIFT1, 0, CO1
|
|
kusano |
2b45e8 |
lvsr PERMRSHIFT2, 0, CO2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c00, VZERO, c01, PERMRSHIFT1
|
|
kusano |
2b45e8 |
vperm c01, c01, VZERO, PERMRSHIFT1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vaddfp c00, c00, C1
|
|
kusano |
2b45e8 |
vaddfp c01, c01, C2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
stvx c00, OFFSET_0, CO1
|
|
kusano |
2b45e8 |
stvx c01, OFFSET_1, CO1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lvx C1, OFFSET_0, CO2
|
|
kusano |
2b45e8 |
lvx C2, OFFSET_1, CO2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c00, VZERO, c09, PERMRSHIFT2
|
|
kusano |
2b45e8 |
vperm c09, c09, VZERO, PERMRSHIFT2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vaddfp c00, c00, C1
|
|
kusano |
2b45e8 |
vaddfp c09, c09, C2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
stvx c00, OFFSET_0, CO2
|
|
kusano |
2b45e8 |
stvx c09, OFFSET_1, CO2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
addi CO1, CO1, 4 * SIZE
|
|
kusano |
2b45e8 |
addi CO2, CO2, 4 * SIZE
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(40):
|
|
kusano |
2b45e8 |
andi. I, M, 1
|
|
kusano |
2b45e8 |
ble LL(49)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
mr BO, B
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LFD f8, 0 * SIZE(AO)
|
|
kusano |
2b45e8 |
LFD f9, 1 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LFD f10, 0 * SIZE(BO)
|
|
kusano |
2b45e8 |
LFD f11, 1 * SIZE(BO)
|
|
kusano |
2b45e8 |
LFD f12, 2 * SIZE(BO)
|
|
kusano |
2b45e8 |
LFD f13, 3 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lfs f0, FZERO(SP)
|
|
kusano |
2b45e8 |
fmr f1, f0
|
|
kusano |
2b45e8 |
fmr f2, f0
|
|
kusano |
2b45e8 |
fmr f3, f0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
fmr f4, f0
|
|
kusano |
2b45e8 |
fmr f5, f0
|
|
kusano |
2b45e8 |
fmr f6, f0
|
|
kusano |
2b45e8 |
fmr f7, f0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
srawi. r0, K, 1
|
|
kusano |
2b45e8 |
mtspr CTR, r0
|
|
kusano |
2b45e8 |
ble LL(45)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(42):
|
|
kusano |
2b45e8 |
fmadd f0, f8, f10, f0
|
|
kusano |
2b45e8 |
fmadd f2, f8, f11, f2
|
|
kusano |
2b45e8 |
fmadd f4, f8, f12, f4
|
|
kusano |
2b45e8 |
fmadd f6, f8, f13, f6
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
fmadd f1, f9, f10, f1
|
|
kusano |
2b45e8 |
fmadd f3, f9, f11, f3
|
|
kusano |
2b45e8 |
fmadd f5, f9, f12, f5
|
|
kusano |
2b45e8 |
fmadd f7, f9, f13, f7
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LFD f8, 2 * SIZE(AO)
|
|
kusano |
2b45e8 |
LFD f9, 3 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LFD f10, 4 * SIZE(BO)
|
|
kusano |
2b45e8 |
LFD f11, 5 * SIZE(BO)
|
|
kusano |
2b45e8 |
LFD f12, 6 * SIZE(BO)
|
|
kusano |
2b45e8 |
LFD f13, 7 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
fmadd f0, f8, f10, f0
|
|
kusano |
2b45e8 |
fmadd f2, f8, f11, f2
|
|
kusano |
2b45e8 |
fmadd f4, f8, f12, f4
|
|
kusano |
2b45e8 |
fmadd f6, f8, f13, f6
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
fmadd f1, f9, f10, f1
|
|
kusano |
2b45e8 |
fmadd f3, f9, f11, f3
|
|
kusano |
2b45e8 |
fmadd f5, f9, f12, f5
|
|
kusano |
2b45e8 |
fmadd f7, f9, f13, f7
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LFD f8, 4 * SIZE(AO)
|
|
kusano |
2b45e8 |
LFD f9, 5 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LFD f10, 8 * SIZE(BO)
|
|
kusano |
2b45e8 |
LFD f11, 9 * SIZE(BO)
|
|
kusano |
2b45e8 |
LFD f12, 10 * SIZE(BO)
|
|
kusano |
2b45e8 |
LFD f13, 11 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
addi AO, AO, 4 * SIZE
|
|
kusano |
2b45e8 |
addi BO, BO, 8 * SIZE
|
|
kusano |
2b45e8 |
bdnz LL(42)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(45):
|
|
kusano |
2b45e8 |
andi. r0, K, 1
|
|
kusano |
2b45e8 |
ble LL(48)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(46):
|
|
kusano |
2b45e8 |
fmadd f0, f8, f10, f0
|
|
kusano |
2b45e8 |
fmadd f2, f8, f11, f2
|
|
kusano |
2b45e8 |
fmadd f4, f8, f12, f4
|
|
kusano |
2b45e8 |
fmadd f6, f8, f13, f6
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
fmadd f1, f9, f10, f1
|
|
kusano |
2b45e8 |
fmadd f3, f9, f11, f3
|
|
kusano |
2b45e8 |
fmadd f5, f9, f12, f5
|
|
kusano |
2b45e8 |
fmadd f7, f9, f13, f7
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
addi AO, AO, 2 * SIZE
|
|
kusano |
2b45e8 |
addi BO, BO, 4 * SIZE
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(48):
|
|
kusano |
2b45e8 |
#if defined(NN) || defined(NT) || defined(TN) || defined(TT)
|
|
kusano |
2b45e8 |
fsub f0, f0, f3
|
|
kusano |
2b45e8 |
fadd f1, f1, f2
|
|
kusano |
2b45e8 |
fsub f4, f4, f7
|
|
kusano |
2b45e8 |
fadd f5, f5, f6
|
|
kusano |
2b45e8 |
#elif defined(NR) || defined(NC) || defined(TR) || defined(TC)
|
|
kusano |
2b45e8 |
fadd f0, f0, f3
|
|
kusano |
2b45e8 |
fsub f1, f1, f2
|
|
kusano |
2b45e8 |
fadd f4, f4, f7
|
|
kusano |
2b45e8 |
fsub f5, f5, f6
|
|
kusano |
2b45e8 |
#elif defined(RN) || defined(RT) || defined(CN) || defined(CT)
|
|
kusano |
2b45e8 |
fadd f0, f0, f3
|
|
kusano |
2b45e8 |
fsub f1, f2, f1
|
|
kusano |
2b45e8 |
fadd f4, f4, f7
|
|
kusano |
2b45e8 |
fsub f5, f6, f5
|
|
kusano |
2b45e8 |
#else /* RR, RC, CR, CC */
|
|
kusano |
2b45e8 |
fsub f0, f0, f3
|
|
kusano |
2b45e8 |
fadd f1, f1, f2
|
|
kusano |
2b45e8 |
fsub f4, f4, f7
|
|
kusano |
2b45e8 |
fadd f5, f5, f6
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LFD f8, 0 * SIZE(CO1)
|
|
kusano |
2b45e8 |
LFD f9, 1 * SIZE(CO1)
|
|
kusano |
2b45e8 |
LFD f10, 0 * SIZE(CO2)
|
|
kusano |
2b45e8 |
LFD f11, 1 * SIZE(CO2)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lfs f12, ALPHA_R + 0(SP)
|
|
kusano |
2b45e8 |
lfs f13, ALPHA_I + 4(SP)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
|
kusano |
2b45e8 |
fmadd f8, f12, f0, f8
|
|
kusano |
2b45e8 |
fnmsub f9, f12, f1, f9
|
|
kusano |
2b45e8 |
fmadd f10, f12, f4, f10
|
|
kusano |
2b45e8 |
fnmsub f11, f12, f5, f11
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
fmadd f8, f13, f1, f8
|
|
kusano |
2b45e8 |
fmadd f9, f13, f0, f9
|
|
kusano |
2b45e8 |
fmadd f10, f13, f5, f10
|
|
kusano |
2b45e8 |
fmadd f11, f13, f4, f11
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
fmadd f8, f12, f0, f8
|
|
kusano |
2b45e8 |
fmadd f9, f12, f1, f9
|
|
kusano |
2b45e8 |
fmadd f10, f12, f4, f10
|
|
kusano |
2b45e8 |
fmadd f11, f12, f5, f11
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
fnmsub f8, f13, f1, f8
|
|
kusano |
2b45e8 |
fmadd f9, f13, f0, f9
|
|
kusano |
2b45e8 |
fnmsub f10, f13, f5, f10
|
|
kusano |
2b45e8 |
fmadd f11, f13, f4, f11
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
STFD f8, 0 * SIZE(CO1)
|
|
kusano |
2b45e8 |
STFD f9, 1 * SIZE(CO1)
|
|
kusano |
2b45e8 |
STFD f10, 0 * SIZE(CO2)
|
|
kusano |
2b45e8 |
STFD f11, 1 * SIZE(CO2)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(49):
|
|
kusano |
2b45e8 |
mr B, BO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
addic. J, J, -1
|
|
kusano |
2b45e8 |
bgt LL(01)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(50):
|
|
kusano |
2b45e8 |
andi. J, N, 1
|
|
kusano |
2b45e8 |
ble LL(999)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
mr CO1, C
|
|
kusano |
2b45e8 |
mr AO, A
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
srawi. I, M, 3
|
|
kusano |
2b45e8 |
ble LL(70)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(61):
|
|
kusano |
2b45e8 |
vxor c01, c01, c01
|
|
kusano |
2b45e8 |
LOAD_B b1, OFFSET_0, B
|
|
kusano |
2b45e8 |
vxor c02, c02, c02
|
|
kusano |
2b45e8 |
vxor c03, c03, c03
|
|
kusano |
2b45e8 |
LOAD_A a1, OFFSET_0, AO
|
|
kusano |
2b45e8 |
vxor c04, c04, c04
|
|
kusano |
2b45e8 |
LOAD_A a2, OFFSET_1, AO
|
|
kusano |
2b45e8 |
vxor c05, c05, c05
|
|
kusano |
2b45e8 |
LOAD_A a3, OFFSET_2, AO
|
|
kusano |
2b45e8 |
vxor c06, c06, c06
|
|
kusano |
2b45e8 |
LOAD_A a4, OFFSET_3, AO
|
|
kusano |
2b45e8 |
vxor c07, c07, c07
|
|
kusano |
2b45e8 |
vxor c08, c08, c08
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
mr BO, B
|
|
kusano |
2b45e8 |
dcbtst CO1, PREC
|
|
kusano |
2b45e8 |
dcbtst CO2, PREC
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vspltw bp1, b1, 0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
srawi. r0, K, 1
|
|
kusano |
2b45e8 |
mtspr CTR, r0
|
|
kusano |
2b45e8 |
ble LL(65)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(62):
|
|
kusano |
2b45e8 |
LOAD_A a5, OFFSET_4, AO
|
|
kusano |
2b45e8 |
LOAD_A a6, OFFSET_5, AO
|
|
kusano |
2b45e8 |
LOAD_A a7, OFFSET_6, AO
|
|
kusano |
2b45e8 |
LOAD_A a8, OFFSET_7, AO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c01, a1, bp1, c01
|
|
kusano |
2b45e8 |
vspltw bp2, b1, 1
|
|
kusano |
2b45e8 |
vmaddfp c02, a2, bp1, c02
|
|
kusano |
2b45e8 |
vmaddfp c03, a3, bp1, c03
|
|
kusano |
2b45e8 |
vmaddfp c04, a4, bp1, c04
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c05, a1, bp2, c05
|
|
kusano |
2b45e8 |
vspltw bp1, b1, 2
|
|
kusano |
2b45e8 |
vmaddfp c06, a2, bp2, c06
|
|
kusano |
2b45e8 |
vmaddfp c07, a3, bp2, c07
|
|
kusano |
2b45e8 |
vmaddfp c08, a4, bp2, c08
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c01, a5, bp1, c01
|
|
kusano |
2b45e8 |
vspltw bp2, b1, 3
|
|
kusano |
2b45e8 |
vmaddfp c02, a6, bp1, c02
|
|
kusano |
2b45e8 |
vmaddfp c03, a7, bp1, c03
|
|
kusano |
2b45e8 |
vmaddfp c04, a8, bp1, c04
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LOAD_B b1, OFFSET_1, BO
|
|
kusano |
2b45e8 |
vspltw bp1, b1, 0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c05, a5, bp2, c05
|
|
kusano |
2b45e8 |
vmaddfp c06, a6, bp2, c06
|
|
kusano |
2b45e8 |
vmaddfp c07, a7, bp2, c07
|
|
kusano |
2b45e8 |
vmaddfp c08, a8, bp2, c08
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
addi AO, AO, 32 * SIZE
|
|
kusano |
2b45e8 |
addi BO, BO, 4 * SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LOAD_A a1, OFFSET_0, AO
|
|
kusano |
2b45e8 |
LOAD_A a2, OFFSET_1, AO
|
|
kusano |
2b45e8 |
LOAD_A a3, OFFSET_2, AO
|
|
kusano |
2b45e8 |
LOAD_A a4, OFFSET_3, AO
|
|
kusano |
2b45e8 |
bdnz LL(62)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(65):
|
|
kusano |
2b45e8 |
andi. r0, K, 1
|
|
kusano |
2b45e8 |
ble+ LL(68)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(66):
|
|
kusano |
2b45e8 |
vmaddfp c01, a1, bp1, c01
|
|
kusano |
2b45e8 |
vspltw bp2, b1, 1
|
|
kusano |
2b45e8 |
vmaddfp c02, a2, bp1, c02
|
|
kusano |
2b45e8 |
addi AO, AO, 16 * SIZE
|
|
kusano |
2b45e8 |
vmaddfp c03, a3, bp1, c03
|
|
kusano |
2b45e8 |
addi BO, BO, 2 * SIZE
|
|
kusano |
2b45e8 |
vmaddfp c04, a4, bp1, c04
|
|
kusano |
2b45e8 |
nop
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c05, a1, bp2, c05
|
|
kusano |
2b45e8 |
vmaddfp c06, a2, bp2, c06
|
|
kusano |
2b45e8 |
vmaddfp c07, a3, bp2, c07
|
|
kusano |
2b45e8 |
vmaddfp c08, a4, bp2, c08
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(68):
|
|
kusano |
2b45e8 |
vxor VZERO, VZERO, VZERO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lvx swap, OFFSET_0, SP
|
|
kusano |
2b45e8 |
lvx neg, OFFSET_1, SP
|
|
kusano |
2b45e8 |
lvx alpha_r, OFFSET_2, SP
|
|
kusano |
2b45e8 |
lvx alpha_i, OFFSET_3, SP
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c05, c05, c05, swap
|
|
kusano |
2b45e8 |
vperm c06, c06, c06, swap
|
|
kusano |
2b45e8 |
vperm c07, c07, c07, swap
|
|
kusano |
2b45e8 |
vperm c08, c08, c08, swap
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vxor c05, c05, neg
|
|
kusano |
2b45e8 |
vxor c06, c06, neg
|
|
kusano |
2b45e8 |
vxor c07, c07, neg
|
|
kusano |
2b45e8 |
vxor c08, c08, neg
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vaddfp c01, c01, c05
|
|
kusano |
2b45e8 |
vaddfp c02, c02, c06
|
|
kusano |
2b45e8 |
vaddfp c03, c03, c07
|
|
kusano |
2b45e8 |
vaddfp c04, c04, c08
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c05, c01, c01, swap
|
|
kusano |
2b45e8 |
vperm c06, c02, c02, swap
|
|
kusano |
2b45e8 |
vperm c07, c03, c03, swap
|
|
kusano |
2b45e8 |
vperm c08, c04, c04, swap
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c01, alpha_r, c01, VZERO
|
|
kusano |
2b45e8 |
vmaddfp c02, alpha_r, c02, VZERO
|
|
kusano |
2b45e8 |
vmaddfp c03, alpha_r, c03, VZERO
|
|
kusano |
2b45e8 |
vmaddfp c04, alpha_r, c04, VZERO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c01, alpha_i, c05, c01
|
|
kusano |
2b45e8 |
vmaddfp c02, alpha_i, c06, c02
|
|
kusano |
2b45e8 |
vmaddfp c03, alpha_i, c07, c03
|
|
kusano |
2b45e8 |
vmaddfp c04, alpha_i, c08, c04
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lvx C1, OFFSET_0, CO1
|
|
kusano |
2b45e8 |
lvx C2, OFFSET_1, CO1
|
|
kusano |
2b45e8 |
lvx C3, OFFSET_2, CO1
|
|
kusano |
2b45e8 |
lvx C4, OFFSET_3, CO1
|
|
kusano |
2b45e8 |
lvx C5, OFFSET_4, CO1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lvsr PERMRSHIFT1, 0, CO1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c00, VZERO, c01, PERMRSHIFT1
|
|
kusano |
2b45e8 |
vperm c01, c01, c02, PERMRSHIFT1
|
|
kusano |
2b45e8 |
vperm c02, c02, c03, PERMRSHIFT1
|
|
kusano |
2b45e8 |
vperm c03, c03, c04, PERMRSHIFT1
|
|
kusano |
2b45e8 |
vperm c04, c04, VZERO, PERMRSHIFT1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vaddfp c00, c00, C1
|
|
kusano |
2b45e8 |
vaddfp c01, c01, C2
|
|
kusano |
2b45e8 |
vaddfp c02, c02, C3
|
|
kusano |
2b45e8 |
vaddfp c03, c03, C4
|
|
kusano |
2b45e8 |
vaddfp c04, c04, C5
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
stvx c00, OFFSET_0, CO1
|
|
kusano |
2b45e8 |
stvx c01, OFFSET_1, CO1
|
|
kusano |
2b45e8 |
stvx c02, OFFSET_2, CO1
|
|
kusano |
2b45e8 |
stvx c03, OFFSET_3, CO1
|
|
kusano |
2b45e8 |
stvx c04, OFFSET_4, CO1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
addi CO1, CO1, 16 * SIZE
|
|
kusano |
2b45e8 |
addic. I, I, -1
|
|
kusano |
2b45e8 |
bgt+ LL(61)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(70):
|
|
kusano |
2b45e8 |
andi. I, M, 4
|
|
kusano |
2b45e8 |
ble LL(80)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vxor c01, c01, c01
|
|
kusano |
2b45e8 |
LOAD_B b1, OFFSET_0, B
|
|
kusano |
2b45e8 |
vxor c02, c02, c02
|
|
kusano |
2b45e8 |
vxor c03, c03, c03
|
|
kusano |
2b45e8 |
LOAD_A a1, OFFSET_0, AO
|
|
kusano |
2b45e8 |
vxor c04, c04, c04
|
|
kusano |
2b45e8 |
LOAD_A a2, OFFSET_1, AO
|
|
kusano |
2b45e8 |
vxor c05, c05, c05
|
|
kusano |
2b45e8 |
LOAD_A a3, OFFSET_2, AO
|
|
kusano |
2b45e8 |
vxor c06, c06, c06
|
|
kusano |
2b45e8 |
LOAD_A a4, OFFSET_3, AO
|
|
kusano |
2b45e8 |
vxor c07, c07, c07
|
|
kusano |
2b45e8 |
vxor c08, c08, c08
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
mr BO, B
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vspltw bp1, b1, 0
|
|
kusano |
2b45e8 |
srawi. r0, K, 1
|
|
kusano |
2b45e8 |
mtspr CTR, r0
|
|
kusano |
2b45e8 |
ble LL(75)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(72):
|
|
kusano |
2b45e8 |
vmaddfp c01, a1, bp1, c01
|
|
kusano |
2b45e8 |
vspltw bp2, b1, 1
|
|
kusano |
2b45e8 |
vmaddfp c02, a2, bp1, c02
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c05, a1, bp2, c05
|
|
kusano |
2b45e8 |
vspltw bp1, b1, 2
|
|
kusano |
2b45e8 |
vmaddfp c06, a2, bp2, c06
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c03, a3, bp1, c03
|
|
kusano |
2b45e8 |
vspltw bp2, b1, 3
|
|
kusano |
2b45e8 |
vmaddfp c04, a4, bp1, c04
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LOAD_B b1, OFFSET_1, BO
|
|
kusano |
2b45e8 |
vspltw bp1, b1, 0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c07, a3, bp2, c07
|
|
kusano |
2b45e8 |
vmaddfp c08, a4, bp2, c08
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
addi AO, AO, 16 * SIZE
|
|
kusano |
2b45e8 |
addi BO, BO, 4 * SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LOAD_A a1, OFFSET_0, AO
|
|
kusano |
2b45e8 |
LOAD_A a2, OFFSET_1, AO
|
|
kusano |
2b45e8 |
LOAD_A a3, OFFSET_2, AO
|
|
kusano |
2b45e8 |
LOAD_A a4, OFFSET_3, AO
|
|
kusano |
2b45e8 |
bdnz LL(72)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(75):
|
|
kusano |
2b45e8 |
andi. r0, K, 1
|
|
kusano |
2b45e8 |
ble+ LL(78)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(76):
|
|
kusano |
2b45e8 |
vmaddfp c01, a1, bp1, c01
|
|
kusano |
2b45e8 |
vspltw bp2, b1, 1
|
|
kusano |
2b45e8 |
vmaddfp c02, a2, bp1, c02
|
|
kusano |
2b45e8 |
addi AO, AO, 8 * SIZE
|
|
kusano |
2b45e8 |
vmaddfp c05, a1, bp2, c05
|
|
kusano |
2b45e8 |
addi BO, BO, 2 * SIZE
|
|
kusano |
2b45e8 |
vmaddfp c06, a2, bp2, c06
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(78):
|
|
kusano |
2b45e8 |
vaddfp c01, c01, c03
|
|
kusano |
2b45e8 |
vaddfp c02, c02, c04
|
|
kusano |
2b45e8 |
vaddfp c05, c05, c07
|
|
kusano |
2b45e8 |
vaddfp c06, c06, c08
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vxor VZERO, VZERO, VZERO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lvx swap, OFFSET_0, SP
|
|
kusano |
2b45e8 |
lvx neg, OFFSET_1, SP
|
|
kusano |
2b45e8 |
lvx alpha_r, OFFSET_2, SP
|
|
kusano |
2b45e8 |
lvx alpha_i, OFFSET_3, SP
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c05, c05, c05, swap
|
|
kusano |
2b45e8 |
vperm c06, c06, c06, swap
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vxor c05, c05, neg
|
|
kusano |
2b45e8 |
vxor c06, c06, neg
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vaddfp c01, c01, c05
|
|
kusano |
2b45e8 |
vaddfp c02, c02, c06
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c05, c01, c01, swap
|
|
kusano |
2b45e8 |
vperm c06, c02, c02, swap
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c01, alpha_r, c01, VZERO
|
|
kusano |
2b45e8 |
vmaddfp c02, alpha_r, c02, VZERO
|
|
kusano |
2b45e8 |
vmaddfp c01, alpha_i, c05, c01
|
|
kusano |
2b45e8 |
vmaddfp c02, alpha_i, c06, c02
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lvx C1, OFFSET_0, CO1
|
|
kusano |
2b45e8 |
lvx C2, OFFSET_1, CO1
|
|
kusano |
2b45e8 |
lvx C3, OFFSET_2, CO1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lvsr PERMRSHIFT1, 0, CO1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c00, VZERO, c01, PERMRSHIFT1
|
|
kusano |
2b45e8 |
vperm c01, c01, c02, PERMRSHIFT1
|
|
kusano |
2b45e8 |
vperm c02, c02, VZERO, PERMRSHIFT1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vaddfp c00, c00, C1
|
|
kusano |
2b45e8 |
vaddfp c01, c01, C2
|
|
kusano |
2b45e8 |
vaddfp c02, c02, C3
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
stvx c00, OFFSET_0, CO1
|
|
kusano |
2b45e8 |
stvx c01, OFFSET_1, CO1
|
|
kusano |
2b45e8 |
stvx c02, OFFSET_2, CO1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
addi CO1, CO1, 8 * SIZE
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(80):
|
|
kusano |
2b45e8 |
andi. I, M, 2
|
|
kusano |
2b45e8 |
ble LL(90)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vxor c01, c01, c01
|
|
kusano |
2b45e8 |
LOAD_B b1, OFFSET_0, B
|
|
kusano |
2b45e8 |
vxor c02, c02, c02
|
|
kusano |
2b45e8 |
LOAD_A a1, OFFSET_0, AO
|
|
kusano |
2b45e8 |
LOAD_A a2, OFFSET_1, AO
|
|
kusano |
2b45e8 |
vxor c05, c05, c05
|
|
kusano |
2b45e8 |
vxor c06, c06, c06
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
mr BO, B
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vspltw bp1, b1, 0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
srawi. r0, K, 1
|
|
kusano |
2b45e8 |
mtspr CTR, r0
|
|
kusano |
2b45e8 |
ble LL(85)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(82):
|
|
kusano |
2b45e8 |
vmaddfp c01, a1, bp1, c01
|
|
kusano |
2b45e8 |
vspltw bp2, b1, 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c05, a1, bp2, c05
|
|
kusano |
2b45e8 |
vspltw bp1, b1, 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c02, a2, bp1, c02
|
|
kusano |
2b45e8 |
vspltw bp2, b1, 3
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LOAD_B b1, OFFSET_1, BO
|
|
kusano |
2b45e8 |
vspltw bp1, b1, 0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c06, a2, bp2, c06
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
addi AO, AO, 8 * SIZE
|
|
kusano |
2b45e8 |
addi BO, BO, 4 * SIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LOAD_A a1, OFFSET_0, AO
|
|
kusano |
2b45e8 |
LOAD_A a2, OFFSET_1, AO
|
|
kusano |
2b45e8 |
bdnz LL(82)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(85):
|
|
kusano |
2b45e8 |
andi. r0, K, 1
|
|
kusano |
2b45e8 |
ble+ LL(88)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(86):
|
|
kusano |
2b45e8 |
vspltw bp2, b1, 1
|
|
kusano |
2b45e8 |
vmaddfp c01, a1, bp1, c01
|
|
kusano |
2b45e8 |
vmaddfp c05, a1, bp2, c05
|
|
kusano |
2b45e8 |
addi AO, AO, 4 * SIZE
|
|
kusano |
2b45e8 |
addi BO, BO, 2 * SIZE
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(88):
|
|
kusano |
2b45e8 |
vaddfp c01, c01, c02
|
|
kusano |
2b45e8 |
vaddfp c05, c05, c06
|
|
kusano |
2b45e8 |
vaddfp c09, c09, c10
|
|
kusano |
2b45e8 |
vaddfp c13, c13, c14
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vxor VZERO, VZERO, VZERO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lvx swap, OFFSET_0, SP
|
|
kusano |
2b45e8 |
lvx neg, OFFSET_1, SP
|
|
kusano |
2b45e8 |
lvx alpha_r, OFFSET_2, SP
|
|
kusano |
2b45e8 |
lvx alpha_i, OFFSET_3, SP
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c05, c05, c05, swap
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vxor c05, c05, neg
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vaddfp c01, c01, c05
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c05, c01, c01, swap
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vmaddfp c01, alpha_r, c01, VZERO
|
|
kusano |
2b45e8 |
vmaddfp c01, alpha_i, c05, c01
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lvx C1, OFFSET_0, CO1
|
|
kusano |
2b45e8 |
lvx C2, OFFSET_1, CO1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lvsr PERMRSHIFT1, 0, CO1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vperm c00, VZERO, c01, PERMRSHIFT1
|
|
kusano |
2b45e8 |
vperm c01, c01, VZERO, PERMRSHIFT1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
vaddfp c00, c00, C1
|
|
kusano |
2b45e8 |
vaddfp c01, c01, C2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
stvx c00, OFFSET_0, CO1
|
|
kusano |
2b45e8 |
stvx c01, OFFSET_1, CO1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
addi CO1, CO1, 4 * SIZE
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(90):
|
|
kusano |
2b45e8 |
andi. I, M, 1
|
|
kusano |
2b45e8 |
ble LL(999)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
mr BO, B
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LFD f8, 0 * SIZE(AO)
|
|
kusano |
2b45e8 |
LFD f9, 1 * SIZE(AO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LFD f10, 0 * SIZE(BO)
|
|
kusano |
2b45e8 |
LFD f11, 1 * SIZE(BO)
|
|
kusano |
2b45e8 |
LFD f12, 2 * SIZE(BO)
|
|
kusano |
2b45e8 |
LFD f13, 3 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lfs f0, FZERO(SP)
|
|
kusano |
2b45e8 |
fmr f1, f0
|
|
kusano |
2b45e8 |
fmr f2, f0
|
|
kusano |
2b45e8 |
fmr f3, f0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
srawi. r0, K, 1
|
|
kusano |
2b45e8 |
mtspr CTR, r0
|
|
kusano |
2b45e8 |
ble LL(95)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(92):
|
|
kusano |
2b45e8 |
fmadd f0, f8, f10, f0
|
|
kusano |
2b45e8 |
fmadd f2, f8, f11, f2
|
|
kusano |
2b45e8 |
fmadd f1, f9, f10, f1
|
|
kusano |
2b45e8 |
fmadd f3, f9, f11, f3
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LFD f8, 2 * SIZE(AO)
|
|
kusano |
2b45e8 |
LFD f9, 3 * SIZE(AO)
|
|
kusano |
2b45e8 |
LFD f10, 4 * SIZE(BO)
|
|
kusano |
2b45e8 |
LFD f11, 5 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
fmadd f0, f8, f12, f0
|
|
kusano |
2b45e8 |
fmadd f2, f8, f13, f2
|
|
kusano |
2b45e8 |
fmadd f1, f9, f12, f1
|
|
kusano |
2b45e8 |
fmadd f3, f9, f13, f3
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LFD f8, 4 * SIZE(AO)
|
|
kusano |
2b45e8 |
LFD f9, 5 * SIZE(AO)
|
|
kusano |
2b45e8 |
LFD f12, 6 * SIZE(BO)
|
|
kusano |
2b45e8 |
LFD f13, 7 * SIZE(BO)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
addi AO, AO, 4 * SIZE
|
|
kusano |
2b45e8 |
addi BO, BO, 4 * SIZE
|
|
kusano |
2b45e8 |
bdnz LL(92)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(95):
|
|
kusano |
2b45e8 |
andi. r0, K, 1
|
|
kusano |
2b45e8 |
ble LL(98)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(96):
|
|
kusano |
2b45e8 |
fmadd f0, f8, f10, f0
|
|
kusano |
2b45e8 |
fmadd f2, f8, f11, f2
|
|
kusano |
2b45e8 |
fmadd f1, f9, f10, f1
|
|
kusano |
2b45e8 |
fmadd f3, f9, f11, f3
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(98):
|
|
kusano |
2b45e8 |
#if defined(NN) || defined(NT) || defined(TN) || defined(TT)
|
|
kusano |
2b45e8 |
fsub f0, f0, f3
|
|
kusano |
2b45e8 |
fadd f1, f1, f2
|
|
kusano |
2b45e8 |
#elif defined(NR) || defined(NC) || defined(TR) || defined(TC)
|
|
kusano |
2b45e8 |
fadd f0, f0, f3
|
|
kusano |
2b45e8 |
fsub f1, f1, f2
|
|
kusano |
2b45e8 |
#elif defined(RN) || defined(RT) || defined(CN) || defined(CT)
|
|
kusano |
2b45e8 |
fadd f0, f0, f3
|
|
kusano |
2b45e8 |
fsub f1, f2, f1
|
|
kusano |
2b45e8 |
#else /* RR, RC, CR, CC */
|
|
kusano |
2b45e8 |
fsub f0, f0, f3
|
|
kusano |
2b45e8 |
fadd f1, f1, f2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LFD f8, 0 * SIZE(CO1)
|
|
kusano |
2b45e8 |
LFD f9, 1 * SIZE(CO1)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lfs f12, ALPHA_R + 0(SP)
|
|
kusano |
2b45e8 |
lfs f13, ALPHA_I + 4(SP)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
|
kusano |
2b45e8 |
fmadd f8, f12, f0, f8
|
|
kusano |
2b45e8 |
fnmsub f9, f12, f1, f9
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
fmadd f8, f13, f1, f8
|
|
kusano |
2b45e8 |
fmadd f9, f13, f0, f9
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
fmadd f8, f12, f0, f8
|
|
kusano |
2b45e8 |
fmadd f9, f12, f1, f9
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
fnmsub f8, f13, f1, f8
|
|
kusano |
2b45e8 |
fmadd f9, f13, f0, f9
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
STFD f8, 0 * SIZE(CO1)
|
|
kusano |
2b45e8 |
STFD f9, 1 * SIZE(CO1)
|
|
kusano |
2b45e8 |
.align 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LL(999):
|
|
kusano |
2b45e8 |
mr SP, STACK
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
li r0, 0 * 16
|
|
kusano |
2b45e8 |
lvx v20, SP, r0
|
|
kusano |
2b45e8 |
li r0, 1 * 16
|
|
kusano |
2b45e8 |
lvx v21, SP, r0
|
|
kusano |
2b45e8 |
li r0, 2 * 16
|
|
kusano |
2b45e8 |
lvx v22, SP, r0
|
|
kusano |
2b45e8 |
li r0, 3 * 16
|
|
kusano |
2b45e8 |
lvx v23, SP, r0
|
|
kusano |
2b45e8 |
li r0, 4 * 16
|
|
kusano |
2b45e8 |
lvx v24, SP, r0
|
|
kusano |
2b45e8 |
li r0, 5 * 16
|
|
kusano |
2b45e8 |
lvx v25, SP, r0
|
|
kusano |
2b45e8 |
li r0, 6 * 16
|
|
kusano |
2b45e8 |
lvx v26, SP, r0
|
|
kusano |
2b45e8 |
li r0, 7 * 16
|
|
kusano |
2b45e8 |
lvx v27, SP, r0
|
|
kusano |
2b45e8 |
li r0, 8 * 16
|
|
kusano |
2b45e8 |
lvx v28, SP, r0
|
|
kusano |
2b45e8 |
li r0, 9 * 16
|
|
kusano |
2b45e8 |
lvx v29, SP, r0
|
|
kusano |
2b45e8 |
li r0, 10 * 16
|
|
kusano |
2b45e8 |
lvx v30, SP, r0
|
|
kusano |
2b45e8 |
li r0, 11 * 16
|
|
kusano |
2b45e8 |
lvx v31, SP, r0
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
mtspr VRsave, VREG
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef __64BIT__
|
|
kusano |
2b45e8 |
ld r31, 192(SP)
|
|
kusano |
2b45e8 |
ld r30, 200(SP)
|
|
kusano |
2b45e8 |
ld r29, 208(SP)
|
|
kusano |
2b45e8 |
ld r28, 216(SP)
|
|
kusano |
2b45e8 |
ld r27, 224(SP)
|
|
kusano |
2b45e8 |
ld r26, 232(SP)
|
|
kusano |
2b45e8 |
ld r25, 240(SP)
|
|
kusano |
2b45e8 |
ld r24, 248(SP)
|
|
kusano |
2b45e8 |
ld r23, 256(SP)
|
|
kusano |
2b45e8 |
ld r22, 264(SP)
|
|
kusano |
2b45e8 |
ld r21, 272(SP)
|
|
kusano |
2b45e8 |
ld r20, 280(SP)
|
|
kusano |
2b45e8 |
ld r19, 288(SP)
|
|
kusano |
2b45e8 |
ld r18, 296(SP)
|
|
kusano |
2b45e8 |
ld r17, 304(SP)
|
|
kusano |
2b45e8 |
ld r16, 312(SP)
|
|
kusano |
2b45e8 |
ld r15, 320(SP)
|
|
kusano |
2b45e8 |
ld r14, 328(SP)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
lwz r31, 192(SP)
|
|
kusano |
2b45e8 |
lwz r30, 196(SP)
|
|
kusano |
2b45e8 |
lwz r29, 200(SP)
|
|
kusano |
2b45e8 |
lwz r28, 204(SP)
|
|
kusano |
2b45e8 |
lwz r27, 208(SP)
|
|
kusano |
2b45e8 |
lwz r26, 212(SP)
|
|
kusano |
2b45e8 |
lwz r25, 216(SP)
|
|
kusano |
2b45e8 |
lwz r24, 220(SP)
|
|
kusano |
2b45e8 |
lwz r23, 224(SP)
|
|
kusano |
2b45e8 |
lwz r22, 228(SP)
|
|
kusano |
2b45e8 |
lwz r21, 232(SP)
|
|
kusano |
2b45e8 |
lwz r20, 236(SP)
|
|
kusano |
2b45e8 |
lwz r19, 240(SP)
|
|
kusano |
2b45e8 |
lwz r18, 244(SP)
|
|
kusano |
2b45e8 |
lwz r17, 248(SP)
|
|
kusano |
2b45e8 |
lwz r16, 252(SP)
|
|
kusano |
2b45e8 |
lwz r15, 256(SP)
|
|
kusano |
2b45e8 |
lwz r14, 260(SP)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
addi SP, SP, STACKSIZE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blr
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
EPILOGUE
|
|
kusano |
2b45e8 |
#endif
|