|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/* All rights reserved. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* Redistribution and use in source and binary forms, with or */
|
|
kusano |
2b45e8 |
/* without modification, are permitted provided that the following */
|
|
kusano |
2b45e8 |
/* conditions are met: */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 1. Redistributions of source code must retain the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 2. Redistributions in binary form must reproduce the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer in the documentation and/or other materials */
|
|
kusano |
2b45e8 |
/* provided with the distribution. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
|
kusano |
2b45e8 |
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
|
kusano |
2b45e8 |
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
|
kusano |
2b45e8 |
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
|
kusano |
2b45e8 |
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
|
kusano |
2b45e8 |
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
|
kusano |
2b45e8 |
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
|
kusano |
2b45e8 |
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
|
kusano |
2b45e8 |
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
|
kusano |
2b45e8 |
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
|
kusano |
2b45e8 |
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
|
kusano |
2b45e8 |
/* POSSIBILITY OF SUCH DAMAGE. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* The views and conclusions contained in the software and */
|
|
kusano |
2b45e8 |
/* documentation are those of the authors and should not be */
|
|
kusano |
2b45e8 |
/* interpreted as representing official policies, either expressed */
|
|
kusano |
2b45e8 |
/* or implied, of The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* This implementation is completely wrong. I'll rewrite this */
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef SYMCOPY_H
|
|
kusano |
2b45e8 |
#define SYMCOPY_H
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static inline void SYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
|
|
kusano |
2b45e8 |
BLASLONG is, js;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FLOAT *aa1, *aa2;
|
|
kusano |
2b45e8 |
FLOAT *b1, *b2;
|
|
kusano |
2b45e8 |
FLOAT *bb1, *bb2;
|
|
kusano |
2b45e8 |
FLOAT *cc1, *cc2;
|
|
kusano |
2b45e8 |
FLOAT a11, a12;
|
|
kusano |
2b45e8 |
FLOAT a21, a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
b1 = b;
|
|
kusano |
2b45e8 |
b2 = b;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (js = 0; js < m; js += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 = a + 0 * lda;
|
|
kusano |
2b45e8 |
aa2 = a + 1 * lda;
|
|
kusano |
2b45e8 |
a += 2 * lda + 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 = b1 + 0 * m;
|
|
kusano |
2b45e8 |
bb2 = b1 + 1 * m;
|
|
kusano |
2b45e8 |
b1 += 2 * m + 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 = b2 + 0 * m;
|
|
kusano |
2b45e8 |
cc2 = b2 + 1 * m;
|
|
kusano |
2b45e8 |
b2 += 2 * m + 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js >= 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a21;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
aa1 += 2;
|
|
kusano |
2b45e8 |
aa2 += 2;
|
|
kusano |
2b45e8 |
bb1 += 2;
|
|
kusano |
2b45e8 |
bb2 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 2 * m;
|
|
kusano |
2b45e8 |
cc2 += 2 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
is = ((m - js - 2) >> 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while (is > 0){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 += 2;
|
|
kusano |
2b45e8 |
aa2 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a12;
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a21;
|
|
kusano |
2b45e8 |
*(cc2 + 1) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 += 2;
|
|
kusano |
2b45e8 |
bb2 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 2 * m;
|
|
kusano |
2b45e8 |
cc2 += 2 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
is --;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
is = ((m - js - 2) & 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (is == 1){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a12;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js == 1){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static inline void SYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
|
|
kusano |
2b45e8 |
BLASLONG is, js;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FLOAT *aa1, *aa2;
|
|
kusano |
2b45e8 |
FLOAT *b1, *b2;
|
|
kusano |
2b45e8 |
FLOAT *bb1, *bb2;
|
|
kusano |
2b45e8 |
FLOAT *cc1, *cc2;
|
|
kusano |
2b45e8 |
FLOAT a11, a12;
|
|
kusano |
2b45e8 |
FLOAT a21, a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
b1 = b;
|
|
kusano |
2b45e8 |
b2 = b;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (js = 0; js < m; js += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 = a + 0 * lda;
|
|
kusano |
2b45e8 |
aa2 = a + 1 * lda;
|
|
kusano |
2b45e8 |
a += 2 * lda;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 = b1 + 0 * m;
|
|
kusano |
2b45e8 |
bb2 = b1 + 1 * m;
|
|
kusano |
2b45e8 |
b1 += 2 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 = b2 + 0 * m;
|
|
kusano |
2b45e8 |
cc2 = b2 + 1 * m;
|
|
kusano |
2b45e8 |
b2 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js >= 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (is = 0; is < js; is += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 += 2;
|
|
kusano |
2b45e8 |
aa2 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a12;
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a21;
|
|
kusano |
2b45e8 |
*(cc2 + 1) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 += 2;
|
|
kusano |
2b45e8 |
bb2 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 2 * m;
|
|
kusano |
2b45e8 |
cc2 += 2 * m;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js == 1){
|
|
kusano |
2b45e8 |
for (is = 0; is < js; is += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
aa1 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a21;
|
|
kusano |
2b45e8 |
bb1 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 2 * m;
|
|
kusano |
2b45e8 |
cc2 += 2 * m;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static inline void ZSYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
|
|
kusano |
2b45e8 |
BLASLONG is, js;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FLOAT *aa1, *aa2;
|
|
kusano |
2b45e8 |
FLOAT *b1, *b2;
|
|
kusano |
2b45e8 |
FLOAT *bb1, *bb2;
|
|
kusano |
2b45e8 |
FLOAT *cc1, *cc2;
|
|
kusano |
2b45e8 |
FLOAT a11, a21, a31, a41;
|
|
kusano |
2b45e8 |
FLOAT a12, a22, a32, a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
b1 = b;
|
|
kusano |
2b45e8 |
b2 = b;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda *= 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (js = 0; js < m; js += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 = a + 0 * lda;
|
|
kusano |
2b45e8 |
aa2 = a + 1 * lda;
|
|
kusano |
2b45e8 |
a += 2 * lda + 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 = b1 + 0 * m;
|
|
kusano |
2b45e8 |
bb2 = b1 + 2 * m;
|
|
kusano |
2b45e8 |
b1 += 4 * m + 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 = b2 + 0 * m;
|
|
kusano |
2b45e8 |
cc2 = b2 + 2 * m;
|
|
kusano |
2b45e8 |
b2 += 4 * m + 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js >= 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a31 = *(aa1 + 2);
|
|
kusano |
2b45e8 |
a41 = *(aa1 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 2);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a31;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = a41;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a31;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a41;
|
|
kusano |
2b45e8 |
*(bb2 + 2) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 3) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 += 4;
|
|
kusano |
2b45e8 |
aa2 += 4;
|
|
kusano |
2b45e8 |
bb1 += 4;
|
|
kusano |
2b45e8 |
bb2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 4 * m;
|
|
kusano |
2b45e8 |
cc2 += 4 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
is = ((m - js - 2) >> 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while (is > 0){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a31 = *(aa1 + 2);
|
|
kusano |
2b45e8 |
a41 = *(aa1 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
a32 = *(aa2 + 2);
|
|
kusano |
2b45e8 |
a42 = *(aa2 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 += 4;
|
|
kusano |
2b45e8 |
aa2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a31;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = a41;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
*(bb2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(bb2 + 3) = a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(cc1 + 2) = a12;
|
|
kusano |
2b45e8 |
*(cc1 + 3) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a31;
|
|
kusano |
2b45e8 |
*(cc2 + 1) = a41;
|
|
kusano |
2b45e8 |
*(cc2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(cc2 + 3) = a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 += 4;
|
|
kusano |
2b45e8 |
bb2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 4 * m;
|
|
kusano |
2b45e8 |
cc2 += 4 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
is --;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m & 1){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(cc1 + 2) = a12;
|
|
kusano |
2b45e8 |
*(cc1 + 3) = a22;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js == 1){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static inline void ZSYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
|
|
kusano |
2b45e8 |
BLASLONG is, js;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FLOAT *aa1, *aa2;
|
|
kusano |
2b45e8 |
FLOAT *b1, *b2;
|
|
kusano |
2b45e8 |
FLOAT *bb1, *bb2;
|
|
kusano |
2b45e8 |
FLOAT *cc1, *cc2;
|
|
kusano |
2b45e8 |
FLOAT a11, a21, a31, a41;
|
|
kusano |
2b45e8 |
FLOAT a12, a22, a32, a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
b1 = b;
|
|
kusano |
2b45e8 |
b2 = b;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda *= 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (js = 0; js < m; js += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 = a + 0 * lda;
|
|
kusano |
2b45e8 |
aa2 = a + 1 * lda;
|
|
kusano |
2b45e8 |
a += 2 * lda;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 = b1 + 0 * m;
|
|
kusano |
2b45e8 |
bb2 = b1 + 2 * m;
|
|
kusano |
2b45e8 |
b1 += 4 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 = b2 + 0 * m;
|
|
kusano |
2b45e8 |
cc2 = b2 + 2 * m;
|
|
kusano |
2b45e8 |
b2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js >= 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (is = 0; is < js; is += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a31 = *(aa1 + 2);
|
|
kusano |
2b45e8 |
a41 = *(aa1 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
a32 = *(aa2 + 2);
|
|
kusano |
2b45e8 |
a42 = *(aa2 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 += 4;
|
|
kusano |
2b45e8 |
aa2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a31;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = a41;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
*(bb2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(bb2 + 3) = a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(cc1 + 2) = a12;
|
|
kusano |
2b45e8 |
*(cc1 + 3) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a31;
|
|
kusano |
2b45e8 |
*(cc2 + 1) = a41;
|
|
kusano |
2b45e8 |
*(cc2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(cc2 + 3) = a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 += 4;
|
|
kusano |
2b45e8 |
bb2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 4 * m;
|
|
kusano |
2b45e8 |
cc2 += 4 * m;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
a32 = *(aa2 + 2);
|
|
kusano |
2b45e8 |
a42 = *(aa2 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a12;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
*(bb2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(bb2 + 3) = a42;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js == 1){
|
|
kusano |
2b45e8 |
for (is = 0; is < js; is += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a31 = *(aa1 + 2);
|
|
kusano |
2b45e8 |
a41 = *(aa1 + 3);
|
|
kusano |
2b45e8 |
aa1 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a31;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = a41;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a31;
|
|
kusano |
2b45e8 |
*(cc2 + 1) = a41;
|
|
kusano |
2b45e8 |
bb1 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 4 * m;
|
|
kusano |
2b45e8 |
cc2 += 4 * m;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static inline void ZHEMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
|
|
kusano |
2b45e8 |
BLASLONG is, js;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FLOAT *aa1, *aa2;
|
|
kusano |
2b45e8 |
FLOAT *b1, *b2;
|
|
kusano |
2b45e8 |
FLOAT *bb1, *bb2;
|
|
kusano |
2b45e8 |
FLOAT *cc1, *cc2;
|
|
kusano |
2b45e8 |
FLOAT a11, a21, a31, a41;
|
|
kusano |
2b45e8 |
FLOAT a12, a22, a32, a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
b1 = b;
|
|
kusano |
2b45e8 |
b2 = b;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda *= 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (js = 0; js < m; js += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 = a + 0 * lda;
|
|
kusano |
2b45e8 |
aa2 = a + 1 * lda;
|
|
kusano |
2b45e8 |
a += 2 * lda + 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 = b1 + 0 * m;
|
|
kusano |
2b45e8 |
bb2 = b1 + 2 * m;
|
|
kusano |
2b45e8 |
b1 += 4 * m + 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 = b2 + 0 * m;
|
|
kusano |
2b45e8 |
cc2 = b2 + 2 * m;
|
|
kusano |
2b45e8 |
b2 += 4 * m + 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js >= 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a31 = *(aa1 + 2);
|
|
kusano |
2b45e8 |
a41 = *(aa1 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 2);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = 0.;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a31;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = a41;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a31;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = -a41;
|
|
kusano |
2b45e8 |
*(bb2 + 2) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 3) = 0.;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 += 4;
|
|
kusano |
2b45e8 |
aa2 += 4;
|
|
kusano |
2b45e8 |
bb1 += 4;
|
|
kusano |
2b45e8 |
bb2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 4 * m;
|
|
kusano |
2b45e8 |
cc2 += 4 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
is = ((m - js - 2) >> 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while (is > 0){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a31 = *(aa1 + 2);
|
|
kusano |
2b45e8 |
a41 = *(aa1 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
a32 = *(aa2 + 2);
|
|
kusano |
2b45e8 |
a42 = *(aa2 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 += 4;
|
|
kusano |
2b45e8 |
aa2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a31;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = a41;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
*(bb2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(bb2 + 3) = a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = -a21;
|
|
kusano |
2b45e8 |
*(cc1 + 2) = a12;
|
|
kusano |
2b45e8 |
*(cc1 + 3) = -a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a31;
|
|
kusano |
2b45e8 |
*(cc2 + 1) = -a41;
|
|
kusano |
2b45e8 |
*(cc2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(cc2 + 3) = -a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 += 4;
|
|
kusano |
2b45e8 |
bb2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 4 * m;
|
|
kusano |
2b45e8 |
cc2 += 4 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
is --;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m & 1){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = -a21;
|
|
kusano |
2b45e8 |
*(cc1 + 2) = a12;
|
|
kusano |
2b45e8 |
*(cc1 + 3) = -a22;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js == 1){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = 0.;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static inline void ZHEMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
|
|
kusano |
2b45e8 |
BLASLONG is, js;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FLOAT *aa1, *aa2;
|
|
kusano |
2b45e8 |
FLOAT *b1, *b2;
|
|
kusano |
2b45e8 |
FLOAT *bb1, *bb2;
|
|
kusano |
2b45e8 |
FLOAT *cc1, *cc2;
|
|
kusano |
2b45e8 |
FLOAT a11, a21, a31, a41;
|
|
kusano |
2b45e8 |
FLOAT a12, a22, a32, a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
b1 = b;
|
|
kusano |
2b45e8 |
b2 = b;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda *= 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (js = 0; js < m; js += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 = a + 0 * lda;
|
|
kusano |
2b45e8 |
aa2 = a + 1 * lda;
|
|
kusano |
2b45e8 |
a += 2 * lda;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 = b1 + 0 * m;
|
|
kusano |
2b45e8 |
bb2 = b1 + 2 * m;
|
|
kusano |
2b45e8 |
b1 += 4 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 = b2 + 0 * m;
|
|
kusano |
2b45e8 |
cc2 = b2 + 2 * m;
|
|
kusano |
2b45e8 |
b2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js >= 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (is = 0; is < js; is += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a31 = *(aa1 + 2);
|
|
kusano |
2b45e8 |
a41 = *(aa1 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
a32 = *(aa2 + 2);
|
|
kusano |
2b45e8 |
a42 = *(aa2 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 += 4;
|
|
kusano |
2b45e8 |
aa2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a31;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = a41;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
*(bb2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(bb2 + 3) = a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = -a21;
|
|
kusano |
2b45e8 |
*(cc1 + 2) = a12;
|
|
kusano |
2b45e8 |
*(cc1 + 3) = -a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a31;
|
|
kusano |
2b45e8 |
*(cc2 + 1) = -a41;
|
|
kusano |
2b45e8 |
*(cc2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(cc2 + 3) = -a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 += 4;
|
|
kusano |
2b45e8 |
bb2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 4 * m;
|
|
kusano |
2b45e8 |
cc2 += 4 * m;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
a32 = *(aa2 + 2);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = 0.;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a12;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = -a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
*(bb2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(bb2 + 3) = 0.;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js == 1){
|
|
kusano |
2b45e8 |
for (is = 0; is < js; is += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a31 = *(aa1 + 2);
|
|
kusano |
2b45e8 |
a41 = *(aa1 + 3);
|
|
kusano |
2b45e8 |
aa1 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a31;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = a41;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = -a21;
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a31;
|
|
kusano |
2b45e8 |
*(cc2 + 1) = -a41;
|
|
kusano |
2b45e8 |
bb1 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 4 * m;
|
|
kusano |
2b45e8 |
cc2 += 4 * m;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = 0.;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static inline void ZHEMCOPY_M(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
|
|
kusano |
2b45e8 |
BLASLONG is, js;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FLOAT *aa1, *aa2;
|
|
kusano |
2b45e8 |
FLOAT *b1, *b2;
|
|
kusano |
2b45e8 |
FLOAT *bb1, *bb2;
|
|
kusano |
2b45e8 |
FLOAT *cc1, *cc2;
|
|
kusano |
2b45e8 |
FLOAT a11, a21, a31, a41;
|
|
kusano |
2b45e8 |
FLOAT a12, a22, a32, a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
b1 = b;
|
|
kusano |
2b45e8 |
b2 = b;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda *= 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (js = 0; js < m; js += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 = a + 0 * lda;
|
|
kusano |
2b45e8 |
aa2 = a + 1 * lda;
|
|
kusano |
2b45e8 |
a += 2 * lda + 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 = b1 + 0 * m;
|
|
kusano |
2b45e8 |
bb2 = b1 + 2 * m;
|
|
kusano |
2b45e8 |
b1 += 4 * m + 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 = b2 + 0 * m;
|
|
kusano |
2b45e8 |
cc2 = b2 + 2 * m;
|
|
kusano |
2b45e8 |
b2 += 4 * m + 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js >= 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a31 = *(aa1 + 2);
|
|
kusano |
2b45e8 |
a41 = *(aa1 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 2);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = 0.;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a31;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = -a41;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a31;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a41;
|
|
kusano |
2b45e8 |
*(bb2 + 2) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 3) = 0.;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 += 4;
|
|
kusano |
2b45e8 |
aa2 += 4;
|
|
kusano |
2b45e8 |
bb1 += 4;
|
|
kusano |
2b45e8 |
bb2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 4 * m;
|
|
kusano |
2b45e8 |
cc2 += 4 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
is = ((m - js - 2) >> 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while (is > 0){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a31 = *(aa1 + 2);
|
|
kusano |
2b45e8 |
a41 = *(aa1 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
a32 = *(aa2 + 2);
|
|
kusano |
2b45e8 |
a42 = *(aa2 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 += 4;
|
|
kusano |
2b45e8 |
aa2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = -a21;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a31;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = -a41;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = -a22;
|
|
kusano |
2b45e8 |
*(bb2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(bb2 + 3) = -a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(cc1 + 2) = a12;
|
|
kusano |
2b45e8 |
*(cc1 + 3) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a31;
|
|
kusano |
2b45e8 |
*(cc2 + 1) = a41;
|
|
kusano |
2b45e8 |
*(cc2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(cc2 + 3) = a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 += 4;
|
|
kusano |
2b45e8 |
bb2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 4 * m;
|
|
kusano |
2b45e8 |
cc2 += 4 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
is --;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m & 1){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = -a21;
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = -a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(cc1 + 2) = a12;
|
|
kusano |
2b45e8 |
*(cc1 + 3) = a22;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js == 1){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = 0.;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static inline void ZHEMCOPY_V(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
|
|
kusano |
2b45e8 |
BLASLONG is, js;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FLOAT *aa1, *aa2;
|
|
kusano |
2b45e8 |
FLOAT *b1, *b2;
|
|
kusano |
2b45e8 |
FLOAT *bb1, *bb2;
|
|
kusano |
2b45e8 |
FLOAT *cc1, *cc2;
|
|
kusano |
2b45e8 |
FLOAT a11, a21, a31, a41;
|
|
kusano |
2b45e8 |
FLOAT a12, a22, a32, a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
b1 = b;
|
|
kusano |
2b45e8 |
b2 = b;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda *= 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (js = 0; js < m; js += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 = a + 0 * lda;
|
|
kusano |
2b45e8 |
aa2 = a + 1 * lda;
|
|
kusano |
2b45e8 |
a += 2 * lda;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 = b1 + 0 * m;
|
|
kusano |
2b45e8 |
bb2 = b1 + 2 * m;
|
|
kusano |
2b45e8 |
b1 += 4 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 = b2 + 0 * m;
|
|
kusano |
2b45e8 |
cc2 = b2 + 2 * m;
|
|
kusano |
2b45e8 |
b2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js >= 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (is = 0; is < js; is += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a31 = *(aa1 + 2);
|
|
kusano |
2b45e8 |
a41 = *(aa1 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
a32 = *(aa2 + 2);
|
|
kusano |
2b45e8 |
a42 = *(aa2 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 += 4;
|
|
kusano |
2b45e8 |
aa2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = -a21;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a31;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = -a41;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = -a22;
|
|
kusano |
2b45e8 |
*(bb2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(bb2 + 3) = -a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(cc1 + 2) = a12;
|
|
kusano |
2b45e8 |
*(cc1 + 3) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a31;
|
|
kusano |
2b45e8 |
*(cc2 + 1) = a41;
|
|
kusano |
2b45e8 |
*(cc2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(cc2 + 3) = a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 += 4;
|
|
kusano |
2b45e8 |
bb2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 4 * m;
|
|
kusano |
2b45e8 |
cc2 += 4 * m;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
a32 = *(aa2 + 2);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = 0.;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a12;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = -a22;
|
|
kusano |
2b45e8 |
*(bb2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(bb2 + 3) = 0.;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js == 1){
|
|
kusano |
2b45e8 |
for (is = 0; is < js; is += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a31 = *(aa1 + 2);
|
|
kusano |
2b45e8 |
a41 = *(aa1 + 3);
|
|
kusano |
2b45e8 |
aa1 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = -a21;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a31;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = -a41;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a31;
|
|
kusano |
2b45e8 |
*(cc2 + 1) = a41;
|
|
kusano |
2b45e8 |
bb1 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 4 * m;
|
|
kusano |
2b45e8 |
cc2 += 4 * m;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = 0.;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static inline void TRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
|
|
kusano |
2b45e8 |
BLASLONG is, js;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FLOAT *aa1, *aa2;
|
|
kusano |
2b45e8 |
FLOAT *b1, *b2;
|
|
kusano |
2b45e8 |
FLOAT *bb1, *bb2;
|
|
kusano |
2b45e8 |
FLOAT *cc1, *cc2;
|
|
kusano |
2b45e8 |
FLOAT a11, a12;
|
|
kusano |
2b45e8 |
FLOAT a21, a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
b1 = b;
|
|
kusano |
2b45e8 |
b2 = b;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (js = 0; js < m; js += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 = a + 0 * lda;
|
|
kusano |
2b45e8 |
aa2 = a + 1 * lda;
|
|
kusano |
2b45e8 |
a += 2 * lda + 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 = b1 + 0 * m;
|
|
kusano |
2b45e8 |
bb2 = b1 + 1 * m;
|
|
kusano |
2b45e8 |
b1 += 2 * m + 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 = b2 + 0 * m;
|
|
kusano |
2b45e8 |
cc2 = b2 + 1 * m;
|
|
kusano |
2b45e8 |
b2 += 2 * m + 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js >= 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a21;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
aa1 += 2;
|
|
kusano |
2b45e8 |
aa2 += 2;
|
|
kusano |
2b45e8 |
bb1 += 2;
|
|
kusano |
2b45e8 |
bb2 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 2 * m;
|
|
kusano |
2b45e8 |
cc2 += 2 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
is = ((m - js - 2) >> 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while (is > 0){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 += 2;
|
|
kusano |
2b45e8 |
aa2 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a12;
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a21;
|
|
kusano |
2b45e8 |
*(cc2 + 1) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 += 2;
|
|
kusano |
2b45e8 |
bb2 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 2 * m;
|
|
kusano |
2b45e8 |
cc2 += 2 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
is --;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
is = ((m - js - 2) & 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (is == 1){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a12;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js == 1){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static inline void TRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
|
|
kusano |
2b45e8 |
BLASLONG is, js;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FLOAT *aa1, *aa2;
|
|
kusano |
2b45e8 |
FLOAT *b1, *b2;
|
|
kusano |
2b45e8 |
FLOAT *bb1, *bb2;
|
|
kusano |
2b45e8 |
FLOAT *cc1, *cc2;
|
|
kusano |
2b45e8 |
FLOAT a11, a12;
|
|
kusano |
2b45e8 |
FLOAT a21, a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
b1 = b;
|
|
kusano |
2b45e8 |
b2 = b;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (js = 0; js < m; js += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 = a + 0 * lda;
|
|
kusano |
2b45e8 |
aa2 = a + 1 * lda;
|
|
kusano |
2b45e8 |
a += 2 * lda + 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 = b1 + 0 * m;
|
|
kusano |
2b45e8 |
bb2 = b1 + 1 * m;
|
|
kusano |
2b45e8 |
b1 += 2 * m + 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 = b2 + 0 * m;
|
|
kusano |
2b45e8 |
cc2 = b2 + 1 * m;
|
|
kusano |
2b45e8 |
b2 += 2 * m + 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js >= 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a21;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
aa1 += 2;
|
|
kusano |
2b45e8 |
aa2 += 2;
|
|
kusano |
2b45e8 |
bb1 += 2;
|
|
kusano |
2b45e8 |
bb2 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 2 * m;
|
|
kusano |
2b45e8 |
cc2 += 2 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
is = ((m - js - 2) >> 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while (is > 0){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 += 2;
|
|
kusano |
2b45e8 |
aa2 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a12;
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a21;
|
|
kusano |
2b45e8 |
*(cc2 + 1) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 += 2;
|
|
kusano |
2b45e8 |
bb2 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 2 * m;
|
|
kusano |
2b45e8 |
cc2 += 2 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
is --;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
is = ((m - js - 2) & 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (is == 1){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a12;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js == 1){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static inline void TRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
|
|
kusano |
2b45e8 |
BLASLONG is, js;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FLOAT *aa1, *aa2;
|
|
kusano |
2b45e8 |
FLOAT *b1, *b2;
|
|
kusano |
2b45e8 |
FLOAT *bb1, *bb2;
|
|
kusano |
2b45e8 |
FLOAT *cc1, *cc2;
|
|
kusano |
2b45e8 |
FLOAT a11, a12;
|
|
kusano |
2b45e8 |
FLOAT a21, a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
b1 = b;
|
|
kusano |
2b45e8 |
b2 = b;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (js = 0; js < m; js += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 = a + 0 * lda;
|
|
kusano |
2b45e8 |
aa2 = a + 1 * lda;
|
|
kusano |
2b45e8 |
a += 2 * lda;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 = b1 + 0 * m;
|
|
kusano |
2b45e8 |
bb2 = b1 + 1 * m;
|
|
kusano |
2b45e8 |
b1 += 2 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 = b2 + 0 * m;
|
|
kusano |
2b45e8 |
cc2 = b2 + 1 * m;
|
|
kusano |
2b45e8 |
b2 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js >= 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (is = 0; is < js; is += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 += 2;
|
|
kusano |
2b45e8 |
aa2 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a12;
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a21;
|
|
kusano |
2b45e8 |
*(cc2 + 1) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 += 2;
|
|
kusano |
2b45e8 |
bb2 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 2 * m;
|
|
kusano |
2b45e8 |
cc2 += 2 * m;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js == 1){
|
|
kusano |
2b45e8 |
for (is = 0; is < js; is += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
aa1 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a21;
|
|
kusano |
2b45e8 |
bb1 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 2 * m;
|
|
kusano |
2b45e8 |
cc2 += 2 * m;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static inline void TRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
|
|
kusano |
2b45e8 |
BLASLONG is, js;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FLOAT *aa1, *aa2;
|
|
kusano |
2b45e8 |
FLOAT *b1, *b2;
|
|
kusano |
2b45e8 |
FLOAT *bb1, *bb2;
|
|
kusano |
2b45e8 |
FLOAT *cc1, *cc2;
|
|
kusano |
2b45e8 |
FLOAT a11, a12;
|
|
kusano |
2b45e8 |
FLOAT a21, a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
b1 = b;
|
|
kusano |
2b45e8 |
b2 = b;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (js = 0; js < m; js += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 = a + 0 * lda;
|
|
kusano |
2b45e8 |
aa2 = a + 1 * lda;
|
|
kusano |
2b45e8 |
a += 2 * lda;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 = b1 + 0 * m;
|
|
kusano |
2b45e8 |
bb2 = b1 + 1 * m;
|
|
kusano |
2b45e8 |
b1 += 2 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 = b2 + 0 * m;
|
|
kusano |
2b45e8 |
cc2 = b2 + 1 * m;
|
|
kusano |
2b45e8 |
b2 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js >= 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (is = 0; is < js; is += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 += 2;
|
|
kusano |
2b45e8 |
aa2 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a12;
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a21;
|
|
kusano |
2b45e8 |
*(cc2 + 1) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 += 2;
|
|
kusano |
2b45e8 |
bb2 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 2 * m;
|
|
kusano |
2b45e8 |
cc2 += 2 * m;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js == 1){
|
|
kusano |
2b45e8 |
for (is = 0; is < js; is += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
aa1 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a21;
|
|
kusano |
2b45e8 |
bb1 += 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 2 * m;
|
|
kusano |
2b45e8 |
cc2 += 2 * m;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static inline void ZTRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
|
|
kusano |
2b45e8 |
BLASLONG is, js;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FLOAT *aa1, *aa2;
|
|
kusano |
2b45e8 |
FLOAT *b1, *b2;
|
|
kusano |
2b45e8 |
FLOAT *bb1, *bb2;
|
|
kusano |
2b45e8 |
FLOAT *cc1, *cc2;
|
|
kusano |
2b45e8 |
FLOAT a11, a21, a31, a41;
|
|
kusano |
2b45e8 |
FLOAT a12, a22, a32, a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
b1 = b;
|
|
kusano |
2b45e8 |
b2 = b;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda *= 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (js = 0; js < m; js += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 = a + 0 * lda;
|
|
kusano |
2b45e8 |
aa2 = a + 1 * lda;
|
|
kusano |
2b45e8 |
a += 2 * lda + 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 = b1 + 0 * m;
|
|
kusano |
2b45e8 |
bb2 = b1 + 2 * m;
|
|
kusano |
2b45e8 |
b1 += 4 * m + 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 = b2 + 0 * m;
|
|
kusano |
2b45e8 |
cc2 = b2 + 2 * m;
|
|
kusano |
2b45e8 |
b2 += 4 * m + 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js >= 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a31 = *(aa1 + 2);
|
|
kusano |
2b45e8 |
a41 = *(aa1 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 2);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a31;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = a41;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a31;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a41;
|
|
kusano |
2b45e8 |
*(bb2 + 2) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 3) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 += 4;
|
|
kusano |
2b45e8 |
aa2 += 4;
|
|
kusano |
2b45e8 |
bb1 += 4;
|
|
kusano |
2b45e8 |
bb2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 4 * m;
|
|
kusano |
2b45e8 |
cc2 += 4 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
is = ((m - js - 2) >> 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while (is > 0){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a31 = *(aa1 + 2);
|
|
kusano |
2b45e8 |
a41 = *(aa1 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
a32 = *(aa2 + 2);
|
|
kusano |
2b45e8 |
a42 = *(aa2 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 += 4;
|
|
kusano |
2b45e8 |
aa2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a31;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = a41;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
*(bb2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(bb2 + 3) = a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(cc1 + 2) = a12;
|
|
kusano |
2b45e8 |
*(cc1 + 3) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a31;
|
|
kusano |
2b45e8 |
*(cc2 + 1) = a41;
|
|
kusano |
2b45e8 |
*(cc2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(cc2 + 3) = a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 += 4;
|
|
kusano |
2b45e8 |
bb2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 4 * m;
|
|
kusano |
2b45e8 |
cc2 += 4 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
is --;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m & 1){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(cc1 + 2) = a12;
|
|
kusano |
2b45e8 |
*(cc1 + 3) = a22;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js == 1){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static inline void ZTRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
|
|
kusano |
2b45e8 |
BLASLONG is, js;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FLOAT *aa1, *aa2;
|
|
kusano |
2b45e8 |
FLOAT *b1, *b2;
|
|
kusano |
2b45e8 |
FLOAT *bb1, *bb2;
|
|
kusano |
2b45e8 |
FLOAT *cc1, *cc2;
|
|
kusano |
2b45e8 |
FLOAT a11, a21, a31, a41;
|
|
kusano |
2b45e8 |
FLOAT a12, a22, a32, a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
b1 = b;
|
|
kusano |
2b45e8 |
b2 = b;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda *= 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (js = 0; js < m; js += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 = a + 0 * lda;
|
|
kusano |
2b45e8 |
aa2 = a + 1 * lda;
|
|
kusano |
2b45e8 |
a += 2 * lda + 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 = b1 + 0 * m;
|
|
kusano |
2b45e8 |
bb2 = b1 + 2 * m;
|
|
kusano |
2b45e8 |
b1 += 4 * m + 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 = b2 + 0 * m;
|
|
kusano |
2b45e8 |
cc2 = b2 + 2 * m;
|
|
kusano |
2b45e8 |
b2 += 4 * m + 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js >= 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a31 = *(aa1 + 2);
|
|
kusano |
2b45e8 |
a41 = *(aa1 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 2);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a31;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = a41;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a31;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a41;
|
|
kusano |
2b45e8 |
*(bb2 + 2) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 3) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 += 4;
|
|
kusano |
2b45e8 |
aa2 += 4;
|
|
kusano |
2b45e8 |
bb1 += 4;
|
|
kusano |
2b45e8 |
bb2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 4 * m;
|
|
kusano |
2b45e8 |
cc2 += 4 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
is = ((m - js - 2) >> 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while (is > 0){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a31 = *(aa1 + 2);
|
|
kusano |
2b45e8 |
a41 = *(aa1 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
a32 = *(aa2 + 2);
|
|
kusano |
2b45e8 |
a42 = *(aa2 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 += 4;
|
|
kusano |
2b45e8 |
aa2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a31;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = a41;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
*(bb2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(bb2 + 3) = a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(cc1 + 2) = a12;
|
|
kusano |
2b45e8 |
*(cc1 + 3) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a31;
|
|
kusano |
2b45e8 |
*(cc2 + 1) = a41;
|
|
kusano |
2b45e8 |
*(cc2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(cc2 + 3) = a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 += 4;
|
|
kusano |
2b45e8 |
bb2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 4 * m;
|
|
kusano |
2b45e8 |
cc2 += 4 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
is --;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m & 1){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(cc1 + 2) = a12;
|
|
kusano |
2b45e8 |
*(cc1 + 3) = a22;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js == 1){
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static inline void ZTRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
|
|
kusano |
2b45e8 |
BLASLONG is, js;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FLOAT *aa1, *aa2;
|
|
kusano |
2b45e8 |
FLOAT *b1, *b2;
|
|
kusano |
2b45e8 |
FLOAT *bb1, *bb2;
|
|
kusano |
2b45e8 |
FLOAT *cc1, *cc2;
|
|
kusano |
2b45e8 |
FLOAT a11, a21, a31, a41;
|
|
kusano |
2b45e8 |
FLOAT a12, a22, a32, a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
b1 = b;
|
|
kusano |
2b45e8 |
b2 = b;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda *= 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (js = 0; js < m; js += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 = a + 0 * lda;
|
|
kusano |
2b45e8 |
aa2 = a + 1 * lda;
|
|
kusano |
2b45e8 |
a += 2 * lda;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 = b1 + 0 * m;
|
|
kusano |
2b45e8 |
bb2 = b1 + 2 * m;
|
|
kusano |
2b45e8 |
b1 += 4 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 = b2 + 0 * m;
|
|
kusano |
2b45e8 |
cc2 = b2 + 2 * m;
|
|
kusano |
2b45e8 |
b2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js >= 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (is = 0; is < js; is += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a31 = *(aa1 + 2);
|
|
kusano |
2b45e8 |
a41 = *(aa1 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
a32 = *(aa2 + 2);
|
|
kusano |
2b45e8 |
a42 = *(aa2 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 += 4;
|
|
kusano |
2b45e8 |
aa2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a31;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = a41;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
*(bb2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(bb2 + 3) = a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(cc1 + 2) = a12;
|
|
kusano |
2b45e8 |
*(cc1 + 3) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a31;
|
|
kusano |
2b45e8 |
*(cc2 + 1) = a41;
|
|
kusano |
2b45e8 |
*(cc2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(cc2 + 3) = a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 += 4;
|
|
kusano |
2b45e8 |
bb2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 4 * m;
|
|
kusano |
2b45e8 |
cc2 += 4 * m;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
a32 = *(aa2 + 2);
|
|
kusano |
2b45e8 |
a42 = *(aa2 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a12;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
*(bb2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(bb2 + 3) = a42;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js == 1){
|
|
kusano |
2b45e8 |
for (is = 0; is < js; is += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a31 = *(aa1 + 2);
|
|
kusano |
2b45e8 |
a41 = *(aa1 + 3);
|
|
kusano |
2b45e8 |
aa1 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a31;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = a41;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a31;
|
|
kusano |
2b45e8 |
*(cc2 + 1) = a41;
|
|
kusano |
2b45e8 |
bb1 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 4 * m;
|
|
kusano |
2b45e8 |
cc2 += 4 * m;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static inline void ZTRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
|
|
kusano |
2b45e8 |
BLASLONG is, js;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
FLOAT *aa1, *aa2;
|
|
kusano |
2b45e8 |
FLOAT *b1, *b2;
|
|
kusano |
2b45e8 |
FLOAT *bb1, *bb2;
|
|
kusano |
2b45e8 |
FLOAT *cc1, *cc2;
|
|
kusano |
2b45e8 |
FLOAT a11, a21, a31, a41;
|
|
kusano |
2b45e8 |
FLOAT a12, a22, a32, a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
b1 = b;
|
|
kusano |
2b45e8 |
b2 = b;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lda *= 2;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (js = 0; js < m; js += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 = a + 0 * lda;
|
|
kusano |
2b45e8 |
aa2 = a + 1 * lda;
|
|
kusano |
2b45e8 |
a += 2 * lda;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 = b1 + 0 * m;
|
|
kusano |
2b45e8 |
bb2 = b1 + 2 * m;
|
|
kusano |
2b45e8 |
b1 += 4 * m;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 = b2 + 0 * m;
|
|
kusano |
2b45e8 |
cc2 = b2 + 2 * m;
|
|
kusano |
2b45e8 |
b2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js >= 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (is = 0; is < js; is += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a31 = *(aa1 + 2);
|
|
kusano |
2b45e8 |
a41 = *(aa1 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
a32 = *(aa2 + 2);
|
|
kusano |
2b45e8 |
a42 = *(aa2 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
aa1 += 4;
|
|
kusano |
2b45e8 |
aa2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a31;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = a41;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
*(bb2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(bb2 + 3) = a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(cc1 + 2) = a12;
|
|
kusano |
2b45e8 |
*(cc1 + 3) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a31;
|
|
kusano |
2b45e8 |
*(cc2 + 1) = a41;
|
|
kusano |
2b45e8 |
*(cc2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(cc2 + 3) = a42;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
bb1 += 4;
|
|
kusano |
2b45e8 |
bb2 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 4 * m;
|
|
kusano |
2b45e8 |
cc2 += 4 * m;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a12 = *(aa2 + 0);
|
|
kusano |
2b45e8 |
a22 = *(aa2 + 1);
|
|
kusano |
2b45e8 |
a32 = *(aa2 + 2);
|
|
kusano |
2b45e8 |
a42 = *(aa2 + 3);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a12;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = a22;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb2 + 0) = a12;
|
|
kusano |
2b45e8 |
*(bb2 + 1) = a22;
|
|
kusano |
2b45e8 |
*(bb2 + 2) = a32;
|
|
kusano |
2b45e8 |
*(bb2 + 3) = a42;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (m - js == 1){
|
|
kusano |
2b45e8 |
for (is = 0; is < js; is += 2){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
a31 = *(aa1 + 2);
|
|
kusano |
2b45e8 |
a41 = *(aa1 + 3);
|
|
kusano |
2b45e8 |
aa1 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(bb1 + 2) = a31;
|
|
kusano |
2b45e8 |
*(bb1 + 3) = a41;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
*(cc1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(cc1 + 1) = a21;
|
|
kusano |
2b45e8 |
*(cc2 + 0) = a31;
|
|
kusano |
2b45e8 |
*(cc2 + 1) = a41;
|
|
kusano |
2b45e8 |
bb1 += 4;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cc1 += 4 * m;
|
|
kusano |
2b45e8 |
cc2 += 4 * m;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
a11 = *(aa1 + 0);
|
|
kusano |
2b45e8 |
a21 = *(aa1 + 1);
|
|
kusano |
2b45e8 |
*(bb1 + 0) = a11;
|
|
kusano |
2b45e8 |
*(bb1 + 1) = a21;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|