kusano 2b45e8
#include "common.h"
kusano 2b45e8
int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FLOAT* C,BLASLONG ldc
kusano 2b45e8
#ifdef TRMMKERNEL
kusano 2b45e8
		,BLASLONG offset
kusano 2b45e8
#endif
kusano 2b45e8
		) 
kusano 2b45e8
{
kusano 2b45e8
   BLASLONG i,j,k;
kusano 2b45e8
   FLOAT *C0,*C1,*ptrba,*ptrbb;
kusano 2b45e8
   FLOAT res0,res1,res2,res3,load0,load1,load2,load3,load4,load5,load6,load7;
kusano 2b45e8
   BLASLONG off, temp;
kusano 2b45e8
#if defined(TRMMKERNEL) && !defined(LEFT)
kusano 2b45e8
   off = -offset; 
kusano 2b45e8
#endif
kusano 2b45e8
   for (j=0; j
kusano 2b45e8
     {
kusano 2b45e8
        C0 = C;
kusano 2b45e8
        C1 = C0+ldc;
kusano 2b45e8
#if defined(TRMMKERNEL) && defined(LEFT)
kusano 2b45e8
		off = offset;
kusano 2b45e8
#endif
kusano 2b45e8
        ptrba = ba;
kusano 2b45e8
        for (i=0; i
kusano 2b45e8
          {
kusano 2b45e8
#if (defined(LEFT) &&  defined(TRANSA)) || \
kusano 2b45e8
			  (!defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
             ptrbb = bb;
kusano 2b45e8
#else
kusano 2b45e8
			  ptrba += off*2;
kusano 2b45e8
			  ptrbb = bb + off*2;
kusano 2b45e8
#endif
kusano 2b45e8
             res0 = 0;
kusano 2b45e8
             res1 = 0;
kusano 2b45e8
             res2 = 0;
kusano 2b45e8
             res3 = 0;
kusano 2b45e8
#if (defined(LEFT) && !defined(TRANSA)) || \
kusano 2b45e8
			 (!defined(LEFT) && defined(TRANSA))
kusano 2b45e8
			 temp = bk-off;
kusano 2b45e8
#elif defined(LEFT) 
kusano 2b45e8
			 temp = off+2;
kusano 2b45e8
#else
kusano 2b45e8
			 temp = off+2;
kusano 2b45e8
#endif
kusano 2b45e8
             for (k=0; k
kusano 2b45e8
               {
kusano 2b45e8
                  load0 = ptrba[2*0+0];
kusano 2b45e8
                  load1 = ptrbb[2*0+0];
kusano 2b45e8
                  res0 = res0+load0*load1;
kusano 2b45e8
                  load2 = ptrba[2*0+1];
kusano 2b45e8
                  res1 = res1+load2*load1;
kusano 2b45e8
                  load3 = ptrbb[2*0+1];
kusano 2b45e8
                  res2 = res2+load0*load3;
kusano 2b45e8
                  res3 = res3+load2*load3;
kusano 2b45e8
                  load4 = ptrba[2*1+0];
kusano 2b45e8
                  load5 = ptrbb[2*1+0];
kusano 2b45e8
                  res0 = res0+load4*load5;
kusano 2b45e8
                  load6 = ptrba[2*1+1];
kusano 2b45e8
                  res1 = res1+load6*load5;
kusano 2b45e8
                  load7 = ptrbb[2*1+1];
kusano 2b45e8
                  res2 = res2+load4*load7;
kusano 2b45e8
                  res3 = res3+load6*load7;
kusano 2b45e8
                  load0 = ptrba[2*2+0];
kusano 2b45e8
                  load1 = ptrbb[2*2+0];
kusano 2b45e8
                  res0 = res0+load0*load1;
kusano 2b45e8
                  load2 = ptrba[2*2+1];
kusano 2b45e8
                  res1 = res1+load2*load1;
kusano 2b45e8
                  load3 = ptrbb[2*2+1];
kusano 2b45e8
                  res2 = res2+load0*load3;
kusano 2b45e8
                  res3 = res3+load2*load3;
kusano 2b45e8
                  load4 = ptrba[2*3+0];
kusano 2b45e8
                  load5 = ptrbb[2*3+0];
kusano 2b45e8
                  res0 = res0+load4*load5;
kusano 2b45e8
                  load6 = ptrba[2*3+1];
kusano 2b45e8
                  res1 = res1+load6*load5;
kusano 2b45e8
                  load7 = ptrbb[2*3+1];
kusano 2b45e8
                  res2 = res2+load4*load7;
kusano 2b45e8
                  res3 = res3+load6*load7;
kusano 2b45e8
                  ptrba = ptrba+8;
kusano 2b45e8
                  ptrbb = ptrbb+8;
kusano 2b45e8
               }
kusano 2b45e8
             for (k=0; k<(temp&3); k+=1) 
kusano 2b45e8
               {
kusano 2b45e8
                  load0 = ptrba[2*0+0];
kusano 2b45e8
                  load1 = ptrbb[2*0+0];
kusano 2b45e8
                  res0 = res0+load0*load1;
kusano 2b45e8
                  load2 = ptrba[2*0+1];
kusano 2b45e8
                  res1 = res1+load2*load1;
kusano 2b45e8
                  load3 = ptrbb[2*0+1];
kusano 2b45e8
                  res2 = res2+load0*load3;
kusano 2b45e8
                  res3 = res3+load2*load3;
kusano 2b45e8
                  ptrba = ptrba+2;
kusano 2b45e8
                  ptrbb = ptrbb+2;
kusano 2b45e8
               }
kusano 2b45e8
             res0 = res0*alpha;
kusano 2b45e8
             C0[0] = res0;
kusano 2b45e8
             res1 = res1*alpha;
kusano 2b45e8
             C0[1] = res1;
kusano 2b45e8
             res2 = res2*alpha;
kusano 2b45e8
             C1[0] = res2;
kusano 2b45e8
             res3 = res3*alpha;
kusano 2b45e8
             C1[1] = res3;
kusano 2b45e8
#if ( defined(LEFT) && defined(TRANSA)) || \
kusano 2b45e8
			 (!defined(LEFT) && !defined(TRANSA)) 
kusano 2b45e8
			 temp = bk - off;
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
			 temp -= 2;
kusano 2b45e8
#else 
kusano 2b45e8
			 temp -= 2;
kusano 2b45e8
#endif
kusano 2b45e8
			 ptrba += temp*2;
kusano 2b45e8
			 ptrbb += temp*2;
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
			 off += 2;
kusano 2b45e8
#endif
kusano 2b45e8
             C0 = C0+2;
kusano 2b45e8
             C1 = C1+2;
kusano 2b45e8
          }
kusano 2b45e8
        for (i=0; i<(bm&1); i+=1) 
kusano 2b45e8
          {
kusano 2b45e8
#if (defined(LEFT) &&  defined(TRANSA)) ||(!defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
             ptrbb = bb;
kusano 2b45e8
#else 
kusano 2b45e8
			 ptrba += off;
kusano 2b45e8
			 ptrbb = bb+off*2;
kusano 2b45e8
#endif
kusano 2b45e8
             res0 = 0;
kusano 2b45e8
             res1 = 0;
kusano 2b45e8
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
kusano 2b45e8
			 temp = bk-off;
kusano 2b45e8
#elif defined(LEFT)
kusano 2b45e8
			 temp = off+1;
kusano 2b45e8
#else 
kusano 2b45e8
			 temp = off+2;
kusano 2b45e8
#endif
kusano 2b45e8
             for (k=0; k
kusano 2b45e8
               {
kusano 2b45e8
                  load0 = ptrba[0+0];
kusano 2b45e8
                  load1 = ptrbb[2*0+0];
kusano 2b45e8
                  res0 = res0+load0*load1;
kusano 2b45e8
                  load2 = ptrbb[2*0+1];
kusano 2b45e8
                  res1 = res1+load0*load2;
kusano 2b45e8
                  ptrba = ptrba+1;
kusano 2b45e8
                  ptrbb = ptrbb+2;
kusano 2b45e8
               }
kusano 2b45e8
             res0 = res0*alpha;
kusano 2b45e8
             C0[0] = res0;
kusano 2b45e8
             res1 = res1*alpha;
kusano 2b45e8
             C1[0] = res1;
kusano 2b45e8
#if ( defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) 
kusano 2b45e8
			 temp = bk-off;
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
			 temp -= 1;
kusano 2b45e8
#else 
kusano 2b45e8
			 temp -= 2;
kusano 2b45e8
#endif
kusano 2b45e8
			 ptrba += temp;
kusano 2b45e8
			 ptrbb += temp*2;
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef LEFT	
kusano 2b45e8
			 off += 1;
kusano 2b45e8
#endif
kusano 2b45e8
             C0 = C0+1;
kusano 2b45e8
             C1 = C1+1;
kusano 2b45e8
          }
kusano 2b45e8
#if defined(TRMMKERNEL) && !defined(LEFT)
kusano 2b45e8
		off += 2;
kusano 2b45e8
#endif
kusano 2b45e8
        k = (bk<<1);
kusano 2b45e8
        bb = bb+k;
kusano 2b45e8
        i = (ldc<<1);
kusano 2b45e8
        C = C+i;
kusano 2b45e8
     }
kusano 2b45e8
   for (j=0; j<(bn&1); j+=1) 
kusano 2b45e8
     {
kusano 2b45e8
        C0 = C;
kusano 2b45e8
#if defined(TRMMKERNEL) &&  defined(LEFT)
kusano 2b45e8
		off = offset;
kusano 2b45e8
#endif
kusano 2b45e8
        ptrba = ba;
kusano 2b45e8
        for (i=0; i
kusano 2b45e8
          {
kusano 2b45e8
#if (defined(LEFT) &&  defined(TRANSA)) || \
kusano 2b45e8
			  (!defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
			  ptrbb = bb;
kusano 2b45e8
#else 
kusano 2b45e8
			  ptrba += off*2;
kusano 2b45e8
			  ptrbb = bb + off;
kusano 2b45e8
#endif
kusano 2b45e8
             res0 = 0;
kusano 2b45e8
             res1 = 0;
kusano 2b45e8
#if (defined(LEFT) && !defined(TRANSA)) || \
kusano 2b45e8
			 (!defined(LEFT) && defined(TRANSA))
kusano 2b45e8
			 temp = bk-off;
kusano 2b45e8
#elif defined(LEFT)
kusano 2b45e8
			 temp = off+2;
kusano 2b45e8
#else
kusano 2b45e8
			 temp = off+1;
kusano 2b45e8
#endif
kusano 2b45e8
			 for (k=0; k
kusano 2b45e8
               {
kusano 2b45e8
                  load0 = ptrba[2*0+0];
kusano 2b45e8
                  load1 = ptrbb[0+0];
kusano 2b45e8
                  res0 = res0+load0*load1;
kusano 2b45e8
                  load2 = ptrba[2*0+1];
kusano 2b45e8
                  res1 = res1+load2*load1;
kusano 2b45e8
                  ptrba = ptrba+2;
kusano 2b45e8
                  ptrbb = ptrbb+1;
kusano 2b45e8
               }
kusano 2b45e8
             res0 = res0*alpha;
kusano 2b45e8
             C0[0] = res0;
kusano 2b45e8
             res1 = res1*alpha;
kusano 2b45e8
			 C0[1] = res1;
kusano 2b45e8
#if ( defined(LEFT) &&  defined(TRANSA)) || \
kusano 2b45e8
			 (!defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
			 temp = bk - off;
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
			 temp -= 2;
kusano 2b45e8
#else 
kusano 2b45e8
			 temp -= 1;
kusano 2b45e8
#endif
kusano 2b45e8
			 ptrba += temp*2;
kusano 2b45e8
			 ptrbb += temp;
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
			 off += 2;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
             C0 = C0+2;
kusano 2b45e8
          }
kusano 2b45e8
        for (i=0; i<(bm&1); i+=1) 
kusano 2b45e8
          {
kusano 2b45e8
#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
             ptrbb = bb;
kusano 2b45e8
#else 
kusano 2b45e8
			 ptrba += off;
kusano 2b45e8
			 ptrbb = bb+off;
kusano 2b45e8
#endif
kusano 2b45e8
             res0 = 0;
kusano 2b45e8
#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
kusano 2b45e8
			 temp = bk-off;
kusano 2b45e8
#elif defined(LEFT)
kusano 2b45e8
			 temp = off + 1;
kusano 2b45e8
#else 
kusano 2b45e8
			 temp = off + 1;
kusano 2b45e8
#endif
kusano 2b45e8
			 for (k=0; k
kusano 2b45e8
               {
kusano 2b45e8
                  load0 = ptrba[0+0];
kusano 2b45e8
                  load1 = ptrbb[0+0];
kusano 2b45e8
                  res0 = res0+load0*load1;
kusano 2b45e8
                  ptrba = ptrba+1;
kusano 2b45e8
                  ptrbb = ptrbb+1;
kusano 2b45e8
               }
kusano 2b45e8
             res0 = res0*alpha;
kusano 2b45e8
             C0[0] = res0;
kusano 2b45e8
#if ( defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
kusano 2b45e8
			 temp = bk-off;
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
			 temp -= 1;
kusano 2b45e8
#else 
kusano 2b45e8
			 temp -= 1;
kusano 2b45e8
#endif
kusano 2b45e8
			 ptrba += temp;
kusano 2b45e8
			 ptrbb += temp;
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef LEFT
kusano 2b45e8
			 off += 1;
kusano 2b45e8
#endif
kusano 2b45e8
             C0 = C0+1;
kusano 2b45e8
          }
kusano 2b45e8
#if defined(TRMMKERNEL) && !defined(LEFT)
kusano 2b45e8
		off += 1;
kusano 2b45e8
#endif
kusano 2b45e8
        k = (bk<<0);
kusano 2b45e8
        bb = bb+k;
kusano 2b45e8
        C = C+ldc;
kusano 2b45e8
     }
kusano 2b45e8
   return 0;
kusano 2b45e8
}