Blame thirdparty/openblas/xianyi-OpenBLAS-e6e87a2/driver/others/parameter.c

kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
/* Copyright 2009, 2010 The University of Texas at Austin.           */
kusano 2b45e8
/* All rights reserved.                                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* Redistribution and use in source and binary forms, with or        */
kusano 2b45e8
/* without modification, are permitted provided that the following   */
kusano 2b45e8
/* conditions are met:                                               */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   1. Redistributions of source code must retain the above         */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer.                                                  */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   2. Redistributions in binary form must reproduce the above      */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer in the documentation and/or other materials       */
kusano 2b45e8
/*      provided with the distribution.                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
kusano 2b45e8
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
kusano 2b45e8
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
kusano 2b45e8
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
kusano 2b45e8
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
kusano 2b45e8
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
kusano 2b45e8
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
kusano 2b45e8
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
kusano 2b45e8
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
kusano 2b45e8
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
kusano 2b45e8
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
kusano 2b45e8
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
kusano 2b45e8
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
kusano 2b45e8
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* The views and conclusions contained in the software and           */
kusano 2b45e8
/* documentation are those of the authors and should not be          */
kusano 2b45e8
/* interpreted as representing official policies, either expressed   */
kusano 2b45e8
/* or implied, of The University of Texas at Austin.                 */
kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
kusano 2b45e8
#include <stdio.h></stdio.h>
kusano 2b45e8
#include <string.h></string.h>
kusano 2b45e8
#include "common.h"
kusano 2b45e8
kusano 2b45e8
int get_L2_size(void);
kusano 2b45e8
kusano 2b45e8
#define DEFAULT_GEMM_P 128
kusano 2b45e8
#define DEFAULT_GEMM_Q 128
kusano 2b45e8
#define DEFAULT_GEMM_R 128
kusano 2b45e8
#define DEFAULT_GEMM_OFFSET_A 0
kusano 2b45e8
#define DEFAULT_GEMM_OFFSET_B 0
kusano 2b45e8
kusano 2b45e8
/* Global Parameter */
kusano 2b45e8
#if GEMM_OFFSET_A == gemm_offset_a
kusano 2b45e8
BLASLONG gemm_offset_a = DEFAULT_GEMM_OFFSET_A;
kusano 2b45e8
#else
kusano 2b45e8
BLASLONG gemm_offset_a = GEMM_OFFSET_A;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if GEMM_OFFSET_B == gemm_offset_b
kusano 2b45e8
BLASLONG gemm_offset_b = DEFAULT_GEMM_OFFSET_B;
kusano 2b45e8
#else
kusano 2b45e8
BLASLONG gemm_offset_b = GEMM_OFFSET_B;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if SGEMM_P == sgemm_p
kusano 2b45e8
BLASLONG sgemm_p = DEFAULT_GEMM_P;
kusano 2b45e8
#else
kusano 2b45e8
BLASLONG sgemm_p = SGEMM_P;
kusano 2b45e8
#endif
kusano 2b45e8
#if DGEMM_P == dgemm_p
kusano 2b45e8
BLASLONG dgemm_p = DEFAULT_GEMM_P;
kusano 2b45e8
#else
kusano 2b45e8
BLASLONG dgemm_p = DGEMM_P;
kusano 2b45e8
#endif
kusano 2b45e8
#if CGEMM_P == cgemm_p
kusano 2b45e8
BLASLONG cgemm_p = DEFAULT_GEMM_P;
kusano 2b45e8
#else
kusano 2b45e8
BLASLONG cgemm_p = CGEMM_P;
kusano 2b45e8
#endif
kusano 2b45e8
#if ZGEMM_P == zgemm_p
kusano 2b45e8
BLASLONG zgemm_p = DEFAULT_GEMM_P;
kusano 2b45e8
#else
kusano 2b45e8
BLASLONG zgemm_p = ZGEMM_P;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if SGEMM_Q == sgemm_q
kusano 2b45e8
BLASLONG sgemm_q = DEFAULT_GEMM_Q;
kusano 2b45e8
#else
kusano 2b45e8
BLASLONG sgemm_q = SGEMM_Q;
kusano 2b45e8
#endif
kusano 2b45e8
#if DGEMM_Q == dgemm_q
kusano 2b45e8
BLASLONG dgemm_q = DEFAULT_GEMM_Q;
kusano 2b45e8
#else
kusano 2b45e8
BLASLONG dgemm_q = DGEMM_Q;
kusano 2b45e8
#endif
kusano 2b45e8
#if CGEMM_Q == cgemm_q
kusano 2b45e8
BLASLONG cgemm_q = DEFAULT_GEMM_Q;
kusano 2b45e8
#else
kusano 2b45e8
BLASLONG cgemm_q = CGEMM_Q;
kusano 2b45e8
#endif
kusano 2b45e8
#if ZGEMM_Q == zgemm_q
kusano 2b45e8
BLASLONG zgemm_q = DEFAULT_GEMM_Q;
kusano 2b45e8
#else
kusano 2b45e8
BLASLONG zgemm_q = ZGEMM_Q;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if SGEMM_R == sgemm_r
kusano 2b45e8
BLASLONG sgemm_r = DEFAULT_GEMM_R;
kusano 2b45e8
#else
kusano 2b45e8
BLASLONG sgemm_r = SGEMM_R;
kusano 2b45e8
#endif
kusano 2b45e8
#if DGEMM_R == dgemm_r
kusano 2b45e8
BLASLONG dgemm_r = DEFAULT_GEMM_R;
kusano 2b45e8
#else
kusano 2b45e8
BLASLONG dgemm_r = DGEMM_R;
kusano 2b45e8
#endif
kusano 2b45e8
#if CGEMM_R == cgemm_r
kusano 2b45e8
BLASLONG cgemm_r = DEFAULT_GEMM_R;
kusano 2b45e8
#else
kusano 2b45e8
BLASLONG cgemm_r = CGEMM_R;
kusano 2b45e8
#endif
kusano 2b45e8
#if ZGEMM_R == zgemm_r
kusano 2b45e8
BLASLONG zgemm_r = DEFAULT_GEMM_R;
kusano 2b45e8
#else
kusano 2b45e8
BLASLONG zgemm_r = ZGEMM_R;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(EXPRECISION) || defined(QUAD_PRECISION)
kusano 2b45e8
#if QGEMM_P == qgemm_p
kusano 2b45e8
BLASLONG qgemm_p = DEFAULT_GEMM_P;
kusano 2b45e8
#else
kusano 2b45e8
BLASLONG qgemm_p = QGEMM_P;
kusano 2b45e8
#endif
kusano 2b45e8
#if XGEMM_P == xgemm_p
kusano 2b45e8
BLASLONG xgemm_p = DEFAULT_GEMM_P;
kusano 2b45e8
#else
kusano 2b45e8
BLASLONG xgemm_p = XGEMM_P;
kusano 2b45e8
#endif
kusano 2b45e8
#if QGEMM_Q == qgemm_q
kusano 2b45e8
BLASLONG qgemm_q = DEFAULT_GEMM_Q;
kusano 2b45e8
#else
kusano 2b45e8
BLASLONG qgemm_q = QGEMM_Q;
kusano 2b45e8
#endif
kusano 2b45e8
#if XGEMM_Q == xgemm_q
kusano 2b45e8
BLASLONG xgemm_q = DEFAULT_GEMM_Q;
kusano 2b45e8
#else
kusano 2b45e8
BLASLONG xgemm_q = XGEMM_Q;
kusano 2b45e8
#endif
kusano 2b45e8
#if QGEMM_R == qgemm_r
kusano 2b45e8
BLASLONG qgemm_r = DEFAULT_GEMM_R;
kusano 2b45e8
#else
kusano 2b45e8
BLASLONG qgemm_r = QGEMM_R;
kusano 2b45e8
#endif
kusano 2b45e8
#if XGEMM_R == xgemm_r
kusano 2b45e8
BLASLONG xgemm_r = DEFAULT_GEMM_R;
kusano 2b45e8
#else
kusano 2b45e8
BLASLONG xgemm_r = XGEMM_R;
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(ARCH_X86) || defined(ARCH_X86_64)
kusano 2b45e8
kusano 2b45e8
int get_L2_size(void){
kusano 2b45e8
kusano 2b45e8
  int eax, ebx, ecx, edx;
kusano 2b45e8
kusano 2b45e8
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || \
kusano 2b45e8
    defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
kusano 2b45e8
    defined(CORE_NEHALEM) || defined(ATOM) || defined(GENERIC)
kusano 2b45e8
kusano 2b45e8
  cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
kusano 2b45e8
kusano 2b45e8
  return BITMASK(ecx, 16, 0xffff);
kusano 2b45e8
kusano 2b45e8
#else
kusano 2b45e8
kusano 2b45e8
  int info[15];
kusano 2b45e8
  int i;
kusano 2b45e8
kusano 2b45e8
  cpuid(2, &eax, &ebx, &ecx, &edx);
kusano 2b45e8
  
kusano 2b45e8
  info[ 0] = BITMASK(eax,  8, 0xff);
kusano 2b45e8
  info[ 1] = BITMASK(eax, 16, 0xff);
kusano 2b45e8
  info[ 2] = BITMASK(eax, 24, 0xff);
kusano 2b45e8
  
kusano 2b45e8
  info[ 3] = BITMASK(ebx,  0, 0xff);
kusano 2b45e8
  info[ 4] = BITMASK(ebx,  8, 0xff);
kusano 2b45e8
  info[ 5] = BITMASK(ebx, 16, 0xff);
kusano 2b45e8
  info[ 6] = BITMASK(ebx, 24, 0xff);
kusano 2b45e8
  
kusano 2b45e8
  info[ 7] = BITMASK(ecx,  0, 0xff);
kusano 2b45e8
  info[ 8] = BITMASK(ecx,  8, 0xff);
kusano 2b45e8
  info[ 9] = BITMASK(ecx, 16, 0xff);
kusano 2b45e8
  info[10] = BITMASK(ecx, 24, 0xff);
kusano 2b45e8
  
kusano 2b45e8
  info[11] = BITMASK(edx,  0, 0xff);
kusano 2b45e8
  info[12] = BITMASK(edx,  8, 0xff);
kusano 2b45e8
  info[13] = BITMASK(edx, 16, 0xff);
kusano 2b45e8
  info[14] = BITMASK(edx, 24, 0xff);
kusano 2b45e8
  
kusano 2b45e8
  for (i = 0; i < 15; i++){
kusano 2b45e8
kusano 2b45e8
    switch (info[i]){
kusano 2b45e8
      case 0x3b :
kusano 2b45e8
      case 0x41 :
kusano 2b45e8
      case 0x79 :
kusano 2b45e8
	return  128;
kusano 2b45e8
	break;
kusano 2b45e8
kusano 2b45e8
      case 0x3c :
kusano 2b45e8
      case 0x42 :
kusano 2b45e8
      case 0x7a :
kusano 2b45e8
      case 0x7e :
kusano 2b45e8
      case 0x82 :
kusano 2b45e8
	return  256;
kusano 2b45e8
	break;
kusano 2b45e8
kusano 2b45e8
      case 0x43 :
kusano 2b45e8
      case 0x7b :
kusano 2b45e8
      case 0x7f :
kusano 2b45e8
      case 0x83 :
kusano 2b45e8
      case 0x86 :
kusano 2b45e8
	return  512;
kusano 2b45e8
	break;
kusano 2b45e8
kusano 2b45e8
      case 0x44 :
kusano 2b45e8
      case 0x78 :
kusano 2b45e8
      case 0x7c :
kusano 2b45e8
      case 0x84 :
kusano 2b45e8
      case 0x87 :
kusano 2b45e8
	return 1024;
kusano 2b45e8
	break;
kusano 2b45e8
kusano 2b45e8
      case 0x45 :
kusano 2b45e8
      case 0x7d :
kusano 2b45e8
      case 0x85 :
kusano 2b45e8
	return 2048;
kusano 2b45e8
kusano 2b45e8
      case 0x49 :
kusano 2b45e8
	return 4096;
kusano 2b45e8
	break;
kusano 2b45e8
    }
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  /* Never reached */
kusano 2b45e8
  return 0;
kusano 2b45e8
#endif
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
void blas_set_parameter(void){
kusano 2b45e8
kusano 2b45e8
  char *p;
kusano 2b45e8
  int factor;
kusano 2b45e8
  int size = get_L2_size();
kusano 2b45e8
kusano 2b45e8
#if defined(CORE_KATMAI)  || defined(CORE_COPPERMINE) || defined(CORE_BANIAS)
kusano 2b45e8
  size >>= 7;
kusano 2b45e8
kusano 2b45e8
#if defined(CORE_BANIAS) && (HAVE_HIT > 1)
kusano 2b45e8
  sgemm_p =  64 / HAVE_HIT * size;
kusano 2b45e8
  dgemm_p =  32 / HAVE_HIT * size;
kusano 2b45e8
  cgemm_p =  32 / HAVE_HIT * size;
kusano 2b45e8
  zgemm_p =  16 / HAVE_HIT * size;
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
  qgemm_p =  16 / HAVE_HIT * size;
kusano 2b45e8
  xgemm_p =   8 / HAVE_HIT * size;
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef QUAD_PRECISION
kusano 2b45e8
  qgemm_p =   8 / HAVE_HIT * size;
kusano 2b45e8
  xgemm_p =   4 / HAVE_HIT * size;
kusano 2b45e8
#endif
kusano 2b45e8
#else
kusano 2b45e8
  sgemm_p =  64 * size;
kusano 2b45e8
  dgemm_p =  32 * size;
kusano 2b45e8
  cgemm_p =  32 * size;
kusano 2b45e8
  zgemm_p =  16 * size;
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
  qgemm_p =  16 * size;
kusano 2b45e8
  xgemm_p =   8 * size;
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef QUAD_PRECISION
kusano 2b45e8
  qgemm_p =   8 * size;
kusano 2b45e8
  xgemm_p =   4 * size;
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(CORE_NORTHWOOD) 
kusano 2b45e8
  size >>= 7;
kusano 2b45e8
kusano 2b45e8
#ifdef ALLOC_HUGETLB
kusano 2b45e8
  sgemm_p = 128 * size;
kusano 2b45e8
  dgemm_p =  64 * size;
kusano 2b45e8
  cgemm_p =  64 * size;
kusano 2b45e8
  zgemm_p =  32 * size;
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
  qgemm_p =  32 * size;
kusano 2b45e8
  xgemm_p =  16 * size;
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef QUAD_PRECISION
kusano 2b45e8
  qgemm_p =  16 * size;
kusano 2b45e8
  xgemm_p =   8 * size;
kusano 2b45e8
#endif
kusano 2b45e8
#else
kusano 2b45e8
  sgemm_p =  96 * size;
kusano 2b45e8
  dgemm_p =  48 * size;
kusano 2b45e8
  cgemm_p =  48 * size;
kusano 2b45e8
  zgemm_p =  24 * size;
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
  qgemm_p =  24 * size;
kusano 2b45e8
  xgemm_p =  12 * size;
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef QUAD_PRECISION
kusano 2b45e8
  qgemm_p =  12 * size;
kusano 2b45e8
  xgemm_p =   6 * size;
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(CORE_CORE2)
kusano 2b45e8
kusano 2b45e8
  size >>= 9;
kusano 2b45e8
kusano 2b45e8
  sgemm_p =  92 * size;
kusano 2b45e8
  dgemm_p =  46 * size;
kusano 2b45e8
  cgemm_p =  46 * size;
kusano 2b45e8
  zgemm_p =  23 * size;
kusano 2b45e8
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
  qgemm_p =  23 * size;
kusano 2b45e8
  xgemm_p =  11 * size;
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef QUAD_PRECISION
kusano 2b45e8
  qgemm_p =  11 * size;
kusano 2b45e8
  xgemm_p =   5 * size;
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(PENRYN)
kusano 2b45e8
kusano 2b45e8
  size >>= 9;
kusano 2b45e8
kusano 2b45e8
  sgemm_p = 1024;
kusano 2b45e8
  dgemm_p =  512;
kusano 2b45e8
  cgemm_p =  512;
kusano 2b45e8
  zgemm_p =  256;
kusano 2b45e8
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
  qgemm_p =  256;
kusano 2b45e8
  xgemm_p =  128;
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef QUAD_PRECISION
kusano 2b45e8
  qgemm_p =  21 * size + 4;
kusano 2b45e8
  xgemm_p =  10 * size + 2;
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(DUNNINGTON)
kusano 2b45e8
kusano 2b45e8
  size >>= 9;
kusano 2b45e8
kusano 2b45e8
  sgemm_p = 384;
kusano 2b45e8
  dgemm_p = 384;
kusano 2b45e8
  cgemm_p = 384;
kusano 2b45e8
  zgemm_p = 384;
kusano 2b45e8
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
  qgemm_p = 384;
kusano 2b45e8
  xgemm_p = 384;
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef QUAD_PRECISION
kusano 2b45e8
  qgemm_p =  21 * size + 4;
kusano 2b45e8
  xgemm_p =  10 * size + 2;
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(NEHALEM)
kusano 2b45e8
  sgemm_p = 1024;
kusano 2b45e8
  dgemm_p =  512;
kusano 2b45e8
  cgemm_p =  512;
kusano 2b45e8
  zgemm_p =  256;
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
  qgemm_p =  256;
kusano 2b45e8
  xgemm_p =  128;
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(CORE_PRESCOTT)  || defined(GENERIC)
kusano 2b45e8
  size >>= 6;
kusano 2b45e8
kusano 2b45e8
  if (size > 16) size = 16;
kusano 2b45e8
kusano 2b45e8
  sgemm_p =  56 * size;
kusano 2b45e8
  dgemm_p =  28 * size;
kusano 2b45e8
  cgemm_p =  28 * size;
kusano 2b45e8
  zgemm_p =  14 * size;
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
  qgemm_p =  14 * size;
kusano 2b45e8
  xgemm_p =   7 * size;
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef QUAD_PRECISION
kusano 2b45e8
  qgemm_p =   7 * size;
kusano 2b45e8
  xgemm_p =   3 * size;
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(CORE_OPTERON) 
kusano 2b45e8
  sgemm_p =  224 + 14 * (size >> 5);
kusano 2b45e8
  dgemm_p =  112 + 14 * (size >> 6);
kusano 2b45e8
  cgemm_p =  116 + 14 * (size >> 6);
kusano 2b45e8
  zgemm_p =   58 + 14 * (size >> 7);
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
  qgemm_p =   58 + 14 * (size >> 7);
kusano 2b45e8
  xgemm_p =   29 + 14 * (size >> 8);
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef QUAD_PRECISION
kusano 2b45e8
  qgemm_p =   29 + 14 * (size >> 8);
kusano 2b45e8
  xgemm_p =   15 + 14 * (size >> 9);
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(ATOM)
kusano 2b45e8
  size >>= 8;
kusano 2b45e8
kusano 2b45e8
  sgemm_p =  256;
kusano 2b45e8
  dgemm_p =  128;
kusano 2b45e8
  cgemm_p =  128;
kusano 2b45e8
  zgemm_p =   64;
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
  qgemm_p =   64;
kusano 2b45e8
  xgemm_p =   32;
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef QUAD_PRECISION
kusano 2b45e8
  qgemm_p =   32;
kusano 2b45e8
  xgemm_p =   16;
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(CORE_BARCELONA)
kusano 2b45e8
  size >>= 8;
kusano 2b45e8
kusano 2b45e8
  sgemm_p = 232 * size;
kusano 2b45e8
  dgemm_p = 116 * size;
kusano 2b45e8
  cgemm_p = 116 * size;
kusano 2b45e8
  zgemm_p =  58 * size;
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
  qgemm_p =  58 * size;
kusano 2b45e8
  xgemm_p =  26 * size;
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef QUAD_PRECISION
kusano 2b45e8
  qgemm_p =  26 * size;
kusano 2b45e8
  xgemm_p =  13 * size;
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  p = getenv("GOTO_BLOCK_FACTOR");
kusano 2b45e8
kusano 2b45e8
  if (p) {
kusano 2b45e8
    factor = atoi(p);
kusano 2b45e8
    if (factor <  10) factor =  10;
kusano 2b45e8
    if (factor > 200) factor = 200;
kusano 2b45e8
    
kusano 2b45e8
    sgemm_p = ((long)((double)sgemm_p * (double)factor * 1.e-2)) & ~7L;
kusano 2b45e8
    dgemm_p = ((long)((double)dgemm_p * (double)factor * 1.e-2)) & ~7L;
kusano 2b45e8
    cgemm_p = ((long)((double)cgemm_p * (double)factor * 1.e-2)) & ~7L;
kusano 2b45e8
    zgemm_p = ((long)((double)zgemm_p * (double)factor * 1.e-2)) & ~7L;
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
    qgemm_p = ((long)((double)qgemm_p * (double)factor * 1.e-2)) & ~7L;
kusano 2b45e8
    xgemm_p = ((long)((double)xgemm_p * (double)factor * 1.e-2)) & ~7L;
kusano 2b45e8
#endif
kusano 2b45e8
  }
kusano 2b45e8
  
kusano 2b45e8
  if (sgemm_p == 0) sgemm_p = 64;
kusano 2b45e8
  if (dgemm_p == 0) dgemm_p = 64;
kusano 2b45e8
  if (cgemm_p == 0) cgemm_p = 64;
kusano 2b45e8
  if (zgemm_p == 0) zgemm_p = 64;
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
  if (qgemm_p == 0) qgemm_p = 64;
kusano 2b45e8
  if (xgemm_p == 0) xgemm_p = 64;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef QUAD_PRECISION
kusano 2b45e8
  if (qgemm_p == 0) qgemm_p = 64;
kusano 2b45e8
  if (xgemm_p == 0) xgemm_p = 64;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  sgemm_p = (sgemm_p + SGEMM_UNROLL_M - 1) & ~(SGEMM_UNROLL_M - 1);
kusano 2b45e8
  dgemm_p = (dgemm_p + DGEMM_UNROLL_M - 1) & ~(DGEMM_UNROLL_M - 1);
kusano 2b45e8
  cgemm_p = (cgemm_p + CGEMM_UNROLL_M - 1) & ~(CGEMM_UNROLL_M - 1);
kusano 2b45e8
  zgemm_p = (zgemm_p + ZGEMM_UNROLL_M - 1) & ~(ZGEMM_UNROLL_M - 1);
kusano 2b45e8
#ifdef QUAD_PRECISION
kusano 2b45e8
  qgemm_p = (qgemm_p + QGEMM_UNROLL_M - 1) & ~(QGEMM_UNROLL_M - 1);
kusano 2b45e8
  xgemm_p = (xgemm_p + XGEMM_UNROLL_M - 1) & ~(XGEMM_UNROLL_M - 1);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  sgemm_r = (((BUFFER_SIZE - ((SGEMM_P * SGEMM_Q *  4 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (SGEMM_Q *  4)) - 15) & ~15;
kusano 2b45e8
  dgemm_r = (((BUFFER_SIZE - ((DGEMM_P * DGEMM_Q *  8 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (DGEMM_Q *  8)) - 15) & ~15;
kusano 2b45e8
  cgemm_r = (((BUFFER_SIZE - ((CGEMM_P * CGEMM_Q *  8 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (CGEMM_Q *  8)) - 15) & ~15;
kusano 2b45e8
  zgemm_r = (((BUFFER_SIZE - ((ZGEMM_P * ZGEMM_Q * 16 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (ZGEMM_Q * 16)) - 15) & ~15;
kusano 2b45e8
#if defined(EXPRECISION) || defined(QUAD_PRECISION)
kusano 2b45e8
  qgemm_r = (((BUFFER_SIZE - ((QGEMM_P * QGEMM_Q * 16 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (QGEMM_Q * 16)) - 15) & ~15;
kusano 2b45e8
  xgemm_r = (((BUFFER_SIZE - ((XGEMM_P * XGEMM_Q * 32 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (XGEMM_Q * 32)) - 15) & ~15;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if 0
kusano 2b45e8
  fprintf(stderr, "SGEMM ... %3d, %3d, %3d\n", SGEMM_P, SGEMM_Q, SGEMM_R);
kusano 2b45e8
  fprintf(stderr, "DGEMM ... %3d, %3d, %3d\n", DGEMM_P, DGEMM_Q, DGEMM_R);
kusano 2b45e8
  fprintf(stderr, "CGEMM ... %3d, %3d, %3d\n", CGEMM_P, CGEMM_Q, CGEMM_R);
kusano 2b45e8
  fprintf(stderr, "ZGEMM ... %3d, %3d, %3d\n", ZGEMM_P, ZGEMM_Q, ZGEMM_R);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  return;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
#if 0
kusano 2b45e8
kusano 2b45e8
int get_current_cpu_info(void){
kusano 2b45e8
kusano 2b45e8
  int nlprocs, ncores, cmplegacy;
kusano 2b45e8
  int htt     = 0;
kusano 2b45e8
  int apicid  = 0;
kusano 2b45e8
kusano 2b45e8
#if defined(CORE_PRESCOTT) || defined(CORE_OPTERON)
kusano 2b45e8
  int eax, ebx, ecx, edx;
kusano 2b45e8
kusano 2b45e8
  cpuid(1, &eax, &ebx, &ecx, &edx);
kusano 2b45e8
  nlprocs = BITMASK(ebx, 16, 0xff);
kusano 2b45e8
  apicid  = BITMASK(ebx, 24, 0xff);
kusano 2b45e8
  htt     = BITMASK(edx, 28, 0x01);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(CORE_PRESCOTT)
kusano 2b45e8
  cpuid(4, &eax, &ebx, &ecx, &edx);
kusano 2b45e8
  ncores = BITMASK(eax, 26, 0x3f);
kusano 2b45e8
kusano 2b45e8
  if (htt == 0)  nlprocs = 0;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(CORE_OPTERON)
kusano 2b45e8
  cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
kusano 2b45e8
  ncores = BITMASK(ecx,  0, 0xff);
kusano 2b45e8
kusano 2b45e8
  cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
kusano 2b45e8
  cmplegacy = BITMASK(ecx,  1, 0x01);
kusano 2b45e8
kusano 2b45e8
  if (htt == 0) {
kusano 2b45e8
    nlprocs = 0;
kusano 2b45e8
    ncores  = 0;
kusano 2b45e8
    cmplegacy = 0;
kusano 2b45e8
  }
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  ncores  ++;
kusano 2b45e8
kusano 2b45e8
  fprintf(stderr, "APICID = %d  Number of core = %d\n", apicid, ncores);
kusano 2b45e8
kusano 2b45e8
  return 0;
kusano 2b45e8
}
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(ARCH_IA64)
kusano 2b45e8
kusano 2b45e8
static inline BLASULONG cpuid(BLASULONG regnum){ 
kusano 2b45e8
  BLASULONG value;
kusano 2b45e8
kusano 2b45e8
#ifndef __ECC
kusano 2b45e8
  asm ("mov %0=cpuid[%r1]" : "=r"(value) : "rO"(regnum));
kusano 2b45e8
#else
kusano 2b45e8
 value = __getIndReg(_IA64_REG_INDR_CPUID, regnum);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  return value;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
#if 1
kusano 2b45e8
kusano 2b45e8
void blas_set_parameter(void){
kusano 2b45e8
  
kusano 2b45e8
  BLASULONG cpuid3, size;
kusano 2b45e8
kusano 2b45e8
  cpuid3 = cpuid(3);
kusano 2b45e8
  
kusano 2b45e8
  size = BITMASK(cpuid3, 16, 0xff);
kusano 2b45e8
kusano 2b45e8
  sgemm_p = 192 * (size + 1);
kusano 2b45e8
  dgemm_p =  96 * (size + 1);
kusano 2b45e8
  cgemm_p =  96 * (size + 1);
kusano 2b45e8
  zgemm_p =  48 * (size + 1);
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
  qgemm_p =  64 * (size + 1);
kusano 2b45e8
  xgemm_p =  32 * (size + 1);
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef QUAD_PRECISION
kusano 2b45e8
  qgemm_p =  32 * (size + 1);
kusano 2b45e8
  xgemm_p =  16 * (size + 1);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  sgemm_r = (((BUFFER_SIZE - ((SGEMM_P * SGEMM_Q *  4 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (SGEMM_Q *  4)) - 15) & ~15;
kusano 2b45e8
  dgemm_r = (((BUFFER_SIZE - ((DGEMM_P * DGEMM_Q *  8 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (DGEMM_Q *  8)) - 15) & ~15;
kusano 2b45e8
  cgemm_r = (((BUFFER_SIZE - ((CGEMM_P * CGEMM_Q *  8 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (CGEMM_Q *  8)) - 15) & ~15;
kusano 2b45e8
  zgemm_r = (((BUFFER_SIZE - ((ZGEMM_P * ZGEMM_Q * 16 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (ZGEMM_Q * 16)) - 15) & ~15;
kusano 2b45e8
#if defined(EXPRECISION) || defined(QUAD_PRECISION)
kusano 2b45e8
  qgemm_r = (((BUFFER_SIZE - ((QGEMM_P * QGEMM_Q * 16 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (QGEMM_Q * 16)) - 15) & ~15;
kusano 2b45e8
  xgemm_r = (((BUFFER_SIZE - ((XGEMM_P * XGEMM_Q * 32 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (XGEMM_Q * 32)) - 15) & ~15;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  return;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
#else
kusano 2b45e8
kusano 2b45e8
#define IA64_SYS_NAME  "/sys/devices/system/cpu/cpu0/cache/index3/size"
kusano 2b45e8
#define IA64_PROC_NAME "/proc/pal/cpu0/cache_info"
kusano 2b45e8
kusano 2b45e8
void blas_set_parameter(void){
kusano 2b45e8
  
kusano 2b45e8
  BLASULONG cpuid3;
kusano 2b45e8
  int size = 0;
kusano 2b45e8
kusano 2b45e8
#if 1
kusano 2b45e8
  char buffer[128];
kusano 2b45e8
  FILE *infile;
kusano 2b45e8
kusano 2b45e8
  if ((infile = fopen(IA64_SYS_NAME, "r")) != NULL) {
kusano 2b45e8
kusano 2b45e8
    fgets(buffer, sizeof(buffer), infile);
kusano 2b45e8
    fclose(infile);
kusano 2b45e8
kusano 2b45e8
    size = atoi(buffer) / 1536;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  if (size <= 0) {
kusano 2b45e8
    if ((infile = fopen(IA64_PROC_NAME, "r")) != NULL) {
kusano 2b45e8
      
kusano 2b45e8
      while(fgets(buffer, sizeof(buffer), infile) != NULL) {
kusano 2b45e8
	if ((!strncmp("Data/Instruction Cache level 3", buffer, 30))) break;
kusano 2b45e8
      }
kusano 2b45e8
      
kusano 2b45e8
      fgets(buffer, sizeof(buffer), infile);
kusano 2b45e8
      
kusano 2b45e8
      fclose(infile);
kusano 2b45e8
      
kusano 2b45e8
      *strstr(buffer, "bytes") = (char)NULL;
kusano 2b45e8
      
kusano 2b45e8
      size = atoi(strchr(buffer, ':') + 1) / 1572864;
kusano 2b45e8
    }
kusano 2b45e8
  }
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  /* The last resort */
kusano 2b45e8
kusano 2b45e8
  if (size <= 0) {
kusano 2b45e8
    cpuid3 = cpuid(3);
kusano 2b45e8
 
kusano 2b45e8
    size = BITMASK(cpuid3, 16, 0xff) + 1;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  sgemm_p = 320 * size;
kusano 2b45e8
  dgemm_p = 160 * size;
kusano 2b45e8
  cgemm_p = 160 * size;
kusano 2b45e8
  zgemm_p =  80 * size;
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
  qgemm_p =  80 * size;
kusano 2b45e8
  xgemm_p =  40 * size;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  sgemm_r = (((BUFFER_SIZE - ((SGEMM_P * SGEMM_Q *  4 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (SGEMM_Q *  4)) - 15) & ~15;
kusano 2b45e8
  dgemm_r = (((BUFFER_SIZE - ((DGEMM_P * DGEMM_Q *  8 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (DGEMM_Q *  8)) - 15) & ~15;
kusano 2b45e8
  cgemm_r = (((BUFFER_SIZE - ((CGEMM_P * CGEMM_Q *  8 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (CGEMM_Q *  8)) - 15) & ~15;
kusano 2b45e8
  zgemm_r = (((BUFFER_SIZE - ((ZGEMM_P * ZGEMM_Q * 16 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (ZGEMM_Q * 16)) - 15) & ~15;
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
  qgemm_r = (((BUFFER_SIZE - ((QGEMM_P * QGEMM_Q * 16 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (QGEMM_Q * 16)) - 15) & ~15;
kusano 2b45e8
  xgemm_r = (((BUFFER_SIZE - ((XGEMM_P * XGEMM_Q * 32 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (XGEMM_Q * 32)) - 15) & ~15;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  return;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(ARCH_MIPS64) 
kusano 2b45e8
void blas_set_parameter(void){
kusano 2b45e8
#if defined(LOONGSON3A)
kusano 2b45e8
#ifdef SMP
kusano 2b45e8
  if(blas_num_threads == 1){
kusano 2b45e8
#endif
kusano 2b45e8
    //single thread
kusano 2b45e8
    dgemm_r = 1024;
kusano 2b45e8
#ifdef SMP
kusano 2b45e8
  }else{
kusano 2b45e8
    //multi thread
kusano 2b45e8
    dgemm_r = 200;
kusano 2b45e8
  }
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(LOONGSON3B)
kusano 2b45e8
#ifdef SMP
kusano 2b45e8
  if(blas_num_threads == 1 || blas_num_threads == 2){
kusano 2b45e8
#endif
kusano 2b45e8
    //single thread
kusano 2b45e8
    dgemm_r = 640;
kusano 2b45e8
#ifdef SMP
kusano 2b45e8
  }else{
kusano 2b45e8
    //multi thread
kusano 2b45e8
    dgemm_r = 160;
kusano 2b45e8
  }
kusano 2b45e8
#endif
kusano 2b45e8
#endif 
kusano 2b45e8
kusano 2b45e8
}
kusano 2b45e8
#endif