kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
/* Copyright 2009, 2010 The University of Texas at Austin.           */
kusano 2b45e8
/* All rights reserved.                                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* Redistribution and use in source and binary forms, with or        */
kusano 2b45e8
/* without modification, are permitted provided that the following   */
kusano 2b45e8
/* conditions are met:                                               */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   1. Redistributions of source code must retain the above         */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer.                                                  */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   2. Redistributions in binary form must reproduce the above      */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer in the documentation and/or other materials       */
kusano 2b45e8
/*      provided with the distribution.                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
kusano 2b45e8
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
kusano 2b45e8
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
kusano 2b45e8
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
kusano 2b45e8
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
kusano 2b45e8
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
kusano 2b45e8
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
kusano 2b45e8
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
kusano 2b45e8
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
kusano 2b45e8
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
kusano 2b45e8
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
kusano 2b45e8
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
kusano 2b45e8
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
kusano 2b45e8
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* The views and conclusions contained in the software and           */
kusano 2b45e8
/* documentation are those of the authors and should not be          */
kusano 2b45e8
/* interpreted as representing official policies, either expressed   */
kusano 2b45e8
/* or implied, of The University of Texas at Austin.                 */
kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
kusano 2b45e8
#ifndef COMMON_H
kusano 2b45e8
#define COMMON_H
kusano 2b45e8
kusano 2b45e8
#ifdef __cplusplus
kusano 2b45e8
extern "C" {
kusano 2b45e8
	/* Assume C declarations for C++ */
kusano 2b45e8
#endif  /* __cplusplus */
kusano 2b45e8
kusano 2b45e8
#ifndef _GNU_SOURCE
kusano 2b45e8
#define _GNU_SOURCE
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef __USE_XOPEN
kusano 2b45e8
#define __USE_XOPEN
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef __USE_SVID
kusano 2b45e8
#define __USE_SVID
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef BUILD_KERNEL
kusano 2b45e8
#include "config_kernel.h"
kusano 2b45e8
#else
kusano 2b45e8
#include "config.h"
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#undef ENABLE_SSE_EXCEPTION
kusano 2b45e8
kusano 2b45e8
#if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
kusano 2b45e8
#define SMP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_Interix)
kusano 2b45e8
#define WINDOWS_ABI
kusano 2b45e8
#define OS_WINDOWS
kusano 2b45e8
kusano 2b45e8
#ifdef DOUBLE
kusano 2b45e8
#define DOUBLE_DEFINED DOUBLE
kusano 2b45e8
#undef  DOUBLE
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if !defined(NOINCLUDE) && !defined(ASSEMBLER)
kusano 2b45e8
#include <stdio.h></stdio.h>
kusano 2b45e8
#include <stdlib.h></stdlib.h>
kusano 2b45e8
#include <string.h></string.h>
kusano 2b45e8
#include <unistd.h></unistd.h>
kusano 2b45e8
kusano 2b45e8
#ifdef OS_LINUX
kusano 2b45e8
#include <malloc.h></malloc.h>
kusano 2b45e8
#include <sched.h></sched.h>
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef OS_DARWIN
kusano 2b45e8
#include <sched.h></sched.h>
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef OS_WINDOWS
kusano 2b45e8
#ifdef  ATOM
kusano 2b45e8
#define GOTO_ATOM ATOM
kusano 2b45e8
#undef  ATOM
kusano 2b45e8
#endif
kusano 2b45e8
#include <windows.h></windows.h>
kusano 2b45e8
#include <math.h></math.h>
kusano 2b45e8
#ifdef  GOTO_ATOM
kusano 2b45e8
#define ATOM GOTO_ATOM
kusano 2b45e8
#undef  GOTO_ATOM
kusano 2b45e8
#endif
kusano 2b45e8
#else
kusano 2b45e8
#include <sys mman.h=""></sys>
kusano 2b45e8
#include <sys shm.h=""></sys>
kusano 2b45e8
#include <sys time.h=""></sys>
kusano 2b45e8
#include <unistd.h></unistd.h>
kusano 2b45e8
#include <math.h></math.h>
kusano 2b45e8
#ifdef SMP
kusano 2b45e8
#include <pthread.h></pthread.h>
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(OS_SUNOS)
kusano 2b45e8
#include <thread.h></thread.h>
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef __DECC
kusano 2b45e8
#include <c_asm.h></c_asm.h>
kusano 2b45e8
#include <machine builtins.h=""></machine>
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(ARCH_IA64) && defined(ENABLE_SSE_EXCEPTION)
kusano 2b45e8
#include <fenv.h></fenv.h>
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(OS_WINDOWS) && defined(DOUBLE_DEFINED)
kusano 2b45e8
#define DOUBLE DOUBLE_DEFINED
kusano 2b45e8
#undef DOUBLE_DEFINED
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#undef DEBUG_INFO
kusano 2b45e8
#define SMP_DEBUG
kusano 2b45e8
#undef MALLOC_DEBUG
kusano 2b45e8
#undef SMP_ALLOC_DEBUG
kusano 2b45e8
kusano 2b45e8
#ifndef ZERO
kusano 2b45e8
#ifdef XDOUBLE
kusano 2b45e8
#define ZERO  0.e0L
kusano 2b45e8
#elif defined DOUBLE
kusano 2b45e8
#define ZERO  0.e0
kusano 2b45e8
#else
kusano 2b45e8
#define ZERO  0.e0f
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef ONE
kusano 2b45e8
#ifdef XDOUBLE
kusano 2b45e8
#define ONE  1.e0L
kusano 2b45e8
#elif defined DOUBLE
kusano 2b45e8
#define ONE  1.e0
kusano 2b45e8
#else
kusano 2b45e8
#define ONE  1.e0f
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
kusano 2b45e8
kusano 2b45e8
#define ALLOCA_ALIGN 63UL
kusano 2b45e8
kusano 2b45e8
#define NUM_BUFFERS (MAX_CPU_NUMBER * 2)
kusano 2b45e8
kusano 2b45e8
#ifdef NEEDBUNDERSCORE
kusano 2b45e8
#define BLASFUNC(FUNC) FUNC##_
kusano 2b45e8
#else
kusano 2b45e8
#define BLASFUNC(FUNC) FUNC
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#undef	USE_PTHREAD_LOCK
kusano 2b45e8
#undef	USE_PTHREAD_SPINLOCK
kusano 2b45e8
kusano 2b45e8
#if defined(USE_PTHREAD_LOCK) && defined(USE_PTHREAD_SPINLOCK)
kusano 2b45e8
#error "You can't specify both LOCK operation!"
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef SMP
kusano 2b45e8
#define USE_PTHREAD_LOCK
kusano 2b45e8
#undef	USE_PTHREAD_SPINLOCK
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef OS_WINDOWS
kusano 2b45e8
#undef	USE_PTHREAD_LOCK
kusano 2b45e8
#undef	USE_PTHREAD_SPINLOCK
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if   defined(USE_PTHREAD_LOCK)
kusano 2b45e8
#define   LOCK_COMMAND(x)   pthread_mutex_lock(x)
kusano 2b45e8
#define UNLOCK_COMMAND(x)   pthread_mutex_unlock(x)
kusano 2b45e8
#elif defined(USE_PTHREAD_SPINLOCK)
kusano 2b45e8
#ifndef ASSEMBLER
kusano 2b45e8
typedef volatile int pthread_spinlock_t;
kusano 2b45e8
int pthread_spin_lock (pthread_spinlock_t *__lock);
kusano 2b45e8
int pthread_spin_unlock (pthread_spinlock_t *__lock);
kusano 2b45e8
#endif
kusano 2b45e8
#define   LOCK_COMMAND(x)   pthread_spin_lock(x)
kusano 2b45e8
#define UNLOCK_COMMAND(x)   pthread_spin_unlock(x)
kusano 2b45e8
#else
kusano 2b45e8
#define   LOCK_COMMAND(x)   blas_lock(x)
kusano 2b45e8
#define UNLOCK_COMMAND(x)   blas_unlock(x)
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#define GOTO_SHMID	0x510510
kusano 2b45e8
kusano 2b45e8
#if 0
kusano 2b45e8
#ifndef __CUDACC__
kusano 2b45e8
#define __global__
kusano 2b45e8
#define __device__
kusano 2b45e8
#define __host__
kusano 2b45e8
#define __shared__
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef ASSEMBLER
kusano 2b45e8
kusano 2b45e8
#ifdef QUAD_PRECISION
kusano 2b45e8
typedef struct {
kusano 2b45e8
  unsigned long x[2];
kusano 2b45e8
}  xdouble;
kusano 2b45e8
#elif defined EXPRECISION
kusano 2b45e8
#define xdouble long double
kusano 2b45e8
#else
kusano 2b45e8
#define xdouble double
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(OS_WINDOWS) && defined(__64BIT__)
kusano 2b45e8
typedef long long BLASLONG;
kusano 2b45e8
typedef unsigned long long BLASULONG;
kusano 2b45e8
#else
kusano 2b45e8
typedef long BLASLONG;
kusano 2b45e8
typedef unsigned long BLASULONG;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef USE64BITINT
kusano 2b45e8
typedef BLASLONG blasint;
kusano 2b45e8
#else
kusano 2b45e8
typedef int blasint;
kusano 2b45e8
#endif
kusano 2b45e8
#else
kusano 2b45e8
#ifdef USE64BITINT
kusano 2b45e8
#define INTSHIFT	3
kusano 2b45e8
#define INTSIZE		8
kusano 2b45e8
#else
kusano 2b45e8
#define INTSHIFT	2
kusano 2b45e8
#define INTSIZE		4
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef XDOUBLE
kusano 2b45e8
#define FLOAT	xdouble
kusano 2b45e8
#ifdef QUAD_PRECISION
kusano 2b45e8
#define XFLOAT	xidouble
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef QUAD_PRECISION
kusano 2b45e8
#define SIZE	32
kusano 2b45e8
#define  BASE_SHIFT 5
kusano 2b45e8
#define ZBASE_SHIFT 6
kusano 2b45e8
#else
kusano 2b45e8
#define SIZE	16
kusano 2b45e8
#define  BASE_SHIFT 4
kusano 2b45e8
#define ZBASE_SHIFT 5
kusano 2b45e8
#endif
kusano 2b45e8
#elif defined(DOUBLE)
kusano 2b45e8
#define FLOAT	double
kusano 2b45e8
#define SIZE	8
kusano 2b45e8
#define  BASE_SHIFT 3
kusano 2b45e8
#define ZBASE_SHIFT 4
kusano 2b45e8
#else
kusano 2b45e8
#define FLOAT	float
kusano 2b45e8
#define SIZE    4
kusano 2b45e8
#define  BASE_SHIFT 2
kusano 2b45e8
#define ZBASE_SHIFT 3
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef XFLOAT
kusano 2b45e8
#define XFLOAT	FLOAT
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef COMPLEX
kusano 2b45e8
#define COMPSIZE  1
kusano 2b45e8
#else
kusano 2b45e8
#define COMPSIZE  2
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(C_PGI) || defined(C_SUN)
kusano 2b45e8
#define CREAL(X)	(*((FLOAT *)&X + 0))
kusano 2b45e8
#define CIMAG(X)	(*((FLOAT *)&X + 1))
kusano 2b45e8
#else
kusano 2b45e8
#define CREAL	__real__
kusano 2b45e8
#define CIMAG	__imag__
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#define Address_H(x) (((x)+(1<<15))>>16)
kusano 2b45e8
#define Address_L(x) ((x)-((Address_H(x))<<16))
kusano 2b45e8
kusano 2b45e8
#ifndef MAX_CPU_NUMBER
kusano 2b45e8
#define MAX_CPU_NUMBER 2
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(OS_SUNOS)
kusano 2b45e8
#define YIELDING	thr_yield()
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(OS_WINDOWS)
kusano 2b45e8
#define YIELDING	SwitchToThread()
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef YIELDING
kusano 2b45e8
#define YIELDING	sched_yield()
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef QUAD_PRECISION
kusano 2b45e8
#include "common_quad.h"
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef ARCH_ALPHA
kusano 2b45e8
#include "common_alpha.h"
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef ARCH_X86
kusano 2b45e8
#include "common_x86.h"
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef ARCH_X86_64
kusano 2b45e8
#include "common_x86_64.h"
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef ARCH_IA64
kusano 2b45e8
#include "common_ia64.h"
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef ARCH_POWER
kusano 2b45e8
#include "common_power.h"
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef sparc
kusano 2b45e8
#include "common_sparc.h"
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef ARCH_MIPS64
kusano 2b45e8
#include "common_mips64.h"
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef OS_LINUX
kusano 2b45e8
#include "common_linux.h"
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#define MMAP_ACCESS (PROT_READ | PROT_WRITE)
kusano 2b45e8
#define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS)
kusano 2b45e8
kusano 2b45e8
#include "param.h"
kusano 2b45e8
#include "common_param.h"
kusano 2b45e8
kusano 2b45e8
#ifndef STDERR
kusano 2b45e8
#define STDERR stderr
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef MASK
kusano 2b45e8
#define MASK(a, b) (((a) + ((b) - 1)) & ~((b) - 1))
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(XDOUBLE) || defined(DOUBLE)
kusano 2b45e8
#define FLOATRET	FLOAT
kusano 2b45e8
#else
kusano 2b45e8
#ifdef NEED_F2CCONV
kusano 2b45e8
#define FLOATRET	double
kusano 2b45e8
#else
kusano 2b45e8
#define FLOATRET	float
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef ASSEMBLER
kusano 2b45e8
#ifndef NOINCLUDE
kusano 2b45e8
/* Inclusion of a standard header file is needed for definition of __STDC_*
kusano 2b45e8
   predefined macros with some compilers (e.g. GCC 4.7 on Linux).  This occurs
kusano 2b45e8
   as a side effect of including either <features.h> or <stdc-predef.h>. */</stdc-predef.h></features.h>
kusano 2b45e8
#include <stdio.h></stdio.h>
kusano 2b45e8
#endif  // NOINCLUDE
kusano 2b45e8
kusano 2b45e8
/* C99 supports complex floating numbers natively, which GCC also offers as an
kusano 2b45e8
   extension since version 3.0.  If neither are available, use a compatible
kusano 2b45e8
   structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
kusano 2b45e8
#if defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || __GNUC__ >= 3
kusano 2b45e8
  #define OPENBLAS_COMPLEX_C99
kusano 2b45e8
  typedef float _Complex openblas_complex_float;
kusano 2b45e8
  typedef double _Complex openblas_complex_double;
kusano 2b45e8
#else
kusano 2b45e8
  #define OPENBLAS_COMPLEX_STRUCT
kusano 2b45e8
  typedef struct { float real, imag; } openblas_complex_float;
kusano 2b45e8
  typedef struct { double real, imag; } openblas_complex_double;
kusano 2b45e8
#endif
kusano 2b45e8
#endif  // ASSEMBLER
kusano 2b45e8
kusano 2b45e8
#ifndef IFLUSH
kusano 2b45e8
#define IFLUSH
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef IFLUSH_HALF
kusano 2b45e8
#define IFLUSH_HALF
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(C_GCC) && (( __GNUC__ <= 3) || ((__GNUC__ == 4) && (__GNUC_MINOR__ < 2)))
kusano 2b45e8
#ifdef USE_OPENMP
kusano 2b45e8
#undef USE_OPENMP
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef ASSEMBLER
kusano 2b45e8
kusano 2b45e8
#ifndef MIN
kusano 2b45e8
#define MIN(a,b)   (a>b? b:a)
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef MAX
kusano 2b45e8
#define MAX(a,b)   (a
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#define TOUPPER(a) {if ((a) > 0x60) (a) -= 0x20;}
kusano 2b45e8
kusano 2b45e8
#if defined(__FreeBSD__) || defined(__APPLE__)
kusano 2b45e8
#define MAP_ANONYMOUS MAP_ANON
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
/* Common Memory Management Routine */
kusano 2b45e8
void  blas_set_parameter(void);
kusano 2b45e8
int   blas_get_cpu_number(void);
kusano 2b45e8
void *blas_memory_alloc  (int);
kusano 2b45e8
void  blas_memory_free   (void *);
kusano 2b45e8
kusano 2b45e8
int  get_num_procs (void);
kusano 2b45e8
kusano 2b45e8
#if defined(OS_LINUX) && defined(SMP) && !defined(NO_AFFINITY)
kusano 2b45e8
int  get_num_nodes (void);
kusano 2b45e8
int get_num_proc   (int);
kusano 2b45e8
int get_node_equal (void);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
void goto_set_num_threads(int);
kusano 2b45e8
kusano 2b45e8
void gotoblas_affinity_init(void);
kusano 2b45e8
void gotoblas_affinity_quit(void);
kusano 2b45e8
void gotoblas_dynamic_init(void);
kusano 2b45e8
void gotoblas_dynamic_quit(void);
kusano 2b45e8
void gotoblas_profile_init(void);
kusano 2b45e8
void gotoblas_profile_quit(void);
kusano 2b45e8
kusano 2b45e8
#ifdef USE_OPENMP
kusano 2b45e8
int omp_in_parallel(void);
kusano 2b45e8
int omp_get_num_procs(void);
kusano 2b45e8
#else
kusano 2b45e8
#ifdef __ELF__
kusano 2b45e8
int omp_in_parallel  (void) __attribute__ ((weak));
kusano 2b45e8
int omp_get_num_procs(void) __attribute__ ((weak));
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
static __inline void blas_unlock(volatile BLASULONG *address){
kusano 2b45e8
  MB;
kusano 2b45e8
  *address = 0;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
static __inline int readenv(char *env) {
kusano 2b45e8
kusano 2b45e8
  char *p;
kusano 2b45e8
kusano 2b45e8
  p = getenv(env);
kusano 2b45e8
kusano 2b45e8
  if (p == NULL) return 0; else return atoi(p);
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
kusano 2b45e8
#if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
kusano 2b45e8
kusano 2b45e8
static __inline void compinv(FLOAT *b, FLOAT ar, FLOAT ai){
kusano 2b45e8
kusano 2b45e8
#ifndef UNIT
kusano 2b45e8
  FLOAT ratio, den;
kusano 2b45e8
      
kusano 2b45e8
  if (
kusano 2b45e8
#ifdef XDOUBLE
kusano 2b45e8
      (fabsl(ar)) >= (fabsl(ai))
kusano 2b45e8
#elif defined DOUBLE
kusano 2b45e8
      (fabs (ar)) >= (fabs (ai))
kusano 2b45e8
#else
kusano 2b45e8
      (fabsf(ar)) >= (fabsf(ai))
kusano 2b45e8
#endif
kusano 2b45e8
      ) {
kusano 2b45e8
    ratio = ai / ar;
kusano 2b45e8
    den   = (FLOAT)(ONE / (ar * (ONE + ratio * ratio)));
kusano 2b45e8
    ar =  den;
kusano 2b45e8
    ai = -ratio * den;
kusano 2b45e8
  } else {
kusano 2b45e8
    ratio = ar / ai;
kusano 2b45e8
    den   = (FLOAT)(ONE /(ai * (ONE + ratio * ratio)));
kusano 2b45e8
    ar =  ratio * den;
kusano 2b45e8
    ai = -den;
kusano 2b45e8
  }
kusano 2b45e8
  b[0] = ar;
kusano 2b45e8
  b[1] = ai;
kusano 2b45e8
#else
kusano 2b45e8
  b[0] = ONE;
kusano 2b45e8
  b[1] = ZERO;
kusano 2b45e8
#endif
kusano 2b45e8
  
kusano 2b45e8
}
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef MALLOC_DEBUG
kusano 2b45e8
void *blas_debug_alloc(int);
kusano 2b45e8
void *blas_debug_free(void *);
kusano 2b45e8
#undef malloc
kusano 2b45e8
#undef free
kusano 2b45e8
#define malloc(a) blas_debug_alloc(a)
kusano 2b45e8
#define free(a)   blas_debug_free (a)
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef COPYOVERHEAD
kusano 2b45e8
#define GEMMRETTYPE  int
kusano 2b45e8
#else
kusano 2b45e8
kusano 2b45e8
typedef struct {
kusano 2b45e8
  double outercopy;
kusano 2b45e8
  double innercopy;
kusano 2b45e8
  double kernel;
kusano 2b45e8
  double mflops;
kusano 2b45e8
} copyoverhead_t;
kusano 2b45e8
kusano 2b45e8
#define GEMMRETTYPE  copyoverhead_t
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef BUILD_KERNEL
kusano 2b45e8
#define KNAME(A, B) A
kusano 2b45e8
#else
kusano 2b45e8
#define KNAME(A, B) A##B
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#include "common_interface.h"
kusano 2b45e8
#ifdef SANITY_CHECK
kusano 2b45e8
#include "common_reference.h"
kusano 2b45e8
#endif
kusano 2b45e8
#include "common_macro.h"
kusano 2b45e8
#include "common_level1.h"
kusano 2b45e8
#include "common_level2.h"
kusano 2b45e8
#include "common_level3.h"
kusano 2b45e8
#include "common_lapack.h"
kusano 2b45e8
#ifdef CBLAS
kusano 2b45e8
#include "cblas.h"
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef ASSEMBLER
kusano 2b45e8
#if 0
kusano 2b45e8
#include "symcopy.h"
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(SMP_SERVER) && defined(SMP_ONDEMAND)
kusano 2b45e8
#error Both SMP_SERVER and SMP_ONDEMAND are specified.
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
kusano 2b45e8
#include "common_thread.h"
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#define INFO_NUM 99
kusano 2b45e8
kusano 2b45e8
#ifndef DEFAULT_CPU_NUMBER
kusano 2b45e8
#define DEFAULT_CPU_NUMBER 4
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef IDEBUG_START
kusano 2b45e8
#define IDEBUG_START
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef IDEBUG_END
kusano 2b45e8
#define IDEBUG_END
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if !defined(ASSEMBLER) && defined(FUNCTION_PROFILE)
kusano 2b45e8
kusano 2b45e8
typedef struct {
kusano 2b45e8
  int func;
kusano 2b45e8
  unsigned long long calls, fops, area, cycles, tcycles;
kusano 2b45e8
} func_profile_t;
kusano 2b45e8
kusano 2b45e8
extern func_profile_t function_profile_table[];
kusano 2b45e8
extern int gotoblas_profile;
kusano 2b45e8
kusano 2b45e8
#ifdef XDOUBLE
kusano 2b45e8
#define NUMOPT	QNUMOPT
kusano 2b45e8
#elif defined DOUBLE
kusano 2b45e8
#define NUMOPT	DNUMOPT
kusano 2b45e8
#else
kusano 2b45e8
#define NUMOPT	SNUMOPT
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#define FUNCTION_PROFILE_START() { unsigned long long profile_start = rpcc(), profile_end;
kusano 2b45e8
#ifdef SMP
kusano 2b45e8
#define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
kusano 2b45e8
	if (gotoblas_profile) { \
kusano 2b45e8
	profile_end = rpcc(); \
kusano 2b45e8
	function_profile_table[PROFILE_FUNC_NAME].calls ++; \
kusano 2b45e8
	function_profile_table[PROFILE_FUNC_NAME].area    += SIZE * COMPSIZE * (AREA); \
kusano 2b45e8
	function_profile_table[PROFILE_FUNC_NAME].fops    += (COMP) * (OPS) / NUMOPT; \
kusano 2b45e8
	function_profile_table[PROFILE_FUNC_NAME].cycles  += (profile_end - profile_start); \
kusano 2b45e8
	function_profile_table[PROFILE_FUNC_NAME].tcycles += blas_cpu_number * (profile_end - profile_start); \
kusano 2b45e8
	} \
kusano 2b45e8
	}
kusano 2b45e8
#else
kusano 2b45e8
#define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
kusano 2b45e8
	if (gotoblas_profile) { \
kusano 2b45e8
	profile_end = rpcc(); \
kusano 2b45e8
	function_profile_table[PROFILE_FUNC_NAME].calls ++; \
kusano 2b45e8
	function_profile_table[PROFILE_FUNC_NAME].area    += SIZE * COMPSIZE * (AREA); \
kusano 2b45e8
	function_profile_table[PROFILE_FUNC_NAME].fops    += (COMP) * (OPS) / NUMOPT; \
kusano 2b45e8
	function_profile_table[PROFILE_FUNC_NAME].cycles  += (profile_end - profile_start); \
kusano 2b45e8
	function_profile_table[PROFILE_FUNC_NAME].tcycles += (profile_end - profile_start); \
kusano 2b45e8
	} \
kusano 2b45e8
	}
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#else
kusano 2b45e8
#define FUNCTION_PROFILE_START()
kusano 2b45e8
#define FUNCTION_PROFILE_END(COMP, AREA, OPS)
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if 1
kusano 2b45e8
#define PRINT_DEBUG_CNAME
kusano 2b45e8
#define PRINT_DEBUG_NAME
kusano 2b45e8
#else
kusano 2b45e8
#define PRINT_DEBUG_CNAME if (readenv("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_CNAME)
kusano 2b45e8
#define PRINT_DEBUG_NAME  if (readenv("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME)
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef __cplusplus
kusano 2b45e8
}
kusano 2b45e8
     
kusano 2b45e8
#endif  /* __cplusplus */
kusano 2b45e8
kusano 2b45e8
#endif