|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/* All rights reserved. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* Redistribution and use in source and binary forms, with or */
|
|
kusano |
2b45e8 |
/* without modification, are permitted provided that the following */
|
|
kusano |
2b45e8 |
/* conditions are met: */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 1. Redistributions of source code must retain the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 2. Redistributions in binary form must reproduce the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer in the documentation and/or other materials */
|
|
kusano |
2b45e8 |
/* provided with the distribution. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
|
kusano |
2b45e8 |
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
|
kusano |
2b45e8 |
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
|
kusano |
2b45e8 |
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
|
kusano |
2b45e8 |
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
|
kusano |
2b45e8 |
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
|
kusano |
2b45e8 |
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
|
kusano |
2b45e8 |
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
|
kusano |
2b45e8 |
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
|
kusano |
2b45e8 |
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
|
kusano |
2b45e8 |
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
|
kusano |
2b45e8 |
/* POSSIBILITY OF SUCH DAMAGE. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* The views and conclusions contained in the software and */
|
|
kusano |
2b45e8 |
/* documentation are those of the authors and should not be */
|
|
kusano |
2b45e8 |
/* interpreted as representing official policies, either expressed */
|
|
kusano |
2b45e8 |
/* or implied, of The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef COMMON_H
|
|
kusano |
2b45e8 |
#define COMMON_H
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef __cplusplus
|
|
kusano |
2b45e8 |
extern "C" {
|
|
kusano |
2b45e8 |
/* Assume C declarations for C++ */
|
|
kusano |
2b45e8 |
#endif /* __cplusplus */
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef _GNU_SOURCE
|
|
kusano |
2b45e8 |
#define _GNU_SOURCE
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef __USE_XOPEN
|
|
kusano |
2b45e8 |
#define __USE_XOPEN
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef __USE_SVID
|
|
kusano |
2b45e8 |
#define __USE_SVID
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef BUILD_KERNEL
|
|
kusano |
2b45e8 |
#include "config_kernel.h"
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#include "config.h"
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#undef ENABLE_SSE_EXCEPTION
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
|
|
kusano |
2b45e8 |
#define SMP
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_Interix)
|
|
kusano |
2b45e8 |
#define WINDOWS_ABI
|
|
kusano |
2b45e8 |
#define OS_WINDOWS
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef DOUBLE
|
|
kusano |
2b45e8 |
#define DOUBLE_DEFINED DOUBLE
|
|
kusano |
2b45e8 |
#undef DOUBLE
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if !defined(NOINCLUDE) && !defined(ASSEMBLER)
|
|
kusano |
2b45e8 |
#include <stdio.h></stdio.h>
|
|
kusano |
2b45e8 |
#include <stdlib.h></stdlib.h>
|
|
kusano |
2b45e8 |
#include <string.h></string.h>
|
|
kusano |
2b45e8 |
#include <unistd.h></unistd.h>
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef OS_LINUX
|
|
kusano |
2b45e8 |
#include <malloc.h></malloc.h>
|
|
kusano |
2b45e8 |
#include <sched.h></sched.h>
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef OS_DARWIN
|
|
kusano |
2b45e8 |
#include <sched.h></sched.h>
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef OS_WINDOWS
|
|
kusano |
2b45e8 |
#ifdef ATOM
|
|
kusano |
2b45e8 |
#define GOTO_ATOM ATOM
|
|
kusano |
2b45e8 |
#undef ATOM
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#include <windows.h></windows.h>
|
|
kusano |
2b45e8 |
#include <math.h></math.h>
|
|
kusano |
2b45e8 |
#ifdef GOTO_ATOM
|
|
kusano |
2b45e8 |
#define ATOM GOTO_ATOM
|
|
kusano |
2b45e8 |
#undef GOTO_ATOM
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#include <sys mman.h=""></sys>
|
|
kusano |
2b45e8 |
#include <sys shm.h=""></sys>
|
|
kusano |
2b45e8 |
#include <sys time.h=""></sys>
|
|
kusano |
2b45e8 |
#include <unistd.h></unistd.h>
|
|
kusano |
2b45e8 |
#include <math.h></math.h>
|
|
kusano |
2b45e8 |
#ifdef SMP
|
|
kusano |
2b45e8 |
#include <pthread.h></pthread.h>
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(OS_SUNOS)
|
|
kusano |
2b45e8 |
#include <thread.h></thread.h>
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef __DECC
|
|
kusano |
2b45e8 |
#include <c_asm.h></c_asm.h>
|
|
kusano |
2b45e8 |
#include <machine builtins.h=""></machine>
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(ARCH_IA64) && defined(ENABLE_SSE_EXCEPTION)
|
|
kusano |
2b45e8 |
#include <fenv.h></fenv.h>
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(OS_WINDOWS) && defined(DOUBLE_DEFINED)
|
|
kusano |
2b45e8 |
#define DOUBLE DOUBLE_DEFINED
|
|
kusano |
2b45e8 |
#undef DOUBLE_DEFINED
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#undef DEBUG_INFO
|
|
kusano |
2b45e8 |
#define SMP_DEBUG
|
|
kusano |
2b45e8 |
#undef MALLOC_DEBUG
|
|
kusano |
2b45e8 |
#undef SMP_ALLOC_DEBUG
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef ZERO
|
|
kusano |
2b45e8 |
#ifdef XDOUBLE
|
|
kusano |
2b45e8 |
#define ZERO 0.e0L
|
|
kusano |
2b45e8 |
#elif defined DOUBLE
|
|
kusano |
2b45e8 |
#define ZERO 0.e0
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define ZERO 0.e0f
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef ONE
|
|
kusano |
2b45e8 |
#ifdef XDOUBLE
|
|
kusano |
2b45e8 |
#define ONE 1.e0L
|
|
kusano |
2b45e8 |
#elif defined DOUBLE
|
|
kusano |
2b45e8 |
#define ONE 1.e0
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define ONE 1.e0f
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ALLOCA_ALIGN 63UL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define NUM_BUFFERS (MAX_CPU_NUMBER * 2)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef NEEDBUNDERSCORE
|
|
kusano |
2b45e8 |
#define BLASFUNC(FUNC) FUNC##_
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define BLASFUNC(FUNC) FUNC
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#undef USE_PTHREAD_LOCK
|
|
kusano |
2b45e8 |
#undef USE_PTHREAD_SPINLOCK
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(USE_PTHREAD_LOCK) && defined(USE_PTHREAD_SPINLOCK)
|
|
kusano |
2b45e8 |
#error "You can't specify both LOCK operation!"
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef SMP
|
|
kusano |
2b45e8 |
#define USE_PTHREAD_LOCK
|
|
kusano |
2b45e8 |
#undef USE_PTHREAD_SPINLOCK
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef OS_WINDOWS
|
|
kusano |
2b45e8 |
#undef USE_PTHREAD_LOCK
|
|
kusano |
2b45e8 |
#undef USE_PTHREAD_SPINLOCK
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(USE_PTHREAD_LOCK)
|
|
kusano |
2b45e8 |
#define LOCK_COMMAND(x) pthread_mutex_lock(x)
|
|
kusano |
2b45e8 |
#define UNLOCK_COMMAND(x) pthread_mutex_unlock(x)
|
|
kusano |
2b45e8 |
#elif defined(USE_PTHREAD_SPINLOCK)
|
|
kusano |
2b45e8 |
#ifndef ASSEMBLER
|
|
kusano |
2b45e8 |
typedef volatile int pthread_spinlock_t;
|
|
kusano |
2b45e8 |
int pthread_spin_lock (pthread_spinlock_t *__lock);
|
|
kusano |
2b45e8 |
int pthread_spin_unlock (pthread_spinlock_t *__lock);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#define LOCK_COMMAND(x) pthread_spin_lock(x)
|
|
kusano |
2b45e8 |
#define UNLOCK_COMMAND(x) pthread_spin_unlock(x)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define LOCK_COMMAND(x) blas_lock(x)
|
|
kusano |
2b45e8 |
#define UNLOCK_COMMAND(x) blas_unlock(x)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GOTO_SHMID 0x510510
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if 0
|
|
kusano |
2b45e8 |
#ifndef __CUDACC__
|
|
kusano |
2b45e8 |
#define __global__
|
|
kusano |
2b45e8 |
#define __device__
|
|
kusano |
2b45e8 |
#define __host__
|
|
kusano |
2b45e8 |
#define __shared__
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef ASSEMBLER
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef QUAD_PRECISION
|
|
kusano |
2b45e8 |
typedef struct {
|
|
kusano |
2b45e8 |
unsigned long x[2];
|
|
kusano |
2b45e8 |
} xdouble;
|
|
kusano |
2b45e8 |
#elif defined EXPRECISION
|
|
kusano |
2b45e8 |
#define xdouble long double
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define xdouble double
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(OS_WINDOWS) && defined(__64BIT__)
|
|
kusano |
2b45e8 |
typedef long long BLASLONG;
|
|
kusano |
2b45e8 |
typedef unsigned long long BLASULONG;
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
typedef long BLASLONG;
|
|
kusano |
2b45e8 |
typedef unsigned long BLASULONG;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef USE64BITINT
|
|
kusano |
2b45e8 |
typedef BLASLONG blasint;
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
typedef int blasint;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#ifdef USE64BITINT
|
|
kusano |
2b45e8 |
#define INTSHIFT 3
|
|
kusano |
2b45e8 |
#define INTSIZE 8
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define INTSHIFT 2
|
|
kusano |
2b45e8 |
#define INTSIZE 4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef XDOUBLE
|
|
kusano |
2b45e8 |
#define FLOAT xdouble
|
|
kusano |
2b45e8 |
#ifdef QUAD_PRECISION
|
|
kusano |
2b45e8 |
#define XFLOAT xidouble
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#ifdef QUAD_PRECISION
|
|
kusano |
2b45e8 |
#define SIZE 32
|
|
kusano |
2b45e8 |
#define BASE_SHIFT 5
|
|
kusano |
2b45e8 |
#define ZBASE_SHIFT 6
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define SIZE 16
|
|
kusano |
2b45e8 |
#define BASE_SHIFT 4
|
|
kusano |
2b45e8 |
#define ZBASE_SHIFT 5
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#elif defined(DOUBLE)
|
|
kusano |
2b45e8 |
#define FLOAT double
|
|
kusano |
2b45e8 |
#define SIZE 8
|
|
kusano |
2b45e8 |
#define BASE_SHIFT 3
|
|
kusano |
2b45e8 |
#define ZBASE_SHIFT 4
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define FLOAT float
|
|
kusano |
2b45e8 |
#define SIZE 4
|
|
kusano |
2b45e8 |
#define BASE_SHIFT 2
|
|
kusano |
2b45e8 |
#define ZBASE_SHIFT 3
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef XFLOAT
|
|
kusano |
2b45e8 |
#define XFLOAT FLOAT
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef COMPLEX
|
|
kusano |
2b45e8 |
#define COMPSIZE 1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define COMPSIZE 2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(C_PGI) || defined(C_SUN)
|
|
kusano |
2b45e8 |
#define CREAL(X) (*((FLOAT *)&X + 0))
|
|
kusano |
2b45e8 |
#define CIMAG(X) (*((FLOAT *)&X + 1))
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define CREAL __real__
|
|
kusano |
2b45e8 |
#define CIMAG __imag__
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define Address_H(x) (((x)+(1<<15))>>16)
|
|
kusano |
2b45e8 |
#define Address_L(x) ((x)-((Address_H(x))<<16))
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef MAX_CPU_NUMBER
|
|
kusano |
2b45e8 |
#define MAX_CPU_NUMBER 2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(OS_SUNOS)
|
|
kusano |
2b45e8 |
#define YIELDING thr_yield()
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(OS_WINDOWS)
|
|
kusano |
2b45e8 |
#define YIELDING SwitchToThread()
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef YIELDING
|
|
kusano |
2b45e8 |
#define YIELDING sched_yield()
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef QUAD_PRECISION
|
|
kusano |
2b45e8 |
#include "common_quad.h"
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef ARCH_ALPHA
|
|
kusano |
2b45e8 |
#include "common_alpha.h"
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef ARCH_X86
|
|
kusano |
2b45e8 |
#include "common_x86.h"
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef ARCH_X86_64
|
|
kusano |
2b45e8 |
#include "common_x86_64.h"
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef ARCH_IA64
|
|
kusano |
2b45e8 |
#include "common_ia64.h"
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef ARCH_POWER
|
|
kusano |
2b45e8 |
#include "common_power.h"
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef sparc
|
|
kusano |
2b45e8 |
#include "common_sparc.h"
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef ARCH_MIPS64
|
|
kusano |
2b45e8 |
#include "common_mips64.h"
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef OS_LINUX
|
|
kusano |
2b45e8 |
#include "common_linux.h"
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define MMAP_ACCESS (PROT_READ | PROT_WRITE)
|
|
kusano |
2b45e8 |
#define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#include "param.h"
|
|
kusano |
2b45e8 |
#include "common_param.h"
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef STDERR
|
|
kusano |
2b45e8 |
#define STDERR stderr
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef MASK
|
|
kusano |
2b45e8 |
#define MASK(a, b) (((a) + ((b) - 1)) & ~((b) - 1))
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(XDOUBLE) || defined(DOUBLE)
|
|
kusano |
2b45e8 |
#define FLOATRET FLOAT
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#ifdef NEED_F2CCONV
|
|
kusano |
2b45e8 |
#define FLOATRET double
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define FLOATRET float
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef ASSEMBLER
|
|
kusano |
2b45e8 |
#ifndef NOINCLUDE
|
|
kusano |
2b45e8 |
/* Inclusion of a standard header file is needed for definition of __STDC_*
|
|
kusano |
2b45e8 |
predefined macros with some compilers (e.g. GCC 4.7 on Linux). This occurs
|
|
kusano |
2b45e8 |
as a side effect of including either <features.h> or <stdc-predef.h>. */</stdc-predef.h></features.h>
|
|
kusano |
2b45e8 |
#include <stdio.h></stdio.h>
|
|
kusano |
2b45e8 |
#endif // NOINCLUDE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* C99 supports complex floating numbers natively, which GCC also offers as an
|
|
kusano |
2b45e8 |
extension since version 3.0. If neither are available, use a compatible
|
|
kusano |
2b45e8 |
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
|
|
kusano |
2b45e8 |
#if defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || __GNUC__ >= 3
|
|
kusano |
2b45e8 |
#define OPENBLAS_COMPLEX_C99
|
|
kusano |
2b45e8 |
typedef float _Complex openblas_complex_float;
|
|
kusano |
2b45e8 |
typedef double _Complex openblas_complex_double;
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define OPENBLAS_COMPLEX_STRUCT
|
|
kusano |
2b45e8 |
typedef struct { float real, imag; } openblas_complex_float;
|
|
kusano |
2b45e8 |
typedef struct { double real, imag; } openblas_complex_double;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif // ASSEMBLER
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef IFLUSH
|
|
kusano |
2b45e8 |
#define IFLUSH
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef IFLUSH_HALF
|
|
kusano |
2b45e8 |
#define IFLUSH_HALF
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(C_GCC) && (( __GNUC__ <= 3) || ((__GNUC__ == 4) && (__GNUC_MINOR__ < 2)))
|
|
kusano |
2b45e8 |
#ifdef USE_OPENMP
|
|
kusano |
2b45e8 |
#undef USE_OPENMP
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef ASSEMBLER
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef MIN
|
|
kusano |
2b45e8 |
#define MIN(a,b) (a>b? b:a)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef MAX
|
|
kusano |
2b45e8 |
#define MAX(a,b) (a
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define TOUPPER(a) {if ((a) > 0x60) (a) -= 0x20;}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(__FreeBSD__) || defined(__APPLE__)
|
|
kusano |
2b45e8 |
#define MAP_ANONYMOUS MAP_ANON
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* Common Memory Management Routine */
|
|
kusano |
2b45e8 |
void blas_set_parameter(void);
|
|
kusano |
2b45e8 |
int blas_get_cpu_number(void);
|
|
kusano |
2b45e8 |
void *blas_memory_alloc (int);
|
|
kusano |
2b45e8 |
void blas_memory_free (void *);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int get_num_procs (void);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(OS_LINUX) && defined(SMP) && !defined(NO_AFFINITY)
|
|
kusano |
2b45e8 |
int get_num_nodes (void);
|
|
kusano |
2b45e8 |
int get_num_proc (int);
|
|
kusano |
2b45e8 |
int get_node_equal (void);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
void goto_set_num_threads(int);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
void gotoblas_affinity_init(void);
|
|
kusano |
2b45e8 |
void gotoblas_affinity_quit(void);
|
|
kusano |
2b45e8 |
void gotoblas_dynamic_init(void);
|
|
kusano |
2b45e8 |
void gotoblas_dynamic_quit(void);
|
|
kusano |
2b45e8 |
void gotoblas_profile_init(void);
|
|
kusano |
2b45e8 |
void gotoblas_profile_quit(void);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef USE_OPENMP
|
|
kusano |
2b45e8 |
int omp_in_parallel(void);
|
|
kusano |
2b45e8 |
int omp_get_num_procs(void);
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#ifdef __ELF__
|
|
kusano |
2b45e8 |
int omp_in_parallel (void) __attribute__ ((weak));
|
|
kusano |
2b45e8 |
int omp_get_num_procs(void) __attribute__ ((weak));
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static __inline void blas_unlock(volatile BLASULONG *address){
|
|
kusano |
2b45e8 |
MB;
|
|
kusano |
2b45e8 |
*address = 0;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static __inline int readenv(char *env) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
char *p;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
p = getenv(env);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (p == NULL) return 0; else return atoi(p);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static __inline void compinv(FLOAT *b, FLOAT ar, FLOAT ai){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef UNIT
|
|
kusano |
2b45e8 |
FLOAT ratio, den;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (
|
|
kusano |
2b45e8 |
#ifdef XDOUBLE
|
|
kusano |
2b45e8 |
(fabsl(ar)) >= (fabsl(ai))
|
|
kusano |
2b45e8 |
#elif defined DOUBLE
|
|
kusano |
2b45e8 |
(fabs (ar)) >= (fabs (ai))
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
(fabsf(ar)) >= (fabsf(ai))
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
) {
|
|
kusano |
2b45e8 |
ratio = ai / ar;
|
|
kusano |
2b45e8 |
den = (FLOAT)(ONE / (ar * (ONE + ratio * ratio)));
|
|
kusano |
2b45e8 |
ar = den;
|
|
kusano |
2b45e8 |
ai = -ratio * den;
|
|
kusano |
2b45e8 |
} else {
|
|
kusano |
2b45e8 |
ratio = ar / ai;
|
|
kusano |
2b45e8 |
den = (FLOAT)(ONE /(ai * (ONE + ratio * ratio)));
|
|
kusano |
2b45e8 |
ar = ratio * den;
|
|
kusano |
2b45e8 |
ai = -den;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
b[0] = ar;
|
|
kusano |
2b45e8 |
b[1] = ai;
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
b[0] = ONE;
|
|
kusano |
2b45e8 |
b[1] = ZERO;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef MALLOC_DEBUG
|
|
kusano |
2b45e8 |
void *blas_debug_alloc(int);
|
|
kusano |
2b45e8 |
void *blas_debug_free(void *);
|
|
kusano |
2b45e8 |
#undef malloc
|
|
kusano |
2b45e8 |
#undef free
|
|
kusano |
2b45e8 |
#define malloc(a) blas_debug_alloc(a)
|
|
kusano |
2b45e8 |
#define free(a) blas_debug_free (a)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef COPYOVERHEAD
|
|
kusano |
2b45e8 |
#define GEMMRETTYPE int
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
typedef struct {
|
|
kusano |
2b45e8 |
double outercopy;
|
|
kusano |
2b45e8 |
double innercopy;
|
|
kusano |
2b45e8 |
double kernel;
|
|
kusano |
2b45e8 |
double mflops;
|
|
kusano |
2b45e8 |
} copyoverhead_t;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMMRETTYPE copyoverhead_t
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef BUILD_KERNEL
|
|
kusano |
2b45e8 |
#define KNAME(A, B) A
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define KNAME(A, B) A##B
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#include "common_interface.h"
|
|
kusano |
2b45e8 |
#ifdef SANITY_CHECK
|
|
kusano |
2b45e8 |
#include "common_reference.h"
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#include "common_macro.h"
|
|
kusano |
2b45e8 |
#include "common_level1.h"
|
|
kusano |
2b45e8 |
#include "common_level2.h"
|
|
kusano |
2b45e8 |
#include "common_level3.h"
|
|
kusano |
2b45e8 |
#include "common_lapack.h"
|
|
kusano |
2b45e8 |
#ifdef CBLAS
|
|
kusano |
2b45e8 |
#include "cblas.h"
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef ASSEMBLER
|
|
kusano |
2b45e8 |
#if 0
|
|
kusano |
2b45e8 |
#include "symcopy.h"
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(SMP_SERVER) && defined(SMP_ONDEMAND)
|
|
kusano |
2b45e8 |
#error Both SMP_SERVER and SMP_ONDEMAND are specified.
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
|
|
kusano |
2b45e8 |
#include "common_thread.h"
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define INFO_NUM 99
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef DEFAULT_CPU_NUMBER
|
|
kusano |
2b45e8 |
#define DEFAULT_CPU_NUMBER 4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef IDEBUG_START
|
|
kusano |
2b45e8 |
#define IDEBUG_START
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef IDEBUG_END
|
|
kusano |
2b45e8 |
#define IDEBUG_END
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if !defined(ASSEMBLER) && defined(FUNCTION_PROFILE)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
typedef struct {
|
|
kusano |
2b45e8 |
int func;
|
|
kusano |
2b45e8 |
unsigned long long calls, fops, area, cycles, tcycles;
|
|
kusano |
2b45e8 |
} func_profile_t;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
extern func_profile_t function_profile_table[];
|
|
kusano |
2b45e8 |
extern int gotoblas_profile;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef XDOUBLE
|
|
kusano |
2b45e8 |
#define NUMOPT QNUMOPT
|
|
kusano |
2b45e8 |
#elif defined DOUBLE
|
|
kusano |
2b45e8 |
#define NUMOPT DNUMOPT
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define NUMOPT SNUMOPT
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define FUNCTION_PROFILE_START() { unsigned long long profile_start = rpcc(), profile_end;
|
|
kusano |
2b45e8 |
#ifdef SMP
|
|
kusano |
2b45e8 |
#define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
|
|
kusano |
2b45e8 |
if (gotoblas_profile) { \
|
|
kusano |
2b45e8 |
profile_end = rpcc(); \
|
|
kusano |
2b45e8 |
function_profile_table[PROFILE_FUNC_NAME].calls ++; \
|
|
kusano |
2b45e8 |
function_profile_table[PROFILE_FUNC_NAME].area += SIZE * COMPSIZE * (AREA); \
|
|
kusano |
2b45e8 |
function_profile_table[PROFILE_FUNC_NAME].fops += (COMP) * (OPS) / NUMOPT; \
|
|
kusano |
2b45e8 |
function_profile_table[PROFILE_FUNC_NAME].cycles += (profile_end - profile_start); \
|
|
kusano |
2b45e8 |
function_profile_table[PROFILE_FUNC_NAME].tcycles += blas_cpu_number * (profile_end - profile_start); \
|
|
kusano |
2b45e8 |
} \
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
|
|
kusano |
2b45e8 |
if (gotoblas_profile) { \
|
|
kusano |
2b45e8 |
profile_end = rpcc(); \
|
|
kusano |
2b45e8 |
function_profile_table[PROFILE_FUNC_NAME].calls ++; \
|
|
kusano |
2b45e8 |
function_profile_table[PROFILE_FUNC_NAME].area += SIZE * COMPSIZE * (AREA); \
|
|
kusano |
2b45e8 |
function_profile_table[PROFILE_FUNC_NAME].fops += (COMP) * (OPS) / NUMOPT; \
|
|
kusano |
2b45e8 |
function_profile_table[PROFILE_FUNC_NAME].cycles += (profile_end - profile_start); \
|
|
kusano |
2b45e8 |
function_profile_table[PROFILE_FUNC_NAME].tcycles += (profile_end - profile_start); \
|
|
kusano |
2b45e8 |
} \
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define FUNCTION_PROFILE_START()
|
|
kusano |
2b45e8 |
#define FUNCTION_PROFILE_END(COMP, AREA, OPS)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if 1
|
|
kusano |
2b45e8 |
#define PRINT_DEBUG_CNAME
|
|
kusano |
2b45e8 |
#define PRINT_DEBUG_NAME
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define PRINT_DEBUG_CNAME if (readenv("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_CNAME)
|
|
kusano |
2b45e8 |
#define PRINT_DEBUG_NAME if (readenv("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef __cplusplus
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif /* __cplusplus */
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|