|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/* All rights reserved. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* Redistribution and use in source and binary forms, with or */
|
|
kusano |
2b45e8 |
/* without modification, are permitted provided that the following */
|
|
kusano |
2b45e8 |
/* conditions are met: */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 1. Redistributions of source code must retain the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 2. Redistributions in binary form must reproduce the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer in the documentation and/or other materials */
|
|
kusano |
2b45e8 |
/* provided with the distribution. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
|
kusano |
2b45e8 |
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
|
kusano |
2b45e8 |
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
|
kusano |
2b45e8 |
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
|
kusano |
2b45e8 |
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
|
kusano |
2b45e8 |
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
|
kusano |
2b45e8 |
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
|
kusano |
2b45e8 |
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
|
kusano |
2b45e8 |
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
|
kusano |
2b45e8 |
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
|
kusano |
2b45e8 |
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
|
kusano |
2b45e8 |
/* POSSIBILITY OF SUCH DAMAGE. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* The views and conclusions contained in the software and */
|
|
kusano |
2b45e8 |
/* documentation are those of the authors and should not be */
|
|
kusano |
2b45e8 |
/* interpreted as representing official policies, either expressed */
|
|
kusano |
2b45e8 |
/* or implied, of The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef COMMON_THREAD
|
|
kusano |
2b45e8 |
#define COMMON_THREAD
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef USE_OPENMP
|
|
kusano |
2b45e8 |
#include <omp.h></omp.h>
|
|
kusano |
2b45e8 |
extern void goto_set_num_threads(int nthreads);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* Basic Thread Debugging */
|
|
kusano |
2b45e8 |
#undef SMP_DEBUG
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* Thread Timing Debugging */
|
|
kusano |
2b45e8 |
#undef TIMING_DEBUG
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* Global Parameter */
|
|
kusano |
2b45e8 |
extern int blas_cpu_number;
|
|
kusano |
2b45e8 |
extern int blas_num_threads;
|
|
kusano |
2b45e8 |
extern int blas_omp_linked;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define BLAS_LEGACY 0x8000U
|
|
kusano |
2b45e8 |
#define BLAS_PTHREAD 0x4000U
|
|
kusano |
2b45e8 |
#define BLAS_NODE 0x2000U
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define BLAS_PREC 0x0003U
|
|
kusano |
2b45e8 |
#define BLAS_SINGLE 0x0000U
|
|
kusano |
2b45e8 |
#define BLAS_DOUBLE 0x0001U
|
|
kusano |
2b45e8 |
#define BLAS_XDOUBLE 0x0002U
|
|
kusano |
2b45e8 |
#define BLAS_REAL 0x0000U
|
|
kusano |
2b45e8 |
#define BLAS_COMPLEX 0x0004U
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define BLAS_TRANSA 0x0030U /* 2bit */
|
|
kusano |
2b45e8 |
#define BLAS_TRANSA_N 0x0000U
|
|
kusano |
2b45e8 |
#define BLAS_TRANSA_T 0x0010U
|
|
kusano |
2b45e8 |
#define BLAS_TRANSA_R 0x0020U
|
|
kusano |
2b45e8 |
#define BLAS_TRANSA_C 0x0030U
|
|
kusano |
2b45e8 |
#define BLAS_TRANSA_SHIFT 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define BLAS_TRANSB 0x0300U /* 2bit */
|
|
kusano |
2b45e8 |
#define BLAS_TRANSB_N 0x0000U
|
|
kusano |
2b45e8 |
#define BLAS_TRANSB_T 0x0100U
|
|
kusano |
2b45e8 |
#define BLAS_TRANSB_R 0x0200U
|
|
kusano |
2b45e8 |
#define BLAS_TRANSB_C 0x0300U
|
|
kusano |
2b45e8 |
#define BLAS_TRANSB_SHIFT 8
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define BLAS_RSIDE 0x0400U
|
|
kusano |
2b45e8 |
#define BLAS_RSIDE_SHIFT 10
|
|
kusano |
2b45e8 |
#define BLAS_UPLO 0x0800U
|
|
kusano |
2b45e8 |
#define BLAS_UPLO_SHIFT 11
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define BLAS_STATUS_NOTYET 0
|
|
kusano |
2b45e8 |
#define BLAS_STATUS_QUEUED 1
|
|
kusano |
2b45e8 |
#define BLAS_STATUS_RUNNING 2
|
|
kusano |
2b45e8 |
#define BLAS_STATUS_FINISHED 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
typedef struct blas_queue {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
void *routine;
|
|
kusano |
2b45e8 |
BLASLONG position;
|
|
kusano |
2b45e8 |
BLASLONG assigned;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blas_arg_t *args;
|
|
kusano |
2b45e8 |
void *range_m;
|
|
kusano |
2b45e8 |
void *range_n;
|
|
kusano |
2b45e8 |
void *sa, *sb;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
struct blas_queue *next;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined( __WIN32__) || defined(__CYGWIN32__)
|
|
kusano |
2b45e8 |
CRITICAL_SECTION lock;
|
|
kusano |
2b45e8 |
HANDLE finish;
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
pthread_mutex_t lock;
|
|
kusano |
2b45e8 |
pthread_cond_t finished;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int mode, status;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef CONSISTENT_FPCSR
|
|
kusano |
2b45e8 |
unsigned int sse_mode, x87_mode;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef SMP_DEBUG
|
|
kusano |
2b45e8 |
int num;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#ifdef TIMING_DEBUG
|
|
kusano |
2b45e8 |
unsigned int clocks;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
} blas_queue_t;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef SMP_SERVER
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
extern int blas_server_avail;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static __inline int num_cpu_avail(int level) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef USE_OPENMP
|
|
kusano |
2b45e8 |
int openmp_nthreads=0;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if ((blas_cpu_number == 1)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef USE_OPENMP
|
|
kusano |
2b45e8 |
|| omp_in_parallel()
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
) return 1;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef USE_OPENMP
|
|
kusano |
2b45e8 |
openmp_nthreads=omp_get_max_threads();
|
|
kusano |
2b45e8 |
if (blas_cpu_number != openmp_nthreads) {
|
|
kusano |
2b45e8 |
goto_set_num_threads(openmp_nthreads);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
return blas_cpu_number;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static __inline void blas_queue_init(blas_queue_t *queue){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
queue -> sa = NULL;
|
|
kusano |
2b45e8 |
queue -> sb = NULL;
|
|
kusano |
2b45e8 |
queue-> next = NULL;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int blas_thread_init(void);
|
|
kusano |
2b45e8 |
int BLASFUNC(blas_thread_shutdown)(void);
|
|
kusano |
2b45e8 |
int exec_blas(BLASLONG, blas_queue_t *);
|
|
kusano |
2b45e8 |
int exec_blas_async(BLASLONG, blas_queue_t *);
|
|
kusano |
2b45e8 |
int exec_blas_async_wait(BLASLONG, blas_queue_t *);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
int exec_blas_async(BLASLONG num_cpu, blas_param_t *param, pthread_t *);
|
|
kusano |
2b45e8 |
int exec_blas_async_wait(BLASLONG num_cpu, pthread_t *blas_threads);
|
|
kusano |
2b45e8 |
int exec_blas(BLASLONG num_cpu, blas_param_t *param, void *buffer);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef ASSEMBLER
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha,
|
|
kusano |
2b45e8 |
void *a, BLASLONG lda,
|
|
kusano |
2b45e8 |
void *b, BLASLONG ldb,
|
|
kusano |
2b45e8 |
void *c, BLASLONG ldc, int (*function)(), int threads);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int gemm_thread_m (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int gemm_thread_n (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int gemm_thread_mn(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int gemm_thread_variable(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG, BLASLONG);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int trsm_thread(int mode, BLASLONG m, BLASLONG n,
|
|
kusano |
2b45e8 |
double alpha_r, double alpha_i,
|
|
kusano |
2b45e8 |
void *a, BLASLONG lda,
|
|
kusano |
2b45e8 |
void *c, BLASLONG ldc, int (*function)(), void *buffer);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int syrk_thread(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int beta_thread(int mode, BLASLONG m, BLASLONG n,
|
|
kusano |
2b45e8 |
double alpha_r, double alpha_i,
|
|
kusano |
2b45e8 |
void *c, BLASLONG ldc, int (*fuction)());
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int getrf_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k,
|
|
kusano |
2b45e8 |
void *offsetA, BLASLONG lda,
|
|
kusano |
2b45e8 |
void *offsetB, BLASLONG jb,
|
|
kusano |
2b45e8 |
void *ipiv, BLASLONG offset, int (*function)(), void *buffer);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif /* ENDIF ASSEMBLER */
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|