kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
/* Copyright 2009, 2010 The University of Texas at Austin.           */
kusano 2b45e8
/* All rights reserved.                                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* Redistribution and use in source and binary forms, with or        */
kusano 2b45e8
/* without modification, are permitted provided that the following   */
kusano 2b45e8
/* conditions are met:                                               */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   1. Redistributions of source code must retain the above         */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer.                                                  */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   2. Redistributions in binary form must reproduce the above      */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer in the documentation and/or other materials       */
kusano 2b45e8
/*      provided with the distribution.                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
kusano 2b45e8
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
kusano 2b45e8
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
kusano 2b45e8
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
kusano 2b45e8
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
kusano 2b45e8
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
kusano 2b45e8
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
kusano 2b45e8
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
kusano 2b45e8
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
kusano 2b45e8
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
kusano 2b45e8
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
kusano 2b45e8
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
kusano 2b45e8
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
kusano 2b45e8
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* The views and conclusions contained in the software and           */
kusano 2b45e8
/* documentation are those of the authors and should not be          */
kusano 2b45e8
/* interpreted as representing official policies, either expressed   */
kusano 2b45e8
/* or implied, of The University of Texas at Austin.                 */
kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
kusano 2b45e8
#ifndef COMMON_THREAD
kusano 2b45e8
#define COMMON_THREAD
kusano 2b45e8
kusano 2b45e8
#ifdef USE_OPENMP
kusano 2b45e8
#include <omp.h></omp.h>
kusano 2b45e8
extern void goto_set_num_threads(int nthreads);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
/* Basic Thread Debugging */
kusano 2b45e8
#undef SMP_DEBUG
kusano 2b45e8
kusano 2b45e8
/* Thread Timing Debugging */
kusano 2b45e8
#undef TIMING_DEBUG
kusano 2b45e8
kusano 2b45e8
/* Global Parameter */
kusano 2b45e8
extern int blas_cpu_number;
kusano 2b45e8
extern int blas_num_threads;
kusano 2b45e8
extern int blas_omp_linked;
kusano 2b45e8
kusano 2b45e8
#define BLAS_LEGACY	0x8000U
kusano 2b45e8
#define BLAS_PTHREAD	0x4000U
kusano 2b45e8
#define BLAS_NODE	0x2000U
kusano 2b45e8
kusano 2b45e8
#define BLAS_PREC	0x0003U
kusano 2b45e8
#define BLAS_SINGLE	0x0000U
kusano 2b45e8
#define BLAS_DOUBLE	0x0001U
kusano 2b45e8
#define BLAS_XDOUBLE	0x0002U
kusano 2b45e8
#define BLAS_REAL	0x0000U
kusano 2b45e8
#define BLAS_COMPLEX	0x0004U
kusano 2b45e8
kusano 2b45e8
#define BLAS_TRANSA	0x0030U	/* 2bit */
kusano 2b45e8
#define BLAS_TRANSA_N	0x0000U
kusano 2b45e8
#define BLAS_TRANSA_T	0x0010U
kusano 2b45e8
#define BLAS_TRANSA_R	0x0020U
kusano 2b45e8
#define BLAS_TRANSA_C	0x0030U
kusano 2b45e8
#define BLAS_TRANSA_SHIFT     4
kusano 2b45e8
kusano 2b45e8
#define BLAS_TRANSB	0x0300U	/* 2bit */
kusano 2b45e8
#define BLAS_TRANSB_N	0x0000U
kusano 2b45e8
#define BLAS_TRANSB_T	0x0100U
kusano 2b45e8
#define BLAS_TRANSB_R	0x0200U
kusano 2b45e8
#define BLAS_TRANSB_C	0x0300U
kusano 2b45e8
#define BLAS_TRANSB_SHIFT     8
kusano 2b45e8
kusano 2b45e8
#define BLAS_RSIDE      0x0400U
kusano 2b45e8
#define BLAS_RSIDE_SHIFT     10
kusano 2b45e8
#define BLAS_UPLO       0x0800U
kusano 2b45e8
#define BLAS_UPLO_SHIFT      11
kusano 2b45e8
kusano 2b45e8
#define BLAS_STATUS_NOTYET	0
kusano 2b45e8
#define BLAS_STATUS_QUEUED	1
kusano 2b45e8
#define BLAS_STATUS_RUNNING	2
kusano 2b45e8
#define BLAS_STATUS_FINISHED	4
kusano 2b45e8
kusano 2b45e8
typedef struct blas_queue {
kusano 2b45e8
kusano 2b45e8
  void *routine;
kusano 2b45e8
  BLASLONG position;
kusano 2b45e8
  BLASLONG assigned;
kusano 2b45e8
kusano 2b45e8
  blas_arg_t *args;
kusano 2b45e8
  void *range_m;
kusano 2b45e8
  void *range_n;
kusano 2b45e8
  void *sa, *sb;
kusano 2b45e8
kusano 2b45e8
  struct blas_queue *next;
kusano 2b45e8
kusano 2b45e8
#if defined( __WIN32__) || defined(__CYGWIN32__)
kusano 2b45e8
  CRITICAL_SECTION lock;
kusano 2b45e8
  HANDLE finish;
kusano 2b45e8
#else
kusano 2b45e8
  pthread_mutex_t	 lock;
kusano 2b45e8
  pthread_cond_t	 finished;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  int mode, status;
kusano 2b45e8
kusano 2b45e8
#ifdef CONSISTENT_FPCSR
kusano 2b45e8
  unsigned int sse_mode, x87_mode;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef SMP_DEBUG
kusano 2b45e8
  int    num;
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef TIMING_DEBUG
kusano 2b45e8
  unsigned int clocks;
kusano 2b45e8
#endif
kusano 2b45e8
} blas_queue_t;
kusano 2b45e8
kusano 2b45e8
#ifdef SMP_SERVER
kusano 2b45e8
kusano 2b45e8
extern int blas_server_avail;
kusano 2b45e8
kusano 2b45e8
static __inline int num_cpu_avail(int level) {
kusano 2b45e8
kusano 2b45e8
#ifdef USE_OPENMP
kusano 2b45e8
	int openmp_nthreads=0;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  if ((blas_cpu_number == 1) 
kusano 2b45e8
kusano 2b45e8
#ifdef USE_OPENMP
kusano 2b45e8
      || omp_in_parallel()
kusano 2b45e8
#endif
kusano 2b45e8
      ) return 1;
kusano 2b45e8
kusano 2b45e8
#ifdef USE_OPENMP
kusano 2b45e8
  openmp_nthreads=omp_get_max_threads();
kusano 2b45e8
  if (blas_cpu_number != openmp_nthreads) {
kusano 2b45e8
	  goto_set_num_threads(openmp_nthreads);
kusano 2b45e8
  }
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  return blas_cpu_number;
kusano 2b45e8
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
static __inline void blas_queue_init(blas_queue_t *queue){
kusano 2b45e8
kusano 2b45e8
  queue -> sa    = NULL;
kusano 2b45e8
  queue -> sb    = NULL;
kusano 2b45e8
  queue-> next  = NULL;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
int blas_thread_init(void);
kusano 2b45e8
int BLASFUNC(blas_thread_shutdown)(void);
kusano 2b45e8
int exec_blas(BLASLONG, blas_queue_t *);
kusano 2b45e8
int exec_blas_async(BLASLONG, blas_queue_t *);
kusano 2b45e8
int exec_blas_async_wait(BLASLONG, blas_queue_t *);
kusano 2b45e8
kusano 2b45e8
#else
kusano 2b45e8
int exec_blas_async(BLASLONG num_cpu, blas_param_t *param, pthread_t *);
kusano 2b45e8
int exec_blas_async_wait(BLASLONG num_cpu, pthread_t *blas_threads);
kusano 2b45e8
int exec_blas(BLASLONG num_cpu, blas_param_t *param, void *buffer);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef ASSEMBLER
kusano 2b45e8
kusano 2b45e8
int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha,
kusano 2b45e8
		       void *a, BLASLONG lda,
kusano 2b45e8
		       void *b, BLASLONG ldb, 
kusano 2b45e8
		       void *c, BLASLONG ldc, int (*function)(), int threads);
kusano 2b45e8
kusano 2b45e8
int gemm_thread_m (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
kusano 2b45e8
kusano 2b45e8
int gemm_thread_n (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
kusano 2b45e8
kusano 2b45e8
int gemm_thread_mn(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
kusano 2b45e8
kusano 2b45e8
int gemm_thread_variable(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG, BLASLONG);
kusano 2b45e8
kusano 2b45e8
int trsm_thread(int mode, BLASLONG m, BLASLONG n, 
kusano 2b45e8
		double alpha_r, double alpha_i,
kusano 2b45e8
		void *a, BLASLONG lda,
kusano 2b45e8
		void *c, BLASLONG ldc, int (*function)(), void *buffer);
kusano 2b45e8
kusano 2b45e8
int syrk_thread(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
kusano 2b45e8
kusano 2b45e8
int beta_thread(int mode, BLASLONG m, BLASLONG n, 
kusano 2b45e8
		double alpha_r, double alpha_i,
kusano 2b45e8
		void *c, BLASLONG ldc, int (*fuction)());
kusano 2b45e8
kusano 2b45e8
int getrf_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k,
kusano 2b45e8
		 void *offsetA, BLASLONG lda,
kusano 2b45e8
		 void *offsetB, BLASLONG jb,
kusano 2b45e8
		 void *ipiv, BLASLONG offset, int (*function)(), void *buffer);
kusano 2b45e8
kusano 2b45e8
#endif  /* ENDIF ASSEMBLER */
kusano 2b45e8
kusano 2b45e8
#endif