Blame thirdparty/openblas/xianyi-OpenBLAS-e6e87a2/driver/others/blas_server.c

kusano 2b45e8
/*****************************************************************************
kusano 2b45e8
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
kusano 2b45e8
All rights reserved.
kusano 2b45e8
kusano 2b45e8
Redistribution and use in source and binary forms, with or without
kusano 2b45e8
modification, are permitted provided that the following conditions are
kusano 2b45e8
met:
kusano 2b45e8
kusano 2b45e8
   1. Redistributions of source code must retain the above copyright
kusano 2b45e8
      notice, this list of conditions and the following disclaimer.
kusano 2b45e8
kusano 2b45e8
   2. Redistributions in binary form must reproduce the above copyright
kusano 2b45e8
      notice, this list of conditions and the following disclaimer in
kusano 2b45e8
      the documentation and/or other materials provided with the
kusano 2b45e8
      distribution.
kusano 2b45e8
   3. Neither the name of the ISCAS nor the names of its contributors may 
kusano 2b45e8
      be used to endorse or promote products derived from this software 
kusano 2b45e8
      without specific prior written permission.
kusano 2b45e8
kusano 2b45e8
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
kusano 2b45e8
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
kusano 2b45e8
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
kusano 2b45e8
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
kusano 2b45e8
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
kusano 2b45e8
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
kusano 2b45e8
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
kusano 2b45e8
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
kusano 2b45e8
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 
kusano 2b45e8
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
kusano 2b45e8
kusano 2b45e8
**********************************************************************************/
kusano 2b45e8
kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
/* Copyright 2009, 2010 The University of Texas at Austin.           */
kusano 2b45e8
/* All rights reserved.                                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* Redistribution and use in source and binary forms, with or        */
kusano 2b45e8
/* without modification, are permitted provided that the following   */
kusano 2b45e8
/* conditions are met:                                               */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   1. Redistributions of source code must retain the above         */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer.                                                  */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   2. Redistributions in binary form must reproduce the above      */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer in the documentation and/or other materials       */
kusano 2b45e8
/*      provided with the distribution.                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
kusano 2b45e8
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
kusano 2b45e8
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
kusano 2b45e8
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
kusano 2b45e8
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
kusano 2b45e8
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
kusano 2b45e8
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
kusano 2b45e8
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
kusano 2b45e8
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
kusano 2b45e8
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
kusano 2b45e8
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
kusano 2b45e8
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
kusano 2b45e8
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
kusano 2b45e8
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* The views and conclusions contained in the software and           */
kusano 2b45e8
/* documentation are those of the authors and should not be          */
kusano 2b45e8
/* interpreted as representing official policies, either expressed   */
kusano 2b45e8
/* or implied, of The University of Texas at Austin.                 */
kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
kusano 2b45e8
#include "common.h"
kusano 2b45e8
#ifdef OS_LINUX
kusano 2b45e8
#include <dlfcn.h></dlfcn.h>
kusano 2b45e8
#include <sys resource.h=""></sys>
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef SMP_SERVER
kusano 2b45e8
kusano 2b45e8
#undef MONITOR
kusano 2b45e8
#undef TIMING
kusano 2b45e8
#undef TIMING_DEBUG
kusano 2b45e8
#undef NEED_STACKATTR
kusano 2b45e8
kusano 2b45e8
#define ATTRIBUTE_SIZE 128
kusano 2b45e8
kusano 2b45e8
/* This is a thread server model implementation.  The threads are   */
kusano 2b45e8
/* spawned at first access to blas library, and still remains until */
kusano 2b45e8
/* destruction routine is called.  The number of threads are        */
kusano 2b45e8
/* equal to "OMP_NUM_THREADS - 1" and thread only wakes up when     */
kusano 2b45e8
/* jobs is queued.                                                  */
kusano 2b45e8
kusano 2b45e8
/* We need this grobal for cheking if initialization is finished.   */
kusano 2b45e8
int blas_server_avail   __attribute__((aligned(ATTRIBUTE_SIZE))) = 0;
kusano 2b45e8
kusano 2b45e8
/* Local Variables */
kusano 2b45e8
#if   defined(USE_PTHREAD_LOCK)
kusano 2b45e8
static pthread_mutex_t  server_lock    = PTHREAD_MUTEX_INITIALIZER;
kusano 2b45e8
#elif defined(USE_PTHREAD_SPINLOCK)
kusano 2b45e8
static pthread_spinlock_t  server_lock = 0;
kusano 2b45e8
#else
kusano 2b45e8
static unsigned long server_lock       = 0;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#define THREAD_STATUS_SLEEP		2
kusano 2b45e8
#define THREAD_STATUS_WAKEUP		4
kusano 2b45e8
kusano 2b45e8
static pthread_t       blas_threads [MAX_CPU_NUMBER];
kusano 2b45e8
kusano 2b45e8
typedef struct {
kusano 2b45e8
  blas_queue_t * volatile queue   __attribute__((aligned(ATTRIBUTE_SIZE)));
kusano 2b45e8
kusano 2b45e8
#if defined(OS_LINUX) && !defined(NO_AFFINITY)
kusano 2b45e8
  int	node;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  volatile long		 status;
kusano 2b45e8
kusano 2b45e8
  pthread_mutex_t	 lock;
kusano 2b45e8
  pthread_cond_t	 wakeup;
kusano 2b45e8
kusano 2b45e8
} thread_status_t;
kusano 2b45e8
kusano 2b45e8
static thread_status_t thread_status[MAX_CPU_NUMBER] __attribute__((aligned(ATTRIBUTE_SIZE)));
kusano 2b45e8
kusano 2b45e8
#ifndef THREAD_TIMEOUT
kusano 2b45e8
#define THREAD_TIMEOUT	28
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
static unsigned int thread_timeout = (1U << (THREAD_TIMEOUT));
kusano 2b45e8
kusano 2b45e8
#ifdef MONITOR
kusano 2b45e8
kusano 2b45e8
/* Monitor is a function to see thread's status for every seconds. */
kusano 2b45e8
/* Usually it turns off and it's for debugging.                    */
kusano 2b45e8
kusano 2b45e8
static pthread_t      monitor_thread;
kusano 2b45e8
static int main_status[MAX_CPU_NUMBER];
kusano 2b45e8
#define MAIN_ENTER	 0x01
kusano 2b45e8
#define MAIN_EXIT	 0x02
kusano 2b45e8
#define MAIN_TRYLOCK	 0x03
kusano 2b45e8
#define MAIN_LOCKSUCCESS 0x04
kusano 2b45e8
#define MAIN_QUEUING	 0x05
kusano 2b45e8
#define MAIN_RECEIVING   0x06
kusano 2b45e8
#define MAIN_RUNNING1    0x07
kusano 2b45e8
#define MAIN_RUNNING2    0x08
kusano 2b45e8
#define MAIN_RUNNING3    0x09
kusano 2b45e8
#define MAIN_WAITING	 0x0a
kusano 2b45e8
#define MAIN_SLEEPING	 0x0b
kusano 2b45e8
#define MAIN_FINISH      0x0c
kusano 2b45e8
#define MAIN_DONE	 0x0d
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#define BLAS_QUEUE_FINISHED	3
kusano 2b45e8
#define BLAS_QUEUE_RUNNING	4
kusano 2b45e8
kusano 2b45e8
#ifdef TIMING
kusano 2b45e8
BLASLONG	exit_time[MAX_CPU_NUMBER];
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
kusano 2b45e8
kusano 2b45e8
      if (!(mode & BLAS_COMPLEX)){
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
	if (mode & BLAS_XDOUBLE){
kusano 2b45e8
	  /* REAL / Extended Double */
kusano 2b45e8
	  void (*afunc)(BLASLONG, BLASLONG, BLASLONG, xdouble, 
kusano 2b45e8
			xdouble *, BLASLONG, xdouble *, BLASLONG, 
kusano 2b45e8
			xdouble *, BLASLONG, void *) = func;
kusano 2b45e8
kusano 2b45e8
	  afunc(args -> m, args -> n, args -> k,
kusano 2b45e8
		((xdouble *)args -> alpha)[0],
kusano 2b45e8
		args -> a, args -> lda,
kusano 2b45e8
		args -> b, args -> ldb,
kusano 2b45e8
		args -> c, args -> ldc, sb);
kusano 2b45e8
	} else 
kusano 2b45e8
#endif
kusano 2b45e8
	  if (mode & BLAS_DOUBLE){
kusano 2b45e8
	    /* REAL / Double */
kusano 2b45e8
	    void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double, 
kusano 2b45e8
			  double *, BLASLONG, double *, BLASLONG, 
kusano 2b45e8
			  double *, BLASLONG, void *) = func;
kusano 2b45e8
	    
kusano 2b45e8
	    afunc(args -> m, args -> n, args -> k,
kusano 2b45e8
		  ((double *)args -> alpha)[0],
kusano 2b45e8
		  args -> a, args -> lda,
kusano 2b45e8
		  args -> b, args -> ldb,
kusano 2b45e8
		  args -> c, args -> ldc, sb);
kusano 2b45e8
	  } else {
kusano 2b45e8
	    /* REAL / Single */
kusano 2b45e8
	    void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float, 
kusano 2b45e8
			  float *, BLASLONG, float *, BLASLONG, 
kusano 2b45e8
			  float *, BLASLONG, void *) = func;
kusano 2b45e8
	    
kusano 2b45e8
	    afunc(args -> m, args -> n, args -> k,
kusano 2b45e8
		  ((float *)args -> alpha)[0],
kusano 2b45e8
		  args -> a, args -> lda,
kusano 2b45e8
		  args -> b, args -> ldb,
kusano 2b45e8
		  args -> c, args -> ldc, sb);
kusano 2b45e8
	  }
kusano 2b45e8
      } else {
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
	if (mode & BLAS_XDOUBLE){
kusano 2b45e8
	  /* COMPLEX / Extended Double */
kusano 2b45e8
	  void (*afunc)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
kusano 2b45e8
			xdouble *, BLASLONG, xdouble *, BLASLONG, 
kusano 2b45e8
			xdouble *, BLASLONG, void *) = func;
kusano 2b45e8
kusano 2b45e8
	  afunc(args -> m, args -> n, args -> k,
kusano 2b45e8
		((xdouble *)args -> alpha)[0],
kusano 2b45e8
		((xdouble *)args -> alpha)[1],
kusano 2b45e8
		args -> a, args -> lda,
kusano 2b45e8
		args -> b, args -> ldb,
kusano 2b45e8
		args -> c, args -> ldc, sb);
kusano 2b45e8
	} else
kusano 2b45e8
#endif
kusano 2b45e8
	  if (mode & BLAS_DOUBLE){
kusano 2b45e8
	    /* COMPLEX / Double */
kusano 2b45e8
	  void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double, double,
kusano 2b45e8
			double *, BLASLONG, double *, BLASLONG, 
kusano 2b45e8
			double *, BLASLONG, void *) = func;
kusano 2b45e8
kusano 2b45e8
	  afunc(args -> m, args -> n, args -> k,
kusano 2b45e8
		((double *)args -> alpha)[0],
kusano 2b45e8
		((double *)args -> alpha)[1],
kusano 2b45e8
		args -> a, args -> lda,
kusano 2b45e8
		args -> b, args -> ldb,
kusano 2b45e8
		args -> c, args -> ldc, sb);
kusano 2b45e8
	  } else {
kusano 2b45e8
	    /* COMPLEX / Single */
kusano 2b45e8
	  void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float, float,
kusano 2b45e8
			float *, BLASLONG, float *, BLASLONG, 
kusano 2b45e8
			float *, BLASLONG, void *) = func;
kusano 2b45e8
kusano 2b45e8
	  afunc(args -> m, args -> n, args -> k,
kusano 2b45e8
		((float *)args -> alpha)[0],
kusano 2b45e8
		((float *)args -> alpha)[1],
kusano 2b45e8
		args -> a, args -> lda,
kusano 2b45e8
		args -> b, args -> ldb,
kusano 2b45e8
		args -> c, args -> ldc, sb);
kusano 2b45e8
	  }
kusano 2b45e8
      }
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
#if defined(OS_LINUX) && !defined(NO_AFFINITY)
kusano 2b45e8
int gotoblas_set_affinity(int);
kusano 2b45e8
int gotoblas_set_affinity2(int);
kusano 2b45e8
int get_node(void);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
static int increased_threads = 0;
kusano 2b45e8
kusano 2b45e8
static int blas_thread_server(void *arg){
kusano 2b45e8
kusano 2b45e8
  /* Thread identifier */
kusano 2b45e8
  BLASLONG  cpu = (BLASLONG)arg;
kusano 2b45e8
  unsigned int last_tick;
kusano 2b45e8
  void *buffer, *sa, *sb;
kusano 2b45e8
  blas_queue_t	*queue;
kusano 2b45e8
#ifdef TIMING_DEBUG
kusano 2b45e8
  unsigned long start, stop;
kusano 2b45e8
#endif
kusano 2b45e8
  
kusano 2b45e8
#if defined(OS_LINUX) && !defined(NO_AFFINITY)
kusano 2b45e8
  if (!increased_threads)
kusano 2b45e8
    thread_status[cpu].node = gotoblas_set_affinity(cpu + 1);
kusano 2b45e8
  else  
kusano 2b45e8
    thread_status[cpu].node = gotoblas_set_affinity(-1);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef MONITOR
kusano 2b45e8
  main_status[cpu] = MAIN_ENTER;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  buffer = blas_memory_alloc(2);
kusano 2b45e8
kusano 2b45e8
#ifdef SMP_DEBUG
kusano 2b45e8
  fprintf(STDERR, "Server[%2ld] Thread has just been spawned!\n", cpu);
kusano 2b45e8
#endif
kusano 2b45e8
  
kusano 2b45e8
  while (1){
kusano 2b45e8
kusano 2b45e8
#ifdef MONITOR
kusano 2b45e8
    main_status[cpu] = MAIN_QUEUING;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef TIMING
kusano 2b45e8
    exit_time[cpu] = rpcc();
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
      last_tick = (unsigned int)rpcc();
kusano 2b45e8
      
kusano 2b45e8
      while (!thread_status[cpu].queue) {
kusano 2b45e8
	
kusano 2b45e8
	YIELDING;
kusano 2b45e8
kusano 2b45e8
	if ((unsigned int)rpcc() - last_tick > thread_timeout) {
kusano 2b45e8
	  
kusano 2b45e8
	  pthread_mutex_lock  (&thread_status[cpu].lock);
kusano 2b45e8
	  
kusano 2b45e8
	  if (!thread_status[cpu].queue) {
kusano 2b45e8
	    thread_status[cpu].status = THREAD_STATUS_SLEEP;
kusano 2b45e8
	    while (thread_status[cpu].status == THREAD_STATUS_SLEEP) {
kusano 2b45e8
	      
kusano 2b45e8
#ifdef MONITOR
kusano 2b45e8
	      main_status[cpu] = MAIN_SLEEPING;
kusano 2b45e8
#endif
kusano 2b45e8
	      
kusano 2b45e8
	      pthread_cond_wait(&thread_status[cpu].wakeup, &thread_status[cpu].lock);
kusano 2b45e8
	    }
kusano 2b45e8
	  }
kusano 2b45e8
	  
kusano 2b45e8
	  pthread_mutex_unlock(&thread_status[cpu].lock);
kusano 2b45e8
	  
kusano 2b45e8
	  last_tick = (unsigned int)rpcc();
kusano 2b45e8
	}
kusano 2b45e8
	
kusano 2b45e8
      }
kusano 2b45e8
      
kusano 2b45e8
    queue = thread_status[cpu].queue;
kusano 2b45e8
kusano 2b45e8
    if ((long)queue == -1) break;
kusano 2b45e8
kusano 2b45e8
#ifdef MONITOR
kusano 2b45e8
    main_status[cpu] = MAIN_RECEIVING;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef TIMING_DEBUG
kusano 2b45e8
    start = rpcc();
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
    if (queue) {
kusano 2b45e8
      int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine;
kusano 2b45e8
	
kusano 2b45e8
      thread_status[cpu].queue = (blas_queue_t *)1;
kusano 2b45e8
kusano 2b45e8
      sa = queue -> sa;
kusano 2b45e8
      sb = queue -> sb;
kusano 2b45e8
      
kusano 2b45e8
#ifdef SMP_DEBUG
kusano 2b45e8
      if (queue -> args) {
kusano 2b45e8
	fprintf(STDERR, "Server[%2ld] Calculation started.  Mode = 0x%03x M = %3ld N=%3ld K=%3ld\n",
kusano 2b45e8
		cpu, queue->mode, queue-> args ->m, queue->args->n, queue->args->k);
kusano 2b45e8
      }
kusano 2b45e8
#endif
kusano 2b45e8
      
kusano 2b45e8
#ifdef CONSISTENT_FPCSR
kusano 2b45e8
      __asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
kusano 2b45e8
      __asm__ __volatile__ ("fldcw %0"   : : "m" (queue -> x87_mode));
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef MONITOR
kusano 2b45e8
      main_status[cpu] = MAIN_RUNNING1;
kusano 2b45e8
#endif
kusano 2b45e8
      
kusano 2b45e8
      if (sa == NULL) sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);
kusano 2b45e8
kusano 2b45e8
      if (sb == NULL) {
kusano 2b45e8
	if (!(queue -> mode & BLAS_COMPLEX)){
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
	  if (queue -> mode & BLAS_XDOUBLE){
kusano 2b45e8
	    sb = (void *)(((BLASLONG)sa + ((QGEMM_P * QGEMM_Q * sizeof(xdouble) 
kusano 2b45e8
					+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
kusano 2b45e8
	  } else 
kusano 2b45e8
#endif
kusano 2b45e8
	  if (queue -> mode & BLAS_DOUBLE){
kusano 2b45e8
	    sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)
kusano 2b45e8
					+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
kusano 2b45e8
	    
kusano 2b45e8
	  } else {
kusano 2b45e8
	    sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)
kusano 2b45e8
					+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
kusano 2b45e8
	  }
kusano 2b45e8
	} else {
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
	  if (queue -> mode & BLAS_XDOUBLE){
kusano 2b45e8
	    sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * 2 * sizeof(xdouble)
kusano 2b45e8
					+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
kusano 2b45e8
	  } else
kusano 2b45e8
#endif
kusano 2b45e8
	  if (queue -> mode & BLAS_DOUBLE){
kusano 2b45e8
	    sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double)
kusano 2b45e8
					+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
kusano 2b45e8
	  } else {
kusano 2b45e8
	    sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float)
kusano 2b45e8
					+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
kusano 2b45e8
	  }
kusano 2b45e8
	}
kusano 2b45e8
      }
kusano 2b45e8
	
kusano 2b45e8
#ifdef MONITOR
kusano 2b45e8
	main_status[cpu] = MAIN_RUNNING2;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
      if (queue -> mode & BLAS_LEGACY) {
kusano 2b45e8
	legacy_exec(routine, queue -> mode, queue -> args, sb);
kusano 2b45e8
      } else
kusano 2b45e8
	if (queue -> mode & BLAS_PTHREAD) {
kusano 2b45e8
	  void (*pthreadcompat)(void *) = queue -> routine;
kusano 2b45e8
	  (pthreadcompat)(queue -> args);
kusano 2b45e8
	} else
kusano 2b45e8
	  (routine)(queue -> args, queue -> range_m, queue -> range_n, sa, sb, queue -> position);
kusano 2b45e8
kusano 2b45e8
#ifdef SMP_DEBUG
kusano 2b45e8
      fprintf(STDERR, "Server[%2ld] Calculation finished!\n", cpu);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef MONITOR
kusano 2b45e8
      main_status[cpu] = MAIN_FINISH;
kusano 2b45e8
#endif
kusano 2b45e8
      
kusano 2b45e8
      thread_status[cpu].queue = (blas_queue_t * volatile) ((long)thread_status[cpu].queue & 0);  /* Need a trick */
kusano 2b45e8
      WMB;
kusano 2b45e8
kusano 2b45e8
    }
kusano 2b45e8
    
kusano 2b45e8
#ifdef MONITOR
kusano 2b45e8
      main_status[cpu] = MAIN_DONE;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef TIMING_DEBUG
kusano 2b45e8
    stop = rpcc();
kusano 2b45e8
    
kusano 2b45e8
    fprintf(STDERR, "Thread[%ld] : %16lu %16lu (%8lu cycles)\n", cpu + 1,
kusano 2b45e8
	    start, stop,
kusano 2b45e8
	    stop - start);
kusano 2b45e8
#endif
kusano 2b45e8
    
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  /* Shutdown procedure */
kusano 2b45e8
kusano 2b45e8
#ifdef SMP_DEBUG
kusano 2b45e8
      fprintf(STDERR, "Server[%2ld] Shutdown!\n",  cpu);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  blas_memory_free(buffer);
kusano 2b45e8
kusano 2b45e8
  pthread_exit(NULL);
kusano 2b45e8
kusano 2b45e8
  return 0;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
#ifdef MONITOR
kusano 2b45e8
kusano 2b45e8
static BLASLONG num_suspend = 0;
kusano 2b45e8
kusano 2b45e8
static int blas_monitor(void *arg){
kusano 2b45e8
  int i;
kusano 2b45e8
kusano 2b45e8
  while(1){
kusano 2b45e8
    for (i = 0; i < blas_num_threads - 1; i++){
kusano 2b45e8
      switch (main_status[i]) {
kusano 2b45e8
      case MAIN_ENTER :
kusano 2b45e8
	fprintf(STDERR, "THREAD[%2d] : Entering.\n", i);
kusano 2b45e8
	break;
kusano 2b45e8
      case MAIN_EXIT :
kusano 2b45e8
	fprintf(STDERR, "THREAD[%2d] : Exiting.\n", i);
kusano 2b45e8
	break;
kusano 2b45e8
      case MAIN_TRYLOCK :
kusano 2b45e8
	fprintf(STDERR, "THREAD[%2d] : Trying lock operation.\n", i);
kusano 2b45e8
	break;
kusano 2b45e8
      case MAIN_QUEUING :
kusano 2b45e8
	fprintf(STDERR, "THREAD[%2d] : Queuing.\n", i);
kusano 2b45e8
	break;
kusano 2b45e8
      case MAIN_RECEIVING :
kusano 2b45e8
	fprintf(STDERR, "THREAD[%2d] : Receiving.\n", i);
kusano 2b45e8
	break;
kusano 2b45e8
      case MAIN_RUNNING1 :
kusano 2b45e8
	fprintf(STDERR, "THREAD[%2d] : Running1.\n", i);
kusano 2b45e8
	break;
kusano 2b45e8
      case MAIN_RUNNING2 :
kusano 2b45e8
	fprintf(STDERR, "THREAD[%2d] : Running2.\n", i);
kusano 2b45e8
	break;
kusano 2b45e8
      case MAIN_RUNNING3 :
kusano 2b45e8
	fprintf(STDERR, "THREAD[%2d] : Running3.\n", i);
kusano 2b45e8
	break;
kusano 2b45e8
      case MAIN_WAITING :
kusano 2b45e8
	fprintf(STDERR, "THREAD[%2d] : Waiting.\n", i);
kusano 2b45e8
	break;
kusano 2b45e8
      case MAIN_SLEEPING :
kusano 2b45e8
	fprintf(STDERR, "THREAD[%2d] : Sleeping.\n", i);
kusano 2b45e8
	break;
kusano 2b45e8
      case MAIN_FINISH :
kusano 2b45e8
	fprintf(STDERR, "THREAD[%2d] : Finishing.\n", i);
kusano 2b45e8
	break;
kusano 2b45e8
      case MAIN_DONE :
kusano 2b45e8
	fprintf(STDERR, "THREAD[%2d] : Job is done.\n", i);
kusano 2b45e8
	break;
kusano 2b45e8
      }
kusano 2b45e8
kusano 2b45e8
      fprintf(stderr, "Total number of suspended ... %ld\n", num_suspend);
kusano 2b45e8
    }
kusano 2b45e8
    sleep(1);
kusano 2b45e8
  }
kusano 2b45e8
  
kusano 2b45e8
 return 0;
kusano 2b45e8
}
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
/* Initializing routine */
kusano 2b45e8
int blas_thread_init(void){
kusano 2b45e8
  BLASLONG i;
kusano 2b45e8
  int ret;
kusano 2b45e8
#ifdef NEED_STACKATTR
kusano 2b45e8
  pthread_attr_t attr;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  if (blas_server_avail) return 0;
kusano 2b45e8
  
kusano 2b45e8
#ifdef NEED_STACKATTR
kusano 2b45e8
  pthread_attr_init(&attr);
kusano 2b45e8
  pthread_attr_setguardsize(&attr,  0x1000U);
kusano 2b45e8
  pthread_attr_setstacksize( &attr, 0x1000U);
kusano 2b45e8
#endif
kusano 2b45e8
  
kusano 2b45e8
  LOCK_COMMAND(&server_lock);
kusano 2b45e8
kusano 2b45e8
  if (!blas_server_avail){
kusano 2b45e8
kusano 2b45e8
    char *p;
kusano 2b45e8
kusano 2b45e8
    p = getenv("THREAD_TIMEOUT");
kusano 2b45e8
kusano 2b45e8
    if (p) {
kusano 2b45e8
      thread_timeout = atoi(p);
kusano 2b45e8
      if (thread_timeout <  4) thread_timeout =  4;
kusano 2b45e8
      if (thread_timeout > 30) thread_timeout = 30;
kusano 2b45e8
      thread_timeout = (1 << thread_timeout);
kusano 2b45e8
    }else{
kusano 2b45e8
		p = getenv("GOTO_THREAD_TIMEOUT");
kusano 2b45e8
		if (p) {
kusano 2b45e8
			thread_timeout = atoi(p);
kusano 2b45e8
			if (thread_timeout <  4) thread_timeout =  4;
kusano 2b45e8
			if (thread_timeout > 30) thread_timeout = 30;
kusano 2b45e8
			thread_timeout = (1 << thread_timeout);
kusano 2b45e8
		}
kusano 2b45e8
	}
kusano 2b45e8
	
kusano 2b45e8
kusano 2b45e8
    for(i = 0; i < blas_num_threads - 1; i++){
kusano 2b45e8
kusano 2b45e8
      thread_status[i].queue  = (blas_queue_t *)NULL;
kusano 2b45e8
      thread_status[i].status = THREAD_STATUS_WAKEUP;
kusano 2b45e8
      
kusano 2b45e8
      pthread_mutex_init(&thread_status[i].lock, NULL);
kusano 2b45e8
      pthread_cond_init (&thread_status[i].wakeup, NULL);
kusano 2b45e8
      
kusano 2b45e8
#ifdef NEED_STACKATTR
kusano 2b45e8
      ret=pthread_create(&blas_threads[i], &attr, 
kusano 2b45e8
		     (void *)&blas_thread_server, (void *)i);
kusano 2b45e8
#else
kusano 2b45e8
      ret=pthread_create(&blas_threads[i], NULL, 
kusano 2b45e8
		     (void *)&blas_thread_server, (void *)i);
kusano 2b45e8
#endif
kusano 2b45e8
      if(ret!=0){
kusano 2b45e8
	fprintf(STDERR,"OpenBLAS: pthread_creat error in blas_thread_init function. Error code:%d\n",ret);
kusano 2b45e8
	exit(1);
kusano 2b45e8
      }
kusano 2b45e8
    }
kusano 2b45e8
kusano 2b45e8
#ifdef MONITOR
kusano 2b45e8
    pthread_create(&monitor_thread, NULL, 
kusano 2b45e8
		     (void *)&blas_monitor, (void *)NULL);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
    blas_server_avail = 1;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  UNLOCK_COMMAND(&server_lock);
kusano 2b45e8
kusano 2b45e8
  return 0;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
/* 
kusano 2b45e8
   User can call one of two routines.
kusano 2b45e8
kusano 2b45e8
     exec_blas_async ... immediately returns after jobs are queued.
kusano 2b45e8
kusano 2b45e8
     exec_blas       ... returns after jobs are finished.
kusano 2b45e8
*/
kusano 2b45e8
kusano 2b45e8
static BLASULONG exec_queue_lock = 0;
kusano 2b45e8
kusano 2b45e8
int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
kusano 2b45e8
kusano 2b45e8
  BLASLONG i = 0;
kusano 2b45e8
  blas_queue_t *current = queue;
kusano 2b45e8
#if defined(OS_LINUX) && !defined(NO_AFFINITY) && !defined(PARAMTEST)
kusano 2b45e8
  int node  = get_node();
kusano 2b45e8
  int nodes = get_num_nodes();
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef SMP_DEBUG
kusano 2b45e8
  int exec_count = 0;
kusano 2b45e8
  fprintf(STDERR, "Exec_blas_async is called. Position = %d\n", pos);
kusano 2b45e8
#endif  
kusano 2b45e8
  
kusano 2b45e8
  blas_lock(&exec_queue_lock);
kusano 2b45e8
kusano 2b45e8
    while (queue) {
kusano 2b45e8
      queue -> position  = pos;
kusano 2b45e8
      
kusano 2b45e8
#ifdef CONSISTENT_FPCSR
kusano 2b45e8
      __asm__ __volatile__ ("fnstcw %0"  : "=m" (queue -> x87_mode));
kusano 2b45e8
      __asm__ __volatile__ ("stmxcsr %0" : "=m" (queue -> sse_mode));
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(OS_LINUX) && !defined(NO_AFFINITY) && !defined(PARAMTEST)
kusano 2b45e8
kusano 2b45e8
      /* Node Mapping Mode */
kusano 2b45e8
kusano 2b45e8
      if (queue -> mode & BLAS_NODE) {
kusano 2b45e8
kusano 2b45e8
	do {
kusano 2b45e8
	  while((thread_status[i].node != node || thread_status[i].queue) && (i < blas_num_threads - 1)) i ++;
kusano 2b45e8
	  
kusano 2b45e8
	  if (i < blas_num_threads - 1) break;
kusano 2b45e8
kusano 2b45e8
	  i ++;
kusano 2b45e8
	  if (i >= blas_num_threads - 1) {
kusano 2b45e8
	    i = 0;
kusano 2b45e8
	    node ++;
kusano 2b45e8
	    if (node >= nodes) node = 0;
kusano 2b45e8
	  }
kusano 2b45e8
kusano 2b45e8
	} while (1);
kusano 2b45e8
kusano 2b45e8
      } else {
kusano 2b45e8
	while(thread_status[i].queue) {
kusano 2b45e8
	  i ++;
kusano 2b45e8
	  if (i >= blas_num_threads - 1) i = 0;
kusano 2b45e8
	}
kusano 2b45e8
      }
kusano 2b45e8
#else
kusano 2b45e8
      while(thread_status[i].queue) {
kusano 2b45e8
	i ++;
kusano 2b45e8
	if (i >= blas_num_threads - 1) i = 0;
kusano 2b45e8
      }
kusano 2b45e8
#endif
kusano 2b45e8
      
kusano 2b45e8
      queue -> assigned = i;
kusano 2b45e8
      WMB;
kusano 2b45e8
      thread_status[i].queue = queue;
kusano 2b45e8
      WMB;
kusano 2b45e8
      
kusano 2b45e8
      queue = queue -> next;
kusano 2b45e8
      pos ++;
kusano 2b45e8
#ifdef SMP_DEBUG
kusano 2b45e8
      exec_count ++;
kusano 2b45e8
#endif
kusano 2b45e8
      
kusano 2b45e8
    }
kusano 2b45e8
kusano 2b45e8
    blas_unlock(&exec_queue_lock);
kusano 2b45e8
kusano 2b45e8
#ifdef SMP_DEBUG
kusano 2b45e8
    fprintf(STDERR, "Done(Number of threads = %2ld).\n", exec_count);
kusano 2b45e8
#endif  
kusano 2b45e8
    
kusano 2b45e8
    while (current) {
kusano 2b45e8
      
kusano 2b45e8
      pos = current -> assigned;
kusano 2b45e8
      
kusano 2b45e8
      if ((BLASULONG)thread_status[pos].queue > 1) {
kusano 2b45e8
	
kusano 2b45e8
	if (thread_status[pos].status == THREAD_STATUS_SLEEP) {
kusano 2b45e8
	  
kusano 2b45e8
	  pthread_mutex_lock  (&thread_status[pos].lock);
kusano 2b45e8
	  
kusano 2b45e8
#ifdef MONITOR
kusano 2b45e8
	  num_suspend ++;
kusano 2b45e8
#endif
kusano 2b45e8
	  
kusano 2b45e8
	  if (thread_status[pos].status == THREAD_STATUS_SLEEP) {
kusano 2b45e8
	    thread_status[pos].status = THREAD_STATUS_WAKEUP;
kusano 2b45e8
	    pthread_cond_signal(&thread_status[pos].wakeup);
kusano 2b45e8
	  }
kusano 2b45e8
	  pthread_mutex_unlock(&thread_status[pos].lock);
kusano 2b45e8
	}
kusano 2b45e8
      }
kusano 2b45e8
      
kusano 2b45e8
      current = current -> next;
kusano 2b45e8
    }
kusano 2b45e8
kusano 2b45e8
  return 0;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){
kusano 2b45e8
kusano 2b45e8
    while ((num > 0) && queue) {
kusano 2b45e8
      
kusano 2b45e8
      while(thread_status[queue -> assigned].queue) {
kusano 2b45e8
	YIELDING;
kusano 2b45e8
      };
kusano 2b45e8
      
kusano 2b45e8
      queue = queue -> next;
kusano 2b45e8
      num --;
kusano 2b45e8
    }
kusano 2b45e8
kusano 2b45e8
#ifdef SMP_DEBUG
kusano 2b45e8
  fprintf(STDERR, "Done.\n\n");
kusano 2b45e8
#endif
kusano 2b45e8
  
kusano 2b45e8
  return 0;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
/* Execute Threads */
kusano 2b45e8
int exec_blas(BLASLONG num, blas_queue_t *queue){
kusano 2b45e8
kusano 2b45e8
   int (*routine)(blas_arg_t *, void *, void *, double *, double *, BLASLONG);
kusano 2b45e8
kusano 2b45e8
#ifdef TIMING_DEBUG
kusano 2b45e8
  BLASULONG start, stop;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  if ((num <= 0) || (queue == NULL)) return 0;
kusano 2b45e8
  
kusano 2b45e8
#ifdef SMP_DEBUG
kusano 2b45e8
  fprintf(STDERR, "Exec_blas is called. Number of executing threads : %ld\n", num);
kusano 2b45e8
#endif  
kusano 2b45e8
kusano 2b45e8
#ifdef __ELF__
kusano 2b45e8
  if (omp_in_parallel && (num > 1)) {
kusano 2b45e8
    if (omp_in_parallel() > 0) {
kusano 2b45e8
      fprintf(stderr, 
kusano 2b45e8
	      "OpenBLAS Warning : Detect OpenMP Loop and this application may hang. "
kusano 2b45e8
	      "Please rebuild the library with USE_OPENMP=1 option.\n");
kusano 2b45e8
    }
kusano 2b45e8
  }
kusano 2b45e8
#endif
kusano 2b45e8
  
kusano 2b45e8
  if ((num > 1) && queue -> next) exec_blas_async(1, queue -> next);
kusano 2b45e8
kusano 2b45e8
#ifdef TIMING_DEBUG
kusano 2b45e8
  start = rpcc();
kusano 2b45e8
  
kusano 2b45e8
  fprintf(STDERR, "\n");
kusano 2b45e8
#endif
kusano 2b45e8
    
kusano 2b45e8
  routine = queue -> routine;
kusano 2b45e8
  
kusano 2b45e8
  if (queue -> mode & BLAS_LEGACY) {
kusano 2b45e8
    legacy_exec(routine, queue -> mode, queue -> args, queue -> sb);
kusano 2b45e8
  } else
kusano 2b45e8
    if (queue -> mode & BLAS_PTHREAD) {
kusano 2b45e8
      void (*pthreadcompat)(void *) = queue -> routine;
kusano 2b45e8
      (pthreadcompat)(queue -> args);
kusano 2b45e8
    } else
kusano 2b45e8
      (routine)(queue -> args, queue -> range_m, queue -> range_n,
kusano 2b45e8
		queue -> sa, queue -> sb, 0);
kusano 2b45e8
  
kusano 2b45e8
#ifdef TIMING_DEBUG
kusano 2b45e8
  stop = rpcc();
kusano 2b45e8
#endif
kusano 2b45e8
  
kusano 2b45e8
  if ((num > 1) && queue -> next) exec_blas_async_wait(num - 1, queue -> next);
kusano 2b45e8
  
kusano 2b45e8
#ifdef TIMING_DEBUG
kusano 2b45e8
  fprintf(STDERR, "Thread[0] : %16lu %16lu (%8lu cycles)\n", 
kusano 2b45e8
	  start, stop,
kusano 2b45e8
	  stop - start);
kusano 2b45e8
#endif
kusano 2b45e8
  
kusano 2b45e8
  return 0;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
void goto_set_num_threads(int num_threads) {
kusano 2b45e8
kusano 2b45e8
  long i;
kusano 2b45e8
kusano 2b45e8
  if (num_threads < 1) num_threads = blas_num_threads;
kusano 2b45e8
kusano 2b45e8
  if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
kusano 2b45e8
kusano 2b45e8
  if (num_threads > blas_num_threads) {
kusano 2b45e8
kusano 2b45e8
    LOCK_COMMAND(&server_lock);
kusano 2b45e8
    
kusano 2b45e8
    increased_threads = 1;
kusano 2b45e8
kusano 2b45e8
    for(i = blas_num_threads - 1; i < num_threads - 1; i++){
kusano 2b45e8
      
kusano 2b45e8
      thread_status[i].queue  = (blas_queue_t *)NULL;
kusano 2b45e8
      thread_status[i].status = THREAD_STATUS_WAKEUP;
kusano 2b45e8
      
kusano 2b45e8
      pthread_mutex_init(&thread_status[i].lock, NULL);
kusano 2b45e8
      pthread_cond_init (&thread_status[i].wakeup, NULL);
kusano 2b45e8
      
kusano 2b45e8
#ifdef NEED_STACKATTR
kusano 2b45e8
      pthread_create(&blas_threads[i], &attr, 
kusano 2b45e8
		     (void *)&blas_thread_server, (void *)i);
kusano 2b45e8
#else
kusano 2b45e8
      pthread_create(&blas_threads[i], NULL, 
kusano 2b45e8
		     (void *)&blas_thread_server, (void *)i);
kusano 2b45e8
#endif
kusano 2b45e8
    }
kusano 2b45e8
    
kusano 2b45e8
    blas_num_threads = num_threads;
kusano 2b45e8
kusano 2b45e8
    UNLOCK_COMMAND(&server_lock);
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  blas_cpu_number  = num_threads;
kusano 2b45e8
kusano 2b45e8
#if defined(ARCH_MIPS64) 
kusano 2b45e8
  //set parameters for different number of threads.
kusano 2b45e8
  blas_set_parameter();
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
void openblas_set_num_threads(int num_threads) {
kusano 2b45e8
	goto_set_num_threads(num_threads);
kusano 2b45e8
	
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
/* Compatible function with pthread_create / join */
kusano 2b45e8
kusano 2b45e8
int gotoblas_pthread(int numthreads, void *function, void *args, int stride) {
kusano 2b45e8
kusano 2b45e8
  blas_queue_t queue[MAX_CPU_NUMBER];
kusano 2b45e8
  int i;
kusano 2b45e8
kusano 2b45e8
  if (numthreads <= 0) return 0;
kusano 2b45e8
kusano 2b45e8
#ifdef SMP
kusano 2b45e8
  if (blas_cpu_number == 0) blas_get_cpu_number();
kusano 2b45e8
#ifdef SMP_SERVER
kusano 2b45e8
  if (blas_server_avail == 0) blas_thread_init();
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  for (i = 0; i < numthreads; i ++) {
kusano 2b45e8
kusano 2b45e8
    queue[i].mode    = BLAS_PTHREAD;
kusano 2b45e8
    queue[i].routine = function;
kusano 2b45e8
    queue[i].args    = args;
kusano 2b45e8
    queue[i].range_m = NULL;
kusano 2b45e8
    queue[i].range_n = NULL;
kusano 2b45e8
    queue[i].sa	     = args;
kusano 2b45e8
    queue[i].sb	     = args;
kusano 2b45e8
    queue[i].next    = &queue[i + 1];
kusano 2b45e8
kusano 2b45e8
    args += stride;
kusano 2b45e8
  }
kusano 2b45e8
  
kusano 2b45e8
  queue[numthreads - 1].next = NULL;
kusano 2b45e8
    
kusano 2b45e8
  exec_blas(numthreads, queue);
kusano 2b45e8
  
kusano 2b45e8
  return 0;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
/* Shutdown procedure, but user don't have to call this routine. The */
kusano 2b45e8
/* kernel automatically kill threads.                                */
kusano 2b45e8
kusano 2b45e8
int BLASFUNC(blas_thread_shutdown)(void){
kusano 2b45e8
kusano 2b45e8
  int i;
kusano 2b45e8
kusano 2b45e8
  if (!blas_server_avail) return 0;
kusano 2b45e8
  
kusano 2b45e8
  LOCK_COMMAND(&server_lock);
kusano 2b45e8
kusano 2b45e8
  for (i = 0; i < blas_num_threads - 1; i++) {
kusano 2b45e8
kusano 2b45e8
    blas_lock(&exec_queue_lock);
kusano 2b45e8
  
kusano 2b45e8
    thread_status[i].queue = (blas_queue_t *)-1;
kusano 2b45e8
kusano 2b45e8
    blas_unlock(&exec_queue_lock);
kusano 2b45e8
  
kusano 2b45e8
    pthread_mutex_lock  (&thread_status[i].lock);
kusano 2b45e8
kusano 2b45e8
    thread_status[i].status = THREAD_STATUS_WAKEUP;
kusano 2b45e8
kusano 2b45e8
    pthread_cond_signal (&thread_status[i].wakeup);
kusano 2b45e8
kusano 2b45e8
    pthread_mutex_unlock(&thread_status[i].lock);
kusano 2b45e8
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  for(i = 0; i < blas_num_threads - 1; i++){
kusano 2b45e8
    pthread_join(blas_threads[i], NULL);
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  for(i = 0; i < blas_num_threads - 1; i++){
kusano 2b45e8
    pthread_mutex_destroy(&thread_status[i].lock);
kusano 2b45e8
    pthread_cond_destroy (&thread_status[i].wakeup);
kusano 2b45e8
  }      
kusano 2b45e8
kusano 2b45e8
#ifdef NEED_STACKATTR
kusano 2b45e8
  pthread_attr_destory(&attr);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  blas_server_avail = 0;
kusano 2b45e8
  
kusano 2b45e8
  UNLOCK_COMMAND(&server_lock);
kusano 2b45e8
  
kusano 2b45e8
  return 0;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
#endif
kusano 2b45e8