Blame thirdparty/openblas/xianyi-OpenBLAS-e6e87a2/driver/others/blas_server_win32.c

kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
/* Copyright 2009, 2010 The University of Texas at Austin.           */
kusano 2b45e8
/* All rights reserved.                                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* Redistribution and use in source and binary forms, with or        */
kusano 2b45e8
/* without modification, are permitted provided that the following   */
kusano 2b45e8
/* conditions are met:                                               */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   1. Redistributions of source code must retain the above         */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer.                                                  */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   2. Redistributions in binary form must reproduce the above      */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer in the documentation and/or other materials       */
kusano 2b45e8
/*      provided with the distribution.                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
kusano 2b45e8
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
kusano 2b45e8
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
kusano 2b45e8
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
kusano 2b45e8
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
kusano 2b45e8
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
kusano 2b45e8
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
kusano 2b45e8
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
kusano 2b45e8
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
kusano 2b45e8
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
kusano 2b45e8
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
kusano 2b45e8
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
kusano 2b45e8
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
kusano 2b45e8
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* The views and conclusions contained in the software and           */
kusano 2b45e8
/* documentation are those of the authors and should not be          */
kusano 2b45e8
/* interpreted as representing official policies, either expressed   */
kusano 2b45e8
/* or implied, of The University of Texas at Austin.                 */
kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
kusano 2b45e8
#include <stdio.h></stdio.h>
kusano 2b45e8
#include <stdlib.h></stdlib.h>
kusano 2b45e8
#include "common.h"
kusano 2b45e8
kusano 2b45e8
/* This is a thread implementation for Win32 lazy implementation */
kusano 2b45e8
kusano 2b45e8
/* Thread server common infomation */
kusano 2b45e8
typedef struct{
kusano 2b45e8
  CRITICAL_SECTION lock;
kusano 2b45e8
  HANDLE filled;
kusano 2b45e8
  HANDLE killed;
kusano 2b45e8
kusano 2b45e8
  blas_queue_t	*queue;    /* Parameter Pointer */
kusano 2b45e8
  int		shutdown;  /* server shutdown flag */
kusano 2b45e8
kusano 2b45e8
} blas_pool_t;
kusano 2b45e8
kusano 2b45e8
/* We need this grobal for cheking if initialization is finished.   */
kusano 2b45e8
int blas_server_avail = 0;
kusano 2b45e8
kusano 2b45e8
/* Local Variables */
kusano 2b45e8
static BLASULONG server_lock       = 0;
kusano 2b45e8
kusano 2b45e8
static blas_pool_t   pool;
kusano 2b45e8
static HANDLE	    blas_threads   [MAX_CPU_NUMBER];
kusano 2b45e8
static DWORD	    blas_threads_id[MAX_CPU_NUMBER];
kusano 2b45e8
kusano 2b45e8
static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
kusano 2b45e8
kusano 2b45e8
      if (!(mode & BLAS_COMPLEX)){
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
	if (mode & BLAS_XDOUBLE){
kusano 2b45e8
	  /* REAL / Extended Double */
kusano 2b45e8
	  void (*afunc)(BLASLONG, BLASLONG, BLASLONG, xdouble, 
kusano 2b45e8
			xdouble *, BLASLONG, xdouble *, BLASLONG, 
kusano 2b45e8
			xdouble *, BLASLONG, void *) = func;
kusano 2b45e8
kusano 2b45e8
	  afunc(args -> m, args -> n, args -> k,
kusano 2b45e8
		((xdouble *)args -> alpha)[0],
kusano 2b45e8
		args -> a, args -> lda,
kusano 2b45e8
		args -> b, args -> ldb,
kusano 2b45e8
		args -> c, args -> ldc, sb);
kusano 2b45e8
	} else 
kusano 2b45e8
#endif
kusano 2b45e8
	  if (mode & BLAS_DOUBLE){
kusano 2b45e8
	    /* REAL / Double */
kusano 2b45e8
	    void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double, 
kusano 2b45e8
			  double *, BLASLONG, double *, BLASLONG, 
kusano 2b45e8
			  double *, BLASLONG, void *) = func;
kusano 2b45e8
	    
kusano 2b45e8
	    afunc(args -> m, args -> n, args -> k,
kusano 2b45e8
		  ((double *)args -> alpha)[0],
kusano 2b45e8
		  args -> a, args -> lda,
kusano 2b45e8
		  args -> b, args -> ldb,
kusano 2b45e8
		  args -> c, args -> ldc, sb);
kusano 2b45e8
	  } else {
kusano 2b45e8
	    /* REAL / Single */
kusano 2b45e8
	    void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float, 
kusano 2b45e8
			  float *, BLASLONG, float *, BLASLONG, 
kusano 2b45e8
			  float *, BLASLONG, void *) = func;
kusano 2b45e8
	    
kusano 2b45e8
	    afunc(args -> m, args -> n, args -> k,
kusano 2b45e8
		  ((float *)args -> alpha)[0],
kusano 2b45e8
		  args -> a, args -> lda,
kusano 2b45e8
		  args -> b, args -> ldb,
kusano 2b45e8
		  args -> c, args -> ldc, sb);
kusano 2b45e8
	  }
kusano 2b45e8
      } else {
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
	if (mode & BLAS_XDOUBLE){
kusano 2b45e8
	  /* COMPLEX / Extended Double */
kusano 2b45e8
	  void (*afunc)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
kusano 2b45e8
			xdouble *, BLASLONG, xdouble *, BLASLONG, 
kusano 2b45e8
			xdouble *, BLASLONG, void *) = func;
kusano 2b45e8
kusano 2b45e8
	  afunc(args -> m, args -> n, args -> k,
kusano 2b45e8
		((xdouble *)args -> alpha)[0],
kusano 2b45e8
		((xdouble *)args -> alpha)[1],
kusano 2b45e8
		args -> a, args -> lda,
kusano 2b45e8
		args -> b, args -> ldb,
kusano 2b45e8
		args -> c, args -> ldc, sb);
kusano 2b45e8
	} else
kusano 2b45e8
#endif
kusano 2b45e8
	  if (mode & BLAS_DOUBLE){
kusano 2b45e8
	    /* COMPLEX / Double */
kusano 2b45e8
	  void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double, double,
kusano 2b45e8
			double *, BLASLONG, double *, BLASLONG, 
kusano 2b45e8
			double *, BLASLONG, void *) = func;
kusano 2b45e8
kusano 2b45e8
	  afunc(args -> m, args -> n, args -> k,
kusano 2b45e8
		((double *)args -> alpha)[0],
kusano 2b45e8
		((double *)args -> alpha)[1],
kusano 2b45e8
		args -> a, args -> lda,
kusano 2b45e8
		args -> b, args -> ldb,
kusano 2b45e8
		args -> c, args -> ldc, sb);
kusano 2b45e8
	  } else {
kusano 2b45e8
	    /* COMPLEX / Single */
kusano 2b45e8
	  void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float, float,
kusano 2b45e8
			float *, BLASLONG, float *, BLASLONG, 
kusano 2b45e8
			float *, BLASLONG, void *) = func;
kusano 2b45e8
kusano 2b45e8
	  afunc(args -> m, args -> n, args -> k,
kusano 2b45e8
		((float *)args -> alpha)[0],
kusano 2b45e8
		((float *)args -> alpha)[1],
kusano 2b45e8
		args -> a, args -> lda,
kusano 2b45e8
		args -> b, args -> ldb,
kusano 2b45e8
		args -> c, args -> ldc, sb);
kusano 2b45e8
	  }
kusano 2b45e8
      }
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
/* This is a main routine of threads. Each thread waits until job is */
kusano 2b45e8
/* queued.                                                           */
kusano 2b45e8
kusano 2b45e8
static DWORD WINAPI blas_thread_server(void *arg){
kusano 2b45e8
kusano 2b45e8
  /* Thread identifier */
kusano 2b45e8
#ifdef SMP_DEBUG
kusano 2b45e8
  BLASLONG  cpu = (BLASLONG)arg;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  void *buffer, *sa, *sb;
kusano 2b45e8
  blas_queue_t	*queue;
kusano 2b45e8
  DWORD action;
kusano 2b45e8
  HANDLE handles[] = {pool.filled, pool.killed};
kusano 2b45e8
  
kusano 2b45e8
  /* Each server needs each buffer */
kusano 2b45e8
  buffer   = blas_memory_alloc(2);
kusano 2b45e8
  
kusano 2b45e8
#ifdef SMP_DEBUG
kusano 2b45e8
  fprintf(STDERR, "Server[%2ld] Thread is started!\n", cpu);
kusano 2b45e8
#endif
kusano 2b45e8
  
kusano 2b45e8
  while (1){
kusano 2b45e8
    
kusano 2b45e8
    /* Waiting for Queue */
kusano 2b45e8
    
kusano 2b45e8
#ifdef SMP_DEBUG
kusano 2b45e8
    fprintf(STDERR, "Server[%2ld] Waiting for Queue.\n", cpu);
kusano 2b45e8
#endif
kusano 2b45e8
    
kusano 2b45e8
    do {
kusano 2b45e8
      action = WaitForMultipleObjects(2, handles, FALSE, INFINITE);
kusano 2b45e8
    } while ((action != WAIT_OBJECT_0) && (action == WAIT_OBJECT_0 + 1));
kusano 2b45e8
    
kusano 2b45e8
    if (action == WAIT_OBJECT_0 + 1) break;
kusano 2b45e8
kusano 2b45e8
#ifdef SMP_DEBUG
kusano 2b45e8
    fprintf(STDERR, "Server[%2ld] Got it.\n", cpu);
kusano 2b45e8
#endif
kusano 2b45e8
    
kusano 2b45e8
    EnterCriticalSection(&pool.lock);
kusano 2b45e8
    
kusano 2b45e8
    queue = pool.queue;
kusano 2b45e8
    if (queue) pool.queue = queue->next;
kusano 2b45e8
    
kusano 2b45e8
    LeaveCriticalSection(&pool.lock);
kusano 2b45e8
    
kusano 2b45e8
    if (queue)  {
kusano 2b45e8
      int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine;
kusano 2b45e8
      
kusano 2b45e8
      if (pool.queue) SetEvent(pool.filled);
kusano 2b45e8
      
kusano 2b45e8
      sa = queue -> sa;
kusano 2b45e8
      sb = queue -> sb;
kusano 2b45e8
      
kusano 2b45e8
#ifdef CONSISTENT_FPCSR
kusano 2b45e8
      __asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
kusano 2b45e8
      __asm__ __volatile__ ("fldcw %0"   : : "m" (queue -> x87_mode));
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef SMP_DEBUG
kusano 2b45e8
      fprintf(STDERR, "Server[%2ld] Started.  Mode = 0x%03x M = %3ld N=%3ld K=%3ld\n",
kusano 2b45e8
	      cpu, queue->mode, queue-> args ->m, queue->args->n, queue->args->k);
kusano 2b45e8
#endif
kusano 2b45e8
      
kusano 2b45e8
      // fprintf(stderr, "queue start[%ld]!!!\n", cpu);
kusano 2b45e8
      
kusano 2b45e8
#ifdef MONITOR
kusano 2b45e8
      main_status[cpu] = MAIN_RUNNING1;
kusano 2b45e8
#endif
kusano 2b45e8
      
kusano 2b45e8
      if (sa == NULL) sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);
kusano 2b45e8
      
kusano 2b45e8
      if (sb == NULL) {
kusano 2b45e8
	if (!(queue -> mode & BLAS_COMPLEX)){
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
	  if (queue -> mode & BLAS_XDOUBLE){
kusano 2b45e8
	    sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * sizeof(xdouble) 
kusano 2b45e8
					+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
kusano 2b45e8
	  } else 
kusano 2b45e8
#endif
kusano 2b45e8
	    if (queue -> mode & BLAS_DOUBLE){
kusano 2b45e8
	      sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)
kusano 2b45e8
					  + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
kusano 2b45e8
	      
kusano 2b45e8
	    } else {
kusano 2b45e8
	      sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)
kusano 2b45e8
					  + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
kusano 2b45e8
	    }
kusano 2b45e8
	} else {
kusano 2b45e8
#ifdef EXPRECISION
kusano 2b45e8
	  if (queue -> mode & BLAS_XDOUBLE){
kusano 2b45e8
	    sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * 2 * sizeof(xdouble)
kusano 2b45e8
					+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
kusano 2b45e8
	  } else
kusano 2b45e8
#endif
kusano 2b45e8
	    if (queue -> mode & BLAS_DOUBLE){
kusano 2b45e8
	      sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double)
kusano 2b45e8
					  + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
kusano 2b45e8
	    } else {
kusano 2b45e8
	      sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float)
kusano 2b45e8
					  + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
kusano 2b45e8
	    }
kusano 2b45e8
	}
kusano 2b45e8
      }
kusano 2b45e8
	
kusano 2b45e8
#ifdef MONITOR
kusano 2b45e8
      main_status[cpu] = MAIN_RUNNING2;
kusano 2b45e8
#endif
kusano 2b45e8
      
kusano 2b45e8
      if (!(queue -> mode & BLAS_LEGACY)) {
kusano 2b45e8
kusano 2b45e8
	(routine)(queue -> args, queue -> range_m, queue -> range_n, sa, sb, queue -> position);
kusano 2b45e8
      } else {
kusano 2b45e8
	legacy_exec(routine, queue -> mode, queue -> args, sb);
kusano 2b45e8
      }
kusano 2b45e8
    }
kusano 2b45e8
    
kusano 2b45e8
#ifdef SMP_DEBUG
kusano 2b45e8
    fprintf(STDERR, "Server[%2ld] Finished!\n", cpu);
kusano 2b45e8
#endif
kusano 2b45e8
    
kusano 2b45e8
    EnterCriticalSection(&queue->lock);
kusano 2b45e8
    
kusano 2b45e8
    queue -> status = BLAS_STATUS_FINISHED;
kusano 2b45e8
    
kusano 2b45e8
    LeaveCriticalSection(&queue->lock);
kusano 2b45e8
    
kusano 2b45e8
    SetEvent(queue->finish);
kusano 2b45e8
  }
kusano 2b45e8
  
kusano 2b45e8
  /* Shutdown procedure */
kusano 2b45e8
  
kusano 2b45e8
#ifdef SMP_DEBUG
kusano 2b45e8
  fprintf(STDERR, "Server[%2ld] Shutdown!\n",  cpu);
kusano 2b45e8
#endif
kusano 2b45e8
  
kusano 2b45e8
  blas_memory_free(buffer);
kusano 2b45e8
  
kusano 2b45e8
  return 0;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
/* Initializing routine */
kusano 2b45e8
int blas_thread_init(void){
kusano 2b45e8
  BLASLONG i;
kusano 2b45e8
kusano 2b45e8
  if (blas_server_avail || (blas_cpu_number <= 1)) return 0;
kusano 2b45e8
  
kusano 2b45e8
  LOCK_COMMAND(&server_lock);
kusano 2b45e8
kusano 2b45e8
#ifdef SMP_DEBUG
kusano 2b45e8
  fprintf(STDERR, "Initializing Thread(Num. threads = %d)\n", 
kusano 2b45e8
	  blas_cpu_number);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  if (!blas_server_avail){
kusano 2b45e8
kusano 2b45e8
    InitializeCriticalSection(&pool.lock);
kusano 2b45e8
    pool.filled = CreateEvent(NULL, FALSE, FALSE, NULL);
kusano 2b45e8
    pool.killed = CreateEvent(NULL, TRUE,  FALSE, NULL);
kusano 2b45e8
kusano 2b45e8
    pool.shutdown = 0;
kusano 2b45e8
    pool.queue    = NULL;
kusano 2b45e8
kusano 2b45e8
    for(i = 0; i < blas_cpu_number - 1; i++){
kusano 2b45e8
      blas_threads[i] = CreateThread(NULL, 0, 
kusano 2b45e8
				     blas_thread_server, (void *)i,
kusano 2b45e8
				     0, &blas_threads_id[i]);
kusano 2b45e8
    }
kusano 2b45e8
    
kusano 2b45e8
    blas_server_avail = 1;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  UNLOCK_COMMAND(&server_lock);
kusano 2b45e8
kusano 2b45e8
  return 0;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
/* 
kusano 2b45e8
   User can call one of two routines.
kusano 2b45e8
kusano 2b45e8
     exec_blas_async ... immediately returns after jobs are queued.
kusano 2b45e8
kusano 2b45e8
     exec_blas       ... returns after jobs are finished.
kusano 2b45e8
*/
kusano 2b45e8
kusano 2b45e8
int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
kusano 2b45e8
kusano 2b45e8
  blas_queue_t *current;
kusano 2b45e8
kusano 2b45e8
  current = queue;
kusano 2b45e8
kusano 2b45e8
  while (current) {
kusano 2b45e8
    InitializeCriticalSection(¤t -> lock);
kusano 2b45e8
    current -> finish = CreateEvent(NULL, FALSE, FALSE, NULL);
kusano 2b45e8
    current -> position = pos;
kusano 2b45e8
kusano 2b45e8
#ifdef CONSISTENT_FPCSR
kusano 2b45e8
    __asm__ __volatile__ ("fnstcw %0"  : "=m" (current -> x87_mode));
kusano 2b45e8
    __asm__ __volatile__ ("stmxcsr %0" : "=m" (current -> sse_mode));
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
    current = current -> next;
kusano 2b45e8
    pos ++;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  EnterCriticalSection(&pool.lock);
kusano 2b45e8
kusano 2b45e8
  if (pool.queue) {
kusano 2b45e8
    current = pool.queue;
kusano 2b45e8
    while (current -> next) current = current -> next;
kusano 2b45e8
    current -> next = queue;
kusano 2b45e8
  } else {
kusano 2b45e8
    pool.queue = queue;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  LeaveCriticalSection(&pool.lock);
kusano 2b45e8
kusano 2b45e8
  SetEvent(pool.filled);
kusano 2b45e8
kusano 2b45e8
  return 0;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){
kusano 2b45e8
kusano 2b45e8
#ifdef SMP_DEBUG
kusano 2b45e8
    fprintf(STDERR, "Synchronization Waiting.\n");
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
    while (num){
kusano 2b45e8
#ifdef SMP_DEBUG
kusano 2b45e8
    fprintf(STDERR, "Waiting Queue ..\n");
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
      WaitForSingleObject(queue->finish, INFINITE);
kusano 2b45e8
      
kusano 2b45e8
      CloseHandle(queue->finish);
kusano 2b45e8
      DeleteCriticalSection(&queue -> lock);
kusano 2b45e8
kusano 2b45e8
      queue = queue -> next;
kusano 2b45e8
      num --;
kusano 2b45e8
    }
kusano 2b45e8
kusano 2b45e8
#ifdef SMP_DEBUG
kusano 2b45e8
    fprintf(STDERR, "Completely Done.\n\n");
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  return 0;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
/* Execute Threads */
kusano 2b45e8
int exec_blas(BLASLONG num, blas_queue_t *queue){
kusano 2b45e8
kusano 2b45e8
#ifndef ALL_THREADED
kusano 2b45e8
   int (*routine)(blas_arg_t *, void *, void *, double *, double *, BLASLONG);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  if ((num <= 0) || (queue == NULL)) return 0;
kusano 2b45e8
kusano 2b45e8
  if ((num > 1) && queue -> next) exec_blas_async(1, queue -> next);
kusano 2b45e8
kusano 2b45e8
  routine = queue -> routine;
kusano 2b45e8
  
kusano 2b45e8
    if (!(queue -> mode & BLAS_LEGACY)) {
kusano 2b45e8
      (routine)(queue -> args, queue -> range_m, queue -> range_n,
kusano 2b45e8
		queue -> sa, queue -> sb, 0);
kusano 2b45e8
    } else {
kusano 2b45e8
      legacy_exec(routine, queue -> mode, queue -> args, queue -> sb);
kusano 2b45e8
    }
kusano 2b45e8
kusano 2b45e8
  if ((num > 1) && queue -> next) exec_blas_async_wait(num - 1, queue -> next);
kusano 2b45e8
kusano 2b45e8
  return 0;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
/* Shutdown procedure, but user don't have to call this routine. The */
kusano 2b45e8
/* kernel automatically kill threads.                                */
kusano 2b45e8
kusano 2b45e8
int blas_thread_shutdown_(void){
kusano 2b45e8
kusano 2b45e8
  int i;
kusano 2b45e8
kusano 2b45e8
  if (!blas_server_avail) return 0;
kusano 2b45e8
  
kusano 2b45e8
  LOCK_COMMAND(&server_lock);
kusano 2b45e8
kusano 2b45e8
  if (blas_server_avail){
kusano 2b45e8
kusano 2b45e8
    SetEvent(pool.killed);
kusano 2b45e8
    
kusano 2b45e8
    for(i = 0; i < blas_cpu_number - 1; i++){
kusano 2b45e8
      WaitForSingleObject(blas_threads[i], INFINITE);
kusano 2b45e8
    }
kusano 2b45e8
    
kusano 2b45e8
    blas_server_avail = 0;
kusano 2b45e8
  }
kusano 2b45e8
  
kusano 2b45e8
  UNLOCK_COMMAND(&server_lock);
kusano 2b45e8
  
kusano 2b45e8
  return 0;
kusano 2b45e8
}