|
kusano |
2b45e8 |
/*****************************************************************************
|
|
kusano |
2b45e8 |
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
|
kusano |
2b45e8 |
All rights reserved.
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
Redistribution and use in source and binary forms, with or without
|
|
kusano |
2b45e8 |
modification, are permitted provided that the following conditions are
|
|
kusano |
2b45e8 |
met:
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
1. Redistributions of source code must retain the above copyright
|
|
kusano |
2b45e8 |
notice, this list of conditions and the following disclaimer.
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
2. Redistributions in binary form must reproduce the above copyright
|
|
kusano |
2b45e8 |
notice, this list of conditions and the following disclaimer in
|
|
kusano |
2b45e8 |
the documentation and/or other materials provided with the
|
|
kusano |
2b45e8 |
distribution.
|
|
kusano |
2b45e8 |
3. Neither the name of the ISCAS nor the names of its contributors may
|
|
kusano |
2b45e8 |
be used to endorse or promote products derived from this software
|
|
kusano |
2b45e8 |
without specific prior written permission.
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
kusano |
2b45e8 |
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
kusano |
2b45e8 |
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
kusano |
2b45e8 |
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
kusano |
2b45e8 |
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
kusano |
2b45e8 |
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
kusano |
2b45e8 |
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
kusano |
2b45e8 |
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
kusano |
2b45e8 |
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|
kusano |
2b45e8 |
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
**********************************************************************************/
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/* All rights reserved. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* Redistribution and use in source and binary forms, with or */
|
|
kusano |
2b45e8 |
/* without modification, are permitted provided that the following */
|
|
kusano |
2b45e8 |
/* conditions are met: */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 1. Redistributions of source code must retain the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 2. Redistributions in binary form must reproduce the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer in the documentation and/or other materials */
|
|
kusano |
2b45e8 |
/* provided with the distribution. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
|
kusano |
2b45e8 |
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
|
kusano |
2b45e8 |
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
|
kusano |
2b45e8 |
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
|
kusano |
2b45e8 |
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
|
kusano |
2b45e8 |
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
|
kusano |
2b45e8 |
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
|
kusano |
2b45e8 |
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
|
kusano |
2b45e8 |
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
|
kusano |
2b45e8 |
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
|
kusano |
2b45e8 |
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
|
kusano |
2b45e8 |
/* POSSIBILITY OF SUCH DAMAGE. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* The views and conclusions contained in the software and */
|
|
kusano |
2b45e8 |
/* documentation are those of the authors and should not be */
|
|
kusano |
2b45e8 |
/* interpreted as representing official policies, either expressed */
|
|
kusano |
2b45e8 |
/* or implied, of The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#include "common.h"
|
|
kusano |
2b45e8 |
#ifdef OS_LINUX
|
|
kusano |
2b45e8 |
#include <dlfcn.h></dlfcn.h>
|
|
kusano |
2b45e8 |
#include <sys resource.h=""></sys>
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef SMP_SERVER
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#undef MONITOR
|
|
kusano |
2b45e8 |
#undef TIMING
|
|
kusano |
2b45e8 |
#undef TIMING_DEBUG
|
|
kusano |
2b45e8 |
#undef NEED_STACKATTR
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ATTRIBUTE_SIZE 128
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* This is a thread server model implementation. The threads are */
|
|
kusano |
2b45e8 |
/* spawned at first access to blas library, and still remains until */
|
|
kusano |
2b45e8 |
/* destruction routine is called. The number of threads are */
|
|
kusano |
2b45e8 |
/* equal to "OMP_NUM_THREADS - 1" and thread only wakes up when */
|
|
kusano |
2b45e8 |
/* jobs is queued. */
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* We need this grobal for cheking if initialization is finished. */
|
|
kusano |
2b45e8 |
int blas_server_avail __attribute__((aligned(ATTRIBUTE_SIZE))) = 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* Local Variables */
|
|
kusano |
2b45e8 |
#if defined(USE_PTHREAD_LOCK)
|
|
kusano |
2b45e8 |
static pthread_mutex_t server_lock = PTHREAD_MUTEX_INITIALIZER;
|
|
kusano |
2b45e8 |
#elif defined(USE_PTHREAD_SPINLOCK)
|
|
kusano |
2b45e8 |
static pthread_spinlock_t server_lock = 0;
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
static unsigned long server_lock = 0;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define THREAD_STATUS_SLEEP 2
|
|
kusano |
2b45e8 |
#define THREAD_STATUS_WAKEUP 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static pthread_t blas_threads [MAX_CPU_NUMBER];
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
typedef struct {
|
|
kusano |
2b45e8 |
blas_queue_t * volatile queue __attribute__((aligned(ATTRIBUTE_SIZE)));
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(OS_LINUX) && !defined(NO_AFFINITY)
|
|
kusano |
2b45e8 |
int node;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
volatile long status;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
pthread_mutex_t lock;
|
|
kusano |
2b45e8 |
pthread_cond_t wakeup;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
} thread_status_t;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static thread_status_t thread_status[MAX_CPU_NUMBER] __attribute__((aligned(ATTRIBUTE_SIZE)));
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef THREAD_TIMEOUT
|
|
kusano |
2b45e8 |
#define THREAD_TIMEOUT 28
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static unsigned int thread_timeout = (1U << (THREAD_TIMEOUT));
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef MONITOR
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* Monitor is a function to see thread's status for every seconds. */
|
|
kusano |
2b45e8 |
/* Usually it turns off and it's for debugging. */
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static pthread_t monitor_thread;
|
|
kusano |
2b45e8 |
static int main_status[MAX_CPU_NUMBER];
|
|
kusano |
2b45e8 |
#define MAIN_ENTER 0x01
|
|
kusano |
2b45e8 |
#define MAIN_EXIT 0x02
|
|
kusano |
2b45e8 |
#define MAIN_TRYLOCK 0x03
|
|
kusano |
2b45e8 |
#define MAIN_LOCKSUCCESS 0x04
|
|
kusano |
2b45e8 |
#define MAIN_QUEUING 0x05
|
|
kusano |
2b45e8 |
#define MAIN_RECEIVING 0x06
|
|
kusano |
2b45e8 |
#define MAIN_RUNNING1 0x07
|
|
kusano |
2b45e8 |
#define MAIN_RUNNING2 0x08
|
|
kusano |
2b45e8 |
#define MAIN_RUNNING3 0x09
|
|
kusano |
2b45e8 |
#define MAIN_WAITING 0x0a
|
|
kusano |
2b45e8 |
#define MAIN_SLEEPING 0x0b
|
|
kusano |
2b45e8 |
#define MAIN_FINISH 0x0c
|
|
kusano |
2b45e8 |
#define MAIN_DONE 0x0d
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define BLAS_QUEUE_FINISHED 3
|
|
kusano |
2b45e8 |
#define BLAS_QUEUE_RUNNING 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef TIMING
|
|
kusano |
2b45e8 |
BLASLONG exit_time[MAX_CPU_NUMBER];
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (!(mode & BLAS_COMPLEX)){
|
|
kusano |
2b45e8 |
#ifdef EXPRECISION
|
|
kusano |
2b45e8 |
if (mode & BLAS_XDOUBLE){
|
|
kusano |
2b45e8 |
/* REAL / Extended Double */
|
|
kusano |
2b45e8 |
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, xdouble,
|
|
kusano |
2b45e8 |
xdouble *, BLASLONG, xdouble *, BLASLONG,
|
|
kusano |
2b45e8 |
xdouble *, BLASLONG, void *) = func;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
afunc(args -> m, args -> n, args -> k,
|
|
kusano |
2b45e8 |
((xdouble *)args -> alpha)[0],
|
|
kusano |
2b45e8 |
args -> a, args -> lda,
|
|
kusano |
2b45e8 |
args -> b, args -> ldb,
|
|
kusano |
2b45e8 |
args -> c, args -> ldc, sb);
|
|
kusano |
2b45e8 |
} else
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
if (mode & BLAS_DOUBLE){
|
|
kusano |
2b45e8 |
/* REAL / Double */
|
|
kusano |
2b45e8 |
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double,
|
|
kusano |
2b45e8 |
double *, BLASLONG, double *, BLASLONG,
|
|
kusano |
2b45e8 |
double *, BLASLONG, void *) = func;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
afunc(args -> m, args -> n, args -> k,
|
|
kusano |
2b45e8 |
((double *)args -> alpha)[0],
|
|
kusano |
2b45e8 |
args -> a, args -> lda,
|
|
kusano |
2b45e8 |
args -> b, args -> ldb,
|
|
kusano |
2b45e8 |
args -> c, args -> ldc, sb);
|
|
kusano |
2b45e8 |
} else {
|
|
kusano |
2b45e8 |
/* REAL / Single */
|
|
kusano |
2b45e8 |
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float,
|
|
kusano |
2b45e8 |
float *, BLASLONG, float *, BLASLONG,
|
|
kusano |
2b45e8 |
float *, BLASLONG, void *) = func;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
afunc(args -> m, args -> n, args -> k,
|
|
kusano |
2b45e8 |
((float *)args -> alpha)[0],
|
|
kusano |
2b45e8 |
args -> a, args -> lda,
|
|
kusano |
2b45e8 |
args -> b, args -> ldb,
|
|
kusano |
2b45e8 |
args -> c, args -> ldc, sb);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
} else {
|
|
kusano |
2b45e8 |
#ifdef EXPRECISION
|
|
kusano |
2b45e8 |
if (mode & BLAS_XDOUBLE){
|
|
kusano |
2b45e8 |
/* COMPLEX / Extended Double */
|
|
kusano |
2b45e8 |
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
|
|
kusano |
2b45e8 |
xdouble *, BLASLONG, xdouble *, BLASLONG,
|
|
kusano |
2b45e8 |
xdouble *, BLASLONG, void *) = func;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
afunc(args -> m, args -> n, args -> k,
|
|
kusano |
2b45e8 |
((xdouble *)args -> alpha)[0],
|
|
kusano |
2b45e8 |
((xdouble *)args -> alpha)[1],
|
|
kusano |
2b45e8 |
args -> a, args -> lda,
|
|
kusano |
2b45e8 |
args -> b, args -> ldb,
|
|
kusano |
2b45e8 |
args -> c, args -> ldc, sb);
|
|
kusano |
2b45e8 |
} else
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
if (mode & BLAS_DOUBLE){
|
|
kusano |
2b45e8 |
/* COMPLEX / Double */
|
|
kusano |
2b45e8 |
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double, double,
|
|
kusano |
2b45e8 |
double *, BLASLONG, double *, BLASLONG,
|
|
kusano |
2b45e8 |
double *, BLASLONG, void *) = func;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
afunc(args -> m, args -> n, args -> k,
|
|
kusano |
2b45e8 |
((double *)args -> alpha)[0],
|
|
kusano |
2b45e8 |
((double *)args -> alpha)[1],
|
|
kusano |
2b45e8 |
args -> a, args -> lda,
|
|
kusano |
2b45e8 |
args -> b, args -> ldb,
|
|
kusano |
2b45e8 |
args -> c, args -> ldc, sb);
|
|
kusano |
2b45e8 |
} else {
|
|
kusano |
2b45e8 |
/* COMPLEX / Single */
|
|
kusano |
2b45e8 |
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float, float,
|
|
kusano |
2b45e8 |
float *, BLASLONG, float *, BLASLONG,
|
|
kusano |
2b45e8 |
float *, BLASLONG, void *) = func;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
afunc(args -> m, args -> n, args -> k,
|
|
kusano |
2b45e8 |
((float *)args -> alpha)[0],
|
|
kusano |
2b45e8 |
((float *)args -> alpha)[1],
|
|
kusano |
2b45e8 |
args -> a, args -> lda,
|
|
kusano |
2b45e8 |
args -> b, args -> ldb,
|
|
kusano |
2b45e8 |
args -> c, args -> ldc, sb);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(OS_LINUX) && !defined(NO_AFFINITY)
|
|
kusano |
2b45e8 |
int gotoblas_set_affinity(int);
|
|
kusano |
2b45e8 |
int gotoblas_set_affinity2(int);
|
|
kusano |
2b45e8 |
int get_node(void);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static int increased_threads = 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static int blas_thread_server(void *arg){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* Thread identifier */
|
|
kusano |
2b45e8 |
BLASLONG cpu = (BLASLONG)arg;
|
|
kusano |
2b45e8 |
unsigned int last_tick;
|
|
kusano |
2b45e8 |
void *buffer, *sa, *sb;
|
|
kusano |
2b45e8 |
blas_queue_t *queue;
|
|
kusano |
2b45e8 |
#ifdef TIMING_DEBUG
|
|
kusano |
2b45e8 |
unsigned long start, stop;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(OS_LINUX) && !defined(NO_AFFINITY)
|
|
kusano |
2b45e8 |
if (!increased_threads)
|
|
kusano |
2b45e8 |
thread_status[cpu].node = gotoblas_set_affinity(cpu + 1);
|
|
kusano |
2b45e8 |
else
|
|
kusano |
2b45e8 |
thread_status[cpu].node = gotoblas_set_affinity(-1);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef MONITOR
|
|
kusano |
2b45e8 |
main_status[cpu] = MAIN_ENTER;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
buffer = blas_memory_alloc(2);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef SMP_DEBUG
|
|
kusano |
2b45e8 |
fprintf(STDERR, "Server[%2ld] Thread has just been spawned!\n", cpu);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while (1){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef MONITOR
|
|
kusano |
2b45e8 |
main_status[cpu] = MAIN_QUEUING;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef TIMING
|
|
kusano |
2b45e8 |
exit_time[cpu] = rpcc();
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
last_tick = (unsigned int)rpcc();
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while (!thread_status[cpu].queue) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
YIELDING;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if ((unsigned int)rpcc() - last_tick > thread_timeout) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
pthread_mutex_lock (&thread_status[cpu].lock);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (!thread_status[cpu].queue) {
|
|
kusano |
2b45e8 |
thread_status[cpu].status = THREAD_STATUS_SLEEP;
|
|
kusano |
2b45e8 |
while (thread_status[cpu].status == THREAD_STATUS_SLEEP) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef MONITOR
|
|
kusano |
2b45e8 |
main_status[cpu] = MAIN_SLEEPING;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
pthread_cond_wait(&thread_status[cpu].wakeup, &thread_status[cpu].lock);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
pthread_mutex_unlock(&thread_status[cpu].lock);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
last_tick = (unsigned int)rpcc();
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
queue = thread_status[cpu].queue;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if ((long)queue == -1) break;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef MONITOR
|
|
kusano |
2b45e8 |
main_status[cpu] = MAIN_RECEIVING;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef TIMING_DEBUG
|
|
kusano |
2b45e8 |
start = rpcc();
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (queue) {
|
|
kusano |
2b45e8 |
int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
thread_status[cpu].queue = (blas_queue_t *)1;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
sa = queue -> sa;
|
|
kusano |
2b45e8 |
sb = queue -> sb;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef SMP_DEBUG
|
|
kusano |
2b45e8 |
if (queue -> args) {
|
|
kusano |
2b45e8 |
fprintf(STDERR, "Server[%2ld] Calculation started. Mode = 0x%03x M = %3ld N=%3ld K=%3ld\n",
|
|
kusano |
2b45e8 |
cpu, queue->mode, queue-> args ->m, queue->args->n, queue->args->k);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef CONSISTENT_FPCSR
|
|
kusano |
2b45e8 |
__asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
|
|
kusano |
2b45e8 |
__asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef MONITOR
|
|
kusano |
2b45e8 |
main_status[cpu] = MAIN_RUNNING1;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (sa == NULL) sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (sb == NULL) {
|
|
kusano |
2b45e8 |
if (!(queue -> mode & BLAS_COMPLEX)){
|
|
kusano |
2b45e8 |
#ifdef EXPRECISION
|
|
kusano |
2b45e8 |
if (queue -> mode & BLAS_XDOUBLE){
|
|
kusano |
2b45e8 |
sb = (void *)(((BLASLONG)sa + ((QGEMM_P * QGEMM_Q * sizeof(xdouble)
|
|
kusano |
2b45e8 |
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
|
|
kusano |
2b45e8 |
} else
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
if (queue -> mode & BLAS_DOUBLE){
|
|
kusano |
2b45e8 |
sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)
|
|
kusano |
2b45e8 |
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
} else {
|
|
kusano |
2b45e8 |
sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)
|
|
kusano |
2b45e8 |
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
} else {
|
|
kusano |
2b45e8 |
#ifdef EXPRECISION
|
|
kusano |
2b45e8 |
if (queue -> mode & BLAS_XDOUBLE){
|
|
kusano |
2b45e8 |
sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * 2 * sizeof(xdouble)
|
|
kusano |
2b45e8 |
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
|
|
kusano |
2b45e8 |
} else
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
if (queue -> mode & BLAS_DOUBLE){
|
|
kusano |
2b45e8 |
sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double)
|
|
kusano |
2b45e8 |
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
|
|
kusano |
2b45e8 |
} else {
|
|
kusano |
2b45e8 |
sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float)
|
|
kusano |
2b45e8 |
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef MONITOR
|
|
kusano |
2b45e8 |
main_status[cpu] = MAIN_RUNNING2;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (queue -> mode & BLAS_LEGACY) {
|
|
kusano |
2b45e8 |
legacy_exec(routine, queue -> mode, queue -> args, sb);
|
|
kusano |
2b45e8 |
} else
|
|
kusano |
2b45e8 |
if (queue -> mode & BLAS_PTHREAD) {
|
|
kusano |
2b45e8 |
void (*pthreadcompat)(void *) = queue -> routine;
|
|
kusano |
2b45e8 |
(pthreadcompat)(queue -> args);
|
|
kusano |
2b45e8 |
} else
|
|
kusano |
2b45e8 |
(routine)(queue -> args, queue -> range_m, queue -> range_n, sa, sb, queue -> position);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef SMP_DEBUG
|
|
kusano |
2b45e8 |
fprintf(STDERR, "Server[%2ld] Calculation finished!\n", cpu);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef MONITOR
|
|
kusano |
2b45e8 |
main_status[cpu] = MAIN_FINISH;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
thread_status[cpu].queue = (blas_queue_t * volatile) ((long)thread_status[cpu].queue & 0); /* Need a trick */
|
|
kusano |
2b45e8 |
WMB;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef MONITOR
|
|
kusano |
2b45e8 |
main_status[cpu] = MAIN_DONE;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef TIMING_DEBUG
|
|
kusano |
2b45e8 |
stop = rpcc();
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
fprintf(STDERR, "Thread[%ld] : %16lu %16lu (%8lu cycles)\n", cpu + 1,
|
|
kusano |
2b45e8 |
start, stop,
|
|
kusano |
2b45e8 |
stop - start);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* Shutdown procedure */
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef SMP_DEBUG
|
|
kusano |
2b45e8 |
fprintf(STDERR, "Server[%2ld] Shutdown!\n", cpu);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blas_memory_free(buffer);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
pthread_exit(NULL);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
return 0;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef MONITOR
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static BLASLONG num_suspend = 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static int blas_monitor(void *arg){
|
|
kusano |
2b45e8 |
int i;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while(1){
|
|
kusano |
2b45e8 |
for (i = 0; i < blas_num_threads - 1; i++){
|
|
kusano |
2b45e8 |
switch (main_status[i]) {
|
|
kusano |
2b45e8 |
case MAIN_ENTER :
|
|
kusano |
2b45e8 |
fprintf(STDERR, "THREAD[%2d] : Entering.\n", i);
|
|
kusano |
2b45e8 |
break;
|
|
kusano |
2b45e8 |
case MAIN_EXIT :
|
|
kusano |
2b45e8 |
fprintf(STDERR, "THREAD[%2d] : Exiting.\n", i);
|
|
kusano |
2b45e8 |
break;
|
|
kusano |
2b45e8 |
case MAIN_TRYLOCK :
|
|
kusano |
2b45e8 |
fprintf(STDERR, "THREAD[%2d] : Trying lock operation.\n", i);
|
|
kusano |
2b45e8 |
break;
|
|
kusano |
2b45e8 |
case MAIN_QUEUING :
|
|
kusano |
2b45e8 |
fprintf(STDERR, "THREAD[%2d] : Queuing.\n", i);
|
|
kusano |
2b45e8 |
break;
|
|
kusano |
2b45e8 |
case MAIN_RECEIVING :
|
|
kusano |
2b45e8 |
fprintf(STDERR, "THREAD[%2d] : Receiving.\n", i);
|
|
kusano |
2b45e8 |
break;
|
|
kusano |
2b45e8 |
case MAIN_RUNNING1 :
|
|
kusano |
2b45e8 |
fprintf(STDERR, "THREAD[%2d] : Running1.\n", i);
|
|
kusano |
2b45e8 |
break;
|
|
kusano |
2b45e8 |
case MAIN_RUNNING2 :
|
|
kusano |
2b45e8 |
fprintf(STDERR, "THREAD[%2d] : Running2.\n", i);
|
|
kusano |
2b45e8 |
break;
|
|
kusano |
2b45e8 |
case MAIN_RUNNING3 :
|
|
kusano |
2b45e8 |
fprintf(STDERR, "THREAD[%2d] : Running3.\n", i);
|
|
kusano |
2b45e8 |
break;
|
|
kusano |
2b45e8 |
case MAIN_WAITING :
|
|
kusano |
2b45e8 |
fprintf(STDERR, "THREAD[%2d] : Waiting.\n", i);
|
|
kusano |
2b45e8 |
break;
|
|
kusano |
2b45e8 |
case MAIN_SLEEPING :
|
|
kusano |
2b45e8 |
fprintf(STDERR, "THREAD[%2d] : Sleeping.\n", i);
|
|
kusano |
2b45e8 |
break;
|
|
kusano |
2b45e8 |
case MAIN_FINISH :
|
|
kusano |
2b45e8 |
fprintf(STDERR, "THREAD[%2d] : Finishing.\n", i);
|
|
kusano |
2b45e8 |
break;
|
|
kusano |
2b45e8 |
case MAIN_DONE :
|
|
kusano |
2b45e8 |
fprintf(STDERR, "THREAD[%2d] : Job is done.\n", i);
|
|
kusano |
2b45e8 |
break;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
fprintf(stderr, "Total number of suspended ... %ld\n", num_suspend);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
sleep(1);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
return 0;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* Initializing routine */
|
|
kusano |
2b45e8 |
int blas_thread_init(void){
|
|
kusano |
2b45e8 |
BLASLONG i;
|
|
kusano |
2b45e8 |
int ret;
|
|
kusano |
2b45e8 |
#ifdef NEED_STACKATTR
|
|
kusano |
2b45e8 |
pthread_attr_t attr;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (blas_server_avail) return 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef NEED_STACKATTR
|
|
kusano |
2b45e8 |
pthread_attr_init(&attr);
|
|
kusano |
2b45e8 |
pthread_attr_setguardsize(&attr, 0x1000U);
|
|
kusano |
2b45e8 |
pthread_attr_setstacksize( &attr, 0x1000U);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LOCK_COMMAND(&server_lock);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (!blas_server_avail){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
char *p;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
p = getenv("THREAD_TIMEOUT");
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (p) {
|
|
kusano |
2b45e8 |
thread_timeout = atoi(p);
|
|
kusano |
2b45e8 |
if (thread_timeout < 4) thread_timeout = 4;
|
|
kusano |
2b45e8 |
if (thread_timeout > 30) thread_timeout = 30;
|
|
kusano |
2b45e8 |
thread_timeout = (1 << thread_timeout);
|
|
kusano |
2b45e8 |
}else{
|
|
kusano |
2b45e8 |
p = getenv("GOTO_THREAD_TIMEOUT");
|
|
kusano |
2b45e8 |
if (p) {
|
|
kusano |
2b45e8 |
thread_timeout = atoi(p);
|
|
kusano |
2b45e8 |
if (thread_timeout < 4) thread_timeout = 4;
|
|
kusano |
2b45e8 |
if (thread_timeout > 30) thread_timeout = 30;
|
|
kusano |
2b45e8 |
thread_timeout = (1 << thread_timeout);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for(i = 0; i < blas_num_threads - 1; i++){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
thread_status[i].queue = (blas_queue_t *)NULL;
|
|
kusano |
2b45e8 |
thread_status[i].status = THREAD_STATUS_WAKEUP;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
pthread_mutex_init(&thread_status[i].lock, NULL);
|
|
kusano |
2b45e8 |
pthread_cond_init (&thread_status[i].wakeup, NULL);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef NEED_STACKATTR
|
|
kusano |
2b45e8 |
ret=pthread_create(&blas_threads[i], &attr,
|
|
kusano |
2b45e8 |
(void *)&blas_thread_server, (void *)i);
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
ret=pthread_create(&blas_threads[i], NULL,
|
|
kusano |
2b45e8 |
(void *)&blas_thread_server, (void *)i);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
if(ret!=0){
|
|
kusano |
2b45e8 |
fprintf(STDERR,"OpenBLAS: pthread_creat error in blas_thread_init function. Error code:%d\n",ret);
|
|
kusano |
2b45e8 |
exit(1);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef MONITOR
|
|
kusano |
2b45e8 |
pthread_create(&monitor_thread, NULL,
|
|
kusano |
2b45e8 |
(void *)&blas_monitor, (void *)NULL);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blas_server_avail = 1;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
UNLOCK_COMMAND(&server_lock);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
return 0;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/*
|
|
kusano |
2b45e8 |
User can call one of two routines.
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
exec_blas_async ... immediately returns after jobs are queued.
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
exec_blas ... returns after jobs are finished.
|
|
kusano |
2b45e8 |
*/
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static BLASULONG exec_queue_lock = 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
BLASLONG i = 0;
|
|
kusano |
2b45e8 |
blas_queue_t *current = queue;
|
|
kusano |
2b45e8 |
#if defined(OS_LINUX) && !defined(NO_AFFINITY) && !defined(PARAMTEST)
|
|
kusano |
2b45e8 |
int node = get_node();
|
|
kusano |
2b45e8 |
int nodes = get_num_nodes();
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef SMP_DEBUG
|
|
kusano |
2b45e8 |
int exec_count = 0;
|
|
kusano |
2b45e8 |
fprintf(STDERR, "Exec_blas_async is called. Position = %d\n", pos);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blas_lock(&exec_queue_lock);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while (queue) {
|
|
kusano |
2b45e8 |
queue -> position = pos;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef CONSISTENT_FPCSR
|
|
kusano |
2b45e8 |
__asm__ __volatile__ ("fnstcw %0" : "=m" (queue -> x87_mode));
|
|
kusano |
2b45e8 |
__asm__ __volatile__ ("stmxcsr %0" : "=m" (queue -> sse_mode));
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(OS_LINUX) && !defined(NO_AFFINITY) && !defined(PARAMTEST)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* Node Mapping Mode */
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (queue -> mode & BLAS_NODE) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
do {
|
|
kusano |
2b45e8 |
while((thread_status[i].node != node || thread_status[i].queue) && (i < blas_num_threads - 1)) i ++;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (i < blas_num_threads - 1) break;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
i ++;
|
|
kusano |
2b45e8 |
if (i >= blas_num_threads - 1) {
|
|
kusano |
2b45e8 |
i = 0;
|
|
kusano |
2b45e8 |
node ++;
|
|
kusano |
2b45e8 |
if (node >= nodes) node = 0;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
} while (1);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
} else {
|
|
kusano |
2b45e8 |
while(thread_status[i].queue) {
|
|
kusano |
2b45e8 |
i ++;
|
|
kusano |
2b45e8 |
if (i >= blas_num_threads - 1) i = 0;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
while(thread_status[i].queue) {
|
|
kusano |
2b45e8 |
i ++;
|
|
kusano |
2b45e8 |
if (i >= blas_num_threads - 1) i = 0;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
queue -> assigned = i;
|
|
kusano |
2b45e8 |
WMB;
|
|
kusano |
2b45e8 |
thread_status[i].queue = queue;
|
|
kusano |
2b45e8 |
WMB;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
queue = queue -> next;
|
|
kusano |
2b45e8 |
pos ++;
|
|
kusano |
2b45e8 |
#ifdef SMP_DEBUG
|
|
kusano |
2b45e8 |
exec_count ++;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blas_unlock(&exec_queue_lock);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef SMP_DEBUG
|
|
kusano |
2b45e8 |
fprintf(STDERR, "Done(Number of threads = %2ld).\n", exec_count);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while (current) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
pos = current -> assigned;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if ((BLASULONG)thread_status[pos].queue > 1) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (thread_status[pos].status == THREAD_STATUS_SLEEP) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
pthread_mutex_lock (&thread_status[pos].lock);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef MONITOR
|
|
kusano |
2b45e8 |
num_suspend ++;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (thread_status[pos].status == THREAD_STATUS_SLEEP) {
|
|
kusano |
2b45e8 |
thread_status[pos].status = THREAD_STATUS_WAKEUP;
|
|
kusano |
2b45e8 |
pthread_cond_signal(&thread_status[pos].wakeup);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
pthread_mutex_unlock(&thread_status[pos].lock);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
current = current -> next;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
return 0;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while ((num > 0) && queue) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while(thread_status[queue -> assigned].queue) {
|
|
kusano |
2b45e8 |
YIELDING;
|
|
kusano |
2b45e8 |
};
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
queue = queue -> next;
|
|
kusano |
2b45e8 |
num --;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef SMP_DEBUG
|
|
kusano |
2b45e8 |
fprintf(STDERR, "Done.\n\n");
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
return 0;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* Execute Threads */
|
|
kusano |
2b45e8 |
int exec_blas(BLASLONG num, blas_queue_t *queue){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int (*routine)(blas_arg_t *, void *, void *, double *, double *, BLASLONG);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef TIMING_DEBUG
|
|
kusano |
2b45e8 |
BLASULONG start, stop;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if ((num <= 0) || (queue == NULL)) return 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef SMP_DEBUG
|
|
kusano |
2b45e8 |
fprintf(STDERR, "Exec_blas is called. Number of executing threads : %ld\n", num);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef __ELF__
|
|
kusano |
2b45e8 |
if (omp_in_parallel && (num > 1)) {
|
|
kusano |
2b45e8 |
if (omp_in_parallel() > 0) {
|
|
kusano |
2b45e8 |
fprintf(stderr,
|
|
kusano |
2b45e8 |
"OpenBLAS Warning : Detect OpenMP Loop and this application may hang. "
|
|
kusano |
2b45e8 |
"Please rebuild the library with USE_OPENMP=1 option.\n");
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if ((num > 1) && queue -> next) exec_blas_async(1, queue -> next);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef TIMING_DEBUG
|
|
kusano |
2b45e8 |
start = rpcc();
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
fprintf(STDERR, "\n");
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
routine = queue -> routine;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (queue -> mode & BLAS_LEGACY) {
|
|
kusano |
2b45e8 |
legacy_exec(routine, queue -> mode, queue -> args, queue -> sb);
|
|
kusano |
2b45e8 |
} else
|
|
kusano |
2b45e8 |
if (queue -> mode & BLAS_PTHREAD) {
|
|
kusano |
2b45e8 |
void (*pthreadcompat)(void *) = queue -> routine;
|
|
kusano |
2b45e8 |
(pthreadcompat)(queue -> args);
|
|
kusano |
2b45e8 |
} else
|
|
kusano |
2b45e8 |
(routine)(queue -> args, queue -> range_m, queue -> range_n,
|
|
kusano |
2b45e8 |
queue -> sa, queue -> sb, 0);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef TIMING_DEBUG
|
|
kusano |
2b45e8 |
stop = rpcc();
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if ((num > 1) && queue -> next) exec_blas_async_wait(num - 1, queue -> next);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef TIMING_DEBUG
|
|
kusano |
2b45e8 |
fprintf(STDERR, "Thread[0] : %16lu %16lu (%8lu cycles)\n",
|
|
kusano |
2b45e8 |
start, stop,
|
|
kusano |
2b45e8 |
stop - start);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
return 0;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
void goto_set_num_threads(int num_threads) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
long i;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (num_threads < 1) num_threads = blas_num_threads;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (num_threads > blas_num_threads) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LOCK_COMMAND(&server_lock);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
increased_threads = 1;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for(i = blas_num_threads - 1; i < num_threads - 1; i++){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
thread_status[i].queue = (blas_queue_t *)NULL;
|
|
kusano |
2b45e8 |
thread_status[i].status = THREAD_STATUS_WAKEUP;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
pthread_mutex_init(&thread_status[i].lock, NULL);
|
|
kusano |
2b45e8 |
pthread_cond_init (&thread_status[i].wakeup, NULL);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef NEED_STACKATTR
|
|
kusano |
2b45e8 |
pthread_create(&blas_threads[i], &attr,
|
|
kusano |
2b45e8 |
(void *)&blas_thread_server, (void *)i);
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
pthread_create(&blas_threads[i], NULL,
|
|
kusano |
2b45e8 |
(void *)&blas_thread_server, (void *)i);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blas_num_threads = num_threads;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
UNLOCK_COMMAND(&server_lock);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blas_cpu_number = num_threads;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(ARCH_MIPS64)
|
|
kusano |
2b45e8 |
//set parameters for different number of threads.
|
|
kusano |
2b45e8 |
blas_set_parameter();
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
void openblas_set_num_threads(int num_threads) {
|
|
kusano |
2b45e8 |
goto_set_num_threads(num_threads);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* Compatible function with pthread_create / join */
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int gotoblas_pthread(int numthreads, void *function, void *args, int stride) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blas_queue_t queue[MAX_CPU_NUMBER];
|
|
kusano |
2b45e8 |
int i;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (numthreads <= 0) return 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef SMP
|
|
kusano |
2b45e8 |
if (blas_cpu_number == 0) blas_get_cpu_number();
|
|
kusano |
2b45e8 |
#ifdef SMP_SERVER
|
|
kusano |
2b45e8 |
if (blas_server_avail == 0) blas_thread_init();
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (i = 0; i < numthreads; i ++) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
queue[i].mode = BLAS_PTHREAD;
|
|
kusano |
2b45e8 |
queue[i].routine = function;
|
|
kusano |
2b45e8 |
queue[i].args = args;
|
|
kusano |
2b45e8 |
queue[i].range_m = NULL;
|
|
kusano |
2b45e8 |
queue[i].range_n = NULL;
|
|
kusano |
2b45e8 |
queue[i].sa = args;
|
|
kusano |
2b45e8 |
queue[i].sb = args;
|
|
kusano |
2b45e8 |
queue[i].next = &queue[i + 1];
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
args += stride;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
queue[numthreads - 1].next = NULL;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
exec_blas(numthreads, queue);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
return 0;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* Shutdown procedure, but user don't have to call this routine. The */
|
|
kusano |
2b45e8 |
/* kernel automatically kill threads. */
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int BLASFUNC(blas_thread_shutdown)(void){
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int i;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (!blas_server_avail) return 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
LOCK_COMMAND(&server_lock);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (i = 0; i < blas_num_threads - 1; i++) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blas_lock(&exec_queue_lock);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
thread_status[i].queue = (blas_queue_t *)-1;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blas_unlock(&exec_queue_lock);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
pthread_mutex_lock (&thread_status[i].lock);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
thread_status[i].status = THREAD_STATUS_WAKEUP;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
pthread_cond_signal (&thread_status[i].wakeup);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
pthread_mutex_unlock(&thread_status[i].lock);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for(i = 0; i < blas_num_threads - 1; i++){
|
|
kusano |
2b45e8 |
pthread_join(blas_threads[i], NULL);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for(i = 0; i < blas_num_threads - 1; i++){
|
|
kusano |
2b45e8 |
pthread_mutex_destroy(&thread_status[i].lock);
|
|
kusano |
2b45e8 |
pthread_cond_destroy (&thread_status[i].wakeup);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef NEED_STACKATTR
|
|
kusano |
2b45e8 |
pthread_attr_destory(&attr);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blas_server_avail = 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
UNLOCK_COMMAND(&server_lock);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
return 0;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|