|
kusano |
2b45e8 |
/*****************************************************************************
|
|
kusano |
2b45e8 |
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
|
kusano |
2b45e8 |
All rights reserved.
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
Redistribution and use in source and binary forms, with or without
|
|
kusano |
2b45e8 |
modification, are permitted provided that the following conditions are
|
|
kusano |
2b45e8 |
met:
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
1. Redistributions of source code must retain the above copyright
|
|
kusano |
2b45e8 |
notice, this list of conditions and the following disclaimer.
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
2. Redistributions in binary form must reproduce the above copyright
|
|
kusano |
2b45e8 |
notice, this list of conditions and the following disclaimer in
|
|
kusano |
2b45e8 |
the documentation and/or other materials provided with the
|
|
kusano |
2b45e8 |
distribution.
|
|
kusano |
2b45e8 |
3. Neither the name of the ISCAS nor the names of its contributors may
|
|
kusano |
2b45e8 |
be used to endorse or promote products derived from this software
|
|
kusano |
2b45e8 |
without specific prior written permission.
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
kusano |
2b45e8 |
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
kusano |
2b45e8 |
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
kusano |
2b45e8 |
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
kusano |
2b45e8 |
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
kusano |
2b45e8 |
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
kusano |
2b45e8 |
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
kusano |
2b45e8 |
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
kusano |
2b45e8 |
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|
kusano |
2b45e8 |
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
**********************************************************************************/
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/* All rights reserved. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* Redistribution and use in source and binary forms, with or */
|
|
kusano |
2b45e8 |
/* without modification, are permitted provided that the following */
|
|
kusano |
2b45e8 |
/* conditions are met: */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 1. Redistributions of source code must retain the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 2. Redistributions in binary form must reproduce the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer in the documentation and/or other materials */
|
|
kusano |
2b45e8 |
/* provided with the distribution. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
|
kusano |
2b45e8 |
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
|
kusano |
2b45e8 |
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
|
kusano |
2b45e8 |
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
|
kusano |
2b45e8 |
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
|
kusano |
2b45e8 |
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
|
kusano |
2b45e8 |
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
|
kusano |
2b45e8 |
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
|
kusano |
2b45e8 |
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
|
kusano |
2b45e8 |
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
|
kusano |
2b45e8 |
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
|
kusano |
2b45e8 |
/* POSSIBILITY OF SUCH DAMAGE. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* The views and conclusions contained in the software and */
|
|
kusano |
2b45e8 |
/* documentation are those of the authors and should not be */
|
|
kusano |
2b45e8 |
/* interpreted as representing official policies, either expressed */
|
|
kusano |
2b45e8 |
/* or implied, of The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#include "common.h"
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(OS_LINUX) && defined(SMP)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define _GNU_SOURCE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#include <sys sysinfo.h=""></sys>
|
|
kusano |
2b45e8 |
#include <sys syscall.h=""></sys>
|
|
kusano |
2b45e8 |
#include <sys shm.h=""></sys>
|
|
kusano |
2b45e8 |
#include <fcntl.h></fcntl.h>
|
|
kusano |
2b45e8 |
#include <sched.h></sched.h>
|
|
kusano |
2b45e8 |
#include <dirent.h></dirent.h>
|
|
kusano |
2b45e8 |
#include <dlfcn.h></dlfcn.h>
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define MAX_NODES 16
|
|
kusano |
2b45e8 |
#define MAX_CPUS 256
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SH_MAGIC 0x510510
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define CPUMAP_NAME "/sys/devices/system/node/node%d/cpumap"
|
|
kusano |
2b45e8 |
#define SHARE_NAME "/sys/devices/system/cpu/cpu%d/cache/index%d/shared_cpu_map"
|
|
kusano |
2b45e8 |
#define NODE_DIR "/sys/devices/system/node"
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
//#undef DEBUG
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* Private variables */
|
|
kusano |
2b45e8 |
typedef struct {
|
|
kusano |
2b45e8 |
unsigned long lock;
|
|
kusano |
2b45e8 |
unsigned int magic;
|
|
kusano |
2b45e8 |
unsigned int shmid;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int num_nodes;
|
|
kusano |
2b45e8 |
int num_procs;
|
|
kusano |
2b45e8 |
int final_num_procs;
|
|
kusano |
2b45e8 |
unsigned long avail;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
unsigned long cpu_info [MAX_CPUS];
|
|
kusano |
2b45e8 |
unsigned long node_info [MAX_NODES];
|
|
kusano |
2b45e8 |
int cpu_use[MAX_CPUS];
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
} shm_t;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static cpu_set_t cpu_orig_mask[4];
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static int cpu_mapping[MAX_CPUS];
|
|
kusano |
2b45e8 |
static int node_mapping[MAX_CPUS * 4];
|
|
kusano |
2b45e8 |
static int cpu_sub_mapping[MAX_CPUS];
|
|
kusano |
2b45e8 |
static int disable_mapping;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* Number of cores per nodes */
|
|
kusano |
2b45e8 |
static int node_cpu[MAX_NODES];
|
|
kusano |
2b45e8 |
static int node_equal = 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static shm_t *common = (void *)-1;
|
|
kusano |
2b45e8 |
static int shmid, pshmid;
|
|
kusano |
2b45e8 |
static void *paddr;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static unsigned long lprocmask, lnodemask;
|
|
kusano |
2b45e8 |
static int numprocs = 1;
|
|
kusano |
2b45e8 |
static int numnodes = 1;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if 1
|
|
kusano |
2b45e8 |
#define READ_CPU(x) ( (x) & 0xff)
|
|
kusano |
2b45e8 |
#define READ_NODE(x) (((x) >> 8) & 0xff)
|
|
kusano |
2b45e8 |
#define READ_CORE(x) (((x) >> 16) & 0xff)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define WRITE_CPU(x) (x)
|
|
kusano |
2b45e8 |
#define WRITE_NODE(x) ((x) << 8)
|
|
kusano |
2b45e8 |
#define WRITE_CORE(x) ((x) << 16)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define READ_CPU(x) ( (x) & 0xff)
|
|
kusano |
2b45e8 |
#define READ_CORE(x) (((x) >> 8) & 0xff)
|
|
kusano |
2b45e8 |
#define READ_NODE(x) (((x) >> 16) & 0xff)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define WRITE_CPU(x) (x)
|
|
kusano |
2b45e8 |
#define WRITE_CORE(x) ((x) << 8)
|
|
kusano |
2b45e8 |
#define WRITE_NODE(x) ((x) << 16)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static inline int popcount(unsigned long number) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int count = 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while (number > 0) {
|
|
kusano |
2b45e8 |
if (number & 1) count ++;
|
|
kusano |
2b45e8 |
number >>= 1;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
return count;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static inline int rcount(unsigned long number) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int count = -1;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while ((number > 0) && ((number & 0)) == 0) {
|
|
kusano |
2b45e8 |
count ++;
|
|
kusano |
2b45e8 |
number >>= 1;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
return count;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/***
|
|
kusano |
2b45e8 |
Known issue: The number of CPUs/cores should less
|
|
kusano |
2b45e8 |
than sizeof(unsigned long). On 64 bits, the limit
|
|
kusano |
2b45e8 |
is 64. On 32 bits, it is 32.
|
|
kusano |
2b45e8 |
***/
|
|
kusano |
2b45e8 |
static inline unsigned long get_cpumap(int node) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int infile;
|
|
kusano |
2b45e8 |
unsigned long affinity;
|
|
kusano |
2b45e8 |
char name[160];
|
|
kusano |
2b45e8 |
char cpumap[160];
|
|
kusano |
2b45e8 |
char *p, *dummy;
|
|
kusano |
2b45e8 |
int i=0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
sprintf(name, CPUMAP_NAME, node);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
infile = open(name, O_RDONLY);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
affinity = 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (infile != -1) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
read(infile, cpumap, sizeof(cpumap));
|
|
kusano |
2b45e8 |
p = cpumap;
|
|
kusano |
2b45e8 |
while (*p != '\n' && i<160){
|
|
kusano |
2b45e8 |
if(*p != ',') {
|
|
kusano |
2b45e8 |
name[i++]=*p;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
p++;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
p = name;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
// while ((*p == '0') || (*p == ',')) p++;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
affinity = strtoul(p, &dummy, 16);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
close(infile);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
return affinity;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static inline unsigned long get_share(int cpu, int level) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int infile;
|
|
kusano |
2b45e8 |
unsigned long affinity;
|
|
kusano |
2b45e8 |
char name[160];
|
|
kusano |
2b45e8 |
char *p;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
sprintf(name, SHARE_NAME, cpu, level);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
infile = open(name, O_RDONLY);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
affinity = (1UL << cpu);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (infile != -1) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
read(infile, name, sizeof(name));
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
p = name;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while ((*p == '0') || (*p == ',')) p++;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
affinity = strtol(p, &p, 16);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
close(infile);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
return affinity;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static int numa_check(void) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
DIR *dp;
|
|
kusano |
2b45e8 |
struct dirent *dir;
|
|
kusano |
2b45e8 |
int node;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
common -> num_nodes = 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
dp = opendir(NODE_DIR);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (dp == NULL) {
|
|
kusano |
2b45e8 |
common -> num_nodes = 1;
|
|
kusano |
2b45e8 |
return 0;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (node = 0; node < MAX_NODES; node ++) common -> node_info[node] = 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while ((dir = readdir(dp)) != NULL) {
|
|
kusano |
2b45e8 |
if (*(unsigned int *) dir -> d_name == 0x065646f6eU) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
node = atoi(&dir -> d_name[4]);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (node > MAX_NODES) {
|
|
kusano |
2b45e8 |
fprintf(stderr, "\nGotoBLAS Warining : MAX_NODES (NUMA) is too small. Terminated.\n");
|
|
kusano |
2b45e8 |
exit(1);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
common -> num_nodes ++;
|
|
kusano |
2b45e8 |
common -> node_info[node] = get_cpumap(node);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
closedir(dp);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (common -> num_nodes == 1) return 1;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef DEBUG
|
|
kusano |
2b45e8 |
fprintf(stderr, "Numa found : number of Nodes = %2d\n", common -> num_nodes);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (node = 0; node < common -> num_nodes; node ++)
|
|
kusano |
2b45e8 |
fprintf(stderr, "MASK (%2d) : %08lx\n", node, common -> node_info[node]);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
return common -> num_nodes;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static void numa_mapping(void) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int node, cpu, core;
|
|
kusano |
2b45e8 |
int i, j, h;
|
|
kusano |
2b45e8 |
unsigned long work, bit;
|
|
kusano |
2b45e8 |
int count = 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (node = 0; node < common -> num_nodes; node ++) {
|
|
kusano |
2b45e8 |
core = 0;
|
|
kusano |
2b45e8 |
for (cpu = 0; cpu < common -> num_procs; cpu ++) {
|
|
kusano |
2b45e8 |
if (common -> node_info[node] & common -> avail & (1UL << cpu)) {
|
|
kusano |
2b45e8 |
common -> cpu_info[count] = WRITE_CORE(core) | WRITE_NODE(node) | WRITE_CPU(cpu);
|
|
kusano |
2b45e8 |
count ++;
|
|
kusano |
2b45e8 |
core ++;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef DEBUG
|
|
kusano |
2b45e8 |
fprintf(stderr, "\nFrom /sys ...\n\n");
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (cpu = 0; cpu < count; cpu++)
|
|
kusano |
2b45e8 |
fprintf(stderr, "CPU (%2d) : %08lx\n", cpu, common -> cpu_info[cpu]);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
h = 1;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while (h < count) h = 2 * h + 1;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while (h > 1) {
|
|
kusano |
2b45e8 |
h /= 2;
|
|
kusano |
2b45e8 |
for (i = h; i < count; i++) {
|
|
kusano |
2b45e8 |
work = common -> cpu_info[i];
|
|
kusano |
2b45e8 |
bit = CPU_ISSET(i, &cpu_orig_mask[0]);
|
|
kusano |
2b45e8 |
j = i - h;
|
|
kusano |
2b45e8 |
while (work < common -> cpu_info[j]) {
|
|
kusano |
2b45e8 |
common -> cpu_info[j + h] = common -> cpu_info[j];
|
|
kusano |
2b45e8 |
if (CPU_ISSET(j, &cpu_orig_mask[0])) {
|
|
kusano |
2b45e8 |
CPU_SET(j + h, &cpu_orig_mask[0]);
|
|
kusano |
2b45e8 |
} else {
|
|
kusano |
2b45e8 |
CPU_CLR(j + h, &cpu_orig_mask[0]);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
j -= h;
|
|
kusano |
2b45e8 |
if (j < 0) break;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
common -> cpu_info[j + h] = work;
|
|
kusano |
2b45e8 |
if (bit) {
|
|
kusano |
2b45e8 |
CPU_SET(j + h, &cpu_orig_mask[0]);
|
|
kusano |
2b45e8 |
} else {
|
|
kusano |
2b45e8 |
CPU_CLR(j + h, &cpu_orig_mask[0]);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef DEBUG
|
|
kusano |
2b45e8 |
fprintf(stderr, "\nSorting ...\n\n");
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (cpu = 0; cpu < count; cpu++)
|
|
kusano |
2b45e8 |
fprintf(stderr, "CPU (%2d) : %08lx\n", cpu, common -> cpu_info[cpu]);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static void disable_hyperthread(void) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
unsigned long share;
|
|
kusano |
2b45e8 |
int cpu;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if(common->num_procs > 64){
|
|
kusano |
2b45e8 |
fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->num_procs);
|
|
kusano |
2b45e8 |
exit(1);
|
|
kusano |
2b45e8 |
}else if(common->num_procs == 64){
|
|
kusano |
2b45e8 |
common -> avail = 0xFFFFFFFFFFFFFFFFUL;
|
|
kusano |
2b45e8 |
}else
|
|
kusano |
2b45e8 |
common -> avail = (1UL << common -> num_procs) - 1;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef DEBUG
|
|
kusano |
2b45e8 |
fprintf(stderr, "\nAvail CPUs : %04lx.\n", common -> avail);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (cpu = 0; cpu < common -> num_procs; cpu ++) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
share = (get_share(cpu, 1) & common -> avail);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (popcount(share) > 1) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef DEBUG
|
|
kusano |
2b45e8 |
fprintf(stderr, "Detected Hyper Threading on CPU %4x; disabled CPU %04lx.\n",
|
|
kusano |
2b45e8 |
cpu, share & ~(1UL << cpu));
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
common -> avail &= ~((share & ~(1UL << cpu)));
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static void disable_affinity(void) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef DEBUG
|
|
kusano |
2b45e8 |
fprintf(stderr, "Final all available CPUs : %04lx.\n\n", common -> avail);
|
|
kusano |
2b45e8 |
fprintf(stderr, "CPU mask : %04lx.\n\n", *(unsigned long *)&cpu_orig_mask[0]);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if(common->final_num_procs > 64){
|
|
kusano |
2b45e8 |
fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->final_num_procs);
|
|
kusano |
2b45e8 |
exit(1);
|
|
kusano |
2b45e8 |
}else if(common->final_num_procs == 64){
|
|
kusano |
2b45e8 |
lprocmask = 0xFFFFFFFFFFFFFFFFUL;
|
|
kusano |
2b45e8 |
}else
|
|
kusano |
2b45e8 |
lprocmask = (1UL << common -> final_num_procs) - 1;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef USE_OPENMP
|
|
kusano |
2b45e8 |
lprocmask &= *(unsigned long *)&cpu_orig_mask[0];
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef DEBUG
|
|
kusano |
2b45e8 |
fprintf(stderr, "I choose these CPUs : %04lx.\n\n", lprocmask);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static void setup_mempolicy(void) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int cpu, mynode, maxcpu;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (cpu = 0; cpu < MAX_NODES; cpu ++) node_cpu[cpu] = 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
maxcpu = 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (cpu = 0; cpu < numprocs; cpu ++) {
|
|
kusano |
2b45e8 |
mynode = READ_NODE(common -> cpu_info[cpu_sub_mapping[cpu]]);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
lnodemask |= (1UL << mynode);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
node_cpu[mynode] ++;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (maxcpu < node_cpu[mynode]) maxcpu = node_cpu[mynode];
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
node_equal = 1;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (cpu = 0; cpu < MAX_NODES; cpu ++) if ((node_cpu[cpu] != 0) && (node_cpu[cpu] != maxcpu)) node_equal = 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (lnodemask) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef DEBUG
|
|
kusano |
2b45e8 |
fprintf(stderr, "Node mask = %lx\n", lnodemask);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
my_set_mempolicy(MPOL_INTERLEAVE, &lnodemask, sizeof(lnodemask) * 8);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
numnodes = popcount(lnodemask);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static inline int is_dead(int id) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
struct shmid_ds ds;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
return shmctl(id, IPC_STAT, &ds);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
static void open_shmem(void) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int try = 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
do {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
shmid = shmget(SH_MAGIC, 4096, 0666);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (shmid == -1) {
|
|
kusano |
2b45e8 |
shmid = shmget(SH_MAGIC, 4096, IPC_CREAT | 0666);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
try ++;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
} while ((try < 10) && (shmid == -1));
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (shmid == -1) {
|
|
kusano |
2b45e8 |
fprintf(stderr, "GotoBLAS : Can't open shared memory. Terminated.\n");
|
|
kusano |
2b45e8 |
exit(1);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (shmid != -1) common = (shm_t *)shmat(shmid, NULL, 0);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef DEBUG
|
|
kusano |
2b45e8 |
fprintf(stderr, "Shared Memory id = %x Address = %p\n", shmid, common);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static void create_pshmem(void) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
pshmid = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
paddr = shmat(pshmid, NULL, 0);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
shmctl(pshmid, IPC_RMID, 0);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef DEBUG
|
|
kusano |
2b45e8 |
fprintf(stderr, "Private Shared Memory id = %x Address = %p\n", pshmid, paddr);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static void local_cpu_map(void) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int cpu, id, mapping;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cpu = 0;
|
|
kusano |
2b45e8 |
mapping = 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
do {
|
|
kusano |
2b45e8 |
id = common -> cpu_use[cpu];
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (id > 0) {
|
|
kusano |
2b45e8 |
if (is_dead(id)) common -> cpu_use[cpu] = 0;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if ((common -> cpu_use[cpu] == 0) && (lprocmask & (1UL << cpu))) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
common -> cpu_use[cpu] = pshmid;
|
|
kusano |
2b45e8 |
cpu_mapping[mapping] = READ_CPU(common -> cpu_info[cpu]);
|
|
kusano |
2b45e8 |
cpu_sub_mapping[mapping] = cpu;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
mapping ++;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cpu ++;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
} while ((mapping < numprocs) && (cpu < common -> final_num_procs));
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
disable_mapping = 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if ((mapping < numprocs) || (numprocs == 1)) {
|
|
kusano |
2b45e8 |
for (cpu = 0; cpu < common -> final_num_procs; cpu ++) {
|
|
kusano |
2b45e8 |
if (common -> cpu_use[cpu] == pshmid) common -> cpu_use[cpu] = 0;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
disable_mapping = 1;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef DEBUG
|
|
kusano |
2b45e8 |
for (cpu = 0; cpu < numprocs; cpu ++) {
|
|
kusano |
2b45e8 |
fprintf(stderr, "Local Mapping : %2d --> %2d (%2d)\n", cpu, cpu_mapping[cpu], cpu_sub_mapping[cpu]);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* Public Functions */
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int get_num_procs(void) { return numprocs; }
|
|
kusano |
2b45e8 |
int get_num_nodes(void) { return numnodes; }
|
|
kusano |
2b45e8 |
int get_node_equal(void) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
return (((blas_cpu_number % numnodes) == 0) && node_equal);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int gotoblas_set_affinity(int pos) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
cpu_set_t cpu_mask;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int mynode = 1;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* if number of threads is larger than inital condition */
|
|
kusano |
2b45e8 |
if (pos < 0) {
|
|
kusano |
2b45e8 |
sched_setaffinity(0, sizeof(cpu_orig_mask), &cpu_orig_mask[0]);
|
|
kusano |
2b45e8 |
return 0;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (!disable_mapping) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
mynode = READ_NODE(common -> cpu_info[cpu_sub_mapping[pos]]);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef DEBUG
|
|
kusano |
2b45e8 |
fprintf(stderr, "Giving Affinity[%4d %3d] --> %3d My node = %3d\n", getpid(), pos, cpu_mapping[pos], mynode);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
CPU_ZERO(&cpu_mask);
|
|
kusano |
2b45e8 |
CPU_SET (cpu_mapping[pos], &cpu_mask);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
sched_setaffinity(0, sizeof(cpu_mask), &cpu_mask);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
node_mapping[WhereAmI()] = mynode;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
return mynode;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int get_node(void) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (!disable_mapping) return node_mapping[WhereAmI()];
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
return 1;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
static int initialized = 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
void gotoblas_affinity_init(void) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int cpu, num_avail;
|
|
kusano |
2b45e8 |
#ifndef USE_OPENMP
|
|
kusano |
2b45e8 |
cpu_set_t cpu_mask;
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (initialized) return;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
initialized = 1;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
sched_getaffinity(0, sizeof(cpu_orig_mask), &cpu_orig_mask[0]);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef USE_OPENMP
|
|
kusano |
2b45e8 |
numprocs = 0;
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
numprocs = readenv("OPENBLAS_NUM_THREADS");
|
|
kusano |
2b45e8 |
if (numprocs == 0) numprocs = readenv("GOTO_NUM_THREADS");
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (numprocs == 0) numprocs = readenv("OMP_NUM_THREADS");
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
numnodes = 1;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (numprocs == 1) {
|
|
kusano |
2b45e8 |
disable_mapping = 1;
|
|
kusano |
2b45e8 |
return;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
create_pshmem();
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
open_shmem();
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
while ((common -> lock) && (common -> magic != SH_MAGIC)) {
|
|
kusano |
2b45e8 |
if (is_dead(common -> shmid)) {
|
|
kusano |
2b45e8 |
common -> lock = 0;
|
|
kusano |
2b45e8 |
common -> shmid = 0;
|
|
kusano |
2b45e8 |
common -> magic = 0;
|
|
kusano |
2b45e8 |
} else {
|
|
kusano |
2b45e8 |
sched_yield();
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blas_lock(&common -> lock);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if ((common -> shmid) && is_dead(common -> shmid)) common -> magic = 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
common -> shmid = pshmid;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (common -> magic != SH_MAGIC) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef DEBUG
|
|
kusano |
2b45e8 |
fprintf(stderr, "Shared Memory Initialization.\n");
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
common -> num_procs = get_nprocs();
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (cpu = 0; cpu < common -> num_procs; cpu++) common -> cpu_info[cpu] = cpu;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
numa_check();
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
disable_hyperthread();
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (common -> num_nodes > 1) numa_mapping();
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
common -> final_num_procs = popcount(common -> avail);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (cpu = 0; cpu < common -> final_num_procs; cpu ++) common -> cpu_use[cpu] = 0;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
common -> magic = SH_MAGIC;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
disable_affinity();
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
num_avail = popcount(lprocmask);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if ((numprocs <= 0) || (numprocs > num_avail)) numprocs = num_avail;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef DEBUG
|
|
kusano |
2b45e8 |
fprintf(stderr, "Number of threads = %d\n", numprocs);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
local_cpu_map();
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blas_unlock(&common -> lock);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef USE_OPENMP
|
|
kusano |
2b45e8 |
if (!disable_mapping) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef DEBUG
|
|
kusano |
2b45e8 |
fprintf(stderr, "Giving Affinity[%3d] --> %3d\n", 0, cpu_mapping[0]);
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
CPU_ZERO(&cpu_mask);
|
|
kusano |
2b45e8 |
CPU_SET (cpu_mapping[0], &cpu_mask);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
sched_setaffinity(0, sizeof(cpu_mask), &cpu_mask);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
node_mapping[WhereAmI()] = READ_NODE(common -> cpu_info[cpu_sub_mapping[0]]);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
setup_mempolicy();
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (readenv("OPENBLAS_MAIN_FREE") || readenv("GOTOBLAS_MAIN_FREE")) {
|
|
kusano |
2b45e8 |
sched_setaffinity(0, sizeof(cpu_orig_mask), &cpu_orig_mask[0]);
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef DEBUG
|
|
kusano |
2b45e8 |
fprintf(stderr, "Initialization is done.\n");
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
void gotoblas_affinity_quit(void) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int i;
|
|
kusano |
2b45e8 |
struct shmid_ds ds;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef DEBUG
|
|
kusano |
2b45e8 |
fprintf(stderr, "Terminating ..\n");
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if ((numprocs == 1) || (initialized == 0)) return;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (!disable_mapping) {
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blas_lock(&common -> lock);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
for (i = 0; i < numprocs; i ++) common -> cpu_use[cpu_mapping[i]] = -1;
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
blas_unlock(&common -> lock);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
shmctl(shmid, IPC_STAT, &ds);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
if (ds.shm_nattch == 1) shmctl(shmid, IPC_RMID, 0);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
shmdt(common);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
shmdt(paddr);
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
initialized = 0;
|
|
kusano |
2b45e8 |
}
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
void gotoblas_affinity_init(void) {};
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
void gotoblas_set_affinity(int threads) {};
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
void gotoblas_set_affinity2(int threads) {};
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
void gotoblas_affinity_reschedule(void) {};
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int get_num_procs(void) { return get_nprocs(); }
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int get_num_nodes(void) { return 1; }
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
int get_node(void) { return 1;}
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|