kusano 2b45e8
/*****************************************************************************
kusano 2b45e8
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
kusano 2b45e8
All rights reserved.
kusano 2b45e8
kusano 2b45e8
Redistribution and use in source and binary forms, with or without
kusano 2b45e8
modification, are permitted provided that the following conditions are
kusano 2b45e8
met:
kusano 2b45e8
kusano 2b45e8
   1. Redistributions of source code must retain the above copyright
kusano 2b45e8
      notice, this list of conditions and the following disclaimer.
kusano 2b45e8
kusano 2b45e8
   2. Redistributions in binary form must reproduce the above copyright
kusano 2b45e8
      notice, this list of conditions and the following disclaimer in
kusano 2b45e8
      the documentation and/or other materials provided with the
kusano 2b45e8
      distribution.
kusano 2b45e8
   3. Neither the name of the ISCAS nor the names of its contributors may 
kusano 2b45e8
      be used to endorse or promote products derived from this software 
kusano 2b45e8
      without specific prior written permission.
kusano 2b45e8
kusano 2b45e8
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
kusano 2b45e8
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
kusano 2b45e8
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
kusano 2b45e8
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
kusano 2b45e8
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
kusano 2b45e8
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
kusano 2b45e8
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
kusano 2b45e8
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
kusano 2b45e8
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 
kusano 2b45e8
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
kusano 2b45e8
kusano 2b45e8
**********************************************************************************/
kusano 2b45e8
kusano 2b45e8
kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
/* Copyright 2009, 2010 The University of Texas at Austin.           */
kusano 2b45e8
/* All rights reserved.                                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* Redistribution and use in source and binary forms, with or        */
kusano 2b45e8
/* without modification, are permitted provided that the following   */
kusano 2b45e8
/* conditions are met:                                               */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   1. Redistributions of source code must retain the above         */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer.                                                  */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   2. Redistributions in binary form must reproduce the above      */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer in the documentation and/or other materials       */
kusano 2b45e8
/*      provided with the distribution.                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
kusano 2b45e8
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
kusano 2b45e8
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
kusano 2b45e8
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
kusano 2b45e8
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
kusano 2b45e8
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
kusano 2b45e8
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
kusano 2b45e8
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
kusano 2b45e8
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
kusano 2b45e8
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
kusano 2b45e8
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
kusano 2b45e8
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
kusano 2b45e8
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
kusano 2b45e8
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* The views and conclusions contained in the software and           */
kusano 2b45e8
/* documentation are those of the authors and should not be          */
kusano 2b45e8
/* interpreted as representing official policies, either expressed   */
kusano 2b45e8
/* or implied, of The University of Texas at Austin.                 */
kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
kusano 2b45e8
#include "common.h"
kusano 2b45e8
kusano 2b45e8
#if defined(OS_LINUX) && defined(SMP)
kusano 2b45e8
kusano 2b45e8
#define _GNU_SOURCE
kusano 2b45e8
kusano 2b45e8
#include <sys sysinfo.h=""></sys>
kusano 2b45e8
#include <sys syscall.h=""></sys>
kusano 2b45e8
#include <sys shm.h=""></sys>
kusano 2b45e8
#include <fcntl.h></fcntl.h>
kusano 2b45e8
#include <sched.h></sched.h>
kusano 2b45e8
#include <dirent.h></dirent.h>
kusano 2b45e8
#include <dlfcn.h></dlfcn.h>
kusano 2b45e8
kusano 2b45e8
#define MAX_NODES	16
kusano 2b45e8
#define MAX_CPUS	256
kusano 2b45e8
kusano 2b45e8
#define SH_MAGIC	0x510510
kusano 2b45e8
kusano 2b45e8
#define CPUMAP_NAME	"/sys/devices/system/node/node%d/cpumap"
kusano 2b45e8
#define SHARE_NAME	"/sys/devices/system/cpu/cpu%d/cache/index%d/shared_cpu_map"
kusano 2b45e8
#define NODE_DIR	"/sys/devices/system/node"
kusano 2b45e8
kusano 2b45e8
//#undef DEBUG
kusano 2b45e8
kusano 2b45e8
/* Private variables */
kusano 2b45e8
typedef struct {
kusano 2b45e8
  unsigned long lock;
kusano 2b45e8
  unsigned int magic;
kusano 2b45e8
  unsigned int shmid;
kusano 2b45e8
kusano 2b45e8
  int num_nodes;
kusano 2b45e8
  int num_procs;
kusano 2b45e8
  int final_num_procs;
kusano 2b45e8
  unsigned long avail;
kusano 2b45e8
  
kusano 2b45e8
  unsigned long cpu_info   [MAX_CPUS];
kusano 2b45e8
  unsigned long node_info  [MAX_NODES];
kusano 2b45e8
  int cpu_use[MAX_CPUS];
kusano 2b45e8
kusano 2b45e8
} shm_t;
kusano 2b45e8
kusano 2b45e8
static cpu_set_t cpu_orig_mask[4];
kusano 2b45e8
kusano 2b45e8
static int  cpu_mapping[MAX_CPUS];
kusano 2b45e8
static int  node_mapping[MAX_CPUS * 4];
kusano 2b45e8
static int  cpu_sub_mapping[MAX_CPUS];
kusano 2b45e8
static int  disable_mapping;
kusano 2b45e8
kusano 2b45e8
/* Number of cores per nodes */
kusano 2b45e8
static int  node_cpu[MAX_NODES];
kusano 2b45e8
static int  node_equal = 0;
kusano 2b45e8
kusano 2b45e8
static shm_t *common = (void *)-1;
kusano 2b45e8
static int shmid, pshmid;
kusano 2b45e8
static void *paddr;
kusano 2b45e8
kusano 2b45e8
static unsigned long lprocmask, lnodemask;
kusano 2b45e8
static int numprocs = 1;
kusano 2b45e8
static int numnodes = 1;
kusano 2b45e8
kusano 2b45e8
#if 1
kusano 2b45e8
#define READ_CPU(x)   ( (x)        & 0xff)
kusano 2b45e8
#define READ_NODE(x)  (((x) >>  8) & 0xff)
kusano 2b45e8
#define READ_CORE(x)  (((x) >> 16) & 0xff)
kusano 2b45e8
kusano 2b45e8
#define WRITE_CPU(x)    (x)
kusano 2b45e8
#define WRITE_NODE(x)  ((x) <<  8)
kusano 2b45e8
#define WRITE_CORE(x)  ((x) << 16)
kusano 2b45e8
#else
kusano 2b45e8
#define READ_CPU(x)   ( (x)        & 0xff)
kusano 2b45e8
#define READ_CORE(x)  (((x) >>  8) & 0xff)
kusano 2b45e8
#define READ_NODE(x)  (((x) >> 16) & 0xff)
kusano 2b45e8
kusano 2b45e8
#define WRITE_CPU(x)    (x)
kusano 2b45e8
#define WRITE_CORE(x)  ((x) <<  8)
kusano 2b45e8
#define WRITE_NODE(x)  ((x) << 16)
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
static inline int popcount(unsigned long number) {
kusano 2b45e8
kusano 2b45e8
  int count = 0;
kusano 2b45e8
kusano 2b45e8
  while (number > 0) {
kusano 2b45e8
    if (number & 1) count ++;
kusano 2b45e8
    number >>= 1;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  return count;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
static inline int rcount(unsigned long number) {
kusano 2b45e8
kusano 2b45e8
  int count = -1;
kusano 2b45e8
kusano 2b45e8
  while ((number > 0) && ((number & 0)) == 0) {
kusano 2b45e8
    count ++;
kusano 2b45e8
    number >>= 1;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  return count;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
/***
kusano 2b45e8
  Known issue: The number of CPUs/cores should less 
kusano 2b45e8
  than sizeof(unsigned long). On 64 bits, the limit 
kusano 2b45e8
  is 64. On 32 bits, it is 32.
kusano 2b45e8
***/
kusano 2b45e8
static inline unsigned long get_cpumap(int node) {
kusano 2b45e8
kusano 2b45e8
  int infile;
kusano 2b45e8
  unsigned long affinity;
kusano 2b45e8
  char name[160];
kusano 2b45e8
  char cpumap[160];
kusano 2b45e8
  char *p, *dummy;
kusano 2b45e8
  int i=0;
kusano 2b45e8
kusano 2b45e8
  sprintf(name, CPUMAP_NAME, node);
kusano 2b45e8
  
kusano 2b45e8
  infile = open(name, O_RDONLY);
kusano 2b45e8
kusano 2b45e8
  affinity = 0;
kusano 2b45e8
    
kusano 2b45e8
  if (infile != -1) {
kusano 2b45e8
    
kusano 2b45e8
    read(infile, cpumap, sizeof(cpumap));
kusano 2b45e8
    p = cpumap;
kusano 2b45e8
    while (*p != '\n' && i<160){
kusano 2b45e8
      if(*p != ',') {
kusano 2b45e8
	name[i++]=*p;
kusano 2b45e8
      }
kusano 2b45e8
      p++;
kusano 2b45e8
    }
kusano 2b45e8
    p = name;
kusano 2b45e8
kusano 2b45e8
    //    while ((*p == '0') || (*p == ',')) p++;
kusano 2b45e8
kusano 2b45e8
    affinity = strtoul(p, &dummy, 16);
kusano 2b45e8
   
kusano 2b45e8
    close(infile);
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  return affinity;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
static inline unsigned long get_share(int cpu, int level) {
kusano 2b45e8
kusano 2b45e8
  int infile;
kusano 2b45e8
  unsigned long affinity;
kusano 2b45e8
  char name[160];
kusano 2b45e8
  char *p;
kusano 2b45e8
  
kusano 2b45e8
  sprintf(name, SHARE_NAME, cpu, level);
kusano 2b45e8
  
kusano 2b45e8
  infile = open(name, O_RDONLY);
kusano 2b45e8
kusano 2b45e8
  affinity = (1UL << cpu);
kusano 2b45e8
    
kusano 2b45e8
  if (infile != -1) {
kusano 2b45e8
    
kusano 2b45e8
    read(infile, name, sizeof(name));
kusano 2b45e8
   
kusano 2b45e8
    p = name;
kusano 2b45e8
kusano 2b45e8
    while ((*p == '0') || (*p == ',')) p++;
kusano 2b45e8
kusano 2b45e8
    affinity = strtol(p, &p, 16);
kusano 2b45e8
   
kusano 2b45e8
    close(infile);
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  return affinity;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
static int numa_check(void) {
kusano 2b45e8
kusano 2b45e8
  DIR *dp;
kusano 2b45e8
  struct dirent *dir;
kusano 2b45e8
  int node;
kusano 2b45e8
kusano 2b45e8
  common -> num_nodes = 0;
kusano 2b45e8
kusano 2b45e8
  dp = opendir(NODE_DIR);
kusano 2b45e8
kusano 2b45e8
  if (dp == NULL) {
kusano 2b45e8
    common -> num_nodes = 1;
kusano 2b45e8
    return 0;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  for (node = 0; node < MAX_NODES; node ++) common -> node_info[node] = 0;
kusano 2b45e8
kusano 2b45e8
  while ((dir = readdir(dp)) != NULL) {
kusano 2b45e8
    if (*(unsigned int *) dir -> d_name == 0x065646f6eU) {
kusano 2b45e8
kusano 2b45e8
      node = atoi(&dir -> d_name[4]);
kusano 2b45e8
kusano 2b45e8
      if (node > MAX_NODES) {
kusano 2b45e8
	fprintf(stderr, "\nGotoBLAS Warining : MAX_NODES (NUMA) is too small. Terminated.\n");
kusano 2b45e8
	exit(1);
kusano 2b45e8
      }
kusano 2b45e8
kusano 2b45e8
      common -> num_nodes ++;
kusano 2b45e8
      common -> node_info[node] = get_cpumap(node);
kusano 2b45e8
kusano 2b45e8
    }
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  closedir(dp);
kusano 2b45e8
kusano 2b45e8
  if (common -> num_nodes == 1) return 1;
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
  fprintf(stderr, "Numa found : number of Nodes = %2d\n", common -> num_nodes);
kusano 2b45e8
kusano 2b45e8
  for (node = 0; node < common -> num_nodes; node ++)
kusano 2b45e8
    fprintf(stderr, "MASK (%2d) : %08lx\n", node, common -> node_info[node]);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  return common -> num_nodes;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
static void numa_mapping(void) {
kusano 2b45e8
kusano 2b45e8
  int node, cpu, core;
kusano 2b45e8
  int i, j, h;
kusano 2b45e8
  unsigned long work, bit;
kusano 2b45e8
  int count = 0;
kusano 2b45e8
kusano 2b45e8
  for (node = 0; node < common -> num_nodes; node ++) {
kusano 2b45e8
    core = 0;
kusano 2b45e8
    for (cpu = 0; cpu < common -> num_procs; cpu ++) {
kusano 2b45e8
      if (common -> node_info[node] & common -> avail & (1UL << cpu)) {
kusano 2b45e8
	common -> cpu_info[count] = WRITE_CORE(core) | WRITE_NODE(node) | WRITE_CPU(cpu);
kusano 2b45e8
	count ++;
kusano 2b45e8
	core ++;
kusano 2b45e8
      }
kusano 2b45e8
kusano 2b45e8
    }
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
  fprintf(stderr, "\nFrom /sys ...\n\n");
kusano 2b45e8
kusano 2b45e8
  for (cpu = 0; cpu < count; cpu++) 
kusano 2b45e8
    fprintf(stderr, "CPU (%2d) : %08lx\n", cpu, common -> cpu_info[cpu]);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  h = 1;
kusano 2b45e8
kusano 2b45e8
  while (h < count) h = 2 * h + 1;
kusano 2b45e8
kusano 2b45e8
  while (h > 1) {
kusano 2b45e8
    h /= 2;
kusano 2b45e8
    for (i = h; i < count; i++) {
kusano 2b45e8
      work = common -> cpu_info[i];
kusano 2b45e8
      bit  = CPU_ISSET(i, &cpu_orig_mask[0]);
kusano 2b45e8
      j = i - h;
kusano 2b45e8
      while (work < common -> cpu_info[j]) {
kusano 2b45e8
	common -> cpu_info[j + h] = common -> cpu_info[j];
kusano 2b45e8
	if (CPU_ISSET(j, &cpu_orig_mask[0])) {
kusano 2b45e8
	  CPU_SET(j + h, &cpu_orig_mask[0]);
kusano 2b45e8
	} else {
kusano 2b45e8
	  CPU_CLR(j + h, &cpu_orig_mask[0]);
kusano 2b45e8
	}
kusano 2b45e8
	j -= h;
kusano 2b45e8
	if (j < 0) break;
kusano 2b45e8
      }
kusano 2b45e8
      common -> cpu_info[j + h] = work;
kusano 2b45e8
      if (bit) {
kusano 2b45e8
	CPU_SET(j + h, &cpu_orig_mask[0]);
kusano 2b45e8
      } else {
kusano 2b45e8
	CPU_CLR(j + h, &cpu_orig_mask[0]);
kusano 2b45e8
      }
kusano 2b45e8
kusano 2b45e8
    }
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
  fprintf(stderr, "\nSorting ...\n\n");
kusano 2b45e8
kusano 2b45e8
  for (cpu = 0; cpu < count; cpu++) 
kusano 2b45e8
    fprintf(stderr, "CPU (%2d) : %08lx\n", cpu, common -> cpu_info[cpu]);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
static void disable_hyperthread(void) {
kusano 2b45e8
kusano 2b45e8
  unsigned long share;
kusano 2b45e8
  int cpu;
kusano 2b45e8
kusano 2b45e8
  if(common->num_procs > 64){
kusano 2b45e8
    fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->num_procs);
kusano 2b45e8
    exit(1);
kusano 2b45e8
  }else if(common->num_procs == 64){
kusano 2b45e8
    common -> avail = 0xFFFFFFFFFFFFFFFFUL;
kusano 2b45e8
  }else
kusano 2b45e8
    common -> avail = (1UL << common -> num_procs) - 1;
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
  fprintf(stderr, "\nAvail CPUs    : %04lx.\n", common -> avail);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  for (cpu = 0; cpu < common -> num_procs; cpu ++) {
kusano 2b45e8
    
kusano 2b45e8
    share = (get_share(cpu, 1) & common -> avail);
kusano 2b45e8
    
kusano 2b45e8
    if (popcount(share) > 1) {
kusano 2b45e8
      
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
      fprintf(stderr, "Detected Hyper Threading on CPU %4x; disabled CPU %04lx.\n",
kusano 2b45e8
	      cpu, share & ~(1UL << cpu));
kusano 2b45e8
#endif
kusano 2b45e8
      
kusano 2b45e8
      common -> avail &= ~((share & ~(1UL << cpu)));
kusano 2b45e8
    }
kusano 2b45e8
  }
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
static void disable_affinity(void) {
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
    fprintf(stderr, "Final all available CPUs  : %04lx.\n\n", common -> avail);
kusano 2b45e8
    fprintf(stderr, "CPU mask                  : %04lx.\n\n", *(unsigned long *)&cpu_orig_mask[0]);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  if(common->final_num_procs > 64){
kusano 2b45e8
    fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->final_num_procs);
kusano 2b45e8
    exit(1);
kusano 2b45e8
  }else if(common->final_num_procs == 64){
kusano 2b45e8
    lprocmask = 0xFFFFFFFFFFFFFFFFUL;
kusano 2b45e8
  }else
kusano 2b45e8
    lprocmask = (1UL << common -> final_num_procs) - 1;
kusano 2b45e8
kusano 2b45e8
#ifndef USE_OPENMP
kusano 2b45e8
  lprocmask &= *(unsigned long *)&cpu_orig_mask[0];
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
    fprintf(stderr, "I choose these CPUs  : %04lx.\n\n", lprocmask);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
static void setup_mempolicy(void) {
kusano 2b45e8
kusano 2b45e8
  int cpu, mynode, maxcpu;
kusano 2b45e8
kusano 2b45e8
  for (cpu = 0; cpu < MAX_NODES; cpu ++) node_cpu[cpu] = 0;
kusano 2b45e8
kusano 2b45e8
  maxcpu = 0;
kusano 2b45e8
kusano 2b45e8
  for (cpu = 0; cpu < numprocs; cpu ++) {
kusano 2b45e8
    mynode = READ_NODE(common -> cpu_info[cpu_sub_mapping[cpu]]);
kusano 2b45e8
    
kusano 2b45e8
    lnodemask |= (1UL << mynode);
kusano 2b45e8
kusano 2b45e8
    node_cpu[mynode] ++;
kusano 2b45e8
kusano 2b45e8
    if (maxcpu < node_cpu[mynode]) maxcpu = node_cpu[mynode];
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  node_equal = 1;
kusano 2b45e8
kusano 2b45e8
  for (cpu = 0; cpu < MAX_NODES; cpu ++) if ((node_cpu[cpu] != 0) && (node_cpu[cpu] != maxcpu)) node_equal = 0;
kusano 2b45e8
kusano 2b45e8
  if (lnodemask) {
kusano 2b45e8
  
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
    fprintf(stderr, "Node mask = %lx\n", lnodemask);
kusano 2b45e8
#endif
kusano 2b45e8
    
kusano 2b45e8
    my_set_mempolicy(MPOL_INTERLEAVE, &lnodemask, sizeof(lnodemask) * 8);
kusano 2b45e8
kusano 2b45e8
    numnodes = popcount(lnodemask);
kusano 2b45e8
  }
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
static inline int is_dead(int id) {
kusano 2b45e8
kusano 2b45e8
  struct shmid_ds ds;
kusano 2b45e8
kusano 2b45e8
  return shmctl(id, IPC_STAT, &ds);
kusano 2b45e8
}
kusano 2b45e8
static void open_shmem(void) {
kusano 2b45e8
kusano 2b45e8
  int try = 0;
kusano 2b45e8
kusano 2b45e8
  do {
kusano 2b45e8
kusano 2b45e8
    shmid = shmget(SH_MAGIC, 4096, 0666);
kusano 2b45e8
    
kusano 2b45e8
    if (shmid == -1) {
kusano 2b45e8
      shmid = shmget(SH_MAGIC, 4096, IPC_CREAT | 0666);
kusano 2b45e8
    }
kusano 2b45e8
    
kusano 2b45e8
    try ++;
kusano 2b45e8
kusano 2b45e8
  } while ((try < 10) && (shmid == -1));
kusano 2b45e8
kusano 2b45e8
  if (shmid == -1) {
kusano 2b45e8
    fprintf(stderr, "GotoBLAS : Can't open shared memory. Terminated.\n");
kusano 2b45e8
    exit(1);
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  if (shmid != -1) common = (shm_t *)shmat(shmid, NULL, 0);
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
  fprintf(stderr, "Shared Memory id = %x  Address = %p\n", shmid, common);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
static void create_pshmem(void) {
kusano 2b45e8
kusano 2b45e8
  pshmid = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666);
kusano 2b45e8
kusano 2b45e8
  paddr = shmat(pshmid, NULL, 0);
kusano 2b45e8
kusano 2b45e8
  shmctl(pshmid, IPC_RMID, 0);
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
  fprintf(stderr, "Private Shared Memory id = %x  Address = %p\n", pshmid, paddr);
kusano 2b45e8
#endif
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
static void local_cpu_map(void) {
kusano 2b45e8
kusano 2b45e8
  int cpu, id, mapping;
kusano 2b45e8
kusano 2b45e8
  cpu = 0;
kusano 2b45e8
  mapping = 0;
kusano 2b45e8
kusano 2b45e8
  do {
kusano 2b45e8
    id   = common -> cpu_use[cpu];
kusano 2b45e8
kusano 2b45e8
    if (id > 0) {
kusano 2b45e8
      if (is_dead(id)) common -> cpu_use[cpu] = 0;
kusano 2b45e8
    }
kusano 2b45e8
kusano 2b45e8
    if ((common -> cpu_use[cpu] == 0) && (lprocmask & (1UL << cpu))) {
kusano 2b45e8
kusano 2b45e8
      common -> cpu_use[cpu] = pshmid;
kusano 2b45e8
      cpu_mapping[mapping] = READ_CPU(common -> cpu_info[cpu]);
kusano 2b45e8
      cpu_sub_mapping[mapping] = cpu;
kusano 2b45e8
kusano 2b45e8
      mapping ++;
kusano 2b45e8
    }
kusano 2b45e8
kusano 2b45e8
    cpu ++;
kusano 2b45e8
    
kusano 2b45e8
  } while ((mapping < numprocs) && (cpu < common -> final_num_procs));
kusano 2b45e8
  
kusano 2b45e8
  disable_mapping = 0;
kusano 2b45e8
kusano 2b45e8
  if ((mapping < numprocs) || (numprocs == 1)) {
kusano 2b45e8
    for (cpu = 0; cpu < common -> final_num_procs; cpu ++) {
kusano 2b45e8
      if (common -> cpu_use[cpu] == pshmid) common -> cpu_use[cpu] = 0;
kusano 2b45e8
    }
kusano 2b45e8
    disable_mapping = 1;
kusano 2b45e8
  }
kusano 2b45e8
  
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
  for (cpu = 0; cpu < numprocs; cpu ++) {
kusano 2b45e8
    fprintf(stderr, "Local Mapping  : %2d --> %2d (%2d)\n", cpu, cpu_mapping[cpu], cpu_sub_mapping[cpu]);
kusano 2b45e8
  }
kusano 2b45e8
#endif
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
/* Public Functions */
kusano 2b45e8
kusano 2b45e8
int get_num_procs(void)  { return numprocs; }
kusano 2b45e8
int get_num_nodes(void)  { return numnodes; }
kusano 2b45e8
int get_node_equal(void) { 
kusano 2b45e8
kusano 2b45e8
  return (((blas_cpu_number % numnodes) == 0) && node_equal);
kusano 2b45e8
  
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
int gotoblas_set_affinity(int pos) {
kusano 2b45e8
  
kusano 2b45e8
  cpu_set_t cpu_mask;
kusano 2b45e8
kusano 2b45e8
  int mynode = 1;
kusano 2b45e8
kusano 2b45e8
  /* if number of threads is larger than inital condition */
kusano 2b45e8
  if (pos < 0) {
kusano 2b45e8
      sched_setaffinity(0, sizeof(cpu_orig_mask), &cpu_orig_mask[0]);
kusano 2b45e8
      return 0;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  if (!disable_mapping) {
kusano 2b45e8
kusano 2b45e8
    mynode = READ_NODE(common -> cpu_info[cpu_sub_mapping[pos]]);
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
    fprintf(stderr, "Giving Affinity[%4d   %3d] --> %3d  My node = %3d\n", getpid(), pos, cpu_mapping[pos], mynode);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
    CPU_ZERO(&cpu_mask);
kusano 2b45e8
    CPU_SET (cpu_mapping[pos], &cpu_mask);
kusano 2b45e8
    
kusano 2b45e8
    sched_setaffinity(0, sizeof(cpu_mask), &cpu_mask);
kusano 2b45e8
kusano 2b45e8
    node_mapping[WhereAmI()] = mynode;
kusano 2b45e8
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  return mynode;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
int get_node(void) { 
kusano 2b45e8
kusano 2b45e8
  if (!disable_mapping) return node_mapping[WhereAmI()];
kusano 2b45e8
kusano 2b45e8
  return 1;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
static int initialized = 0;
kusano 2b45e8
kusano 2b45e8
void gotoblas_affinity_init(void) {
kusano 2b45e8
kusano 2b45e8
  int cpu, num_avail;
kusano 2b45e8
#ifndef USE_OPENMP
kusano 2b45e8
  cpu_set_t cpu_mask;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  if (initialized) return;
kusano 2b45e8
kusano 2b45e8
  initialized = 1;
kusano 2b45e8
kusano 2b45e8
  sched_getaffinity(0, sizeof(cpu_orig_mask), &cpu_orig_mask[0]);
kusano 2b45e8
    
kusano 2b45e8
#ifdef USE_OPENMP
kusano 2b45e8
  numprocs = 0;
kusano 2b45e8
#else
kusano 2b45e8
  numprocs = readenv("OPENBLAS_NUM_THREADS");
kusano 2b45e8
  if (numprocs == 0) numprocs = readenv("GOTO_NUM_THREADS");
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  if (numprocs == 0) numprocs = readenv("OMP_NUM_THREADS");
kusano 2b45e8
kusano 2b45e8
  numnodes = 1;
kusano 2b45e8
kusano 2b45e8
  if (numprocs == 1) {
kusano 2b45e8
    disable_mapping = 1;
kusano 2b45e8
    return;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  create_pshmem();
kusano 2b45e8
kusano 2b45e8
  open_shmem();
kusano 2b45e8
kusano 2b45e8
  while ((common -> lock) && (common -> magic != SH_MAGIC)) {
kusano 2b45e8
    if (is_dead(common -> shmid)) {
kusano 2b45e8
      common -> lock = 0;
kusano 2b45e8
      common -> shmid = 0;
kusano 2b45e8
      common -> magic = 0;
kusano 2b45e8
    } else {
kusano 2b45e8
      sched_yield();
kusano 2b45e8
    }
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  blas_lock(&common -> lock);
kusano 2b45e8
kusano 2b45e8
  if ((common -> shmid) && is_dead(common -> shmid)) common -> magic = 0;
kusano 2b45e8
kusano 2b45e8
  common -> shmid = pshmid;
kusano 2b45e8
kusano 2b45e8
  if (common -> magic != SH_MAGIC) {
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
    fprintf(stderr, "Shared Memory Initialization.\n");
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
    common -> num_procs = get_nprocs();
kusano 2b45e8
kusano 2b45e8
    for (cpu = 0; cpu < common -> num_procs; cpu++) common -> cpu_info[cpu] = cpu;
kusano 2b45e8
    
kusano 2b45e8
    numa_check();
kusano 2b45e8
    
kusano 2b45e8
    disable_hyperthread();
kusano 2b45e8
kusano 2b45e8
    if (common -> num_nodes > 1) numa_mapping();
kusano 2b45e8
kusano 2b45e8
    common -> final_num_procs = popcount(common -> avail);
kusano 2b45e8
kusano 2b45e8
    for (cpu = 0; cpu < common -> final_num_procs; cpu ++) common -> cpu_use[cpu] =  0;
kusano 2b45e8
kusano 2b45e8
    common -> magic = SH_MAGIC;
kusano 2b45e8
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  disable_affinity();
kusano 2b45e8
kusano 2b45e8
  num_avail = popcount(lprocmask);
kusano 2b45e8
kusano 2b45e8
  if ((numprocs <= 0) || (numprocs > num_avail)) numprocs = num_avail;
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
  fprintf(stderr, "Number of threads = %d\n", numprocs);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  local_cpu_map();
kusano 2b45e8
kusano 2b45e8
  blas_unlock(&common -> lock);
kusano 2b45e8
kusano 2b45e8
#ifndef USE_OPENMP
kusano 2b45e8
  if (!disable_mapping) {
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
    fprintf(stderr, "Giving Affinity[%3d] --> %3d\n", 0, cpu_mapping[0]);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
    CPU_ZERO(&cpu_mask);
kusano 2b45e8
    CPU_SET (cpu_mapping[0], &cpu_mask);
kusano 2b45e8
    
kusano 2b45e8
    sched_setaffinity(0, sizeof(cpu_mask), &cpu_mask);
kusano 2b45e8
kusano 2b45e8
    node_mapping[WhereAmI()] = READ_NODE(common -> cpu_info[cpu_sub_mapping[0]]);
kusano 2b45e8
kusano 2b45e8
    setup_mempolicy();
kusano 2b45e8
kusano 2b45e8
    if (readenv("OPENBLAS_MAIN_FREE") || readenv("GOTOBLAS_MAIN_FREE")) {
kusano 2b45e8
      sched_setaffinity(0, sizeof(cpu_orig_mask), &cpu_orig_mask[0]);
kusano 2b45e8
    }
kusano 2b45e8
kusano 2b45e8
  }
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
  fprintf(stderr, "Initialization is done.\n");
kusano 2b45e8
#endif
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
void gotoblas_affinity_quit(void) {
kusano 2b45e8
kusano 2b45e8
  int i;
kusano 2b45e8
  struct shmid_ds ds;
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
  fprintf(stderr, "Terminating ..\n");
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  if ((numprocs == 1) || (initialized == 0)) return;
kusano 2b45e8
kusano 2b45e8
  if (!disable_mapping) {
kusano 2b45e8
    
kusano 2b45e8
    blas_lock(&common -> lock);
kusano 2b45e8
    
kusano 2b45e8
    for (i = 0; i < numprocs; i ++) common -> cpu_use[cpu_mapping[i]] = -1;
kusano 2b45e8
    
kusano 2b45e8
    blas_unlock(&common -> lock);
kusano 2b45e8
  
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  shmctl(shmid, IPC_STAT, &ds);
kusano 2b45e8
kusano 2b45e8
  if (ds.shm_nattch == 1) shmctl(shmid, IPC_RMID, 0);
kusano 2b45e8
kusano 2b45e8
  shmdt(common);
kusano 2b45e8
kusano 2b45e8
  shmdt(paddr);
kusano 2b45e8
kusano 2b45e8
  initialized = 0;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
#else
kusano 2b45e8
kusano 2b45e8
void gotoblas_affinity_init(void) {};
kusano 2b45e8
kusano 2b45e8
void gotoblas_set_affinity(int threads) {};
kusano 2b45e8
kusano 2b45e8
void gotoblas_set_affinity2(int threads) {};
kusano 2b45e8
kusano 2b45e8
void gotoblas_affinity_reschedule(void) {};
kusano 2b45e8
kusano 2b45e8
int get_num_procs(void) { return get_nprocs(); }
kusano 2b45e8
kusano 2b45e8
int get_num_nodes(void) { return 1; }
kusano 2b45e8
kusano 2b45e8
int get_node(void) { return 1;}
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8