kusano 2b45e8
/*****************************************************************************
kusano 2b45e8
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
kusano 2b45e8
All rights reserved.
kusano 2b45e8
kusano 2b45e8
Redistribution and use in source and binary forms, with or without
kusano 2b45e8
modification, are permitted provided that the following conditions are
kusano 2b45e8
met:
kusano 2b45e8
kusano 2b45e8
   1. Redistributions of source code must retain the above copyright
kusano 2b45e8
      notice, this list of conditions and the following disclaimer.
kusano 2b45e8
kusano 2b45e8
   2. Redistributions in binary form must reproduce the above copyright
kusano 2b45e8
      notice, this list of conditions and the following disclaimer in
kusano 2b45e8
      the documentation and/or other materials provided with the
kusano 2b45e8
      distribution.
kusano 2b45e8
   3. Neither the name of the ISCAS nor the names of its contributors may 
kusano 2b45e8
      be used to endorse or promote products derived from this software 
kusano 2b45e8
      without specific prior written permission.
kusano 2b45e8
kusano 2b45e8
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
kusano 2b45e8
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
kusano 2b45e8
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
kusano 2b45e8
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
kusano 2b45e8
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
kusano 2b45e8
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
kusano 2b45e8
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
kusano 2b45e8
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
kusano 2b45e8
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 
kusano 2b45e8
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
kusano 2b45e8
kusano 2b45e8
**********************************************************************************/
kusano 2b45e8
kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
/* Copyright 2009, 2010 The University of Texas at Austin.           */
kusano 2b45e8
/* All rights reserved.                                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* Redistribution and use in source and binary forms, with or        */
kusano 2b45e8
/* without modification, are permitted provided that the following   */
kusano 2b45e8
/* conditions are met:                                               */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   1. Redistributions of source code must retain the above         */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer.                                                  */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*   2. Redistributions in binary form must reproduce the above      */
kusano 2b45e8
/*      copyright notice, this list of conditions and the following  */
kusano 2b45e8
/*      disclaimer in the documentation and/or other materials       */
kusano 2b45e8
/*      provided with the distribution.                              */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
kusano 2b45e8
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
kusano 2b45e8
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
kusano 2b45e8
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
kusano 2b45e8
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
kusano 2b45e8
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
kusano 2b45e8
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
kusano 2b45e8
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
kusano 2b45e8
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
kusano 2b45e8
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
kusano 2b45e8
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
kusano 2b45e8
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
kusano 2b45e8
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
kusano 2b45e8
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
kusano 2b45e8
/*                                                                   */
kusano 2b45e8
/* The views and conclusions contained in the software and           */
kusano 2b45e8
/* documentation are those of the authors and should not be          */
kusano 2b45e8
/* interpreted as representing official policies, either expressed   */
kusano 2b45e8
/* or implied, of The University of Texas at Austin.                 */
kusano 2b45e8
/*********************************************************************/
kusano 2b45e8
kusano 2b45e8
//#undef  DEBUG
kusano 2b45e8
kusano 2b45e8
#include "common.h"
kusano 2b45e8
#include <errno.h></errno.h>
kusano 2b45e8
kusano 2b45e8
#ifdef OS_WINDOWS
kusano 2b45e8
#define ALLOC_WINDOWS
kusano 2b45e8
#ifndef MEM_LARGE_PAGES
kusano 2b45e8
#define MEM_LARGE_PAGES  0x20000000
kusano 2b45e8
#endif
kusano 2b45e8
#else
kusano 2b45e8
#define ALLOC_MMAP
kusano 2b45e8
#define ALLOC_MALLOC
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#include <stdlib.h></stdlib.h>
kusano 2b45e8
#include <stdio.h></stdio.h>
kusano 2b45e8
#include <fcntl.h></fcntl.h>
kusano 2b45e8
kusano 2b45e8
#ifndef OS_WINDOWS
kusano 2b45e8
#include <sys mman.h=""></sys>
kusano 2b45e8
#include <sys shm.h=""></sys>
kusano 2b45e8
#include <sys ipc.h=""></sys>
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#include <sys types.h=""></sys>
kusano 2b45e8
kusano 2b45e8
#ifdef OS_LINUX
kusano 2b45e8
#include <sys sysinfo.h=""></sys>
kusano 2b45e8
#include <sched.h></sched.h>
kusano 2b45e8
#include <errno.h></errno.h>
kusano 2b45e8
#include <linux unistd.h=""></linux>
kusano 2b45e8
#include <sys syscall.h=""></sys>
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(OS_FreeBSD) || defined(OS_Darwin)
kusano 2b45e8
#include <sys sysctl.h=""></sys>
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(OS_WINDOWS) && (defined(__MINGW32__) || defined(__MINGW64__))
kusano 2b45e8
#include <conio.h></conio.h>
kusano 2b45e8
#undef  printf
kusano 2b45e8
#define printf	_cprintf
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef OS_LINUX
kusano 2b45e8
kusano 2b45e8
#ifndef MPOL_PREFERRED
kusano 2b45e8
#define MPOL_PREFERRED  1
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if (defined(PPC440) || !defined(OS_LINUX) || defined(HPL)) && !defined(NO_WARMUP)
kusano 2b45e8
#define NO_WARMUP
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef ALLOC_HUGETLB
kusano 2b45e8
#define SHM_HUGETLB 04000
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef FIXED_PAGESIZE
kusano 2b45e8
#define FIXED_PAGESIZE 4096
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
kusano 2b45e8
kusano 2b45e8
#define CONSTRUCTOR	__attribute__ ((constructor)) 
kusano 2b45e8
#define DESTRUCTOR	__attribute__ ((destructor)) 
kusano 2b45e8
kusano 2b45e8
#ifdef DYNAMIC_ARCH
kusano 2b45e8
gotoblas_t *gotoblas = NULL;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef SMP
kusano 2b45e8
kusano 2b45e8
#define blas_cpu_number 1
kusano 2b45e8
#define blas_num_threads 1
kusano 2b45e8
kusano 2b45e8
/* Dummy Function */
kusano 2b45e8
int  goto_get_num_procs  (void) { return 1;};
kusano 2b45e8
void goto_set_num_threads(int num_threads) {};
kusano 2b45e8
kusano 2b45e8
#else
kusano 2b45e8
kusano 2b45e8
#ifdef OS_LINUX
kusano 2b45e8
#ifndef NO_AFFINITY
kusano 2b45e8
int get_num_procs(void);
kusano 2b45e8
#else
kusano 2b45e8
int get_num_procs(void) {
kusano 2b45e8
  static int nums = 0;
kusano 2b45e8
  if (!nums) nums = get_nprocs();
kusano 2b45e8
  return nums;
kusano 2b45e8
}
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef OS_WINDOWS
kusano 2b45e8
kusano 2b45e8
int get_num_procs(void) {
kusano 2b45e8
  
kusano 2b45e8
  static int nums = 0;
kusano 2b45e8
kusano 2b45e8
  if (nums == 0) {
kusano 2b45e8
kusano 2b45e8
    SYSTEM_INFO sysinfo;
kusano 2b45e8
    
kusano 2b45e8
    GetSystemInfo(&sysinfo);
kusano 2b45e8
kusano 2b45e8
    nums = sysinfo.dwNumberOfProcessors;
kusano 2b45e8
  }
kusano 2b45e8
  
kusano 2b45e8
  return nums;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(OS_FreeBSD) || defined(OS_Darwin)
kusano 2b45e8
kusano 2b45e8
int get_num_procs(void) {
kusano 2b45e8
  
kusano 2b45e8
  static int nums = 0;
kusano 2b45e8
kusano 2b45e8
  int m[2];
kusano 2b45e8
  size_t len;
kusano 2b45e8
  
kusano 2b45e8
  if (nums == 0) {
kusano 2b45e8
    m[0] = CTL_HW;
kusano 2b45e8
    m[1] = HW_NCPU;
kusano 2b45e8
    len = sizeof(int);
kusano 2b45e8
    sysctl(m, 2, &nums, &len, NULL, 0);
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  return nums;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
int blas_cpu_number  = 0;
kusano 2b45e8
int blas_num_threads = 0;
kusano 2b45e8
kusano 2b45e8
int  goto_get_num_procs  (void) {
kusano 2b45e8
  return blas_cpu_number;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
int blas_get_cpu_number(void){
kusano 2b45e8
  char *p;
kusano 2b45e8
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FreeBSD) || defined(OS_Darwin)
kusano 2b45e8
  int max_num;
kusano 2b45e8
#endif
kusano 2b45e8
  int blas_goto_num   = 0;
kusano 2b45e8
  int blas_omp_num    = 0;
kusano 2b45e8
kusano 2b45e8
  if (blas_num_threads) return blas_num_threads;
kusano 2b45e8
kusano 2b45e8
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FreeBSD) || defined(OS_Darwin)
kusano 2b45e8
  max_num = get_num_procs();
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  blas_goto_num = 0;
kusano 2b45e8
#ifndef USE_OPENMP
kusano 2b45e8
  p = getenv("OPENBLAS_NUM_THREADS");
kusano 2b45e8
  if (p) blas_goto_num = atoi(p);
kusano 2b45e8
  if (blas_goto_num < 0) blas_goto_num = 0;
kusano 2b45e8
kusano 2b45e8
  if (blas_goto_num == 0) {
kusano 2b45e8
	    p = getenv("GOTO_NUM_THREADS");
kusano 2b45e8
		if (p) blas_goto_num = atoi(p);
kusano 2b45e8
		if (blas_goto_num < 0) blas_goto_num = 0;
kusano 2b45e8
  }
kusano 2b45e8
  
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  blas_omp_num = 0;
kusano 2b45e8
  p = getenv("OMP_NUM_THREADS");
kusano 2b45e8
  if (p) blas_omp_num = atoi(p);
kusano 2b45e8
  if (blas_omp_num < 0) blas_omp_num = 0;
kusano 2b45e8
kusano 2b45e8
  if (blas_goto_num > 0) blas_num_threads = blas_goto_num;
kusano 2b45e8
  else if (blas_omp_num > 0) blas_num_threads = blas_omp_num;
kusano 2b45e8
  else blas_num_threads = MAX_CPU_NUMBER;
kusano 2b45e8
kusano 2b45e8
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FreeBSD) || defined(OS_Darwin)
kusano 2b45e8
  if (blas_num_threads > max_num) blas_num_threads = max_num;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  if (blas_num_threads > MAX_CPU_NUMBER) blas_num_threads = MAX_CPU_NUMBER;
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
  printf( "Adjusted number of threads : %3d\n", blas_num_threads);
kusano 2b45e8
#endif
kusano 2b45e8
  
kusano 2b45e8
  blas_cpu_number = blas_num_threads;  
kusano 2b45e8
kusano 2b45e8
  return blas_num_threads;
kusano 2b45e8
}
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
struct release_t {
kusano 2b45e8
  void *address;
kusano 2b45e8
  void (*func)(struct release_t *);
kusano 2b45e8
  long attr;
kusano 2b45e8
};
kusano 2b45e8
kusano 2b45e8
int hugetlb_allocated = 0;
kusano 2b45e8
kusano 2b45e8
static struct release_t release_info[NUM_BUFFERS];
kusano 2b45e8
static int release_pos = 0;
kusano 2b45e8
kusano 2b45e8
#if defined(OS_LINUX) && !defined(NO_WARMUP)
kusano 2b45e8
static int hot_alloc = 0;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef ALLOC_MMAP
kusano 2b45e8
kusano 2b45e8
static void alloc_mmap_free(struct release_t *release){
kusano 2b45e8
kusano 2b45e8
  if (munmap(release -> address, BUFFER_SIZE)) {
kusano 2b45e8
    printf("OpenBLAS : munmap failed\n");
kusano 2b45e8
  }
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
#ifdef NO_WARMUP
kusano 2b45e8
kusano 2b45e8
static void *alloc_mmap(void *address){
kusano 2b45e8
  void *map_address;
kusano 2b45e8
kusano 2b45e8
  if (address){
kusano 2b45e8
    map_address = mmap(address, 
kusano 2b45e8
		       BUFFER_SIZE, 
kusano 2b45e8
		       MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0);
kusano 2b45e8
  } else {
kusano 2b45e8
    map_address = mmap(address, 
kusano 2b45e8
		       BUFFER_SIZE, 
kusano 2b45e8
		       MMAP_ACCESS, MMAP_POLICY, -1, 0);
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  if (map_address != (void *)-1) {
kusano 2b45e8
    release_info[release_pos].address = map_address;
kusano 2b45e8
    release_info[release_pos].func    = alloc_mmap_free;
kusano 2b45e8
    release_pos ++;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
#ifdef OS_LINUX
kusano 2b45e8
  my_mbind(map_address, BUFFER_SIZE, MPOL_PREFERRED, NULL, 0, 0);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  return map_address;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
#else
kusano 2b45e8
kusano 2b45e8
#define BENCH_ITERATION 4
kusano 2b45e8
#define SCALING		2
kusano 2b45e8
kusano 2b45e8
static inline BLASULONG run_bench(BLASULONG address, long size) {
kusano 2b45e8
kusano 2b45e8
  BLASULONG original, *p;
kusano 2b45e8
  BLASULONG start, stop, min;
kusano 2b45e8
  int iter, i, count;
kusano 2b45e8
  
kusano 2b45e8
  min = (BLASULONG)-1;
kusano 2b45e8
kusano 2b45e8
  original = *(BLASULONG *)(address + size - PAGESIZE);
kusano 2b45e8
kusano 2b45e8
  *(BLASULONG *)(address + size - PAGESIZE) = (BLASULONG)address;
kusano 2b45e8
kusano 2b45e8
  for (iter = 0; iter < BENCH_ITERATION; iter ++ ) {
kusano 2b45e8
kusano 2b45e8
    p = (BLASULONG *)address;
kusano 2b45e8
    
kusano 2b45e8
    count = size / PAGESIZE;
kusano 2b45e8
    
kusano 2b45e8
    start = rpcc();
kusano 2b45e8
    
kusano 2b45e8
    for (i = 0; i < count; i ++) {
kusano 2b45e8
      p = (BLASULONG *)(*p);
kusano 2b45e8
    }
kusano 2b45e8
    
kusano 2b45e8
    stop = rpcc();
kusano 2b45e8
    
kusano 2b45e8
    if (min > stop - start) min = stop - start;
kusano 2b45e8
  }
kusano 2b45e8
  
kusano 2b45e8
  *(BLASULONG *)(address + size - PAGESIZE +  0) = original;
kusano 2b45e8
  *(BLASULONG *)(address + size - PAGESIZE +  8) = (BLASULONG)p;
kusano 2b45e8
kusano 2b45e8
  return min;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
static void *alloc_mmap(void *address){
kusano 2b45e8
  void *map_address, *best_address;
kusano 2b45e8
  BLASULONG best, start, current;
kusano 2b45e8
  BLASULONG allocsize;
kusano 2b45e8
kusano 2b45e8
  if (address){
kusano 2b45e8
    /* Just give up use advanced operation */
kusano 2b45e8
    map_address = mmap(address, BUFFER_SIZE, MMAP_ACCESS, MMAP_POLICY | MAP_FIXED, -1, 0);
kusano 2b45e8
kusano 2b45e8
#ifdef OS_LINUX
kusano 2b45e8
    my_mbind(map_address, BUFFER_SIZE, MPOL_PREFERRED, NULL, 0, 0);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  } else {
kusano 2b45e8
#if defined(OS_LINUX) && !defined(NO_WARMUP)
kusano 2b45e8
    if (hot_alloc == 0) {
kusano 2b45e8
      map_address = mmap(NULL, BUFFER_SIZE, MMAP_ACCESS, MMAP_POLICY, -1, 0);
kusano 2b45e8
kusano 2b45e8
#ifdef OS_LINUX
kusano 2b45e8
      my_mbind(map_address, BUFFER_SIZE, MPOL_PREFERRED, NULL, 0, 0);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
    } else {
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
      map_address = mmap(NULL, BUFFER_SIZE * SCALING, 
kusano 2b45e8
			 MMAP_ACCESS, MMAP_POLICY, -1, 0);
kusano 2b45e8
      
kusano 2b45e8
      if (map_address != (void *)-1) {
kusano 2b45e8
		  
kusano 2b45e8
#ifdef OS_LINUX
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
		  int ret=0;
kusano 2b45e8
		  ret=my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0);
kusano 2b45e8
		  if(ret==-1){
kusano 2b45e8
			  int errsv=errno;
kusano 2b45e8
			  perror("OpenBLAS alloc_mmap:");
kusano 2b45e8
			  printf("error code=%d,\tmap_address=%lx\n",errsv,map_address);
kusano 2b45e8
		  }
kusano 2b45e8
kusano 2b45e8
#else
kusano 2b45e8
		  my_mbind(map_address, BUFFER_SIZE * SCALING, MPOL_PREFERRED, NULL, 0, 0);
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	
kusano 2b45e8
	allocsize = DGEMM_P * DGEMM_Q * sizeof(double);
kusano 2b45e8
	
kusano 2b45e8
	start   = (BLASULONG)map_address;
kusano 2b45e8
	current = (SCALING - 1) * BUFFER_SIZE;
kusano 2b45e8
	
kusano 2b45e8
	while(current > 0) {
kusano 2b45e8
	  *(long *)start = (long)start + PAGESIZE;
kusano 2b45e8
	  start += PAGESIZE;
kusano 2b45e8
	  current -= PAGESIZE;
kusano 2b45e8
	}
kusano 2b45e8
	
kusano 2b45e8
	*(long *)(start - PAGESIZE) = (BLASULONG)map_address;
kusano 2b45e8
	
kusano 2b45e8
	start = (BLASULONG)map_address;
kusano 2b45e8
	
kusano 2b45e8
	best = (BLASULONG)-1;
kusano 2b45e8
	best_address = map_address;
kusano 2b45e8
	
kusano 2b45e8
	while ((start + allocsize  < (BLASULONG)map_address + (SCALING - 1) * BUFFER_SIZE)) {
kusano 2b45e8
	  
kusano 2b45e8
	  current = run_bench(start, allocsize);
kusano 2b45e8
	  
kusano 2b45e8
	  if (best > current) {
kusano 2b45e8
	    best = current;
kusano 2b45e8
	    best_address = (void *)start;
kusano 2b45e8
	  }
kusano 2b45e8
	  
kusano 2b45e8
	  start += PAGESIZE;
kusano 2b45e8
	  
kusano 2b45e8
	}
kusano 2b45e8
	
kusano 2b45e8
      if ((BLASULONG)best_address > (BLASULONG)map_address)
kusano 2b45e8
	munmap(map_address,  (BLASULONG)best_address - (BLASULONG)map_address);
kusano 2b45e8
      
kusano 2b45e8
      munmap((void *)((BLASULONG)best_address + BUFFER_SIZE), (SCALING - 1) * BUFFER_SIZE + (BLASULONG)map_address - (BLASULONG)best_address);
kusano 2b45e8
      
kusano 2b45e8
      map_address = best_address;
kusano 2b45e8
      
kusano 2b45e8
#if defined(OS_LINUX) && !defined(NO_WARMUP)
kusano 2b45e8
      hot_alloc = 2;
kusano 2b45e8
#endif
kusano 2b45e8
      }
kusano 2b45e8
    }
kusano 2b45e8
#if defined(OS_LINUX) && !defined(NO_WARMUP)
kusano 2b45e8
  }
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  if (map_address != (void *)-1) {
kusano 2b45e8
    release_info[release_pos].address = map_address;
kusano 2b45e8
    release_info[release_pos].func    = alloc_mmap_free;
kusano 2b45e8
    release_pos ++;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  return map_address;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
kusano 2b45e8
#ifdef ALLOC_MALLOC
kusano 2b45e8
kusano 2b45e8
static void alloc_malloc_free(struct release_t *release){
kusano 2b45e8
kusano 2b45e8
  free(release -> address);
kusano 2b45e8
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
static void *alloc_malloc(void *address){
kusano 2b45e8
kusano 2b45e8
  void *map_address;
kusano 2b45e8
kusano 2b45e8
  map_address = (void *)malloc(BUFFER_SIZE + FIXED_PAGESIZE);
kusano 2b45e8
kusano 2b45e8
  if (map_address == (void *)NULL) map_address = (void *)-1;
kusano 2b45e8
kusano 2b45e8
  if (map_address != (void *)-1) {
kusano 2b45e8
    release_info[release_pos].address = map_address;
kusano 2b45e8
    release_info[release_pos].func    = alloc_malloc_free;
kusano 2b45e8
    release_pos ++;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  return map_address;
kusano 2b45e8
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef ALLOC_QALLOC
kusano 2b45e8
kusano 2b45e8
void *qalloc(int flags, size_t bytes);
kusano 2b45e8
void *qfree (void *address);
kusano 2b45e8
kusano 2b45e8
#define QNONCACHE 0x1
kusano 2b45e8
#define QCOMMS    0x2
kusano 2b45e8
#define QFAST     0x4
kusano 2b45e8
kusano 2b45e8
static void alloc_qalloc_free(struct release_t *release){
kusano 2b45e8
kusano 2b45e8
  qfree(release -> address);
kusano 2b45e8
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
static void *alloc_qalloc(void *address){
kusano 2b45e8
  void *map_address;
kusano 2b45e8
kusano 2b45e8
  map_address = (void *)qalloc(QCOMMS | QFAST, BUFFER_SIZE + FIXED_PAGESIZE);
kusano 2b45e8
kusano 2b45e8
  if (map_address == (void *)NULL) map_address = (void *)-1;
kusano 2b45e8
kusano 2b45e8
  if (map_address != (void *)-1) {
kusano 2b45e8
    release_info[release_pos].address = map_address;
kusano 2b45e8
    release_info[release_pos].func    = alloc_qalloc_free;
kusano 2b45e8
    release_pos ++;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  return (void *)(((BLASULONG)map_address + FIXED_PAGESIZE - 1) & ~(FIXED_PAGESIZE - 1));
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef ALLOC_WINDOWS
kusano 2b45e8
kusano 2b45e8
static void alloc_windows_free(struct release_t *release){
kusano 2b45e8
kusano 2b45e8
  VirtualFree(release -> address, BUFFER_SIZE, MEM_DECOMMIT);
kusano 2b45e8
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
static void *alloc_windows(void *address){
kusano 2b45e8
  void *map_address;
kusano 2b45e8
kusano 2b45e8
  map_address  = VirtualAlloc(address,
kusano 2b45e8
			      BUFFER_SIZE,
kusano 2b45e8
			      MEM_RESERVE | MEM_COMMIT,
kusano 2b45e8
			      PAGE_READWRITE);
kusano 2b45e8
kusano 2b45e8
  if (map_address == (void *)NULL) map_address = (void *)-1;
kusano 2b45e8
kusano 2b45e8
  if (map_address != (void *)-1) {
kusano 2b45e8
    release_info[release_pos].address = map_address;
kusano 2b45e8
    release_info[release_pos].func    = alloc_windows_free;
kusano 2b45e8
    release_pos ++;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  return map_address;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef ALLOC_DEVICEDRIVER
kusano 2b45e8
#ifndef DEVICEDRIVER_NAME
kusano 2b45e8
#define DEVICEDRIVER_NAME "/dev/mapper"
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
static void alloc_devicedirver_free(struct release_t *release){
kusano 2b45e8
kusano 2b45e8
  if (munmap(release -> address, BUFFER_SIZE)) {
kusano 2b45e8
    printf("OpenBLAS : Bugphysarea unmap failed.\n");
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  if (close(release -> attr)) {
kusano 2b45e8
    printf("OpenBLAS : Bugphysarea close failed.\n");
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
static void *alloc_devicedirver(void *address){
kusano 2b45e8
  
kusano 2b45e8
  int fd;
kusano 2b45e8
  void *map_address;
kusano 2b45e8
kusano 2b45e8
  if ((fd = open(DEVICEDRIVER_NAME, O_RDWR | O_SYNC)) < 0) {
kusano 2b45e8
kusano 2b45e8
    return (void *)-1;
kusano 2b45e8
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  map_address = mmap(address, BUFFER_SIZE,
kusano 2b45e8
		     PROT_READ | PROT_WRITE,
kusano 2b45e8
		     MAP_FILE | MAP_SHARED,
kusano 2b45e8
		     fd, 0);
kusano 2b45e8
  
kusano 2b45e8
  if (map_address != (void *)-1) {
kusano 2b45e8
    release_info[release_pos].address = map_address;
kusano 2b45e8
    release_info[release_pos].attr    = fd;
kusano 2b45e8
    release_info[release_pos].func    = alloc_devicedirver_free;
kusano 2b45e8
    release_pos ++;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  return map_address;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef ALLOC_SHM
kusano 2b45e8
kusano 2b45e8
static void alloc_shm_free(struct release_t *release){
kusano 2b45e8
kusano 2b45e8
  if (shmdt(release -> address)) {
kusano 2b45e8
    printf("OpenBLAS : Shared memory unmap failed.\n");
kusano 2b45e8
    }
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
static void *alloc_shm(void *address){
kusano 2b45e8
  void *map_address;
kusano 2b45e8
  int shmid;
kusano 2b45e8
  
kusano 2b45e8
  shmid = shmget(IPC_PRIVATE, BUFFER_SIZE,IPC_CREAT | 0600);
kusano 2b45e8
  
kusano 2b45e8
  map_address = (void *)shmat(shmid, address, 0);
kusano 2b45e8
kusano 2b45e8
  if (map_address != (void *)-1){
kusano 2b45e8
kusano 2b45e8
#ifdef OS_LINUX
kusano 2b45e8
    my_mbind(map_address, BUFFER_SIZE, MPOL_PREFERRED, NULL, 0, 0);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
    shmctl(shmid, IPC_RMID, 0);
kusano 2b45e8
kusano 2b45e8
    release_info[release_pos].address = map_address;
kusano 2b45e8
    release_info[release_pos].attr    = shmid;
kusano 2b45e8
    release_info[release_pos].func    = alloc_shm_free;
kusano 2b45e8
    release_pos ++;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  return map_address;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined OS_LINUX  || defined OS_AIX  || defined __sun__  || defined OS_WINDOWS
kusano 2b45e8
kusano 2b45e8
static void alloc_hugetlb_free(struct release_t *release){
kusano 2b45e8
kusano 2b45e8
#if defined(OS_LINUX) || defined(OS_AIX)
kusano 2b45e8
  if (shmdt(release -> address)) {
kusano 2b45e8
    printf("OpenBLAS : Hugepage unmap failed.\n");
kusano 2b45e8
  }
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef __sun__
kusano 2b45e8
kusano 2b45e8
  munmap(release -> address, BUFFER_SIZE);
kusano 2b45e8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef OS_WINDOWS
kusano 2b45e8
kusano 2b45e8
  VirtualFree(release -> address, BUFFER_SIZE, MEM_LARGE_PAGES | MEM_DECOMMIT);
kusano 2b45e8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
static void *alloc_hugetlb(void *address){
kusano 2b45e8
kusano 2b45e8
  void *map_address = (void *)-1;
kusano 2b45e8
kusano 2b45e8
#if defined(OS_LINUX) || defined(OS_AIX)
kusano 2b45e8
  int shmid;
kusano 2b45e8
  
kusano 2b45e8
  shmid = shmget(IPC_PRIVATE, BUFFER_SIZE,
kusano 2b45e8
#ifdef OS_LINUX
kusano 2b45e8
		 SHM_HUGETLB |
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef OS_AIX
kusano 2b45e8
		 SHM_LGPAGE | SHM_PIN |
kusano 2b45e8
#endif
kusano 2b45e8
		 IPC_CREAT | SHM_R | SHM_W);
kusano 2b45e8
  
kusano 2b45e8
  if (shmid != -1) {
kusano 2b45e8
    map_address = (void *)shmat(shmid, address, SHM_RND);
kusano 2b45e8
    
kusano 2b45e8
#ifdef OS_LINUX
kusano 2b45e8
    my_mbind(map_address, BUFFER_SIZE, MPOL_PREFERRED, NULL, 0, 0);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
    if (map_address != (void *)-1){
kusano 2b45e8
      shmctl(shmid, IPC_RMID, 0);
kusano 2b45e8
    }
kusano 2b45e8
  }
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef __sun__
kusano 2b45e8
  struct memcntl_mha mha;
kusano 2b45e8
  
kusano 2b45e8
  mha.mha_cmd = MHA_MAPSIZE_BSSBRK;
kusano 2b45e8
  mha.mha_flags = 0;
kusano 2b45e8
  mha.mha_pagesize = HUGE_PAGESIZE;
kusano 2b45e8
  memcntl(NULL, 0, MC_HAT_ADVISE, (char *)&mha, 0, 0);
kusano 2b45e8
kusano 2b45e8
  map_address = (BLASULONG)memalign(HUGE_PAGESIZE, BUFFER_SIZE);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef OS_WINDOWS
kusano 2b45e8
kusano 2b45e8
  HANDLE hToken;
kusano 2b45e8
  TOKEN_PRIVILEGES tp;
kusano 2b45e8
kusano 2b45e8
  if (OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES, &hToken) != TRUE) return (void *) -1;
kusano 2b45e8
kusano 2b45e8
  tp.PrivilegeCount = 1;
kusano 2b45e8
  tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
kusano 2b45e8
  
kusano 2b45e8
  if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid) != TRUE) return (void *) -1;
kusano 2b45e8
kusano 2b45e8
  if (AdjustTokenPrivileges(hToken, FALSE, (PTOKEN_PRIVILEGES)&tp, 0, NULL, NULL) != TRUE) return (void *) -1;
kusano 2b45e8
kusano 2b45e8
  map_address  = (void *)VirtualAlloc(address,
kusano 2b45e8
				      BUFFER_SIZE,
kusano 2b45e8
				      MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT,
kusano 2b45e8
				      PAGE_READWRITE);
kusano 2b45e8
kusano 2b45e8
  AdjustTokenPrivileges(hToken, TRUE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, NULL);
kusano 2b45e8
kusano 2b45e8
  if (map_address == (void *)NULL) map_address = (void *)-1;
kusano 2b45e8
  
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  if (map_address != (void *)-1){
kusano 2b45e8
    release_info[release_pos].address = map_address;
kusano 2b45e8
    release_info[release_pos].func    = alloc_hugetlb_free;
kusano 2b45e8
    release_pos ++;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  return map_address;
kusano 2b45e8
}
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef  ALLOC_HUGETLBFILE
kusano 2b45e8
kusano 2b45e8
static int hugetlb_pid = 0;
kusano 2b45e8
kusano 2b45e8
static void alloc_hugetlbfile_free(struct release_t *release){
kusano 2b45e8
kusano 2b45e8
  if (munmap(release -> address, BUFFER_SIZE)) {
kusano 2b45e8
    printf("OpenBLAS : HugeTLBfs unmap failed.\n");
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  if (close(release -> attr)) {
kusano 2b45e8
    printf("OpenBLAS : HugeTLBfs close failed.\n");
kusano 2b45e8
  }
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
static void *alloc_hugetlbfile(void *address){
kusano 2b45e8
kusano 2b45e8
  void *map_address = (void *)-1;
kusano 2b45e8
  int fd;
kusano 2b45e8
  char filename[64];
kusano 2b45e8
kusano 2b45e8
  if (!hugetlb_pid) hugetlb_pid = getpid();
kusano 2b45e8
kusano 2b45e8
  sprintf(filename, "%s/gotoblas.%d", HUGETLB_FILE_NAME, hugetlb_pid);
kusano 2b45e8
kusano 2b45e8
  if ((fd = open(filename, O_RDWR | O_CREAT, 0700)) < 0) {
kusano 2b45e8
    return (void *)-1;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  unlink(filename);
kusano 2b45e8
kusano 2b45e8
  map_address = mmap(address, BUFFER_SIZE,
kusano 2b45e8
		     PROT_READ | PROT_WRITE,
kusano 2b45e8
		     MAP_SHARED,
kusano 2b45e8
		     fd, 0);
kusano 2b45e8
  
kusano 2b45e8
  if (map_address != (void *)-1) {
kusano 2b45e8
    release_info[release_pos].address = map_address;
kusano 2b45e8
    release_info[release_pos].attr    = fd;
kusano 2b45e8
    release_info[release_pos].func    = alloc_hugetlbfile_free;
kusano 2b45e8
    release_pos ++;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  return map_address;
kusano 2b45e8
}
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
/* Global lock for memory allocation */
kusano 2b45e8
kusano 2b45e8
#if   defined(USE_PTHREAD_LOCK)
kusano 2b45e8
static pthread_mutex_t    alloc_lock = PTHREAD_MUTEX_INITIALIZER;
kusano 2b45e8
#elif defined(USE_PTHREAD_SPINLOCK)
kusano 2b45e8
static pthread_spinlock_t alloc_lock = 0;
kusano 2b45e8
#else
kusano 2b45e8
static BLASULONG  alloc_lock = 0UL;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef SEEK_ADDRESS
kusano 2b45e8
static BLASULONG base_address      = 0UL;
kusano 2b45e8
#else
kusano 2b45e8
static BLASULONG base_address      = BASE_ADDRESS;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
static volatile struct {
kusano 2b45e8
  BLASULONG lock;
kusano 2b45e8
  void *addr;
kusano 2b45e8
#if defined(WHEREAMI) && !defined(USE_OPENMP)
kusano 2b45e8
  int   pos;
kusano 2b45e8
#endif
kusano 2b45e8
  int used;
kusano 2b45e8
#ifndef __64BIT__
kusano 2b45e8
  char dummy[48];
kusano 2b45e8
#else
kusano 2b45e8
  char dummy[40];
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
} memory[NUM_BUFFERS];
kusano 2b45e8
kusano 2b45e8
static int memory_initialized = 0;
kusano 2b45e8
static void gotoblas_memory_init(void);
kusano 2b45e8
kusano 2b45e8
/*       Memory allocation routine           */
kusano 2b45e8
/* procpos ... indicates where it comes from */
kusano 2b45e8
/*                0 : Level 3 functions      */
kusano 2b45e8
/*                1 : Level 2 functions      */
kusano 2b45e8
/*                2 : Thread                 */
kusano 2b45e8
kusano 2b45e8
void *blas_memory_alloc(int procpos){
kusano 2b45e8
  
kusano 2b45e8
  int position;
kusano 2b45e8
#if defined(WHEREAMI) && !defined(USE_OPENMP)
kusano 2b45e8
  int mypos;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  void *map_address;
kusano 2b45e8
kusano 2b45e8
  void *(*memoryalloc[])(void *address) = {
kusano 2b45e8
#ifdef ALLOC_DEVICEDRIVER
kusano 2b45e8
    alloc_devicedirver,
kusano 2b45e8
#endif
kusano 2b45e8
#if defined OS_LINUX  || defined OS_AIX  || defined __sun__  || defined OS_WINDOWS
kusano 2b45e8
    alloc_hugetlb,
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef ALLOC_SHM
kusano 2b45e8
    alloc_shm,
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef ALLOC_MMAP
kusano 2b45e8
    alloc_mmap,
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef ALLOC_QALLOC
kusano 2b45e8
    alloc_qalloc,
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef ALLOC_WINDOWS
kusano 2b45e8
    alloc_windows,
kusano 2b45e8
#endif
kusano 2b45e8
#ifdef ALLOC_MALLOC
kusano 2b45e8
    alloc_malloc,
kusano 2b45e8
#endif
kusano 2b45e8
    NULL,
kusano 2b45e8
  };
kusano 2b45e8
  void *(**func)(void *address);
kusano 2b45e8
kusano 2b45e8
  if (!memory_initialized) {
kusano 2b45e8
    
kusano 2b45e8
    LOCK_COMMAND(&alloc_lock);
kusano 2b45e8
    
kusano 2b45e8
    if (!memory_initialized) {
kusano 2b45e8
      
kusano 2b45e8
#if defined(WHEREAMI) && !defined(USE_OPENMP)
kusano 2b45e8
      for (position = 0; position < NUM_BUFFERS; position ++){
kusano 2b45e8
	memory[position].addr   = (void *)0;
kusano 2b45e8
	memory[position].pos    = -1;
kusano 2b45e8
	memory[position].used   = 0;
kusano 2b45e8
	memory[position].lock   = 0;
kusano 2b45e8
      }
kusano 2b45e8
#endif
kusano 2b45e8
      
kusano 2b45e8
#ifdef DYNAMIC_ARCH
kusano 2b45e8
      gotoblas_dynamic_init();
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(SMP) && defined(OS_LINUX) && !defined(NO_AFFINITY)
kusano 2b45e8
      gotoblas_affinity_init();
kusano 2b45e8
#endif
kusano 2b45e8
      
kusano 2b45e8
#ifdef SMP
kusano 2b45e8
      if (!blas_num_threads) blas_cpu_number = blas_get_cpu_number();
kusano 2b45e8
#endif
kusano 2b45e8
      
kusano 2b45e8
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64)
kusano 2b45e8
#ifndef DYNAMIC_ARCH
kusano 2b45e8
      blas_set_parameter();
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
      memory_initialized = 1;
kusano 2b45e8
    }
kusano 2b45e8
kusano 2b45e8
    UNLOCK_COMMAND(&alloc_lock);
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
  printf("Alloc Start ...\n");
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(WHEREAMI) && !defined(USE_OPENMP)
kusano 2b45e8
kusano 2b45e8
  mypos = WhereAmI();
kusano 2b45e8
kusano 2b45e8
  position = mypos;
kusano 2b45e8
  while (position > NUM_BUFFERS) position >>= 1;
kusano 2b45e8
kusano 2b45e8
  do {
kusano 2b45e8
    if (!memory[position].used && (memory[position].pos == mypos)) {
kusano 2b45e8
      
kusano 2b45e8
      blas_lock(&memory[position].lock);
kusano 2b45e8
      
kusano 2b45e8
      if (!memory[position].used) goto allocation;
kusano 2b45e8
      
kusano 2b45e8
      blas_unlock(&memory[position].lock);
kusano 2b45e8
    }
kusano 2b45e8
    
kusano 2b45e8
    position ++;
kusano 2b45e8
    
kusano 2b45e8
  } while (position < NUM_BUFFERS);
kusano 2b45e8
kusano 2b45e8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  position = 0;
kusano 2b45e8
kusano 2b45e8
  do {
kusano 2b45e8
    if (!memory[position].used) {
kusano 2b45e8
      
kusano 2b45e8
      blas_lock(&memory[position].lock);
kusano 2b45e8
kusano 2b45e8
      if (!memory[position].used) goto allocation;
kusano 2b45e8
      
kusano 2b45e8
      blas_unlock(&memory[position].lock);
kusano 2b45e8
    }
kusano 2b45e8
    
kusano 2b45e8
    position ++;
kusano 2b45e8
    
kusano 2b45e8
  } while (position < NUM_BUFFERS);
kusano 2b45e8
  
kusano 2b45e8
  goto error;
kusano 2b45e8
kusano 2b45e8
  allocation :
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
  printf("  Position -> %d\n", position);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  memory[position].used = 1;
kusano 2b45e8
kusano 2b45e8
  blas_unlock(&memory[position].lock);
kusano 2b45e8
kusano 2b45e8
  if (!memory[position].addr) {
kusano 2b45e8
    do {
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
      printf("Allocation Start : %lx\n", base_address);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
      map_address = (void *)-1;
kusano 2b45e8
kusano 2b45e8
      func = &memoryalloc[0];
kusano 2b45e8
kusano 2b45e8
      while ((func != NULL) && (map_address == (void *) -1)) {
kusano 2b45e8
kusano 2b45e8
	map_address = (*func)((void *)base_address);
kusano 2b45e8
kusano 2b45e8
#ifdef ALLOC_DEVICEDRIVER
kusano 2b45e8
	if ((*func ==  alloc_devicedirver) && (map_address == (void *)-1)) {
kusano 2b45e8
	    fprintf(stderr, "OpenBLAS Warning ... Physically contigous allocation was failed.\n");
kusano 2b45e8
	}
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef ALLOC_HUGETLBFILE
kusano 2b45e8
	if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) {
kusano 2b45e8
#ifndef OS_WINDOWS
kusano 2b45e8
	    fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation was failed.\n");
kusano 2b45e8
#endif
kusano 2b45e8
	}
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined OS_LINUX  || defined OS_AIX  || defined __sun__  || defined OS_WINDOWS
kusano 2b45e8
	if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
	func ++;
kusano 2b45e8
      }
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
      printf("  Success -> %08lx\n", map_address);
kusano 2b45e8
#endif
kusano 2b45e8
      if (((BLASLONG) map_address) == -1) base_address = 0UL;
kusano 2b45e8
kusano 2b45e8
      if (base_address) base_address += BUFFER_SIZE + FIXED_PAGESIZE;
kusano 2b45e8
kusano 2b45e8
    } while ((BLASLONG)map_address == -1);
kusano 2b45e8
kusano 2b45e8
    memory[position].addr = map_address; 
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
    printf("  Mapping Succeeded. %p(%d)\n", (void *)memory[position].addr, position);
kusano 2b45e8
#endif
kusano 2b45e8
  }
kusano 2b45e8
 
kusano 2b45e8
#if defined(WHEREAMI) && !defined(USE_OPENMP)
kusano 2b45e8
kusano 2b45e8
  if (memory[position].pos == -1) memory[position].pos = mypos;
kusano 2b45e8
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef DYNAMIC_ARCH
kusano 2b45e8
kusano 2b45e8
  if (memory_initialized == 1) {
kusano 2b45e8
    
kusano 2b45e8
    LOCK_COMMAND(&alloc_lock);
kusano 2b45e8
    
kusano 2b45e8
    if (memory_initialized == 1) {
kusano 2b45e8
      
kusano 2b45e8
      if (!gotoblas) gotoblas_dynamic_init();
kusano 2b45e8
      
kusano 2b45e8
      memory_initialized = 2;
kusano 2b45e8
    }
kusano 2b45e8
    
kusano 2b45e8
    UNLOCK_COMMAND(&alloc_lock);
kusano 2b45e8
    
kusano 2b45e8
  }
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
  printf("Mapped   : %p  %3d\n\n",
kusano 2b45e8
	  (void *)memory[position].addr, position);
kusano 2b45e8
#endif  
kusano 2b45e8
  
kusano 2b45e8
  return (void *)memory[position].addr;
kusano 2b45e8
kusano 2b45e8
 error:
kusano 2b45e8
  printf("BLAS : Program is Terminated. Because you tried to allocate too many memory regions.\n");
kusano 2b45e8
kusano 2b45e8
  return NULL;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
void blas_memory_free(void *free_area){
kusano 2b45e8
kusano 2b45e8
  int position;
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
  printf("Unmapped Start : %p ...\n", free_area);
kusano 2b45e8
#endif  
kusano 2b45e8
 
kusano 2b45e8
  position = 0;
kusano 2b45e8
kusano 2b45e8
  while ((memory[position].addr != free_area)
kusano 2b45e8
	 && (position < NUM_BUFFERS)) position++;
kusano 2b45e8
kusano 2b45e8
  if (memory[position].addr != free_area) goto error;
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
  printf("  Position : %d\n", position);
kusano 2b45e8
#endif  
kusano 2b45e8
kusano 2b45e8
  memory[position].used = 0;
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
  printf("Unmap Succeeded.\n\n");
kusano 2b45e8
#endif  
kusano 2b45e8
kusano 2b45e8
  return;
kusano 2b45e8
  
kusano 2b45e8
 error:
kusano 2b45e8
  printf("BLAS : Bad memory unallocation! : %4d  %p\n", position,  free_area);
kusano 2b45e8
kusano 2b45e8
#ifdef DEBUG
kusano 2b45e8
  for (position = 0; position < NUM_BUFFERS; position++) 
kusano 2b45e8
    printf("%4ld  %p : %d\n", position, memory[position].addr, memory[position].used);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  return;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
void blas_shutdown(void){
kusano 2b45e8
kusano 2b45e8
  int pos;
kusano 2b45e8
kusano 2b45e8
#ifdef SMP
kusano 2b45e8
  BLASFUNC(blas_thread_shutdown)();
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  LOCK_COMMAND(&alloc_lock);
kusano 2b45e8
kusano 2b45e8
  for (pos = 0; pos < release_pos; pos ++) {
kusano 2b45e8
    release_info[pos].func(&release_info[pos]);
kusano 2b45e8
  }
kusano 2b45e8
  
kusano 2b45e8
#ifdef SEEK_ADDRESS
kusano 2b45e8
  base_address      = 0UL;
kusano 2b45e8
#else
kusano 2b45e8
  base_address      = BASE_ADDRESS;
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  for (pos = 0; pos < NUM_BUFFERS; pos ++){
kusano 2b45e8
    memory[pos].addr   = (void *)0;
kusano 2b45e8
    memory[pos].used   = 0;
kusano 2b45e8
#if defined(WHEREAMI) && !defined(USE_OPENMP)
kusano 2b45e8
    memory[pos].pos    = -1;
kusano 2b45e8
#endif
kusano 2b45e8
    memory[pos].lock   = 0;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  UNLOCK_COMMAND(&alloc_lock);
kusano 2b45e8
kusano 2b45e8
  return;
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
#if defined(OS_LINUX) && !defined(NO_WARMUP)
kusano 2b45e8
 
kusano 2b45e8
#ifdef SMP
kusano 2b45e8
#if   defined(USE_PTHREAD_LOCK)
kusano 2b45e8
static pthread_mutex_t    init_lock = PTHREAD_MUTEX_INITIALIZER;
kusano 2b45e8
#elif defined(USE_PTHREAD_SPINLOCK)
kusano 2b45e8
static pthread_spinlock_t init_lock = 0;
kusano 2b45e8
#else
kusano 2b45e8
static BLASULONG   init_lock = 0UL;
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, 
kusano 2b45e8
			  void *sa, void *sb, BLASLONG pos) {
kusano 2b45e8
kusano 2b45e8
#ifndef ARCH_POWER
kusano 2b45e8
kusano 2b45e8
  long size;
kusano 2b45e8
  BLASULONG buffer;
kusano 2b45e8
kusano 2b45e8
  size   = BUFFER_SIZE - PAGESIZE;
kusano 2b45e8
  buffer = (BLASULONG)sa + GEMM_OFFSET_A;
kusano 2b45e8
kusano 2b45e8
#if defined(OS_LINUX) && !defined(NO_WARMUP)
kusano 2b45e8
    if (hot_alloc != 2) {
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef SMP
kusano 2b45e8
  LOCK_COMMAND(&init_lock);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  while (size > 0) {
kusano 2b45e8
    *(int *)buffer = size;
kusano 2b45e8
    buffer  += PAGESIZE;
kusano 2b45e8
    size    -= PAGESIZE;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
#ifdef SMP
kusano 2b45e8
  UNLOCK_COMMAND(&init_lock);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
  size = MIN((BUFFER_SIZE - PAGESIZE), L2_SIZE);
kusano 2b45e8
  buffer = (BLASULONG)sa + GEMM_OFFSET_A;
kusano 2b45e8
kusano 2b45e8
  while (size > 0) {
kusano 2b45e8
    *(int *)buffer = size;
kusano 2b45e8
    buffer  += 64;
kusano 2b45e8
    size    -= 64;
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
#if defined(OS_LINUX) && !defined(NO_WARMUP)
kusano 2b45e8
    }
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#endif
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
#ifdef SMP
kusano 2b45e8
kusano 2b45e8
static void _init_thread_memory(void *buffer) {
kusano 2b45e8
kusano 2b45e8
  blas_queue_t queue[MAX_CPU_NUMBER];
kusano 2b45e8
  int num_cpu;
kusano 2b45e8
kusano 2b45e8
  for (num_cpu = 0; num_cpu < blas_num_threads; num_cpu++) {
kusano 2b45e8
kusano 2b45e8
    blas_queue_init(&queue[num_cpu]);
kusano 2b45e8
    queue[num_cpu].mode    = BLAS_DOUBLE | BLAS_REAL;
kusano 2b45e8
    queue[num_cpu].routine = &_touch_memory;
kusano 2b45e8
    queue[num_cpu].args    = NULL;
kusano 2b45e8
    queue[num_cpu].next    = &queue[num_cpu + 1];
kusano 2b45e8
  }
kusano 2b45e8
kusano 2b45e8
  queue[num_cpu - 1].next = NULL;
kusano 2b45e8
  queue[0].sa = buffer;
kusano 2b45e8
  
kusano 2b45e8
  exec_blas(num_cpu, queue);
kusano 2b45e8
kusano 2b45e8
}
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
static void gotoblas_memory_init(void) {
kusano 2b45e8
kusano 2b45e8
  void *buffer;
kusano 2b45e8
kusano 2b45e8
  hot_alloc = 1;
kusano 2b45e8
kusano 2b45e8
  buffer = (void *)blas_memory_alloc(0);
kusano 2b45e8
kusano 2b45e8
#ifdef SMP
kusano 2b45e8
  if (blas_cpu_number == 0) blas_get_cpu_number();
kusano 2b45e8
#ifdef SMP_SERVER
kusano 2b45e8
  if (blas_server_avail == 0) blas_thread_init();
kusano 2b45e8
#endif
kusano 2b45e8
    
kusano 2b45e8
  _init_thread_memory((void *)((BLASULONG)buffer + GEMM_OFFSET_A));
kusano 2b45e8
  
kusano 2b45e8
#else
kusano 2b45e8
  
kusano 2b45e8
  _touch_memory(NULL, NULL, NULL, (void *)((BLASULONG)buffer + GEMM_OFFSET_A), NULL, 0);
kusano 2b45e8
  
kusano 2b45e8
#endif
kusano 2b45e8
  
kusano 2b45e8
  blas_memory_free(buffer);
kusano 2b45e8
}
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
/* Initialization for all function; this function should be called before main */
kusano 2b45e8
kusano 2b45e8
static int gotoblas_initialized = 0;
kusano 2b45e8
kusano 2b45e8
void CONSTRUCTOR gotoblas_init(void) {
kusano 2b45e8
kusano 2b45e8
  if (gotoblas_initialized) return;
kusano 2b45e8
kusano 2b45e8
#ifdef PROFILE
kusano 2b45e8
   moncontrol (0);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef DYNAMIC_ARCH
kusano 2b45e8
   gotoblas_dynamic_init();
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(SMP) && defined(OS_LINUX) && !defined(NO_AFFINITY)
kusano 2b45e8
   gotoblas_affinity_init();
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(OS_LINUX) && !defined(NO_WARMUP)
kusano 2b45e8
   gotoblas_memory_init();
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef SMP
kusano 2b45e8
  if (blas_cpu_number == 0) blas_get_cpu_number();
kusano 2b45e8
#ifdef SMP_SERVER
kusano 2b45e8
  if (blas_server_avail == 0) blas_thread_init();
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef FUNCTION_PROFILE
kusano 2b45e8
   gotoblas_profile_init();
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
   gotoblas_initialized = 1;
kusano 2b45e8
kusano 2b45e8
#ifdef PROFILE
kusano 2b45e8
   moncontrol (1);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
void DESTRUCTOR gotoblas_quit(void) {
kusano 2b45e8
kusano 2b45e8
  if (gotoblas_initialized == 0) return;
kusano 2b45e8
kusano 2b45e8
#ifdef PROFILE
kusano 2b45e8
   moncontrol (0);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef FUNCTION_PROFILE
kusano 2b45e8
   gotoblas_profile_quit();
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(SMP) && defined(OS_LINUX) && !defined(NO_AFFINITY)
kusano 2b45e8
   gotoblas_affinity_quit();
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef DYNAMIC_ARCH
kusano 2b45e8
   gotoblas_dynamic_quit();
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
   gotoblas_initialized = 0;
kusano 2b45e8
kusano 2b45e8
#ifdef PROFILE
kusano 2b45e8
   moncontrol (1);
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
}
kusano 2b45e8
kusano 2b45e8
#if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64))
kusano 2b45e8
/* Don't call me; this is just work around for PGI / Sun bug */
kusano 2b45e8
void gotoblas_dummy_for_PGI(void) {
kusano 2b45e8
kusano 2b45e8
  gotoblas_init();
kusano 2b45e8
  gotoblas_quit();
kusano 2b45e8
kusano 2b45e8
#if 0
kusano 2b45e8
  asm ("\t.section\t.ctors,\"aw\",@progbits; .align 8; .quad gotoblas_init; .section .text");
kusano 2b45e8
  asm ("\t.section\t.dtors,\"aw\",@progbits; .align 8; .quad gotoblas_quit; .section .text");
kusano 2b45e8
#else
kusano 2b45e8
  asm (".section .init,\"ax\"; call gotoblas_init@PLT; .section .text");
kusano 2b45e8
  asm (".section .fini,\"ax\"; call gotoblas_quit@PLT; .section .text");
kusano 2b45e8
#endif
kusano 2b45e8
}
kusano 2b45e8
#endif