|
kusano |
2b45e8 |
/*****************************************************************************
|
|
kusano |
2b45e8 |
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
|
kusano |
2b45e8 |
All rights reserved.
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
Redistribution and use in source and binary forms, with or without
|
|
kusano |
2b45e8 |
modification, are permitted provided that the following conditions are
|
|
kusano |
2b45e8 |
met:
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
1. Redistributions of source code must retain the above copyright
|
|
kusano |
2b45e8 |
notice, this list of conditions and the following disclaimer.
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
2. Redistributions in binary form must reproduce the above copyright
|
|
kusano |
2b45e8 |
notice, this list of conditions and the following disclaimer in
|
|
kusano |
2b45e8 |
the documentation and/or other materials provided with the
|
|
kusano |
2b45e8 |
distribution.
|
|
kusano |
2b45e8 |
3. Neither the name of the ISCAS nor the names of its contributors may
|
|
kusano |
2b45e8 |
be used to endorse or promote products derived from this software
|
|
kusano |
2b45e8 |
without specific prior written permission.
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
kusano |
2b45e8 |
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
kusano |
2b45e8 |
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
kusano |
2b45e8 |
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
kusano |
2b45e8 |
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
kusano |
2b45e8 |
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
kusano |
2b45e8 |
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
kusano |
2b45e8 |
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
kusano |
2b45e8 |
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|
kusano |
2b45e8 |
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
**********************************************************************************/
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/* All rights reserved. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* Redistribution and use in source and binary forms, with or */
|
|
kusano |
2b45e8 |
/* without modification, are permitted provided that the following */
|
|
kusano |
2b45e8 |
/* conditions are met: */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 1. Redistributions of source code must retain the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* 2. Redistributions in binary form must reproduce the above */
|
|
kusano |
2b45e8 |
/* copyright notice, this list of conditions and the following */
|
|
kusano |
2b45e8 |
/* disclaimer in the documentation and/or other materials */
|
|
kusano |
2b45e8 |
/* provided with the distribution. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
|
kusano |
2b45e8 |
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
|
kusano |
2b45e8 |
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
|
kusano |
2b45e8 |
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
|
kusano |
2b45e8 |
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
|
kusano |
2b45e8 |
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
|
kusano |
2b45e8 |
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
|
kusano |
2b45e8 |
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
|
kusano |
2b45e8 |
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
|
kusano |
2b45e8 |
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
|
kusano |
2b45e8 |
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
|
kusano |
2b45e8 |
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
|
kusano |
2b45e8 |
/* POSSIBILITY OF SUCH DAMAGE. */
|
|
kusano |
2b45e8 |
/* */
|
|
kusano |
2b45e8 |
/* The views and conclusions contained in the software and */
|
|
kusano |
2b45e8 |
/* documentation are those of the authors and should not be */
|
|
kusano |
2b45e8 |
/* interpreted as representing official policies, either expressed */
|
|
kusano |
2b45e8 |
/* or implied, of The University of Texas at Austin. */
|
|
kusano |
2b45e8 |
/*********************************************************************/
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef PARAM_H
|
|
kusano |
2b45e8 |
#define PARAM_H
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef OPTERON
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 4
|
|
kusano |
2b45e8 |
#define DNUMOPT 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 64
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 256
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x01ffffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef ARCH_X86
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 8
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P sgemm_p
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P dgemm_p
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_P qgemm_p
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P cgemm_p
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P zgemm_p
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_P xgemm_p
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_R sgemm_r
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_R dgemm_r
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_R qgemm_r
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_R cgemm_r
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_R zgemm_r
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_R xgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef ALLOC_HUGETLB
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 248
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 248
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_Q 248
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 248
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 248
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_Q 248
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 240
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 240
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_Q 240
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 240
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 240
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_Q 240
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 16
|
|
kusano |
2b45e8 |
#define HAVE_EXCLUSIVE_CACHE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(BARCELONA) || defined(SHANGHAI)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 8
|
|
kusano |
2b45e8 |
#define DNUMOPT 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 64
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 832
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x0fffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef ARCH_X86
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 8
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if 0
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 496
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 248
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_P 124
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 248
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 124
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_P 62
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 248
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 248
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_Q 248
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 248
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 248
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_Q 248
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 448
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 224
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_P 112
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 224
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 112
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_P 56
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 224
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 224
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_Q 224
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 224
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 224
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_Q 224
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_R sgemm_r
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_R qgemm_r
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_R dgemm_r
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_R cgemm_r
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_R zgemm_r
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_R xgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 16
|
|
kusano |
2b45e8 |
#define HAVE_EXCLUSIVE_CACHE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_THREAD gemm_thread_mn
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef ATHLON
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 4
|
|
kusano |
2b45e8 |
#define DNUMOPT 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 384
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_R sgemm_r
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_R dgemm_r
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_R qgemm_r
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_R cgemm_r
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_R zgemm_r
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_R xgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 208
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 104
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_P 56
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 104
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 56
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_P 28
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 208
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 208
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_Q 208
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 208
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 208
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_Q 208
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 16
|
|
kusano |
2b45e8 |
#define HAVE_EXCLUSIVE_CACHE
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef VIAC3
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 2
|
|
kusano |
2b45e8 |
#define DNUMOPT 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 256
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_R sgemm_r
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_R dgemm_r
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_R qgemm_r
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_R cgemm_r
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_R zgemm_r
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_R xgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 128
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 128
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_P 128
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 128
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 128
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_P 128
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 512
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 16
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef NANO
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 4
|
|
kusano |
2b45e8 |
#define DNUMOPT 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 64
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 256
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x01ffffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef ARCH_X86
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 8
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 288
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 288
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_P 288
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 288
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 288
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_P 288
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_R sgemm_r
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_R dgemm_r
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_R qgemm_r
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_R cgemm_r
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_R zgemm_r
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_R xgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_Q 64
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 64
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_Q 32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 16
|
|
kusano |
2b45e8 |
#define HAVE_EXCLUSIVE_CACHE
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef HAVE_SSE
|
|
kusano |
2b45e8 |
#define SNUMOPT 2
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define SNUMOPT 1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#define DNUMOPT 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef HAVE_SSE
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 8
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P sgemm_p
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_R sgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P dgemm_p
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_R dgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_P qgemm_p
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_R qgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P cgemm_p
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_R cgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P zgemm_p
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_R zgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_P xgemm_p
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_R xgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef PENTIUMM
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 2
|
|
kusano |
2b45e8 |
#define DNUMOPT 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef CORE_YONAH
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 8
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P sgemm_p
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_R sgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P dgemm_p
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_R dgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_P qgemm_p
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_R qgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P cgemm_p
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_R cgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P zgemm_p
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_R zgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_P xgemm_p
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_R xgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef CORE_NORTHWOOD
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 4
|
|
kusano |
2b45e8 |
#define DNUMOPT 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 8
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 8
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P sgemm_p
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_R sgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P dgemm_p
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_R dgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_P qgemm_p
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_R qgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P cgemm_p
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_R cgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P zgemm_p
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_R zgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_P xgemm_p
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_R xgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef CORE_PRESCOTT
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 4
|
|
kusano |
2b45e8 |
#define DNUMOPT 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef __64BIT__
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 128
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 192
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 256
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 8
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef ARCH_X86
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 8
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P sgemm_p
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_R sgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P dgemm_p
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_R dgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_P qgemm_p
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_R qgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P cgemm_p
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_R cgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P zgemm_p
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_R zgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_P xgemm_p
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_R xgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef CORE2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 8
|
|
kusano |
2b45e8 |
#define DNUMOPT 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 448
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 128
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 8
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SWITCH_RATIO 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef ARCH_X86
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 8
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 8
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P sgemm_p
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_R sgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P dgemm_p
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_R dgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_P qgemm_p
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_R qgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P cgemm_p
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_R cgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P zgemm_p
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_R zgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_P xgemm_p
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_R xgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef PENRYN
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 8
|
|
kusano |
2b45e8 |
#define DNUMOPT 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 128
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 8
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SWITCH_RATIO 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef ARCH_X86
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 8
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P sgemm_p
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_R sgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P dgemm_p
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_R dgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_P qgemm_p
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_R qgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P cgemm_p
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_R cgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P zgemm_p
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_R zgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_P xgemm_p
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_R xgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 512
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 512
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GETRF_FACTOR 0.75
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef DUNNINGTON
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 8
|
|
kusano |
2b45e8 |
#define DNUMOPT 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 128
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 8
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SWITCH_RATIO 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef ARCH_X86
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 8
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P sgemm_p
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_R sgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P dgemm_p
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_R dgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_P qgemm_p
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_R qgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P cgemm_p
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_R cgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P zgemm_p
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_R zgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_P xgemm_p
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_R xgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 768
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 384
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_Q 192
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 768
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 384
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_Q 192
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GETRF_FACTOR 0.75
|
|
kusano |
2b45e8 |
#define GEMM_THREAD gemm_thread_mn
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef NEHALEM
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 8
|
|
kusano |
2b45e8 |
#define DNUMOPT 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 32
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 8
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SWITCH_RATIO 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef ARCH_X86
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 8
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 8
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 504
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_R sgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 504
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_R dgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_P 504
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_R qgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 252
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_R cgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 252
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_R zgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_P 252
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_R xgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 512
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 512
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GETRF_FACTOR 0.72
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef ATOM
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 2
|
|
kusano |
2b45e8 |
#define DNUMOPT 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 64
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 8
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef ARCH_X86
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 8
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P sgemm_p
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_R sgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P dgemm_p
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_R dgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_P qgemm_p
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_R qgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P cgemm_p
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_R cgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P zgemm_p
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_R zgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_P xgemm_p
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_R xgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef ITANIUM2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 4
|
|
kusano |
2b45e8 |
#define DNUMOPT 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 128
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 8
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 8
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 8
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 8
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 8
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 8
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P sgemm_p
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P dgemm_p
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_P qgemm_p
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P cgemm_p
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P zgemm_p
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_P xgemm_p
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 1024
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 1024
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_Q 1024
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 1024
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 1024
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_Q 1024
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_R sgemm_r
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_R dgemm_r
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_R qgemm_r
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_R cgemm_r
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_R zgemm_r
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_R xgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 16
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GETRF_FACTOR 0.65
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(EV4) || defined(EV5) || defined(EV6)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef EV4
|
|
kusano |
2b45e8 |
#define SNUMOPT 1
|
|
kusano |
2b45e8 |
#define DNUMOPT 1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define SNUMOPT 2
|
|
kusano |
2b45e8 |
#define DNUMOPT 2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 512
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 512
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 8
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef EV4
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 32
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 112
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_R 256
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 32
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 56
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_R 256
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 32
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 64
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_R 240
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 32
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 32
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_R 240
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef EV5
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 64
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 64
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 64
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 64
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 64
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef EV6
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 256
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 512
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 256
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 256
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 128
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef CELL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 2
|
|
kusano |
2b45e8 |
#define DNUMOPT 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 8192
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 16
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 8
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 128
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 128
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 128
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 128
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 512
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef PPCG4
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 1024
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 16
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 8
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 256
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 128
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 128
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 64
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef PPC970
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 4
|
|
kusano |
2b45e8 |
#define DNUMOPT 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 2688
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 3072
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 16
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 8
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef OS_LINUX
|
|
kusano |
2b45e8 |
#if L2_SIZE == 1024976
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 320
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 256
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 256
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 256
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 176
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 176
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 176
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 176
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 512
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef PPC440
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 2
|
|
kusano |
2b45e8 |
#define DNUMOPT 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A (32 * 0)
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B (32 * 0)
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 512
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 512
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 512
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 512
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 1024
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 512
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 512
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef PPC440FP2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 4
|
|
kusano |
2b45e8 |
#define DNUMOPT 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A (32 * 0)
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B (32 * 0)
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 8
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 8
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 128
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 128
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 128
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 128
|
|
kusano |
2b45e8 |
#if 1
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 4096
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 3072
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 2048
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 1024
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 512
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(POWER3) || defined(POWER4) || defined(POWER5)
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 2048
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef POWER3
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 4
|
|
kusano |
2b45e8 |
#define DNUMOPT 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 256
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 432
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_R 1012
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 256
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 216
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_R 1012
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 256
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 104
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_R 1012
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(POWER4)
|
|
kusano |
2b45e8 |
#ifdef ALLOC_HUGETLB
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 184
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 184
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 184
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 184
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 144
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 144
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 144
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 144
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(POWER5)
|
|
kusano |
2b45e8 |
#ifdef ALLOC_HUGETLB
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 512
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 256
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 256
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 128
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 320
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 160
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 160
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 80
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 8
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(POWER6)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 4
|
|
kusano |
2b45e8 |
#define DNUMOPT 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 384
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 1024
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 992
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 480
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 488
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 248
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 504
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 504
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 400
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 400
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 8
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(SPARC) && defined(V7)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 4
|
|
kusano |
2b45e8 |
#define DNUMOPT 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 2048
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 8
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 8
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 256
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 256
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 256
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 256
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 512
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 8
|
|
kusano |
2b45e8 |
#define GEMM_THREAD gemm_thread_mn
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(SPARC) && defined(V9)
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 2
|
|
kusano |
2b45e8 |
#define DNUMOPT 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 2048
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 512
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 512
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 512
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 512
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 1024
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 512
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 512
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 256
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 8
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef SICORTEX
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 2
|
|
kusano |
2b45e8 |
#define DNUMOPT 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 8
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 8
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 108
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 112
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 108
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 112
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 288
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 144
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 144
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 72
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_R 2000
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_R 2000
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_R 2000
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_R 2000
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 16
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef LOONGSON3A
|
|
kusano |
2b45e8 |
////Copy from SICORTEX
|
|
kusano |
2b45e8 |
#define SNUMOPT 2
|
|
kusano |
2b45e8 |
#define DNUMOPT 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 8
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 64
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 44
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 64
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 32
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 192
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 92
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 80
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_R 640
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_R dgemm_r
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_R 640
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_R 640
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_OFFSET_A1 0x10000
|
|
kusano |
2b45e8 |
#define GEMM_OFFSET_B1 0x100000
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 16
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef LOONGSON3B
|
|
kusano |
2b45e8 |
#define SNUMOPT 2
|
|
kusano |
2b45e8 |
#define DNUMOPT 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_P 64
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_P 24
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_P 24
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_P 20
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_Q 192
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_Q 128
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_Q 64
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_R 512
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_R 512
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_R 512
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_R 512
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_OFFSET_A1 0x10000
|
|
kusano |
2b45e8 |
#define GEMM_OFFSET_B1 0x100000
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 16
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef GENERIC
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SNUMOPT 2
|
|
kusano |
2b45e8 |
#define DNUMOPT 2
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_A 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_OFFSET_B 0
|
|
kusano |
2b45e8 |
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 1
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef ARCH_X86
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define SGEMM_DEFAULT_UNROLL_M 8
|
|
kusano |
2b45e8 |
#define DGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define CGEMM_DEFAULT_UNROLL_M 4
|
|
kusano |
2b45e8 |
#define ZGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_P sgemm_p
|
|
kusano |
2b45e8 |
#define DGEMM_P dgemm_p
|
|
kusano |
2b45e8 |
#define QGEMM_P qgemm_p
|
|
kusano |
2b45e8 |
#define CGEMM_P cgemm_p
|
|
kusano |
2b45e8 |
#define ZGEMM_P zgemm_p
|
|
kusano |
2b45e8 |
#define XGEMM_P xgemm_p
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_R sgemm_r
|
|
kusano |
2b45e8 |
#define DGEMM_R dgemm_r
|
|
kusano |
2b45e8 |
#define QGEMM_R qgemm_r
|
|
kusano |
2b45e8 |
#define CGEMM_R cgemm_r
|
|
kusano |
2b45e8 |
#define ZGEMM_R zgemm_r
|
|
kusano |
2b45e8 |
#define XGEMM_R xgemm_r
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SGEMM_Q 128
|
|
kusano |
2b45e8 |
#define DGEMM_Q 128
|
|
kusano |
2b45e8 |
#define QGEMM_Q 128
|
|
kusano |
2b45e8 |
#define CGEMM_Q 128
|
|
kusano |
2b45e8 |
#define ZGEMM_Q 128
|
|
kusano |
2b45e8 |
#define XGEMM_Q 128
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define SYMV_P 16
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef QGEMM_DEFAULT_UNROLL_M
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef QGEMM_DEFAULT_UNROLL_N
|
|
kusano |
2b45e8 |
#define QGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef XGEMM_DEFAULT_UNROLL_M
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_M 2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef XGEMM_DEFAULT_UNROLL_N
|
|
kusano |
2b45e8 |
#define XGEMM_DEFAULT_UNROLL_N 2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef HAVE_SSE2
|
|
kusano |
2b45e8 |
#define SHUFPD_0 shufps $0x44,
|
|
kusano |
2b45e8 |
#define SHUFPD_1 shufps $0x4e,
|
|
kusano |
2b45e8 |
#define SHUFPD_2 shufps $0xe4,
|
|
kusano |
2b45e8 |
#define SHUFPD_3 shufps $0xee,
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef SHUFPD_0
|
|
kusano |
2b45e8 |
#define SHUFPD_0 shufpd $0,
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef SHUFPD_1
|
|
kusano |
2b45e8 |
#define SHUFPD_1 shufpd $1,
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef SHUFPD_2
|
|
kusano |
2b45e8 |
#define SHUFPD_2 shufpd $2,
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef SHUFPD_3
|
|
kusano |
2b45e8 |
#define SHUFPD_3 shufpd $3,
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef SHUFPS_39
|
|
kusano |
2b45e8 |
#define SHUFPS_39 shufps $0x39,
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|