|
kusano |
2b45e8 |
#ifndef GEMV_PARAM_H
|
|
kusano |
2b45e8 |
#define GEMV_PARAM_H
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef movsd
|
|
kusano |
2b45e8 |
#undef movsd
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#undef movapd
|
|
kusano |
2b45e8 |
#define movapd movaps
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef ATHLON
|
|
kusano |
2b45e8 |
#define ALIGNED_ACCESS
|
|
kusano |
2b45e8 |
#define MOVUPS_A movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_XL movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_XS movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_YL movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_YS movaps
|
|
kusano |
2b45e8 |
#define PREFETCH prefetcht0
|
|
kusano |
2b45e8 |
#define PREFETCHSIZE 64 * 3
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef PENTIUM4
|
|
kusano |
2b45e8 |
#define ALIGNED_ACCESS
|
|
kusano |
2b45e8 |
#define MOVUPS_A movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_XL movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_XS movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_YL movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_YS movaps
|
|
kusano |
2b45e8 |
#define PREFETCH prefetcht0
|
|
kusano |
2b45e8 |
#define PREFETCHSIZE 64 * 2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef CORE2
|
|
kusano |
2b45e8 |
#define ALIGNED_ACCESS
|
|
kusano |
2b45e8 |
#define MOVUPS_A movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_XL movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_XS movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_YL movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_YS movaps
|
|
kusano |
2b45e8 |
#define PREFETCH prefetcht0
|
|
kusano |
2b45e8 |
#define PREFETCHSIZE 64 * 4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef PENRYN
|
|
kusano |
2b45e8 |
#define ALIGNED_ACCESS
|
|
kusano |
2b45e8 |
#define MOVUPS_A movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_XL movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_XS movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_YL movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_YS movaps
|
|
kusano |
2b45e8 |
#define PREFETCH prefetcht0
|
|
kusano |
2b45e8 |
#define PREFETCHSIZE 64 * 4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef NEHALEM
|
|
kusano |
2b45e8 |
#define MOVUPS_A movups
|
|
kusano |
2b45e8 |
#define MOVUPS_XL movups
|
|
kusano |
2b45e8 |
#define MOVUPS_XS movups
|
|
kusano |
2b45e8 |
#define MOVUPS_YL movups
|
|
kusano |
2b45e8 |
#define MOVUPS_YS movups
|
|
kusano |
2b45e8 |
#define PREFETCH prefetcht0
|
|
kusano |
2b45e8 |
#define PREFETCHW prefetcht0
|
|
kusano |
2b45e8 |
#define PREFETCHSIZE 64 * 3
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef OPTERON
|
|
kusano |
2b45e8 |
#define PREFETCH prefetch
|
|
kusano |
2b45e8 |
#define PREFETCHW prefetchw
|
|
kusano |
2b45e8 |
#ifndef COMPLEX
|
|
kusano |
2b45e8 |
#define PREFETCHSIZE 64 * 1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define PREFETCHSIZE 64 * 1
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#define movsd movlps
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#if defined(BARCELONA) || defined(SHANGHAI)
|
|
kusano |
2b45e8 |
#define ALIGNED_ACCESS
|
|
kusano |
2b45e8 |
#define MOVUPS_A movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_XL movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_XS movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_YL movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_YS movaps
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define PREFETCH prefetch
|
|
kusano |
2b45e8 |
#define PREFETCHW prefetchw
|
|
kusano |
2b45e8 |
#ifndef COMPLEX
|
|
kusano |
2b45e8 |
#define PREFETCHSIZE 64 * 2
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define PREFETCHSIZE 64 * 4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef NANO
|
|
kusano |
2b45e8 |
#define ALIGNED_ACCESS
|
|
kusano |
2b45e8 |
#define MOVUPS_A movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_XL movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_XS movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_YL movaps
|
|
kusano |
2b45e8 |
#define MOVUPS_YS movaps
|
|
kusano |
2b45e8 |
#define PREFETCH prefetcht0
|
|
kusano |
2b45e8 |
#ifndef COMPLEX
|
|
kusano |
2b45e8 |
#define PREFETCHSIZE 64 * 1
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define PREFETCHSIZE 64 * 2
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef PREOFFSET
|
|
kusano |
2b45e8 |
#ifdef L1_DATA_LINESIZE
|
|
kusano |
2b45e8 |
#define PREOFFSET (L1_DATA_LINESIZE >> 1)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define PREOFFSET 32
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef GEMV_UNROLL
|
|
kusano |
2b45e8 |
#define GEMV_UNROLL 4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifndef ZGEMV_UNROLL
|
|
kusano |
2b45e8 |
#define ZGEMV_UNROLL 4
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
/* #define COPY_FORCE */ /* Always copy X or Y to the buffer */
|
|
kusano |
2b45e8 |
/* #define NOCOPY_UNALIGNED */ /* Not copy if X or Y is not aligned */
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef MOVUPS_A
|
|
kusano |
2b45e8 |
#define MOVUPS_A1(OFF, ADDR, REGS) MOVUPS_A OFF(ADDR), REGS
|
|
kusano |
2b45e8 |
#define MOVUPS_A2(OFF, ADDR, BASE, SCALE, REGS) MOVUPS_A OFF(ADDR, BASE, SCALE), REGS
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define MOVUPS_A1(OFF, ADDR, REGS) movsd OFF(ADDR), REGS; movhps OFF + 8(ADDR), REGS
|
|
kusano |
2b45e8 |
#define MOVUPS_A2(OFF, ADDR, BASE, SCALE, REGS) movsd OFF(ADDR, BASE, SCALE), REGS; movhps OFF + 8(ADDR, BASE, SCALE), REGS
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#define MOVRPS_A1(OFF, ADDR, REGS) movsd OFF + 8(ADDR), REGS; movhps OFF(ADDR), REGS
|
|
kusano |
2b45e8 |
#define MOVRPS_A2(OFF, ADDR, BASE, SCALE, REGS) movsd OFF + 8(ADDR, BASE, SCALE), REGS; movhps OFF(ADDR, BASE, SCALE), REGS
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef MOVUPS_XL
|
|
kusano |
2b45e8 |
#define MOVUPS_XL1(OFF, ADDR, REGS) MOVUPS_XL OFF(ADDR), REGS
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define MOVUPS_XL1(OFF, ADDR, REGS) movsd OFF(ADDR), REGS; movhps OFF + 8(ADDR), REGS
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef MOVUPS_XS
|
|
kusano |
2b45e8 |
#define MOVUPS_XS1(OFF, ADDR, REGS) MOVUPS_XS REGS, OFF(ADDR)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define MOVUPS_XS1(OFF, ADDR, REGS) movsd REGS, OFF(ADDR); movhps REGS, OFF + 8(ADDR)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef MOVUPS_YL
|
|
kusano |
2b45e8 |
#define MOVUPS_YL1(OFF, ADDR, REGS) MOVUPS_YL OFF(ADDR), REGS
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define MOVUPS_YL1(OFF, ADDR, REGS) movsd OFF(ADDR), REGS; movhps OFF + 8(ADDR), REGS
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#ifdef MOVUPS_YS
|
|
kusano |
2b45e8 |
#define MOVUPS_YS1(OFF, ADDR, REGS) MOVUPS_YS REGS, OFF(ADDR)
|
|
kusano |
2b45e8 |
#else
|
|
kusano |
2b45e8 |
#define MOVUPS_YS1(OFF, ADDR, REGS) movsd REGS, OFF(ADDR); movhps REGS, OFF + 8(ADDR)
|
|
kusano |
2b45e8 |
#endif
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
|
|
kusano |
2b45e8 |
#endif
|