| #ifndef GEMV_PARAM_H |
| #define GEMV_PARAM_H |
|
|
| #ifdef movsd |
| #undef movsd |
| #endif |
|
|
| #undef movapd |
| #define movapd movaps |
|
|
| #ifdef ATHLON |
| #define ALIGNED_ACCESS |
| #define MOVUPS_A movaps |
| #define MOVUPS_XL movaps |
| #define MOVUPS_XS movaps |
| #define MOVUPS_YL movaps |
| #define MOVUPS_YS movaps |
| #define PREFETCH prefetcht0 |
| #define PREFETCHSIZE 64 * 3 |
| #endif |
|
|
| #ifdef PENTIUM4 |
| #define ALIGNED_ACCESS |
| #define MOVUPS_A movaps |
| #define MOVUPS_XL movaps |
| #define MOVUPS_XS movaps |
| #define MOVUPS_YL movaps |
| #define MOVUPS_YS movaps |
| #define PREFETCH prefetcht0 |
| #define PREFETCHSIZE 64 * 2 |
| #endif |
|
|
| #ifdef CORE2 |
| #define ALIGNED_ACCESS |
| #define MOVUPS_A movaps |
| #define MOVUPS_XL movaps |
| #define MOVUPS_XS movaps |
| #define MOVUPS_YL movaps |
| #define MOVUPS_YS movaps |
| #define PREFETCH prefetcht0 |
| #define PREFETCHSIZE 64 * 4 |
| #endif |
|
|
| #ifdef PENRYN |
| #define ALIGNED_ACCESS |
| #define MOVUPS_A movaps |
| #define MOVUPS_XL movaps |
| #define MOVUPS_XS movaps |
| #define MOVUPS_YL movaps |
| #define MOVUPS_YS movaps |
| #define PREFETCH prefetcht0 |
| #define PREFETCHSIZE 64 * 4 |
| #endif |
|
|
| #ifdef NEHALEM |
| #define MOVUPS_A movups |
| #define MOVUPS_XL movups |
| #define MOVUPS_XS movups |
| #define MOVUPS_YL movups |
| #define MOVUPS_YS movups |
| #define PREFETCH prefetcht0 |
| #define PREFETCHW prefetcht0 |
| #define PREFETCHSIZE 64 * 3 |
| #endif |
|
|
| #ifdef OPTERON |
| #define PREFETCH prefetch |
| #define PREFETCHW prefetchw |
| #ifndef COMPLEX |
| #define PREFETCHSIZE 64 * 1 |
| #else |
| #define PREFETCHSIZE 64 * 1 |
| #endif |
| #define movsd movlps |
| #endif |
|
|
| #if defined(BARCELONA) || defined(SHANGHAI) |
| #define ALIGNED_ACCESS |
| #define MOVUPS_A movaps |
| #define MOVUPS_XL movaps |
| #define MOVUPS_XS movaps |
| #define MOVUPS_YL movaps |
| #define MOVUPS_YS movaps |
|
|
| #define PREFETCH prefetch |
| #define PREFETCHW prefetchw |
| #ifndef COMPLEX |
| #define PREFETCHSIZE 64 * 2 |
| #else |
| #define PREFETCHSIZE 64 * 4 |
| #endif |
| #endif |
|
|
| #ifdef NANO |
| #define ALIGNED_ACCESS |
| #define MOVUPS_A movaps |
| #define MOVUPS_XL movaps |
| #define MOVUPS_XS movaps |
| #define MOVUPS_YL movaps |
| #define MOVUPS_YS movaps |
| #define PREFETCH prefetcht0 |
| #ifndef COMPLEX |
| #define PREFETCHSIZE 64 * 1 |
| #else |
| #define PREFETCHSIZE 64 * 2 |
| #endif |
| #endif |
|
|
| #ifndef PREOFFSET |
| #ifdef L1_DATA_LINESIZE |
| #define PREOFFSET (L1_DATA_LINESIZE >> 1) |
| #else |
| #define PREOFFSET 32 |
| #endif |
| #endif |
|
|
| #ifndef GEMV_UNROLL |
| #define GEMV_UNROLL 4 |
| #endif |
|
|
| #ifndef ZGEMV_UNROLL |
| #define ZGEMV_UNROLL 4 |
| #endif |
| |
| /* #define COPY_FORCE */ /* Always copy X or Y to the buffer */ |
| /* #define NOCOPY_UNALIGNED */ /* Not copy if X or Y is not aligned */ |
|
|
| #ifdef MOVUPS_A |
| #define MOVUPS_A1(OFF, ADDR, REGS) MOVUPS_A OFF(ADDR), REGS |
| #define MOVUPS_A2(OFF, ADDR, BASE, SCALE, REGS) MOVUPS_A OFF(ADDR, BASE, SCALE), REGS |
| #else |
| #define MOVUPS_A1(OFF, ADDR, REGS) movsd OFF(ADDR), REGS; movhps OFF + 8(ADDR), REGS |
| #define MOVUPS_A2(OFF, ADDR, BASE, SCALE, REGS) movsd OFF(ADDR, BASE, SCALE), REGS; movhps OFF + 8(ADDR, BASE, SCALE), REGS |
| #endif |
|
|
| #define MOVRPS_A1(OFF, ADDR, REGS) movsd OFF + 8(ADDR), REGS; movhps OFF(ADDR), REGS |
| #define MOVRPS_A2(OFF, ADDR, BASE, SCALE, REGS) movsd OFF + 8(ADDR, BASE, SCALE), REGS; movhps OFF(ADDR, BASE, SCALE), REGS |
|
|
| #ifdef MOVUPS_XL |
| #define MOVUPS_XL1(OFF, ADDR, REGS) MOVUPS_XL OFF(ADDR), REGS |
| #else |
| #define MOVUPS_XL1(OFF, ADDR, REGS) movsd OFF(ADDR), REGS; movhps OFF + 8(ADDR), REGS |
| #endif |
|
|
| #ifdef MOVUPS_XS |
| #define MOVUPS_XS1(OFF, ADDR, REGS) MOVUPS_XS REGS, OFF(ADDR) |
| #else |
| #define MOVUPS_XS1(OFF, ADDR, REGS) movsd REGS, OFF(ADDR); movhps REGS, OFF + 8(ADDR) |
| #endif |
|
|
| #ifdef MOVUPS_YL |
| #define MOVUPS_YL1(OFF, ADDR, REGS) MOVUPS_YL OFF(ADDR), REGS |
| #else |
| #define MOVUPS_YL1(OFF, ADDR, REGS) movsd OFF(ADDR), REGS; movhps OFF + 8(ADDR), REGS |
| #endif |
|
|
| #ifdef MOVUPS_YS |
| #define MOVUPS_YS1(OFF, ADDR, REGS) MOVUPS_YS REGS, OFF(ADDR) |
| #else |
| #define MOVUPS_YS1(OFF, ADDR, REGS) movsd REGS, OFF(ADDR); movhps REGS, OFF + 8(ADDR) |
| #endif |
|
|
| |
| |
| #endif |