kusano 2b45e8
#ifndef GEMV_PARAM_H
kusano 2b45e8
#define GEMV_PARAM_H
kusano 2b45e8
kusano 2b45e8
#ifdef movsd
kusano 2b45e8
#undef movsd
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#undef  movapd
kusano 2b45e8
#define movapd movaps
kusano 2b45e8
kusano 2b45e8
#ifdef ATHLON
kusano 2b45e8
#define ALIGNED_ACCESS
kusano 2b45e8
#define MOVUPS_A	movaps
kusano 2b45e8
#define MOVUPS_XL	movaps
kusano 2b45e8
#define MOVUPS_XS	movaps
kusano 2b45e8
#define MOVUPS_YL	movaps
kusano 2b45e8
#define MOVUPS_YS	movaps
kusano 2b45e8
#define PREFETCH	prefetcht0
kusano 2b45e8
#define PREFETCHSIZE	64 * 3
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef PENTIUM4
kusano 2b45e8
#define ALIGNED_ACCESS
kusano 2b45e8
#define MOVUPS_A	movaps
kusano 2b45e8
#define MOVUPS_XL	movaps
kusano 2b45e8
#define MOVUPS_XS	movaps
kusano 2b45e8
#define MOVUPS_YL	movaps
kusano 2b45e8
#define MOVUPS_YS	movaps
kusano 2b45e8
#define PREFETCH	prefetcht0
kusano 2b45e8
#define PREFETCHSIZE	64 * 2
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef CORE2
kusano 2b45e8
#define ALIGNED_ACCESS
kusano 2b45e8
#define MOVUPS_A	movaps
kusano 2b45e8
#define MOVUPS_XL	movaps
kusano 2b45e8
#define MOVUPS_XS	movaps
kusano 2b45e8
#define MOVUPS_YL	movaps
kusano 2b45e8
#define MOVUPS_YS	movaps
kusano 2b45e8
#define PREFETCH	prefetcht0
kusano 2b45e8
#define PREFETCHSIZE	64 * 4
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef PENRYN
kusano 2b45e8
#define ALIGNED_ACCESS
kusano 2b45e8
#define MOVUPS_A	movaps
kusano 2b45e8
#define MOVUPS_XL	movaps
kusano 2b45e8
#define MOVUPS_XS	movaps
kusano 2b45e8
#define MOVUPS_YL	movaps
kusano 2b45e8
#define MOVUPS_YS	movaps
kusano 2b45e8
#define PREFETCH	prefetcht0
kusano 2b45e8
#define PREFETCHSIZE	64 * 4
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef NEHALEM
kusano 2b45e8
#define MOVUPS_A	movups
kusano 2b45e8
#define MOVUPS_XL	movups
kusano 2b45e8
#define MOVUPS_XS	movups
kusano 2b45e8
#define MOVUPS_YL	movups
kusano 2b45e8
#define MOVUPS_YS	movups
kusano 2b45e8
#define PREFETCH	prefetcht0
kusano 2b45e8
#define PREFETCHW	prefetcht0
kusano 2b45e8
#define PREFETCHSIZE	64 * 3
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef OPTERON
kusano 2b45e8
#define PREFETCH	prefetch
kusano 2b45e8
#define PREFETCHW	prefetchw
kusano 2b45e8
#ifndef COMPLEX
kusano 2b45e8
#define PREFETCHSIZE	64 * 1
kusano 2b45e8
#else
kusano 2b45e8
#define PREFETCHSIZE	64 * 1
kusano 2b45e8
#endif
kusano 2b45e8
#define movsd		movlps
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#if defined(BARCELONA) || defined(SHANGHAI)
kusano 2b45e8
#define ALIGNED_ACCESS
kusano 2b45e8
#define MOVUPS_A	movaps
kusano 2b45e8
#define MOVUPS_XL	movaps
kusano 2b45e8
#define MOVUPS_XS	movaps
kusano 2b45e8
#define MOVUPS_YL	movaps
kusano 2b45e8
#define MOVUPS_YS	movaps
kusano 2b45e8
kusano 2b45e8
#define PREFETCH	prefetch
kusano 2b45e8
#define PREFETCHW	prefetchw
kusano 2b45e8
#ifndef COMPLEX
kusano 2b45e8
#define PREFETCHSIZE	64 * 2
kusano 2b45e8
#else
kusano 2b45e8
#define PREFETCHSIZE	64 * 4
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef NANO
kusano 2b45e8
#define ALIGNED_ACCESS
kusano 2b45e8
#define MOVUPS_A	movaps
kusano 2b45e8
#define MOVUPS_XL	movaps
kusano 2b45e8
#define MOVUPS_XS	movaps
kusano 2b45e8
#define MOVUPS_YL	movaps
kusano 2b45e8
#define MOVUPS_YS	movaps
kusano 2b45e8
#define PREFETCH	prefetcht0
kusano 2b45e8
#ifndef COMPLEX
kusano 2b45e8
#define PREFETCHSIZE	64 * 1
kusano 2b45e8
#else
kusano 2b45e8
#define PREFETCHSIZE	64 * 2
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef PREOFFSET
kusano 2b45e8
#ifdef L1_DATA_LINESIZE
kusano 2b45e8
#define PREOFFSET	(L1_DATA_LINESIZE >> 1)
kusano 2b45e8
#else
kusano 2b45e8
#define PREOFFSET	32
kusano 2b45e8
#endif
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef  GEMV_UNROLL
kusano 2b45e8
#define  GEMV_UNROLL 4
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifndef ZGEMV_UNROLL
kusano 2b45e8
#define ZGEMV_UNROLL 4
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
/* #define COPY_FORCE       */   /* Always copy X or Y to the buffer */
kusano 2b45e8
/* #define NOCOPY_UNALIGNED */   /* Not copy if X or Y is not aligned */
kusano 2b45e8
kusano 2b45e8
#ifdef MOVUPS_A
kusano 2b45e8
#define MOVUPS_A1(OFF, ADDR, REGS)		MOVUPS_A	OFF(ADDR), REGS
kusano 2b45e8
#define MOVUPS_A2(OFF, ADDR, BASE, SCALE, REGS)	MOVUPS_A	OFF(ADDR, BASE, SCALE), REGS
kusano 2b45e8
#else
kusano 2b45e8
#define MOVUPS_A1(OFF, ADDR, REGS)		movsd	OFF(ADDR), REGS; movhps	OFF + 8(ADDR), REGS
kusano 2b45e8
#define MOVUPS_A2(OFF, ADDR, BASE, SCALE, REGS)	movsd	OFF(ADDR, BASE, SCALE), REGS; movhps	OFF + 8(ADDR, BASE, SCALE), REGS
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#define MOVRPS_A1(OFF, ADDR, REGS)		movsd	OFF + 8(ADDR), REGS; movhps	OFF(ADDR), REGS
kusano 2b45e8
#define MOVRPS_A2(OFF, ADDR, BASE, SCALE, REGS)	movsd	OFF + 8(ADDR, BASE, SCALE), REGS; movhps	OFF(ADDR, BASE, SCALE), REGS
kusano 2b45e8
kusano 2b45e8
#ifdef MOVUPS_XL
kusano 2b45e8
#define MOVUPS_XL1(OFF, ADDR, REGS)			MOVUPS_XL	OFF(ADDR), REGS
kusano 2b45e8
#else
kusano 2b45e8
#define MOVUPS_XL1(OFF, ADDR, REGS)			movsd	OFF(ADDR), REGS; movhps	OFF + 8(ADDR), REGS
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef MOVUPS_XS
kusano 2b45e8
#define MOVUPS_XS1(OFF, ADDR, REGS)			MOVUPS_XS	REGS, OFF(ADDR)
kusano 2b45e8
#else
kusano 2b45e8
#define MOVUPS_XS1(OFF, ADDR, REGS)			movsd	REGS, OFF(ADDR); movhps	REGS, OFF + 8(ADDR)
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef MOVUPS_YL
kusano 2b45e8
#define MOVUPS_YL1(OFF, ADDR, REGS)			MOVUPS_YL	OFF(ADDR), REGS
kusano 2b45e8
#else
kusano 2b45e8
#define MOVUPS_YL1(OFF, ADDR, REGS)			movsd	OFF(ADDR), REGS; movhps	OFF + 8(ADDR), REGS
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
#ifdef MOVUPS_YS
kusano 2b45e8
#define MOVUPS_YS1(OFF, ADDR, REGS)			MOVUPS_YS	REGS, OFF(ADDR)
kusano 2b45e8
#else
kusano 2b45e8
#define MOVUPS_YS1(OFF, ADDR, REGS)			movsd	REGS, OFF(ADDR); movhps	REGS, OFF + 8(ADDR)
kusano 2b45e8
#endif
kusano 2b45e8
kusano 2b45e8
kusano 2b45e8
kusano 2b45e8
#endif