Blob Blame Raw
#ifndef GEMV_PARAM_H
#define GEMV_PARAM_H

#ifdef movsd
#undef movsd
#endif

#undef  movapd
#define movapd movaps

#ifdef ATHLON
#define ALIGNED_ACCESS
#define MOVUPS_A	movaps
#define MOVUPS_XL	movaps
#define MOVUPS_XS	movaps
#define MOVUPS_YL	movaps
#define MOVUPS_YS	movaps
#define PREFETCH	prefetcht0
#define PREFETCHSIZE	64 * 3
#endif

#ifdef PENTIUM4
#define ALIGNED_ACCESS
#define MOVUPS_A	movaps
#define MOVUPS_XL	movaps
#define MOVUPS_XS	movaps
#define MOVUPS_YL	movaps
#define MOVUPS_YS	movaps
#define PREFETCH	prefetcht0
#define PREFETCHSIZE	64 * 2
#endif

#ifdef CORE2
#define ALIGNED_ACCESS
#define MOVUPS_A	movaps
#define MOVUPS_XL	movaps
#define MOVUPS_XS	movaps
#define MOVUPS_YL	movaps
#define MOVUPS_YS	movaps
#define PREFETCH	prefetcht0
#define PREFETCHSIZE	64 * 4
#endif

#ifdef PENRYN
#define ALIGNED_ACCESS
#define MOVUPS_A	movaps
#define MOVUPS_XL	movaps
#define MOVUPS_XS	movaps
#define MOVUPS_YL	movaps
#define MOVUPS_YS	movaps
#define PREFETCH	prefetcht0
#define PREFETCHSIZE	64 * 4
#endif

#ifdef NEHALEM
#define MOVUPS_A	movups
#define MOVUPS_XL	movups
#define MOVUPS_XS	movups
#define MOVUPS_YL	movups
#define MOVUPS_YS	movups
#define PREFETCH	prefetcht0
#define PREFETCHW	prefetcht0
#define PREFETCHSIZE	64 * 3
#endif

#ifdef OPTERON
#define PREFETCH	prefetch
#define PREFETCHW	prefetchw
#ifndef COMPLEX
#define PREFETCHSIZE	64 * 1
#else
#define PREFETCHSIZE	64 * 1
#endif
#define movsd		movlps
#endif

#if defined(BARCELONA) || defined(SHANGHAI)
#define ALIGNED_ACCESS
#define MOVUPS_A	movaps
#define MOVUPS_XL	movaps
#define MOVUPS_XS	movaps
#define MOVUPS_YL	movaps
#define MOVUPS_YS	movaps

#define PREFETCH	prefetch
#define PREFETCHW	prefetchw
#ifndef COMPLEX
#define PREFETCHSIZE	64 * 2
#else
#define PREFETCHSIZE	64 * 4
#endif
#endif

#ifdef NANO
#define ALIGNED_ACCESS
#define MOVUPS_A	movaps
#define MOVUPS_XL	movaps
#define MOVUPS_XS	movaps
#define MOVUPS_YL	movaps
#define MOVUPS_YS	movaps
#define PREFETCH	prefetcht0
#ifndef COMPLEX
#define PREFETCHSIZE	64 * 1
#else
#define PREFETCHSIZE	64 * 2
#endif
#endif

#ifndef PREOFFSET
#ifdef L1_DATA_LINESIZE
#define PREOFFSET	(L1_DATA_LINESIZE >> 1)
#else
#define PREOFFSET	32
#endif
#endif

#ifndef  GEMV_UNROLL
#define  GEMV_UNROLL 4
#endif

#ifndef ZGEMV_UNROLL
#define ZGEMV_UNROLL 4
#endif

/* #define COPY_FORCE       */   /* Always copy X or Y to the buffer */
/* #define NOCOPY_UNALIGNED */   /* Not copy if X or Y is not aligned */

#ifdef MOVUPS_A
#define MOVUPS_A1(OFF, ADDR, REGS)		MOVUPS_A	OFF(ADDR), REGS
#define MOVUPS_A2(OFF, ADDR, BASE, SCALE, REGS)	MOVUPS_A	OFF(ADDR, BASE, SCALE), REGS
#else
#define MOVUPS_A1(OFF, ADDR, REGS)		movsd	OFF(ADDR), REGS; movhps	OFF + 8(ADDR), REGS
#define MOVUPS_A2(OFF, ADDR, BASE, SCALE, REGS)	movsd	OFF(ADDR, BASE, SCALE), REGS; movhps	OFF + 8(ADDR, BASE, SCALE), REGS
#endif

#define MOVRPS_A1(OFF, ADDR, REGS)		movsd	OFF + 8(ADDR), REGS; movhps	OFF(ADDR), REGS
#define MOVRPS_A2(OFF, ADDR, BASE, SCALE, REGS)	movsd	OFF + 8(ADDR, BASE, SCALE), REGS; movhps	OFF(ADDR, BASE, SCALE), REGS

#ifdef MOVUPS_XL
#define MOVUPS_XL1(OFF, ADDR, REGS)			MOVUPS_XL	OFF(ADDR), REGS
#else
#define MOVUPS_XL1(OFF, ADDR, REGS)			movsd	OFF(ADDR), REGS; movhps	OFF + 8(ADDR), REGS
#endif

#ifdef MOVUPS_XS
#define MOVUPS_XS1(OFF, ADDR, REGS)			MOVUPS_XS	REGS, OFF(ADDR)
#else
#define MOVUPS_XS1(OFF, ADDR, REGS)			movsd	REGS, OFF(ADDR); movhps	REGS, OFF + 8(ADDR)
#endif

#ifdef MOVUPS_YL
#define MOVUPS_YL1(OFF, ADDR, REGS)			MOVUPS_YL	OFF(ADDR), REGS
#else
#define MOVUPS_YL1(OFF, ADDR, REGS)			movsd	OFF(ADDR), REGS; movhps	OFF + 8(ADDR), REGS
#endif

#ifdef MOVUPS_YS
#define MOVUPS_YS1(OFF, ADDR, REGS)			MOVUPS_YS	REGS, OFF(ADDR)
#else
#define MOVUPS_YS1(OFF, ADDR, REGS)			movsd	REGS, OFF(ADDR); movhps	REGS, OFF + 8(ADDR)
#endif



#endif