| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| #ifndef COMMON_X86 |
| #define COMMON_X86 |
| |
| #ifndef ASSEMBLER |
| |
| #ifdef C_SUN |
| #define __asm__ __asm |
| #define __volatile__ |
| #endif |
| |
| #ifdef HAVE_SSE2 |
| #define MB __asm__ __volatile__ ("mfence"); |
| #define WMB __asm__ __volatile__ ("sfence"); |
| #else |
| #define MB |
| #define WMB |
| #endif |
| |
| static void __inline blas_lock(volatile BLASULONG *address){ |
| |
| int ret; |
| |
| do { |
| while (*address) {YIELDING;}; |
| |
| __asm__ __volatile__( |
| "xchgl %0, %1\n" |
| : "=r"(ret), "=m"(*address) |
| : "0"(1), "m"(*address) |
| : "memory"); |
| |
| } while (ret); |
| } |
| |
| static __inline BLASULONG rpcc(void){ |
| BLASULONG a, d; |
| |
| __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); |
| |
| return ((BLASULONG)a + ((BLASULONG)d << 32)); |
| } |
| |
| #define RPCC64BIT |
| |
| static __inline BLASULONG getstackaddr(void){ |
| BLASULONG addr; |
| |
| __asm__ __volatile__ ("movq %%rsp, %0" |
| : "=r"(addr) : : "memory"); |
| |
| return addr; |
| } |
| |
| static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ |
| |
| __asm__ __volatile__("cpuid" |
| : "=a" (*eax), |
| "=b" (*ebx), |
| "=c" (*ecx), |
| "=d" (*edx) |
| : "0" (op)); |
| } |
| |
| #define WHEREAMI |
| |
| static inline int WhereAmI(void){ |
| int eax, ebx, ecx, edx; |
| int apicid; |
| |
| cpuid(1, &eax, &ebx, &ecx, &edx); |
| apicid = BITMASK(ebx, 24, 0xff); |
| |
| return apicid; |
| } |
| |
| #ifdef CORE_BARCELONA |
| #define IFLUSH gotoblas_iflush() |
| #define IFLUSH_HALF gotoblas_iflush_half() |
| #endif |
| |
| #ifdef ENABLE_SSE_EXCEPTION |
| |
| #define IDEBUG_START \ |
| { \ |
| unsigned int fp_sse_mode, new_fp_mode; \ |
| __asm__ __volatile__ ("stmxcsr %0" : "=m" (fp_sse_mode) : ); \ |
| new_fp_mode = fp_sse_mode & ~0xd00; \ |
| __asm__ __volatile__ ("ldmxcsr %0" : : "m" (new_fp_mode) ); |
| |
| #define IDEBUG_END \ |
| __asm__ __volatile__ ("ldmxcsr %0" : : "m" (fp_sse_mode) ); \ |
| } |
| |
| #endif |
| |
| #ifdef XDOUBLE |
| #define GET_IMAGE(res) __asm__ __volatile__("fstpt %0" : "=m"(res) : : "memory") |
| #elif defined(DOUBLE) |
| #define GET_IMAGE(res) __asm__ __volatile__("movsd %%xmm1, %0" : "=m"(res) : : "memory") |
| #else |
| #define GET_IMAGE(res) __asm__ __volatile__("movss %%xmm1, %0" : "=m"(res) : : "memory") |
| #endif |
| |
| #define GET_IMAGE_CANCEL |
| |
| #ifdef SMP |
| #ifdef USE64BITINT |
| static __inline blasint blas_quickdivide(blasint x, blasint y){ |
| return x / y; |
| } |
| #else |
| extern unsigned int blas_quick_divide_table[]; |
| |
| static __inline int blas_quickdivide(unsigned int x, unsigned int y){ |
| |
| unsigned int result; |
| |
| if (y <= 1) return x; |
| |
| y = blas_quick_divide_table[y]; |
| |
| __asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y)); |
| |
| return result; |
| } |
| #endif |
| #endif |
| |
| #endif |
| |
| #ifndef PAGESIZE |
| #define PAGESIZE ( 4 << 10) |
| #endif |
| #define HUGE_PAGESIZE ( 2 << 20) |
| |
| #define BUFFER_SIZE (32 << 20) |
| |
| #define SEEK_ADDRESS |
| |
| #ifdef F_INTERFACE_G77 |
| #define RETURN_BY_STACK |
| #define NEED_F2CCONV |
| #endif |
| |
| #ifdef F_INTERFACE_G95 |
| #define RETURN_BY_PACKED |
| #endif |
| |
| #ifdef F_INTERFACE_GFORT |
| #ifdef OS_WINDOWS |
| #ifndef DOUBLE |
| #define RETURN_BY_REGS |
| #else |
| #define RETURN_BY_STACK |
| #endif |
| #else |
| #define RETURN_BY_PACKED |
| #endif |
| #endif |
| |
| #ifdef F_INTERFACE_INTEL |
| #define RETURN_BY_STACK |
| #endif |
| |
| #ifdef F_INTERFACE_FUJITSU |
| #define RETURN_BY_STACK |
| #endif |
| |
| #ifdef F_INTERFACE_PGI |
| #define RETURN_BY_STACK |
| #endif |
| |
| #ifdef F_INTERFACE_PATHSCALE |
| #define RETURN_BY_PACKED |
| #endif |
| |
| #ifdef F_INTERFACE_SUN |
| #define RETURN_BY_PACKED |
| #endif |
| |
| #ifdef ASSEMBLER |
| |
| #if defined(HAVE_3DNOW) |
| #define EMMS femms |
| #elif defined(HAVE_MMX) |
| #define EMMS emms |
| #endif |
| |
| #ifndef EMMS |
| #define EMMS |
| #endif |
| |
| #define BRANCH .byte 0x3e |
| #define NOBRANCH .byte 0x2e |
| #define PADDING .byte 0x66 |
| |
| #ifdef OS_WINDOWS |
| #define ARG1 %rcx |
| #define ARG2 %rdx |
| #define ARG3 %r8 |
| #define ARG4 %r9 |
| #else |
| #define ARG1 %rdi |
| #define ARG2 %rsi |
| #define ARG3 %rdx |
| #define ARG4 %rcx |
| #define ARG5 %r8 |
| #define ARG6 %r9 |
| #endif |
| |
| #ifndef COMPLEX |
| #ifdef XDOUBLE |
| #define LOCAL_BUFFER_SIZE QLOCAL_BUFFER_SIZE |
| #elif defined DOUBLE |
| #define LOCAL_BUFFER_SIZE DLOCAL_BUFFER_SIZE |
| #else |
| #define LOCAL_BUFFER_SIZE SLOCAL_BUFFER_SIZE |
| #endif |
| #else |
| #ifdef XDOUBLE |
| #define LOCAL_BUFFER_SIZE XLOCAL_BUFFER_SIZE |
| #elif defined DOUBLE |
| #define LOCAL_BUFFER_SIZE ZLOCAL_BUFFER_SIZE |
| #else |
| #define LOCAL_BUFFER_SIZE CLOCAL_BUFFER_SIZE |
| #endif |
| #endif |
| |
| #if defined(OS_WINDOWS) |
| #if LOCAL_BUFFER_SIZE > 16384 |
| #define STACK_TOUCHING \ |
| movl $0, 4096 * 4(%rsp);\ |
| movl $0, 4096 * 3(%rsp);\ |
| movl $0, 4096 * 2(%rsp);\ |
| movl $0, 4096 * 1(%rsp); |
| #elif LOCAL_BUFFER_SIZE > 12288 |
| #define STACK_TOUCHING \ |
| movl $0, 4096 * 3(%rsp);\ |
| movl $0, 4096 * 2(%rsp);\ |
| movl $0, 4096 * 1(%rsp); |
| #elif LOCAL_BUFFER_SIZE > 8192 |
| #define STACK_TOUCHING \ |
| movl $0, 4096 * 2(%rsp);\ |
| movl $0, 4096 * 1(%rsp); |
| #elif LOCAL_BUFFER_SIZE > 4096 |
| #define STACK_TOUCHING \ |
| movl $0, 4096 * 1(%rsp); |
| #else |
| #define STACK_TOUCHING |
| #endif |
| #else |
| #define STACK_TOUCHING |
| #endif |
| |
| #if defined(CORE2) |
| #define movapd movaps |
| #define andpd andps |
| #define movlpd movlps |
| #define movhpd movhps |
| #endif |
| |
| #ifndef F_INTERFACE |
| #define REALNAME ASMNAME |
| #else |
| #define REALNAME ASMFNAME |
| #endif |
| |
| #ifdef OS_DARWIN |
| #define PROLOGUE .text;.align 5; .globl REALNAME; REALNAME: |
| #define EPILOGUE .subsections_via_symbols |
| #define PROFCODE |
| #endif |
| |
| #ifdef OS_WINDOWS |
| #define SAVEREGISTERS \ |
| subq $256, %rsp;\ |
| movups %xmm6, 0(%rsp);\ |
| movups %xmm7, 16(%rsp);\ |
| movups %xmm8, 32(%rsp);\ |
| movups %xmm9, 48(%rsp);\ |
| movups %xmm10, 64(%rsp);\ |
| movups %xmm11, 80(%rsp);\ |
| movups %xmm12, 96(%rsp);\ |
| movups %xmm13, 112(%rsp);\ |
| movups %xmm14, 128(%rsp);\ |
| movups %xmm15, 144(%rsp) |
| |
| #define RESTOREREGISTERS \ |
| movups 0(%rsp), %xmm6;\ |
| movups 16(%rsp), %xmm7;\ |
| movups 32(%rsp), %xmm8;\ |
| movups 48(%rsp), %xmm9;\ |
| movups 64(%rsp), %xmm10;\ |
| movups 80(%rsp), %xmm11;\ |
| movups 96(%rsp), %xmm12;\ |
| movups 112(%rsp), %xmm13;\ |
| movups 128(%rsp), %xmm14;\ |
| movups 144(%rsp), %xmm15;\ |
| addq $256, %rsp |
| #else |
| #define SAVEREGISTERS |
| #define RESTOREREGISTERS |
| #endif |
| |
| #if defined(OS_WINDOWS) && !defined(C_PGI) |
| #define PROLOGUE \ |
| .text; \ |
| .align 16; \ |
| .globl REALNAME ;\ |
| .def REALNAME;.scl 2;.type 32;.endef; \ |
| REALNAME: |
| |
| #define PROFCODE |
| |
| #define EPILOGUE .end REALNAME |
| #endif |
| |
| #if defined(OS_LINUX) || defined(OS_FreeBSD) || defined(OS_NetBSD) || defined(__ELF__) || defined(C_PGI) |
| #define PROLOGUE \ |
| .text; \ |
| .align 512; \ |
| .globl REALNAME ;\ |
| .type REALNAME, @function; \ |
| REALNAME: |
| |
| #ifdef PROFILE |
| #define PROFCODE call *mcount@GOTPCREL(%rip) |
| #else |
| #define PROFCODE |
| #endif |
| |
| #define EPILOGUE .size REALNAME, .-REALNAME |
| |
| #endif |
| |
| #endif |
| |
| #ifdef XDOUBLE |
| #define FLD fldt |
| #define FST fstpt |
| #define MOVQ movq |
| #elif defined(DOUBLE) |
| #define FLD fldl |
| #define FST fstpl |
| #define FSTU fstl |
| #define FMUL fmull |
| #define FADD faddl |
| #define MOVSD movsd |
| #define MULSD mulsd |
| #define MULPD mulpd |
| #define CMPEQPD cmpeqpd |
| #define COMISD comisd |
| #define PSRLQ psrlq |
| #define ANDPD andpd |
| #define ADDPD addpd |
| #define ADDSD addsd |
| #define SUBPD subpd |
| #define SUBSD subsd |
| #define MOVQ movq |
| #define MOVUPD movupd |
| #define XORPD xorpd |
| #else |
| #define FLD flds |
| #define FST fstps |
| #define FSTU fsts |
| #define FMUL fmuls |
| #define FADD fadds |
| #define MOVSD movss |
| #define MULSD mulss |
| #define MULPD mulps |
| #define CMPEQPD cmpeqps |
| #define COMISD comiss |
| #define PSRLQ psrld |
| #define ANDPD andps |
| #define ADDPD addps |
| #define ADDSD addss |
| #define SUBPD subps |
| #define SUBSD subss |
| #define MOVQ movd |
| #define MOVUPD movups |
| #define XORPD xorps |
| #endif |
| |
| #define HALT hlt |
| |
| #ifdef OS_DARWIN |
| #define ALIGN_2 .align 2 |
| #define ALIGN_3 .align 3 |
| #define ALIGN_4 .align 4 |
| #define ffreep fstp |
| #endif |
| |
| #ifndef ALIGN_2 |
| #define ALIGN_2 .align 4 |
| #endif |
| |
| #ifndef ALIGN_3 |
| #define ALIGN_3 .align 8 |
| #endif |
| |
| #ifndef ALIGN_4 |
| #define ALIGN_4 .align 16 |
| #endif |
| |
| #ifndef ALIGN_5 |
| #define ALIGN_5 .align 32 |
| #endif |
| |
| #ifndef ALIGN_6 |
| #define ALIGN_6 .align 64 |
| #endif |
| |
| #endif |