| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| #ifndef COMMON_X86 |
| #define COMMON_X86 |
| |
| #ifndef ASSEMBLER |
| |
| #define MB |
| #define WMB |
| |
| #ifdef C_SUN |
| #define __asm__ __asm |
| #define __volatile__ |
| #endif |
| |
| static void __inline blas_lock(volatile BLASULONG *address){ |
| |
| int ret; |
| |
| do { |
| while (*address) {YIELDING;}; |
| |
| __asm__ __volatile__( |
| "xchgl %0, %1\n" |
| : "=r"(ret), "=m"(*address) |
| : "0"(1), "m"(*address) |
| : "memory"); |
| |
| } while (ret); |
| |
| } |
| |
| static __inline unsigned long long rpcc(void){ |
| unsigned int a, d; |
| |
| __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); |
| |
| return ((unsigned long long)a + ((unsigned long long)d << 32)); |
| }; |
| |
| static __inline unsigned long getstackaddr(void){ |
| unsigned long addr; |
| |
| __asm__ __volatile__ ("mov %%esp, %0" |
| : "=r"(addr) : : "memory"); |
| |
| return addr; |
| }; |
| |
| |
| static __inline long double sqrt_long(long double val) { |
| long double result; |
| |
| __asm__ __volatile__ ("fldt %1\n" |
| "fsqrt\n" |
| "fstpt %0\n" : "=m" (result) : "m"(val)); |
| return result; |
| } |
| |
| #define SQRT(a) sqrt_long(a) |
| |
| |
| void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx); |
| |
| #define WHEREAMI |
| |
| static inline int WhereAmI(void){ |
| int eax, ebx, ecx, edx; |
| int apicid; |
| |
| cpuid(1, &eax, &ebx, &ecx, &edx); |
| apicid = BITMASK(ebx, 24, 0xff); |
| |
| return apicid; |
| } |
| |
| #ifdef ENABLE_SSE_EXCEPTION |
| |
| #define IDEBUG_START \ |
| { \ |
| unsigned int fp_sse_mode, new_fp_mode; \ |
| __asm__ __volatile__ ("stmxcsr %0" : "=m" (fp_sse_mode) : ); \ |
| new_fp_mode = fp_sse_mode & ~0xd00; \ |
| __asm__ __volatile__ ("ldmxcsr %0" : : "m" (new_fp_mode) ); |
| |
| #define IDEBUG_END \ |
| __asm__ __volatile__ ("ldmxcsr %0" : : "m" (fp_sse_mode) ); \ |
| } |
| |
| #endif |
| |
| #ifdef XDOUBLE |
| #define GET_IMAGE(res) __asm__ __volatile__("fstpt %0" : "=m"(res) : : "memory") |
| #elif defined(DOUBLE) |
| #define GET_IMAGE(res) __asm__ __volatile__("fstpl %0" : "=m"(res) : : "memory") |
| #else |
| #define GET_IMAGE(res) __asm__ __volatile__("fstps %0" : "=m"(res) : : "memory"); |
| #endif |
| |
| #define GET_IMAGE_CANCEL __asm__ __volatile__ ("ffree %st") |
| |
| #ifdef SMP |
| extern unsigned int blas_quick_divide_table[]; |
| |
| static __inline int blas_quickdivide(unsigned int x, unsigned int y){ |
| |
| unsigned int result; |
| |
| if (y <= 1) return x; |
| |
| y = blas_quick_divide_table[y]; |
| |
| __asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y)); |
| |
| return result; |
| } |
| #endif |
| |
| #endif |
| |
| #ifndef PAGESIZE |
| #define PAGESIZE ( 4 << 10) |
| #endif |
| #define HUGE_PAGESIZE ( 4 << 20) |
| |
| #define BUFFER_SIZE (16 << 20) |
| |
| #define SEEK_ADDRESS |
| |
| #if defined(DOUBLE) || defined(XDOUBLE) |
| #define MMXLOAD movq |
| #define MMXSTORE movq |
| #else |
| #define MMXLOAD movd |
| #define MMXSTORE movd |
| #endif |
| |
| #if defined(HAVE_3DNOW) |
| #define EMMS femms |
| #elif defined(HAVE_MMX) |
| #define EMMS emms |
| #endif |
| |
| #ifndef EMMS |
| #define EMMS |
| #endif |
| |
| #if defined(CORE2) || defined(PENTIUM4) |
| #define movapd movaps |
| #endif |
| |
| #define BRANCH .byte 0x3e |
| #define NOBRANCH .byte 0x2e |
| #define PADDING .byte 0x66; |
| #define HALT hlt |
| |
| #ifndef COMPLEX |
| #ifdef XDOUBLE |
| #define LOCAL_BUFFER_SIZE QLOCAL_BUFFER_SIZE |
| #elif defined DOUBLE |
| #define LOCAL_BUFFER_SIZE DLOCAL_BUFFER_SIZE |
| #else |
| #define LOCAL_BUFFER_SIZE SLOCAL_BUFFER_SIZE |
| #endif |
| #else |
| #ifdef XDOUBLE |
| #define LOCAL_BUFFER_SIZE XLOCAL_BUFFER_SIZE |
| #elif defined DOUBLE |
| #define LOCAL_BUFFER_SIZE ZLOCAL_BUFFER_SIZE |
| #else |
| #define LOCAL_BUFFER_SIZE CLOCAL_BUFFER_SIZE |
| #endif |
| #endif |
| |
| #if defined(OS_WINDOWS) |
| #if LOCAL_BUFFER_SIZE > 16384 |
| #define STACK_TOUCHING \ |
| movl $0, 4096 * 4(%esp);\ |
| movl $0, 4096 * 3(%esp);\ |
| movl $0, 4096 * 2(%esp);\ |
| movl $0, 4096 * 1(%esp); |
| #elif LOCAL_BUFFER_SIZE > 12288 |
| #define STACK_TOUCHING \ |
| movl $0, 4096 * 3(%esp);\ |
| movl $0, 4096 * 2(%esp);\ |
| movl $0, 4096 * 1(%esp); |
| #elif LOCAL_BUFFER_SIZE > 8192 |
| #define STACK_TOUCHING \ |
| movl $0, 4096 * 2(%esp);\ |
| movl $0, 4096 * 1(%esp); |
| #elif LOCAL_BUFFER_SIZE > 4096 |
| #define STACK_TOUCHING \ |
| movl $0, 4096 * 1(%esp); |
| #else |
| #define STACK_TOUCHING |
| #endif |
| #else |
| #define STACK_TOUCHING |
| #endif |
| |
| #ifndef F_INTERFACE |
| #define REALNAME ASMNAME |
| #else |
| #define REALNAME ASMFNAME |
| #endif |
| |
| #if defined(F_INTERFACE_PATHSCALE) || defined(F_INTERFACE_OPEN64) |
| #define RETURN_BY_STRUCT |
| #elif defined(F_INTERFACE_GFORT) || defined(F_INTERFACE_G95) |
| #define RETURN_BY_COMPLEX |
| #else |
| #define RETURN_BY_STACK |
| #endif |
| |
| #ifdef OS_DARWIN |
| #define PROLOGUE .text;.align 5; .globl REALNAME; REALNAME: |
| #define EPILOGUE .subsections_via_symbols |
| #define PROFCODE |
| #endif |
| |
| #if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INERIX) |
| #define SAVEREGISTERS \ |
| subl $32, %esp;\ |
| movups %xmm6, 0(%esp);\ |
| movups %xmm7, 16(%esp) |
| |
| #define RESTOREREGISTERS \ |
| movups 0(%esp), %xmm6;\ |
| movups 16(%esp), %xmm7;\ |
| addl $32, %esp |
| #else |
| #define SAVEREGISTERS |
| #define RESTOREREGISTERS |
| #endif |
| |
| #if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INERIX) |
| #define PROLOGUE \ |
| .text; \ |
| .align 16; \ |
| .globl REALNAME ;\ |
| .def REALNAME;.scl 2;.type 32;.endef; \ |
| REALNAME: |
| |
| #define PROFCODE |
| |
| #define EPILOGUE .end REALNAME |
| #endif |
| |
| #if defined(OS_LINUX) || defined(OS_FreeBSD) || defined(OS_NetBSD) || defined(__ELF__) |
| #define PROLOGUE \ |
| .text; \ |
| .align 16; \ |
| .globl REALNAME ;\ |
| .type REALNAME, @function; \ |
| REALNAME: |
| |
| #ifdef PROFILE |
| #define PROFCODE call mcount |
| #else |
| #define PROFCODE |
| #endif |
| |
| #define EPILOGUE .size REALNAME, .-REALNAME |
| |
| #endif |
| |
| #ifdef XDOUBLE |
| #define FLD fldt |
| #define FST fstpt |
| #define FSTU fstt |
| #define FMUL fmult |
| #define FADD faddt |
| #define FSUB fsubt |
| #define FSUBR fsubrt |
| #elif defined(DOUBLE) |
| #define FLD fldl |
| #define FST fstpl |
| #define FSTU fstl |
| #define FMUL fmull |
| #define FADD faddl |
| #define FSUB fsubl |
| #define FSUBR fsubrl |
| #else |
| #define FLD flds |
| #define FST fstps |
| #define FSTU fsts |
| #define FMUL fmuls |
| #define FADD fadds |
| #define FSUB fsubs |
| #define FSUBR fsubrs |
| #endif |
| #endif |
| |
| #ifdef C_SUN |
| #define ffreep fstp |
| #endif |
| |
| #ifdef __APPLE__ |
| #define ALIGN_2 .align 2 |
| #define ALIGN_3 .align 3 |
| #define ALIGN_4 .align 4 |
| #define ffreep fstp |
| #endif |
| |
| #ifndef ALIGN_2 |
| #define ALIGN_2 .align 4 |
| #endif |
| |
| #ifndef ALIGN_3 |
| #define ALIGN_3 .align 8 |
| #endif |
| |
| #ifndef ALIGN_4 |
| #define ALIGN_4 .align 16 |
| #endif |
| |
| #ifndef ALIGN_5 |
| #define ALIGN_5 .align 32 |
| #endif |
| |
| #ifndef ALIGN_6 |
| #define ALIGN_6 .align 64 |
| #endif |