| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| #ifndef COMMON_POWER |
| #define COMMON_POWER |
| |
| #define MB __asm__ __volatile__ ("sync") |
| #define WMB __asm__ __volatile__ ("sync") |
| |
| #define INLINE inline |
| |
| #ifdef PPC440 |
| #define STDERR stdout |
| #define QNONCACHE 0x1 |
| #define QCOMMS 0x2 |
| #define QFAST 0x4 |
| #endif |
| |
| #ifndef ASSEMBLER |
| |
| void *qalloc(int flags, size_t bytes); |
| |
| static void INLINE blas_lock(volatile unsigned long *address){ |
| |
| long int ret, val = 1; |
| |
| do { |
| while (*address) {YIELDING;}; |
| |
| #if defined(OS_LINUX) || defined(OS_DARWIN) |
| __asm__ __volatile__ ( |
| "0: lwarx %0, 0, %1\n" |
| " cmpwi %0, 0\n" |
| " bne- 1f\n" |
| " stwcx. %2,0, %1\n" |
| " bne- 0b\n" |
| "1: " |
| : "=&r"(ret) |
| : "r"(address), "r" (val) |
| : "cr0", "memory"); |
| #else |
| __asm__ __volatile__ ( |
| ".machine \"any\"\n" |
| " lwarx %0, 0, %1\n" |
| " cmpwi %0, 0\n" |
| " bne- $+12\n" |
| " stwcx. %2,0, %1\n" |
| " bne- $-16\n" |
| : "=&r"(ret) |
| : "r"(address), "r" (val) |
| : "cr0", "memory"); |
| #endif |
| } while (ret); |
| } |
| |
| static inline unsigned long rpcc(void){ |
| unsigned long ret; |
| |
| #ifdef OS_AIX |
| __asm__ __volatile__(".machine \"any\" ;"); |
| #endif |
| __asm__ __volatile__ ("mftb %0" : "=r" (ret) : ); |
| |
| #if defined(POWER5) || defined(PPC970) |
| return (ret << 6); |
| #else |
| return (ret << 3); |
| #endif |
| |
| } |
| |
| #ifdef __64BIT__ |
| #define RPCC64BIT |
| #endif |
| |
| static inline unsigned long getstackaddr(void){ |
| unsigned long addr; |
| |
| __asm__ __volatile__ ("mr %0, 1" |
| : "=r"(addr) : : "memory"); |
| |
| return addr; |
| }; |
| |
| #if defined(OS_LINUX) || defined(OS_AIX) |
| #define GET_IMAGE(res) __asm__ __volatile__("fmr %0, 2" : "=f"(res) : : "memory") |
| #else |
| #define GET_IMAGE(res) __asm__ __volatile__("fmr %0, f2" : "=f"(res) : : "memory") |
| |
| #define GET_IMAGE_CANCEL |
| |
| #endif |
| |
| #ifdef SMP |
| static inline int blas_quickdivide(blasint x, blasint y){ |
| return x / y; |
| } |
| #endif |
| |
| #endif |
| |
| |
| #ifdef ASSEMBLER |
| |
| #ifdef DOUBLE |
| #define LFD lfd |
| #define LFDX lfdx |
| #define LFPDX lfpdx |
| #define LFSDX lfsdx |
| #define LFXDX lfxdx |
| #define LFDU lfdu |
| #define LFDUX lfdux |
| #define LFPDUX lfpdux |
| #define LFSDUX lfsdux |
| #define LFXDUX lfxdux |
| #define STFD stfd |
| #define STFDX stfdx |
| #define STFPDX stfpdx |
| #define STFSDX stfsdx |
| #define STFXDX stfxdx |
| #define STFDU stfdu |
| #define STFDUX stfdux |
| #define STFPDUX stfpdux |
| #define STFSDUX stfsdux |
| #define STFXDUX stfxdux |
| #define FMADD fmadd |
| #define FMSUB fmsub |
| #define FNMADD fnmadd |
| #define FNMSUB fnmsub |
| #define FMUL fmul |
| #define FADD fadd |
| #define FSUB fsub |
| #else |
| #define LFD lfs |
| #define LFDX lfsx |
| #define LFPDX lfpsx |
| #define LFSDX lfssx |
| #define LFXDX lfxsx |
| #define LFDU lfsu |
| #define LFDUX lfsux |
| #define LFPDUX lfpsux |
| #define LFSDUX lfssux |
| #define LFXDUX lfxsux |
| #define STFD stfs |
| #define STFDX stfsx |
| #define STFPDX stfpsx |
| #define STFSDX stfssx |
| #define STFXDX stfxsx |
| #define STFDU stfsu |
| #define STFDUX stfsux |
| #define STFPDUX stfpsux |
| #define STFSDUX stfssux |
| #define STFXDUX stfxsux |
| #define FMADD fmadds |
| #define FMSUB fmsubs |
| #define FNMADD fnmadds |
| #define FNMSUB fnmsubs |
| #define FMUL fmuls |
| #define FADD fadds |
| #define FSUB fsubs |
| #endif |
| |
| #ifdef __64BIT__ |
| #define LDLONG ld |
| #else |
| #define LDLONG lwz |
| #endif |
| |
| #ifdef OS_DARWIN |
| #define LL(x) L##x |
| #endif |
| |
| #ifdef OS_LINUX |
| #define LL(x) .L##x |
| #endif |
| |
| #ifndef LL |
| #define LL(x) __L##x |
| #endif |
| |
| |
| #if defined(__64BIT__) && defined(USE64BITINT) |
| #define LDINT ld |
| #elif defined(__64BIT__) && !defined(USE64BITINT) |
| #define LDINT lwa |
| #else |
| #define LDINT lwz |
| #endif |
| |
| |
| |
| |
| |
| |
| #define DSTATTR_H(SIZE, COUNT, STRIDE) ((SIZE << 8) | (COUNT)) |
| #define DSTATTR_L(SIZE, COUNT, STRIDE) (STRIDE) |
| |
| #if defined(PPC970) || defined(POWER3) || defined(POWER4) || defined(POWER5) || defined(PPCG4) |
| #define HAVE_PREFETCH |
| #endif |
| |
| #if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) |
| #define DCBT_ARG 0 |
| #else |
| #define DCBT_ARG 8 |
| #endif |
| |
| #ifdef CELL |
| #define L1_DUALFETCH |
| #define L1_PREFETCHSIZE (64 + 128 * 13) |
| #endif |
| |
| #if defined(POWER3) || defined(POWER4) || defined(POWER5) |
| #define L1_DUALFETCH |
| #define L1_PREFETCHSIZE (96 + 128 * 12) |
| #endif |
| |
| #if defined(POWER6) |
| #define L1_DUALFETCH |
| #define L1_PREFETCHSIZE (16 + 128 * 100) |
| #define L1_PREFETCH dcbtst |
| #endif |
| |
| #ifndef L1_PREFETCH |
| #define L1_PREFETCH dcbt |
| #endif |
| |
| #ifndef L1_PREFETCHW |
| #define L1_PREFETCHW dcbtst |
| #endif |
| |
| #if DCBT_ARG == 0 |
| #define DCBT(REGA, REGB) L1_PREFETCH REGB, REGA |
| #define DCBTST(REGA, REGB) L1_PREFETCHW REGB, REGA |
| #else |
| #define DCBT(REGA, REGB) L1_PREFETCH DCBT_ARG, REGB, REGA |
| #define DCBTST(REGA, REGB) L1_PREFETCHW DCBT_ARG, REGB, REGA |
| #endif |
| |
| |
| #ifndef L1_PREFETCHSIZE |
| #define L1_PREFETCHSIZE (96 + 128 * 12) |
| #endif |
| |
| #if !defined(OS_DARWIN) || defined(NEEDPARAM) |
| #define f0 0 |
| #define f1 1 |
| #define f2 2 |
| #define f3 3 |
| #define f4 4 |
| #define f5 5 |
| #define f6 6 |
| #define f7 7 |
| #define f8 8 |
| #define f9 9 |
| #define f10 10 |
| #define f11 11 |
| #define f12 12 |
| #define f13 13 |
| #define f14 14 |
| #define f15 15 |
| #define f16 16 |
| #define f17 17 |
| #define f18 18 |
| #define f19 19 |
| #define f20 20 |
| #define f21 21 |
| #define f22 22 |
| #define f23 23 |
| #define f24 24 |
| #define f25 25 |
| #define f26 26 |
| #define f27 27 |
| #define f28 28 |
| #define f29 29 |
| #define f30 30 |
| #define f31 31 |
| |
| #define r0 0 |
| #define r1 1 |
| #define r2 2 |
| #define r3 3 |
| #define r4 4 |
| #define r5 5 |
| #define r6 6 |
| #define r7 7 |
| #define r8 8 |
| #define r9 9 |
| #define r10 10 |
| #define r11 11 |
| #define r12 12 |
| #define r13 13 |
| #define r14 14 |
| #define r15 15 |
| #define r16 16 |
| #define r17 17 |
| #define r18 18 |
| #define r19 19 |
| #define r20 20 |
| #define r21 21 |
| #define r22 22 |
| #define r23 23 |
| #define r24 24 |
| #define r25 25 |
| #define r26 26 |
| #define r27 27 |
| #define r28 28 |
| #define r29 29 |
| #define r30 30 |
| #define r31 31 |
| |
| #define v0 0 |
| #define v1 1 |
| #define v2 2 |
| #define v3 3 |
| #define v4 4 |
| #define v5 5 |
| #define v6 6 |
| #define v7 7 |
| #define v8 8 |
| #define v9 9 |
| #define v10 10 |
| #define v11 11 |
| #define v12 12 |
| #define v13 13 |
| #define v14 14 |
| #define v15 15 |
| #define v16 16 |
| #define v17 17 |
| #define v18 18 |
| #define v19 19 |
| #define v20 20 |
| #define v21 21 |
| #define v22 22 |
| #define v23 23 |
| #define v24 24 |
| #define v25 25 |
| #define v26 26 |
| #define v27 27 |
| #define v28 28 |
| #define v29 29 |
| #define v30 30 |
| #define v31 31 |
| |
| #define BO_dCTR_NZERO_AND_NOT 0 |
| #define BO_dCTR_NZERO_AND_NOT_1 1 |
| #define BO_dCTR_ZERO_AND_NOT 2 |
| #define BO_dCTR_ZERO_AND_NOT_1 3 |
| #define BO_IF_NOT 4 |
| #define BO_IF_NOT_1 5 |
| #define BO_IF_NOT_2 6 |
| #define BO_IF_NOT_3 7 |
| #define BO_dCTR_NZERO_AND 8 |
| #define BO_dCTR_NZERO_AND_1 9 |
| #define BO_dCTR_ZERO_AND 10 |
| #define BO_dCTR_ZERO_AND_1 11 |
| #define BO_IF 12 |
| #define BO_IF_1 13 |
| #define BO_IF_2 14 |
| #define BO_IF_3 15 |
| #define BO_dCTR_NZERO 16 |
| #define BO_dCTR_NZERO_1 17 |
| #define BO_dCTR_ZERO 18 |
| #define BO_dCTR_ZERO_1 19 |
| #define BO_ALWAYS 20 |
| #define BO_ALWAYS_1 21 |
| #define BO_ALWAYS_2 22 |
| #define BO_ALWAYS_3 23 |
| #define BO_dCTR_NZERO_8 24 |
| #define BO_dCTR_NZERO_9 25 |
| #define BO_dCTR_ZERO_8 26 |
| #define BO_dCTR_ZERO_9 27 |
| #define BO_ALWAYS_8 28 |
| #define BO_ALWAYS_9 29 |
| #define BO_ALWAYS_10 30 |
| #define BO_ALWAYS_11 31 |
| |
| #define CR0_LT 0 |
| #define CR0_GT 1 |
| #define CR0_EQ 2 |
| #define CR0_SO 3 |
| #define CR1_FX 4 |
| #define CR1_FEX 5 |
| #define CR1_VX 6 |
| #define CR1_OX 7 |
| #define CR2_LT 8 |
| #define CR2_GT 9 |
| #define CR2_EQ 10 |
| #define CR2_SO 11 |
| #define CR3_LT 12 |
| #define CR3_GT 13 |
| #define CR3_EQ 14 |
| #define CR3_SO 15 |
| #define CR4_LT 16 |
| #define CR4_GT 17 |
| #define CR4_EQ 18 |
| #define CR4_SO 19 |
| #define CR5_LT 20 |
| #define CR5_GT 21 |
| #define CR5_EQ 22 |
| #define CR5_SO 23 |
| #define CR6_LT 24 |
| #define CR6_GT 25 |
| #define CR6_EQ 26 |
| #define CR6_SO 27 |
| #define CR7_LT 28 |
| #define CR7_GT 29 |
| #define CR7_EQ 30 |
| #define CR7_SO 31 |
| #define TO_LT 16 |
| #define TO_GT 8 |
| #define TO_EQ 4 |
| #define TO_LLT 2 |
| #define TO_LGT 1 |
| #define CR0 0 |
| #define CR1 1 |
| #define CR2 2 |
| #define CR3 3 |
| #define CR4 4 |
| #define CR5 5 |
| #define CR6 6 |
| #define CR7 7 |
| #define cr0 0 |
| #define cr1 1 |
| #define cr2 2 |
| #define cr3 3 |
| #define cr4 4 |
| #define cr5 5 |
| #define cr6 6 |
| #define cr7 7 |
| #define VRsave 256 |
| |
| #endif |
| |
| #define CTR 9 |
| #define SP r1 |
| |
| #ifdef __64BIT__ |
| #define slwi sldi |
| #define cmpwi cmpdi |
| #define srawi sradi |
| #define mullw mulld |
| #endif |
| |
| #ifndef F_INTERFACE |
| #define REALNAME ASMNAME |
| #else |
| #define REALNAME ASMFNAME |
| #endif |
| |
| #if defined(ASSEMBLER) && !defined(NEEDPARAM) |
| |
| #ifdef OS_LINUX |
| #ifndef __64BIT__ |
| #define PROLOGUE \ |
| .section .text;\ |
| .align 6;\ |
| .globl REALNAME;\ |
| .type REALNAME, @function;\ |
| REALNAME: |
| #define EPILOGUE .size REALNAME, .-REALNAME |
| #else |
| #define PROLOGUE \ |
| .section .text;\ |
| .align 5;\ |
| .globl REALNAME;\ |
| .section ".opd","aw";\ |
| .align 3;\ |
| REALNAME:;\ |
| .quad .REALNAME, .TOC.@tocbase, 0;\ |
| .previous;\ |
| .size REALNAME, 24;\ |
| .type .REALNAME, @function;\ |
| .globl .REALNAME;\ |
| .REALNAME: |
| #define EPILOGUE \ |
| .long 0 ; \ |
| .byte 0,0,0,1,128,0,0,0 ; \ |
| .size .REALNAME, .-.REALNAME; \ |
| .section .note.GNU-stack,"",@progbits |
| #endif |
| |
| #ifdef PROFILE |
| #ifndef __64BIT__ |
| #define PROFCODE ;\ |
| .section ".data";\ |
| .align 2;\ |
| .LP3:;\ |
| .long 0;\ |
| .section ".text";\ |
| mflr r0;\ |
| stw r0, 4(SP);\ |
| lis r12, .LP3@ha;\ |
| la r0, .LP3@l(r12);\ |
| bl _mcount;\ |
| lwz r0, 4(SP);\ |
| mtlr r0 |
| #else |
| #define PROFCODE \ |
| .globl _mcount; \ |
| mflr r0; \ |
| std r0, 16(SP); \ |
| mr r11, SP; \ |
| addi SP, SP, -256; \ |
| std r11, 0(SP); \ |
| std r3, 128(SP); \ |
| std r4, 136(SP); \ |
| std r5, 144(SP); \ |
| std r6, 152(SP); \ |
| std r7, 160(SP); \ |
| std r8, 168(SP); \ |
| std r9, 176(SP); \ |
| std r10, 184(SP); \ |
| stfd f3, 192(SP); \ |
| stfd f4, 200(SP); \ |
| bl ._mcount; \ |
| nop; \ |
| ld r3, 128(SP);\ |
| ld r4, 136(SP);\ |
| ld r5, 144(SP);\ |
| ld r6, 152(SP);\ |
| ld r7, 160(SP);\ |
| ld r8, 168(SP);\ |
| ld r9, 176(SP);\ |
| ld r10, 184(SP);\ |
| lfd f3, 192(SP);\ |
| lfd f4, 200(SP);\ |
| addi SP, SP, 256;\ |
| ld r0, 16(SP);\ |
| mtlr r0 |
| #endif |
| #else |
| #define PROFCODE |
| #endif |
| |
| #endif |
| |
| #if OS_AIX |
| #ifndef __64BIT__ |
| #define PROLOGUE \ |
| .machine "any";\ |
| .globl .REALNAME;\ |
| .csect .text[PR],5;\ |
| .REALNAME:; |
| |
| #define EPILOGUE \ |
| _section_.text:;\ |
| .csect .data[RW],4;\ |
| .long _section_.text; |
| |
| #else |
| |
| #define PROLOGUE \ |
| .machine "any";\ |
| .globl .REALNAME;\ |
| .csect .text[PR], 5;\ |
| .REALNAME:; |
| |
| #define EPILOGUE \ |
| _section_.text:;\ |
| .csect .data[RW],4;\ |
| .llong _section_.text; |
| #endif |
| |
| #define PROFCODE |
| |
| #endif |
| |
| #ifdef OS_DARWIN |
| #ifndef __64BIT__ |
| .macro PROLOGUE |
| .section __TEXT,__text,regular,pure_instructions |
| .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 |
| .machine ppc |
| .text |
| .align 4 |
| .globl REALNAME |
| REALNAME: |
| .endmacro |
| #else |
| .macro PROLOGUE |
| .section __TEXT,__text,regular,pure_instructions |
| .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 |
| .machine ppc64 |
| .text |
| .align 4 |
| .globl REALNAME |
| REALNAME: |
| .endmacro |
| #endif |
| |
| #ifndef PROFILE |
| #define PROFCODE |
| #define EPILOGUE .subsections_via_symbols |
| #else |
| #ifndef __64BIT__ |
| |
| .macro PROFCODE |
| mflr r0 |
| stw r0, 8(SP) |
| addi SP, SP, -64 |
| stw SP, 0(SP) |
| stw r3, 12(SP) |
| stw r4, 16(SP) |
| stw r5, 20(SP) |
| stw r6, 24(SP) |
| stw r7, 28(SP) |
| stw r8, 32(SP) |
| stw r9, 36(SP) |
| stw r10, 40(SP) |
| stfd f1, 48(SP) |
| stfd f2, 56(SP) |
| mr r3, r0 |
| bl Lmcount$stub |
| nop |
| lwz r3, 12(SP) |
| lwz r4, 16(SP) |
| lwz r5, 20(SP) |
| lwz r6, 24(SP) |
| lwz r7, 28(SP) |
| lwz r8, 32(SP) |
| lwz r9, 36(SP) |
| lwz r10, 40(SP) |
| lfd f1, 48(SP) |
| lfd f2, 56(SP) |
| addi SP, SP, 64 |
| lwz r0, 8(SP) |
| mtlr r0 |
| .endmacro |
| |
| .macro EPILOGUE |
| .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 |
| .align 5 |
| Lmcount$stub: |
| .indirect_symbol mcount |
| mflr r0 |
| bcl 20,31,L00000000001$spb |
| L00000000001$spb: |
| mflr r11 |
| addis r11,r11,ha16(Lmcount$lazy_ptr-L00000000001$spb) |
| mtlr r0 |
| lwzu r12,lo16(Lmcount$lazy_ptr-L00000000001$spb)(r11) |
| mtctr r12 |
| bctr |
| .lazy_symbol_pointer |
| Lmcount$lazy_ptr: |
| .indirect_symbol mcount |
| .long dyld_stub_binding_helper |
| .subsections_via_symbols |
| .endmacro |
| |
| #else |
| .macro PROFCODE |
| mflr r0 |
| std r0, 16(SP) |
| addi SP, SP, -128 |
| std SP, 0(SP) |
| std r3, 24(SP) |
| std r4, 32(SP) |
| std r5, 40(SP) |
| std r6, 48(SP) |
| std r7, 56(SP) |
| std r8, 64(SP) |
| std r9, 72(SP) |
| std r10, 80(SP) |
| stfd f1, 88(SP) |
| stfd f2, 96(SP) |
| mr r3, r0 |
| bl Lmcount$stub |
| nop |
| ld r3, 24(SP) |
| ld r4, 32(SP) |
| ld r5, 40(SP) |
| ld r6, 48(SP) |
| ld r7, 56(SP) |
| ld r8, 64(SP) |
| ld r9, 72(SP) |
| ld r10, 80(SP) |
| lfd f1, 88(SP) |
| lfd f2, 86(SP) |
| addi SP, SP, 128 |
| ld r0, 16(SP) |
| mtlr r0 |
| .endmacro |
| |
| .macro EPILOGUE |
| .data |
| .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 |
| .align 5 |
| Lmcount$stub: |
| .indirect_symbol mcount |
| mflr r0 |
| bcl 20,31,L00000000001$spb |
| L00000000001$spb: |
| mflr r11 |
| addis r11,r11,ha16(Lmcount$lazy_ptr-L00000000001$spb) |
| mtlr r0 |
| ld r12,lo16(Lmcount$lazy_ptr-L00000000001$spb)(r11) |
| mtctr r12 |
| bctr |
| .lazy_symbol_pointer |
| Lmcount$lazy_ptr: |
| .indirect_symbol mcount |
| .quad dyld_stub_binding_helper |
| .subsections_via_symbols |
| .endmacro |
| #endif |
| |
| #endif |
| |
| #endif |
| #endif |
| |
| #endif |
| |
| #define HALT mfspr r0, 1023 |
| |
| #ifdef OS_LINUX |
| #if defined(PPC440) || defined(PPC440FP2) |
| #undef MAX_CPU_NUMBER |
| #define MAX_CPU_NUMBER 1 |
| #endif |
| #if !defined(__64BIT__) && !defined(PROFILE) && !defined(PPC440) && !defined(PPC440FP2) |
| #define START_ADDRESS (0x0b000000UL) |
| #else |
| #define SEEK_ADDRESS |
| #endif |
| #endif |
| |
| #ifdef OS_AIX |
| #ifndef __64BIT__ |
| #define START_ADDRESS (0xf0000000UL) |
| #else |
| #define SEEK_ADDRESS |
| #endif |
| #endif |
| |
| #ifdef OS_DARWIN |
| #define SEEK_ADDRESS |
| #endif |
| |
| #if defined(PPC440) |
| #define BUFFER_SIZE ( 2 << 20) |
| #elif defined(PPC440FP2) |
| #define BUFFER_SIZE ( 16 << 20) |
| #else |
| #define BUFFER_SIZE ( 16 << 20) |
| #endif |
| |
| #ifndef PAGESIZE |
| #define PAGESIZE ( 4 << 10) |
| #endif |
| #define HUGE_PAGESIZE (16 << 20) |
| |
| #define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER) |
| |
| #ifndef MAP_ANONYMOUS |
| #define MAP_ANONYMOUS MAP_ANON |
| #endif |
| #endif |