|
shun-iwasawa |
82a8f5 |
/*
|
|
shun-iwasawa |
82a8f5 |
* MIPS DSPr2 optimizations for libjpeg-turbo
|
|
shun-iwasawa |
82a8f5 |
*
|
|
shun-iwasawa |
82a8f5 |
* Copyright (C) 2013, MIPS Technologies, Inc., California.
|
|
shun-iwasawa |
82a8f5 |
* Copyright (C) 2018, Matthieu Darbois.
|
|
shun-iwasawa |
82a8f5 |
* All Rights Reserved.
|
|
shun-iwasawa |
82a8f5 |
* Authors: Teodora Novkovic (teodora.novkovic@imgtec.com)
|
|
shun-iwasawa |
82a8f5 |
* Darko Laus (darko.laus@imgtec.com)
|
|
shun-iwasawa |
82a8f5 |
* This software is provided 'as-is', without any express or implied
|
|
shun-iwasawa |
82a8f5 |
* warranty. In no event will the authors be held liable for any damages
|
|
shun-iwasawa |
82a8f5 |
* arising from the use of this software.
|
|
shun-iwasawa |
82a8f5 |
*
|
|
shun-iwasawa |
82a8f5 |
* Permission is granted to anyone to use this software for any purpose,
|
|
shun-iwasawa |
82a8f5 |
* including commercial applications, and to alter it and redistribute it
|
|
shun-iwasawa |
82a8f5 |
* freely, subject to the following restrictions:
|
|
shun-iwasawa |
82a8f5 |
*
|
|
shun-iwasawa |
82a8f5 |
* 1. The origin of this software must not be misrepresented; you must not
|
|
shun-iwasawa |
82a8f5 |
* claim that you wrote the original software. If you use this software
|
|
shun-iwasawa |
82a8f5 |
* in a product, an acknowledgment in the product documentation would be
|
|
shun-iwasawa |
82a8f5 |
* appreciated but is not required.
|
|
shun-iwasawa |
82a8f5 |
* 2. Altered source versions must be plainly marked as such, and must not be
|
|
shun-iwasawa |
82a8f5 |
* misrepresented as being the original software.
|
|
shun-iwasawa |
82a8f5 |
* 3. This notice may not be removed or altered from any source distribution.
|
|
shun-iwasawa |
82a8f5 |
*/
|
|
shun-iwasawa |
82a8f5 |
|
|
shun-iwasawa |
82a8f5 |
#define zero $0
|
|
shun-iwasawa |
82a8f5 |
#define AT $1
|
|
shun-iwasawa |
82a8f5 |
#define v0 $2
|
|
shun-iwasawa |
82a8f5 |
#define v1 $3
|
|
shun-iwasawa |
82a8f5 |
#define a0 $4
|
|
shun-iwasawa |
82a8f5 |
#define a1 $5
|
|
shun-iwasawa |
82a8f5 |
#define a2 $6
|
|
shun-iwasawa |
82a8f5 |
#define a3 $7
|
|
shun-iwasawa |
82a8f5 |
#define t0 $8
|
|
shun-iwasawa |
82a8f5 |
#define t1 $9
|
|
shun-iwasawa |
82a8f5 |
#define t2 $10
|
|
shun-iwasawa |
82a8f5 |
#define t3 $11
|
|
shun-iwasawa |
82a8f5 |
#define t4 $12
|
|
shun-iwasawa |
82a8f5 |
#define t5 $13
|
|
shun-iwasawa |
82a8f5 |
#define t6 $14
|
|
shun-iwasawa |
82a8f5 |
#define t7 $15
|
|
shun-iwasawa |
82a8f5 |
#define s0 $16
|
|
shun-iwasawa |
82a8f5 |
#define s1 $17
|
|
shun-iwasawa |
82a8f5 |
#define s2 $18
|
|
shun-iwasawa |
82a8f5 |
#define s3 $19
|
|
shun-iwasawa |
82a8f5 |
#define s4 $20
|
|
shun-iwasawa |
82a8f5 |
#define s5 $21
|
|
shun-iwasawa |
82a8f5 |
#define s6 $22
|
|
shun-iwasawa |
82a8f5 |
#define s7 $23
|
|
shun-iwasawa |
82a8f5 |
#define t8 $24
|
|
shun-iwasawa |
82a8f5 |
#define t9 $25
|
|
shun-iwasawa |
82a8f5 |
#define k0 $26
|
|
shun-iwasawa |
82a8f5 |
#define k1 $27
|
|
shun-iwasawa |
82a8f5 |
#define gp $28
|
|
shun-iwasawa |
82a8f5 |
#define sp $29
|
|
shun-iwasawa |
82a8f5 |
#define fp $30
|
|
shun-iwasawa |
82a8f5 |
#define s8 $30
|
|
shun-iwasawa |
82a8f5 |
#define ra $31
|
|
shun-iwasawa |
82a8f5 |
|
|
shun-iwasawa |
82a8f5 |
#define f0 $f0
|
|
shun-iwasawa |
82a8f5 |
#define f1 $f1
|
|
shun-iwasawa |
82a8f5 |
#define f2 $f2
|
|
shun-iwasawa |
82a8f5 |
#define f3 $f3
|
|
shun-iwasawa |
82a8f5 |
#define f4 $f4
|
|
shun-iwasawa |
82a8f5 |
#define f5 $f5
|
|
shun-iwasawa |
82a8f5 |
#define f6 $f6
|
|
shun-iwasawa |
82a8f5 |
#define f7 $f7
|
|
shun-iwasawa |
82a8f5 |
#define f8 $f8
|
|
shun-iwasawa |
82a8f5 |
#define f9 $f9
|
|
shun-iwasawa |
82a8f5 |
#define f10 $f10
|
|
shun-iwasawa |
82a8f5 |
#define f11 $f11
|
|
shun-iwasawa |
82a8f5 |
#define f12 $f12
|
|
shun-iwasawa |
82a8f5 |
#define f13 $f13
|
|
shun-iwasawa |
82a8f5 |
#define f14 $f14
|
|
shun-iwasawa |
82a8f5 |
#define f15 $f15
|
|
shun-iwasawa |
82a8f5 |
#define f16 $f16
|
|
shun-iwasawa |
82a8f5 |
#define f17 $f17
|
|
shun-iwasawa |
82a8f5 |
#define f18 $f18
|
|
shun-iwasawa |
82a8f5 |
#define f19 $f19
|
|
shun-iwasawa |
82a8f5 |
#define f20 $f20
|
|
shun-iwasawa |
82a8f5 |
#define f21 $f21
|
|
shun-iwasawa |
82a8f5 |
#define f22 $f22
|
|
shun-iwasawa |
82a8f5 |
#define f23 $f23
|
|
shun-iwasawa |
82a8f5 |
#define f24 $f24
|
|
shun-iwasawa |
82a8f5 |
#define f25 $f25
|
|
shun-iwasawa |
82a8f5 |
#define f26 $f26
|
|
shun-iwasawa |
82a8f5 |
#define f27 $f27
|
|
shun-iwasawa |
82a8f5 |
#define f28 $f28
|
|
shun-iwasawa |
82a8f5 |
#define f29 $f29
|
|
shun-iwasawa |
82a8f5 |
#define f30 $f30
|
|
shun-iwasawa |
82a8f5 |
#define f31 $f31
|
|
shun-iwasawa |
82a8f5 |
|
|
shun-iwasawa |
82a8f5 |
#ifdef __ELF__
|
|
shun-iwasawa |
82a8f5 |
#define HIDDEN_SYMBOL(symbol) .hidden symbol;
|
|
shun-iwasawa |
82a8f5 |
#else
|
|
shun-iwasawa |
82a8f5 |
#define HIDDEN_SYMBOL(symbol)
|
|
shun-iwasawa |
82a8f5 |
#endif
|
|
shun-iwasawa |
82a8f5 |
|
|
shun-iwasawa |
82a8f5 |
/*
|
|
shun-iwasawa |
82a8f5 |
* LEAF_MIPS32R2 - declare leaf routine for MIPS32r2
|
|
shun-iwasawa |
82a8f5 |
*/
|
|
shun-iwasawa |
82a8f5 |
#define LEAF_MIPS32R2(symbol) \
|
|
shun-iwasawa |
82a8f5 |
.globl symbol; \
|
|
shun-iwasawa |
82a8f5 |
HIDDEN_SYMBOL(symbol) \
|
|
shun-iwasawa |
82a8f5 |
.align 2; \
|
|
shun-iwasawa |
82a8f5 |
.type symbol, @function; \
|
|
shun-iwasawa |
82a8f5 |
.ent symbol, 0; \
|
|
shun-iwasawa |
82a8f5 |
symbol: \
|
|
shun-iwasawa |
82a8f5 |
.frame sp, 0, ra; \
|
|
shun-iwasawa |
82a8f5 |
.set push; \
|
|
shun-iwasawa |
82a8f5 |
.set arch = mips32r2; \
|
|
shun-iwasawa |
82a8f5 |
.set noreorder; \
|
|
shun-iwasawa |
82a8f5 |
.set noat;
|
|
shun-iwasawa |
82a8f5 |
|
|
shun-iwasawa |
82a8f5 |
/*
|
|
shun-iwasawa |
82a8f5 |
* LEAF_DSPR2 - declare leaf routine for MIPS DSPr2
|
|
shun-iwasawa |
82a8f5 |
*/
|
|
shun-iwasawa |
82a8f5 |
#define LEAF_DSPR2(symbol) \
|
|
shun-iwasawa |
82a8f5 |
LEAF_MIPS32R2(symbol) \
|
|
shun-iwasawa |
82a8f5 |
.set dspr2;
|
|
shun-iwasawa |
82a8f5 |
|
|
shun-iwasawa |
82a8f5 |
/*
|
|
shun-iwasawa |
82a8f5 |
* END - mark end of function
|
|
shun-iwasawa |
82a8f5 |
*/
|
|
shun-iwasawa |
82a8f5 |
#define END(function) \
|
|
shun-iwasawa |
82a8f5 |
.set pop; \
|
|
shun-iwasawa |
82a8f5 |
.end function; \
|
|
shun-iwasawa |
82a8f5 |
.size function, .-function
|
|
shun-iwasawa |
82a8f5 |
|
|
shun-iwasawa |
82a8f5 |
/*
|
|
shun-iwasawa |
82a8f5 |
* Checks if stack offset is big enough for storing/restoring regs_num
|
|
shun-iwasawa |
82a8f5 |
* number of register to/from stack. Stack offset must be greater than
|
|
shun-iwasawa |
82a8f5 |
* or equal to the number of bytes needed for storing registers (regs_num*4).
|
|
shun-iwasawa |
82a8f5 |
* Since MIPS ABI allows usage of first 16 bytes of stack frame (this is
|
|
shun-iwasawa |
82a8f5 |
* preserved for input arguments of the functions, already stored in a0-a3),
|
|
shun-iwasawa |
82a8f5 |
* stack size can be further optimized by utilizing this space.
|
|
shun-iwasawa |
82a8f5 |
*/
|
|
shun-iwasawa |
82a8f5 |
.macro CHECK_STACK_OFFSET regs_num, stack_offset
|
|
shun-iwasawa |
82a8f5 |
.if \stack_offset < \regs_num * 4 - 16
|
|
shun-iwasawa |
82a8f5 |
.error "Stack offset too small."
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.endm
|
|
shun-iwasawa |
82a8f5 |
|
|
shun-iwasawa |
82a8f5 |
/*
|
|
shun-iwasawa |
82a8f5 |
* Saves set of registers on stack. Maximum number of registers that
|
|
shun-iwasawa |
82a8f5 |
* can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
|
|
shun-iwasawa |
82a8f5 |
* Stack offset is number of bytes that are added to stack pointer (sp)
|
|
shun-iwasawa |
82a8f5 |
* before registers are pushed in order to provide enough space on stack
|
|
shun-iwasawa |
82a8f5 |
* (offset must be multiple of 4, and must be big enough, as described by
|
|
shun-iwasawa |
82a8f5 |
* CHECK_STACK_OFFSET macro). This macro is intended to be used in
|
|
shun-iwasawa |
82a8f5 |
* combination with RESTORE_REGS_FROM_STACK macro. Example:
|
|
shun-iwasawa |
82a8f5 |
* SAVE_REGS_ON_STACK 4, v0, v1, s0, s1
|
|
shun-iwasawa |
82a8f5 |
* RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
|
|
shun-iwasawa |
82a8f5 |
*/
|
|
shun-iwasawa |
82a8f5 |
.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \
|
|
shun-iwasawa |
82a8f5 |
r2 = 0, r3 = 0, r4 = 0, \
|
|
shun-iwasawa |
82a8f5 |
r5 = 0, r6 = 0, r7 = 0, \
|
|
shun-iwasawa |
82a8f5 |
r8 = 0, r9 = 0, r10 = 0, \
|
|
shun-iwasawa |
82a8f5 |
r11 = 0, r12 = 0, r13 = 0, \
|
|
shun-iwasawa |
82a8f5 |
r14 = 0
|
|
shun-iwasawa |
82a8f5 |
.if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4)
|
|
shun-iwasawa |
82a8f5 |
.error "Stack offset must be pozitive and multiple of 4."
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \stack_offset != 0
|
|
shun-iwasawa |
82a8f5 |
addiu sp, sp, -\stack_offset
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
sw \r1, 0(sp)
|
|
shun-iwasawa |
82a8f5 |
.if \r2 != 0
|
|
shun-iwasawa |
82a8f5 |
sw \r2, 4(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r3 != 0
|
|
shun-iwasawa |
82a8f5 |
sw \r3, 8(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r4 != 0
|
|
shun-iwasawa |
82a8f5 |
sw \r4, 12(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r5 != 0
|
|
shun-iwasawa |
82a8f5 |
CHECK_STACK_OFFSET 5, \stack_offset
|
|
shun-iwasawa |
82a8f5 |
sw \r5, 16(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r6 != 0
|
|
shun-iwasawa |
82a8f5 |
CHECK_STACK_OFFSET 6, \stack_offset
|
|
shun-iwasawa |
82a8f5 |
sw \r6, 20(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r7 != 0
|
|
shun-iwasawa |
82a8f5 |
CHECK_STACK_OFFSET 7, \stack_offset
|
|
shun-iwasawa |
82a8f5 |
sw \r7, 24(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r8 != 0
|
|
shun-iwasawa |
82a8f5 |
CHECK_STACK_OFFSET 8, \stack_offset
|
|
shun-iwasawa |
82a8f5 |
sw \r8, 28(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r9 != 0
|
|
shun-iwasawa |
82a8f5 |
CHECK_STACK_OFFSET 9, \stack_offset
|
|
shun-iwasawa |
82a8f5 |
sw \r9, 32(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r10 != 0
|
|
shun-iwasawa |
82a8f5 |
CHECK_STACK_OFFSET 10, \stack_offset
|
|
shun-iwasawa |
82a8f5 |
sw \r10, 36(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r11 != 0
|
|
shun-iwasawa |
82a8f5 |
CHECK_STACK_OFFSET 11, \stack_offset
|
|
shun-iwasawa |
82a8f5 |
sw \r11, 40(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r12 != 0
|
|
shun-iwasawa |
82a8f5 |
CHECK_STACK_OFFSET 12, \stack_offset
|
|
shun-iwasawa |
82a8f5 |
sw \r12, 44(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r13 != 0
|
|
shun-iwasawa |
82a8f5 |
CHECK_STACK_OFFSET 13, \stack_offset
|
|
shun-iwasawa |
82a8f5 |
sw \r13, 48(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r14 != 0
|
|
shun-iwasawa |
82a8f5 |
CHECK_STACK_OFFSET 14, \stack_offset
|
|
shun-iwasawa |
82a8f5 |
sw \r14, 52(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.endm
|
|
shun-iwasawa |
82a8f5 |
|
|
shun-iwasawa |
82a8f5 |
/*
|
|
shun-iwasawa |
82a8f5 |
* Restores set of registers from stack. Maximum number of registers that
|
|
shun-iwasawa |
82a8f5 |
* can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
|
|
shun-iwasawa |
82a8f5 |
* Stack offset is number of bytes that are added to stack pointer (sp)
|
|
shun-iwasawa |
82a8f5 |
* after registers are restored (offset must be multiple of 4, and must
|
|
shun-iwasawa |
82a8f5 |
* be big enough, as described by CHECK_STACK_OFFSET macro). This macro is
|
|
shun-iwasawa |
82a8f5 |
* intended to be used in combination with RESTORE_REGS_FROM_STACK macro.
|
|
shun-iwasawa |
82a8f5 |
* Example:
|
|
shun-iwasawa |
82a8f5 |
* SAVE_REGS_ON_STACK 4, v0, v1, s0, s1
|
|
shun-iwasawa |
82a8f5 |
* RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
|
|
shun-iwasawa |
82a8f5 |
*/
|
|
shun-iwasawa |
82a8f5 |
.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \
|
|
shun-iwasawa |
82a8f5 |
r2 = 0, r3 = 0, r4 = 0, \
|
|
shun-iwasawa |
82a8f5 |
r5 = 0, r6 = 0, r7 = 0, \
|
|
shun-iwasawa |
82a8f5 |
r8 = 0, r9 = 0, r10 = 0, \
|
|
shun-iwasawa |
82a8f5 |
r11 = 0, r12 = 0, r13 = 0, \
|
|
shun-iwasawa |
82a8f5 |
r14 = 0
|
|
shun-iwasawa |
82a8f5 |
.if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4)
|
|
shun-iwasawa |
82a8f5 |
.error "Stack offset must be pozitive and multiple of 4."
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
lw \r1, 0(sp)
|
|
shun-iwasawa |
82a8f5 |
.if \r2 != 0
|
|
shun-iwasawa |
82a8f5 |
lw \r2, 4(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r3 != 0
|
|
shun-iwasawa |
82a8f5 |
lw \r3, 8(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r4 != 0
|
|
shun-iwasawa |
82a8f5 |
lw \r4, 12(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r5 != 0
|
|
shun-iwasawa |
82a8f5 |
CHECK_STACK_OFFSET 5, \stack_offset
|
|
shun-iwasawa |
82a8f5 |
lw \r5, 16(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r6 != 0
|
|
shun-iwasawa |
82a8f5 |
CHECK_STACK_OFFSET 6, \stack_offset
|
|
shun-iwasawa |
82a8f5 |
lw \r6, 20(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r7 != 0
|
|
shun-iwasawa |
82a8f5 |
CHECK_STACK_OFFSET 7, \stack_offset
|
|
shun-iwasawa |
82a8f5 |
lw \r7, 24(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r8 != 0
|
|
shun-iwasawa |
82a8f5 |
CHECK_STACK_OFFSET 8, \stack_offset
|
|
shun-iwasawa |
82a8f5 |
lw \r8, 28(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r9 != 0
|
|
shun-iwasawa |
82a8f5 |
CHECK_STACK_OFFSET 9, \stack_offset
|
|
shun-iwasawa |
82a8f5 |
lw \r9, 32(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r10 != 0
|
|
shun-iwasawa |
82a8f5 |
CHECK_STACK_OFFSET 10, \stack_offset
|
|
shun-iwasawa |
82a8f5 |
lw \r10, 36(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r11 != 0
|
|
shun-iwasawa |
82a8f5 |
CHECK_STACK_OFFSET 11, \stack_offset
|
|
shun-iwasawa |
82a8f5 |
lw \r11, 40(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r12 != 0
|
|
shun-iwasawa |
82a8f5 |
CHECK_STACK_OFFSET 12, \stack_offset
|
|
shun-iwasawa |
82a8f5 |
lw \r12, 44(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r13 != 0
|
|
shun-iwasawa |
82a8f5 |
CHECK_STACK_OFFSET 13, \stack_offset
|
|
shun-iwasawa |
82a8f5 |
lw \r13, 48(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \r14 != 0
|
|
shun-iwasawa |
82a8f5 |
CHECK_STACK_OFFSET 14, \stack_offset
|
|
shun-iwasawa |
82a8f5 |
lw \r14, 52(sp)
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.if \stack_offset != 0
|
|
shun-iwasawa |
82a8f5 |
addiu sp, sp, \stack_offset
|
|
shun-iwasawa |
82a8f5 |
.endif
|
|
shun-iwasawa |
82a8f5 |
.endm
|