/*********************************************************************/ /* Copyright 2009, 2010 The University of Texas at Austin. */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ /* without modification, are permitted provided that the following */ /* conditions are met: */ /* */ /* 1. Redistributions of source code must retain the above */ /* copyright notice, this list of conditions and the following */ /* disclaimer. */ /* */ /* 2. Redistributions in binary form must reproduce the above */ /* copyright notice, this list of conditions and the following */ /* disclaimer in the documentation and/or other materials */ /* provided with the distribution. */ /* */ /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ /* POSSIBILITY OF SUCH DAMAGE. */ /* */ /* The views and conclusions contained in the software and */ /* documentation are those of the authors and should not be */ /* interpreted as representing official policies, either expressed */ /* or implied, of The University of Texas at Austin. */ /*********************************************************************/ #define ASSEMBLER #include "common.h" #include "version.h" #define N $16 #define X $17 #define INCX $18 #define XX $19 #ifndef USE_MIN #define CMPLT(a, b) cmptlt a, b #else #define CMPLT(a, b) cmptlt b, a #endif #define STACKSIZE 8 * 8 PROLOGUE PROFCODE clr $0 mov X, XX .align 4 cmplt $31, N, $2 cmplt $31, INCX, $3 SXADDQ INCX, $31, INCX and $2, $3, $2 sra N, 3, $1 fclr $f0 unop beq $2, $End # if (n <= 0) or (incx <= 0) return .align 4 LD $f0, 0 * SIZE(X) unop unop ble $1, $L15 .align 4 fmov $f0, $f1 addq X, INCX, X fmov $f0, $f10 lda $1, -1($1) LD $f21, 0 * SIZE(X) fmov $f0, $f11 addq X, INCX, X fmov $f0, $f12 LD $f22, 0 * SIZE(X) fmov $f0, $f13 addq X, INCX, X fmov $f0, $f14 LD $f23, 0 * SIZE(X) fmov $f0, $f15 addq X, INCX, X fmov $f0, $f20 LD $f24, 0 * SIZE(X) addq X, INCX, X LD $f25, 0 * SIZE(X) addq X, INCX, X LD $f26, 0 * SIZE(X) addq X, INCX, X LD $f27, 0 * SIZE(X) addq X, INCX, X CMPLT($f0, $f20), $f16 CMPLT($f1, $f21), $f17 CMPLT($f10, $f22), $f18 CMPLT($f11, $f23), $f19 ble $1, $L13 .align 4 $L12: fcmovne $f16, $f20, $f0 LD $f20, 0 * SIZE(X) CMPLT($f12, $f24), $f16 addq X, INCX, X fcmovne $f17, $f21, $f1 LD $f21, 0 * SIZE(X) CMPLT($f13, $f25), $f17 addq X, INCX, X fcmovne $f18, $f22, $f10 LD $f22, 0 * SIZE(X) CMPLT($f14, $f26), $f18 addq X, INCX, X fcmovne $f19, $f23, $f11 LD $f23, 0 * SIZE(X) CMPLT($f15, $f27), $f19 addq X, INCX, X fcmovne $f16, $f24, $f12 LD $f24, 0 * SIZE(X) CMPLT($f0, $f20), $f16 addq X, INCX, X fcmovne $f17, $f25, $f13 LD $f25, 0 * SIZE(X) CMPLT($f1, $f21), $f17 addq X, INCX, X fcmovne $f18, $f26, $f14 LD $f26, 0 * SIZE(X) CMPLT($f10, $f22), $f18 addq X, INCX, X fcmovne $f19, $f27, $f15 LD $f27, 0 * SIZE(X) CMPLT($f11, $f23), $f19 lda $1, -1($1) # i -- addq X, INCX, X unop unop bgt $1,$L12 .align 4 $L13: fcmovne $f16, $f20, $f0 CMPLT($f12, $f24), $f16 fcmovne $f17, $f21, $f1 CMPLT($f13, $f25), $f17 fcmovne $f18, $f22, $f10 CMPLT($f14, $f26), $f18 fcmovne $f19, $f23, $f11 CMPLT($f15, $f27), $f19 fcmovne $f16, $f24, $f12 CMPLT($f0, $f1), $f16 fcmovne $f17, $f25, $f13 CMPLT($f10, $f11), $f17 fcmovne $f18, $f26, $f14 CMPLT($f12, $f13), $f18 fcmovne $f19, $f27, $f15 CMPLT($f14, $f15), $f19 fcmovne $f16, $f1, $f0 fcmovne $f17, $f11, $f10 fcmovne $f18, $f13, $f12 fcmovne $f19, $f15, $f14 CMPLT($f0, $f10), $f16 CMPLT($f12, $f14), $f17 fcmovne $f16, $f10, $f0 fcmovne $f17, $f14, $f12 CMPLT($f0, $f12), $f16 fcmovne $f16, $f12, $f0 .align 4 $L15: and N, 7, $1 unop unop ble $1, $L20 .align 4 $L16: LD $f20, 0 * SIZE(X) addq X, INCX, X CMPLT($f0, $f20), $f16 fcmovne $f16, $f20, $f0 lda $1, -1($1) # i -- bgt $1, $L16 .align 4 $L20: sra N, 3, $1 ble $1, $L40 .align 4 LD $f10, 0 * SIZE(XX) addq XX, INCX, XX LD $f11, 0 * SIZE(XX) addq XX, INCX, XX LD $f12, 0 * SIZE(XX) addq XX, INCX, XX LD $f13, 0 * SIZE(XX) addq XX, INCX, XX LD $f14, 0 * SIZE(XX) addq XX, INCX, XX LD $f15, 0 * SIZE(XX) addq XX, INCX, XX LD $f16, 0 * SIZE(XX) addq XX, INCX, XX LD $f17, 0 * SIZE(XX) addq XX, INCX, XX cmpteq $f0, $f10, $f20 cmpteq $f0, $f11, $f21 cmpteq $f0, $f12, $f22 cmpteq $f0, $f13, $f23 lda $1, -1($1) ble $1, $L23 .align 4 $L22: LD $f10, 0 * SIZE(XX) cmpteq $f0, $f14, $f24 lda $0, 1($0) addq XX, INCX, XX fbne $f20, $End LD $f11, 0 * SIZE(XX) cmpteq $f0, $f15, $f25 lda $0, 1($0) addq XX, INCX, XX fbne $f21, $End LD $f12, 0 * SIZE(XX) cmpteq $f0, $f16, $f26 lda $0, 1($0) addq XX, INCX, XX fbne $f22, $End LD $f13, 0 * SIZE(XX) cmpteq $f0, $f17, $f27 lda $0, 1($0) addq XX, INCX, XX fbne $f23, $End LD $f14, 0 * SIZE(XX) cmpteq $f0, $f10, $f20 lda $0, 1($0) addq XX, INCX, XX fbne $f24, $End LD $f15, 0 * SIZE(XX) cmpteq $f0, $f11, $f21 lda $0, 1($0) addq XX, INCX, XX fbne $f25, $End LD $f16, 0 * SIZE(XX) lda $1, -1($1) # i -- cmpteq $f0, $f12, $f22 lda $0, 1($0) addq XX, INCX, XX fbne $f26, $End LD $f17, 0 * SIZE(XX) cmpteq $f0, $f13, $f23 lda $0, 1($0) addq XX, INCX, XX fbne $f27, $End bgt $1, $L22 .align 4 $L23: lda $0, 1($0) cmpteq $f0, $f14, $f24 unop fbne $f20, $End lda $0, 1($0) cmpteq $f0, $f15, $f25 unop fbne $f21, $End lda $0, 1($0) cmpteq $f0, $f16, $f26 unop fbne $f22, $End lda $0, 1($0) cmpteq $f0, $f17, $f27 unop fbne $f23, $End lda $0, 1($0) fbne $f24, $End lda $0, 1($0) fbne $f25, $End lda $0, 1($0) fbne $f26, $End lda $0, 1($0) fbne $f27, $End .align 4 $L40: LD $f20, 0 * SIZE(XX) addq XX, INCX, XX cmpteq $f0, $f20, $f29 lda $0, 1($0) fbne $f29, $End br $31, $L40 .align 4 $End: ret EPILOGUE