kusano fc6ab3
/*
kusano fc6ab3
 * inffast.S is a hand tuned assembler version of:
kusano fc6ab3
 *
kusano fc6ab3
 * inffast.c -- fast decoding
kusano fc6ab3
 * Copyright (C) 1995-2003 Mark Adler
kusano fc6ab3
 * For conditions of distribution and use, see copyright notice in zlib.h
kusano fc6ab3
 *
kusano fc6ab3
 * Copyright (C) 2003 Chris Anderson <christop@charm.net></christop@charm.net>
kusano fc6ab3
 * Please use the copyright conditions above.
kusano fc6ab3
 *
kusano fc6ab3
 * This version (Jan-23-2003) of inflate_fast was coded and tested under
kusano fc6ab3
 * GNU/Linux on a pentium 3, using the gcc-3.2 compiler distribution.  On that
kusano fc6ab3
 * machine, I found that gzip style archives decompressed about 20% faster than
kusano fc6ab3
 * the gcc-3.2 -O3 -fomit-frame-pointer compiled version.  Your results will
kusano fc6ab3
 * depend on how large of a buffer is used for z_stream.next_in & next_out
kusano fc6ab3
 * (8K-32K worked best for my 256K cpu cache) and how much overhead there is in
kusano fc6ab3
 * stream processing I/O and crc32/addler32.  In my case, this routine used
kusano fc6ab3
 * 70% of the cpu time and crc32 used 20%.
kusano fc6ab3
 *
kusano fc6ab3
 * I am confident that this version will work in the general case, but I have
kusano fc6ab3
 * not tested a wide variety of datasets or a wide variety of platforms.
kusano fc6ab3
 *
kusano fc6ab3
 * Jan-24-2003 -- Added -DUSE_MMX define for slightly faster inflating.
kusano fc6ab3
 * It should be a runtime flag instead of compile time flag...
kusano fc6ab3
 *
kusano fc6ab3
 * Jan-26-2003 -- Added runtime check for MMX support with cpuid instruction.
kusano fc6ab3
 * With -DUSE_MMX, only MMX code is compiled.  With -DNO_MMX, only non-MMX code
kusano fc6ab3
 * is compiled.  Without either option, runtime detection is enabled.  Runtime
kusano fc6ab3
 * detection should work on all modern cpus and the recomended algorithm (flip
kusano fc6ab3
 * ID bit on eflags and then use the cpuid instruction) is used in many
kusano fc6ab3
 * multimedia applications.  Tested under win2k with gcc-2.95 and gas-2.12
kusano fc6ab3
 * distributed with cygwin3.  Compiling with gcc-2.95 -c inffast.S -o
kusano fc6ab3
 * inffast.obj generates a COFF object which can then be linked with MSVC++
kusano fc6ab3
 * compiled code.  Tested under FreeBSD 4.7 with gcc-2.95.
kusano fc6ab3
 *
kusano fc6ab3
 * Jan-28-2003 -- Tested Athlon XP... MMX mode is slower than no MMX (and
kusano fc6ab3
 * slower than compiler generated code).  Adjusted cpuid check to use the MMX
kusano fc6ab3
 * code only for Pentiums < P4 until I have more data on the P4.  Speed
kusano fc6ab3
 * improvment is only about 15% on the Athlon when compared with code generated
kusano fc6ab3
 * with MSVC++.  Not sure yet, but I think the P4 will also be slower using the
kusano fc6ab3
 * MMX mode because many of it's x86 ALU instructions execute in .5 cycles and
kusano fc6ab3
 * have less latency than MMX ops.  Added code to buffer the last 11 bytes of
kusano fc6ab3
 * the input stream since the MMX code grabs bits in chunks of 32, which
kusano fc6ab3
 * differs from the inffast.c algorithm.  I don't think there would have been
kusano fc6ab3
 * read overruns where a page boundary was crossed (a segfault), but there
kusano fc6ab3
 * could have been overruns when next_in ends on unaligned memory (unintialized
kusano fc6ab3
 * memory read).
kusano fc6ab3
 *
kusano fc6ab3
 * Mar-13-2003 -- P4 MMX is slightly slower than P4 NO_MMX.  I created a C
kusano fc6ab3
 * version of the non-MMX code so that it doesn't depend on zstrm and zstate
kusano fc6ab3
 * structure offsets which are hard coded in this file.  This was last tested
kusano fc6ab3
 * with zlib-1.2.0 which is currently in beta testing, newer versions of this
kusano fc6ab3
 * and inffas86.c can be found at http://www.eetbeetee.com/zlib/ and
kusano fc6ab3
 * http://www.charm.net/~christop/zlib/
kusano fc6ab3
 */
kusano fc6ab3
kusano fc6ab3
kusano fc6ab3
/*
kusano fc6ab3
 * if you have underscore linking problems (_inflate_fast undefined), try
kusano fc6ab3
 * using -DGAS_COFF
kusano fc6ab3
 */
kusano fc6ab3
#if ! defined( GAS_COFF ) && ! defined( GAS_ELF )
kusano fc6ab3
kusano fc6ab3
#if defined( WIN32 ) || defined( __CYGWIN__ )
kusano fc6ab3
#define GAS_COFF /* windows object format */
kusano fc6ab3
#else
kusano fc6ab3
#define GAS_ELF
kusano fc6ab3
#endif
kusano fc6ab3
kusano fc6ab3
#endif /* ! GAS_COFF && ! GAS_ELF */
kusano fc6ab3
kusano fc6ab3
kusano fc6ab3
#if defined( GAS_COFF )
kusano fc6ab3
kusano fc6ab3
/* coff externals have underscores */
kusano fc6ab3
#define inflate_fast _inflate_fast
kusano fc6ab3
#define inflate_fast_use_mmx _inflate_fast_use_mmx
kusano fc6ab3
kusano fc6ab3
#endif /* GAS_COFF */
kusano fc6ab3
kusano fc6ab3
kusano fc6ab3
.file "inffast.S"
kusano fc6ab3
kusano fc6ab3
.globl inflate_fast
kusano fc6ab3
kusano fc6ab3
.text
kusano fc6ab3
.align 4,0
kusano fc6ab3
.L_invalid_literal_length_code_msg:
kusano fc6ab3
.string "invalid literal/length code"
kusano fc6ab3
kusano fc6ab3
.align 4,0
kusano fc6ab3
.L_invalid_distance_code_msg:
kusano fc6ab3
.string "invalid distance code"
kusano fc6ab3
kusano fc6ab3
.align 4,0
kusano fc6ab3
.L_invalid_distance_too_far_msg:
kusano fc6ab3
.string "invalid distance too far back"
kusano fc6ab3
kusano fc6ab3
#if ! defined( NO_MMX )
kusano fc6ab3
.align 4,0
kusano fc6ab3
.L_mask: /* mask[N] = ( 1 << N ) - 1 */
kusano fc6ab3
.long 0
kusano fc6ab3
.long 1
kusano fc6ab3
.long 3
kusano fc6ab3
.long 7
kusano fc6ab3
.long 15
kusano fc6ab3
.long 31
kusano fc6ab3
.long 63
kusano fc6ab3
.long 127
kusano fc6ab3
.long 255
kusano fc6ab3
.long 511
kusano fc6ab3
.long 1023
kusano fc6ab3
.long 2047
kusano fc6ab3
.long 4095
kusano fc6ab3
.long 8191
kusano fc6ab3
.long 16383
kusano fc6ab3
.long 32767
kusano fc6ab3
.long 65535
kusano fc6ab3
.long 131071
kusano fc6ab3
.long 262143
kusano fc6ab3
.long 524287
kusano fc6ab3
.long 1048575
kusano fc6ab3
.long 2097151
kusano fc6ab3
.long 4194303
kusano fc6ab3
.long 8388607
kusano fc6ab3
.long 16777215
kusano fc6ab3
.long 33554431
kusano fc6ab3
.long 67108863
kusano fc6ab3
.long 134217727
kusano fc6ab3
.long 268435455
kusano fc6ab3
.long 536870911
kusano fc6ab3
.long 1073741823
kusano fc6ab3
.long 2147483647
kusano fc6ab3
.long 4294967295
kusano fc6ab3
#endif /* NO_MMX */
kusano fc6ab3
kusano fc6ab3
.text
kusano fc6ab3
kusano fc6ab3
/*
kusano fc6ab3
 * struct z_stream offsets, in zlib.h
kusano fc6ab3
 */
kusano fc6ab3
#define next_in_strm   0   /* strm->next_in */
kusano fc6ab3
#define avail_in_strm  4   /* strm->avail_in */
kusano fc6ab3
#define next_out_strm  12  /* strm->next_out */
kusano fc6ab3
#define avail_out_strm 16  /* strm->avail_out */
kusano fc6ab3
#define msg_strm       24  /* strm->msg */
kusano fc6ab3
#define state_strm     28  /* strm->state */
kusano fc6ab3
kusano fc6ab3
/*
kusano fc6ab3
 * struct inflate_state offsets, in inflate.h
kusano fc6ab3
 */
kusano fc6ab3
#define mode_state     0   /* state->mode */
kusano fc6ab3
#define wsize_state    32  /* state->wsize */
kusano fc6ab3
#define write_state    40  /* state->write */
kusano fc6ab3
#define window_state   44  /* state->window */
kusano fc6ab3
#define hold_state     48  /* state->hold */
kusano fc6ab3
#define bits_state     52  /* state->bits */
kusano fc6ab3
#define lencode_state  68  /* state->lencode */
kusano fc6ab3
#define distcode_state 72  /* state->distcode */
kusano fc6ab3
#define lenbits_state  76  /* state->lenbits */
kusano fc6ab3
#define distbits_state 80  /* state->distbits */
kusano fc6ab3
kusano fc6ab3
/*
kusano fc6ab3
 * inflate_fast's activation record
kusano fc6ab3
 */
kusano fc6ab3
#define local_var_size 64 /* how much local space for vars */
kusano fc6ab3
#define strm_sp        88 /* first arg: z_stream * (local_var_size + 24) */
kusano fc6ab3
#define start_sp       92 /* second arg: unsigned int (local_var_size + 28) */
kusano fc6ab3
kusano fc6ab3
/*
kusano fc6ab3
 * offsets for local vars on stack
kusano fc6ab3
 */
kusano fc6ab3
#define out            60  /* unsigned char* */
kusano fc6ab3
#define window         56  /* unsigned char* */
kusano fc6ab3
#define wsize          52  /* unsigned int */
kusano fc6ab3
#define write          48  /* unsigned int */
kusano fc6ab3
#define in             44  /* unsigned char* */
kusano fc6ab3
#define beg            40  /* unsigned char* */
kusano fc6ab3
#define buf            28  /* char[ 12 ] */
kusano fc6ab3
#define len            24  /* unsigned int */
kusano fc6ab3
#define last           20  /* unsigned char* */
kusano fc6ab3
#define end            16  /* unsigned char* */
kusano fc6ab3
#define dcode          12  /* code* */
kusano fc6ab3
#define lcode           8  /* code* */
kusano fc6ab3
#define dmask           4  /* unsigned int */
kusano fc6ab3
#define lmask           0  /* unsigned int */
kusano fc6ab3
kusano fc6ab3
/*
kusano fc6ab3
 * typedef enum inflate_mode consts, in inflate.h
kusano fc6ab3
 */
kusano fc6ab3
#define INFLATE_MODE_TYPE 11  /* state->mode flags enum-ed in inflate.h */
kusano fc6ab3
#define INFLATE_MODE_BAD  26
kusano fc6ab3
kusano fc6ab3
kusano fc6ab3
#if ! defined( USE_MMX ) && ! defined( NO_MMX )
kusano fc6ab3
kusano fc6ab3
#define RUN_TIME_MMX
kusano fc6ab3
kusano fc6ab3
#define CHECK_MMX    1
kusano fc6ab3
#define DO_USE_MMX   2
kusano fc6ab3
#define DONT_USE_MMX 3
kusano fc6ab3
kusano fc6ab3
.globl inflate_fast_use_mmx
kusano fc6ab3
kusano fc6ab3
.data
kusano fc6ab3
kusano fc6ab3
.align 4,0
kusano fc6ab3
inflate_fast_use_mmx: /* integer flag for run time control 1=check,2=mmx,3=no */
kusano fc6ab3
.long CHECK_MMX
kusano fc6ab3
kusano fc6ab3
#if defined( GAS_ELF )
kusano fc6ab3
/* elf info */
kusano fc6ab3
.type   inflate_fast_use_mmx,@object
kusano fc6ab3
.size   inflate_fast_use_mmx,4
kusano fc6ab3
#endif
kusano fc6ab3
kusano fc6ab3
#endif /* RUN_TIME_MMX */
kusano fc6ab3
kusano fc6ab3
#if defined( GAS_COFF )
kusano fc6ab3
/* coff info: scl 2 = extern, type 32 = function */
kusano fc6ab3
.def inflate_fast; .scl 2; .type 32; .endef
kusano fc6ab3
#endif
kusano fc6ab3
kusano fc6ab3
.text
kusano fc6ab3
kusano fc6ab3
.align 32,0x90
kusano fc6ab3
inflate_fast:
kusano fc6ab3
        pushl   %edi
kusano fc6ab3
        pushl   %esi
kusano fc6ab3
        pushl   %ebp
kusano fc6ab3
        pushl   %ebx
kusano fc6ab3
        pushf   /* save eflags (strm_sp, state_sp assumes this is 32 bits) */
kusano fc6ab3
        subl    $local_var_size, %esp
kusano fc6ab3
        cld
kusano fc6ab3
kusano fc6ab3
#define strm_r  %esi
kusano fc6ab3
#define state_r %edi
kusano fc6ab3
kusano fc6ab3
        movl    strm_sp(%esp), strm_r
kusano fc6ab3
        movl    state_strm(strm_r), state_r
kusano fc6ab3
kusano fc6ab3
        /* in = strm->next_in;
kusano fc6ab3
         * out = strm->next_out;
kusano fc6ab3
         * last = in + strm->avail_in - 11;
kusano fc6ab3
         * beg = out - (start - strm->avail_out);
kusano fc6ab3
         * end = out + (strm->avail_out - 257);
kusano fc6ab3
         */
kusano fc6ab3
        movl    avail_in_strm(strm_r), %edx
kusano fc6ab3
        movl    next_in_strm(strm_r), %eax
kusano fc6ab3
kusano fc6ab3
        addl    %eax, %edx      /* avail_in += next_in */
kusano fc6ab3
        subl    $11, %edx       /* avail_in -= 11 */
kusano fc6ab3
kusano fc6ab3
        movl    %eax, in(%esp)
kusano fc6ab3
        movl    %edx, last(%esp)
kusano fc6ab3
kusano fc6ab3
        movl    start_sp(%esp), %ebp
kusano fc6ab3
        movl    avail_out_strm(strm_r), %ecx
kusano fc6ab3
        movl    next_out_strm(strm_r), %ebx
kusano fc6ab3
kusano fc6ab3
        subl    %ecx, %ebp      /* start -= avail_out */
kusano fc6ab3
        negl    %ebp            /* start = -start */
kusano fc6ab3
        addl    %ebx, %ebp      /* start += next_out */
kusano fc6ab3
kusano fc6ab3
        subl    $257, %ecx      /* avail_out -= 257 */
kusano fc6ab3
        addl    %ebx, %ecx      /* avail_out += out */
kusano fc6ab3
kusano fc6ab3
        movl    %ebx, out(%esp)
kusano fc6ab3
        movl    %ebp, beg(%esp)
kusano fc6ab3
        movl    %ecx, end(%esp)
kusano fc6ab3
kusano fc6ab3
        /* wsize = state->wsize;
kusano fc6ab3
         * write = state->write;
kusano fc6ab3
         * window = state->window;
kusano fc6ab3
         * hold = state->hold;
kusano fc6ab3
         * bits = state->bits;
kusano fc6ab3
         * lcode = state->lencode;
kusano fc6ab3
         * dcode = state->distcode;
kusano fc6ab3
         * lmask = ( 1 << state->lenbits ) - 1;
kusano fc6ab3
         * dmask = ( 1 << state->distbits ) - 1;
kusano fc6ab3
         */
kusano fc6ab3
kusano fc6ab3
        movl    lencode_state(state_r), %eax
kusano fc6ab3
        movl    distcode_state(state_r), %ecx
kusano fc6ab3
kusano fc6ab3
        movl    %eax, lcode(%esp)
kusano fc6ab3
        movl    %ecx, dcode(%esp)
kusano fc6ab3
kusano fc6ab3
        movl    $1, %eax
kusano fc6ab3
        movl    lenbits_state(state_r), %ecx
kusano fc6ab3
        shll    %cl, %eax
kusano fc6ab3
        decl    %eax
kusano fc6ab3
        movl    %eax, lmask(%esp)
kusano fc6ab3
kusano fc6ab3
        movl    $1, %eax
kusano fc6ab3
        movl    distbits_state(state_r), %ecx
kusano fc6ab3
        shll    %cl, %eax
kusano fc6ab3
        decl    %eax
kusano fc6ab3
        movl    %eax, dmask(%esp)
kusano fc6ab3
kusano fc6ab3
        movl    wsize_state(state_r), %eax
kusano fc6ab3
        movl    write_state(state_r), %ecx
kusano fc6ab3
        movl    window_state(state_r), %edx
kusano fc6ab3
kusano fc6ab3
        movl    %eax, wsize(%esp)
kusano fc6ab3
        movl    %ecx, write(%esp)
kusano fc6ab3
        movl    %edx, window(%esp)
kusano fc6ab3
kusano fc6ab3
        movl    hold_state(state_r), %ebp
kusano fc6ab3
        movl    bits_state(state_r), %ebx
kusano fc6ab3
kusano fc6ab3
#undef strm_r
kusano fc6ab3
#undef state_r
kusano fc6ab3
kusano fc6ab3
#define in_r       %esi
kusano fc6ab3
#define from_r     %esi
kusano fc6ab3
#define out_r      %edi
kusano fc6ab3
kusano fc6ab3
        movl    in(%esp), in_r
kusano fc6ab3
        movl    last(%esp), %ecx
kusano fc6ab3
        cmpl    in_r, %ecx
kusano fc6ab3
        ja      .L_align_long           /* if in < last */
kusano fc6ab3
kusano fc6ab3
        addl    $11, %ecx               /* ecx = &in[ avail_in ] */
kusano fc6ab3
        subl    in_r, %ecx              /* ecx = avail_in */
kusano fc6ab3
        movl    $12, %eax
kusano fc6ab3
        subl    %ecx, %eax              /* eax = 12 - avail_in */
kusano fc6ab3
        leal    buf(%esp), %edi
kusano fc6ab3
        rep     movsb                   /* memcpy( buf, in, avail_in ) */
kusano fc6ab3
        movl    %eax, %ecx
kusano fc6ab3
        xorl    %eax, %eax
kusano fc6ab3
        rep     stosb         /* memset( &buf[ avail_in ], 0, 12 - avail_in ) */
kusano fc6ab3
        leal    buf(%esp), in_r         /* in = buf */
kusano fc6ab3
        movl    in_r, last(%esp)        /* last = in, do just one iteration */
kusano fc6ab3
        jmp     .L_is_aligned
kusano fc6ab3
kusano fc6ab3
        /* align in_r on long boundary */
kusano fc6ab3
.L_align_long:
kusano fc6ab3
        testl   $3, in_r
kusano fc6ab3
        jz      .L_is_aligned
kusano fc6ab3
        xorl    %eax, %eax
kusano fc6ab3
        movb    (in_r), %al
kusano fc6ab3
        incl    in_r
kusano fc6ab3
        movl    %ebx, %ecx
kusano fc6ab3
        addl    $8, %ebx
kusano fc6ab3
        shll    %cl, %eax
kusano fc6ab3
        orl     %eax, %ebp
kusano fc6ab3
        jmp     .L_align_long
kusano fc6ab3
kusano fc6ab3
.L_is_aligned:
kusano fc6ab3
        movl    out(%esp), out_r
kusano fc6ab3
kusano fc6ab3
#if defined( NO_MMX )
kusano fc6ab3
        jmp     .L_do_loop
kusano fc6ab3
#endif
kusano fc6ab3
kusano fc6ab3
#if defined( USE_MMX )
kusano fc6ab3
        jmp     .L_init_mmx
kusano fc6ab3
#endif
kusano fc6ab3
kusano fc6ab3
/*** Runtime MMX check ***/
kusano fc6ab3
kusano fc6ab3
#if defined( RUN_TIME_MMX )
kusano fc6ab3
.L_check_mmx:
kusano fc6ab3
        cmpl    $DO_USE_MMX, inflate_fast_use_mmx
kusano fc6ab3
        je      .L_init_mmx
kusano fc6ab3
        ja      .L_do_loop /* > 2 */
kusano fc6ab3
kusano fc6ab3
        pushl   %eax
kusano fc6ab3
        pushl   %ebx
kusano fc6ab3
        pushl   %ecx
kusano fc6ab3
        pushl   %edx
kusano fc6ab3
        pushf
kusano fc6ab3
        movl    (%esp), %eax      /* copy eflags to eax */
kusano fc6ab3
        xorl    $0x200000, (%esp) /* try toggling ID bit of eflags (bit 21)
kusano fc6ab3
                                   * to see if cpu supports cpuid...
kusano fc6ab3
                                   * ID bit method not supported by NexGen but
kusano fc6ab3
                                   * bios may load a cpuid instruction and
kusano fc6ab3
                                   * cpuid may be disabled on Cyrix 5-6x86 */
kusano fc6ab3
        popf
kusano fc6ab3
        pushf
kusano fc6ab3
        popl    %edx              /* copy new eflags to edx */
kusano fc6ab3
        xorl    %eax, %edx        /* test if ID bit is flipped */
kusano fc6ab3
        jz      .L_dont_use_mmx   /* not flipped if zero */
kusano fc6ab3
        xorl    %eax, %eax
kusano fc6ab3
        cpuid
kusano fc6ab3
        cmpl    $0x756e6547, %ebx /* check for GenuineIntel in ebx,ecx,edx */
kusano fc6ab3
        jne     .L_dont_use_mmx
kusano fc6ab3
        cmpl    $0x6c65746e, %ecx
kusano fc6ab3
        jne     .L_dont_use_mmx
kusano fc6ab3
        cmpl    $0x49656e69, %edx
kusano fc6ab3
        jne     .L_dont_use_mmx
kusano fc6ab3
        movl    $1, %eax
kusano fc6ab3
        cpuid                     /* get cpu features */
kusano fc6ab3
        shrl    $8, %eax
kusano fc6ab3
        andl    $15, %eax
kusano fc6ab3
        cmpl    $6, %eax          /* check for Pentium family, is 0xf for P4 */
kusano fc6ab3
        jne     .L_dont_use_mmx
kusano fc6ab3
        testl   $0x800000, %edx   /* test if MMX feature is set (bit 23) */
kusano fc6ab3
        jnz     .L_use_mmx
kusano fc6ab3
        jmp     .L_dont_use_mmx
kusano fc6ab3
.L_use_mmx:
kusano fc6ab3
        movl    $DO_USE_MMX, inflate_fast_use_mmx
kusano fc6ab3
        jmp     .L_check_mmx_pop
kusano fc6ab3
.L_dont_use_mmx:
kusano fc6ab3
        movl    $DONT_USE_MMX, inflate_fast_use_mmx
kusano fc6ab3
.L_check_mmx_pop:
kusano fc6ab3
        popl    %edx
kusano fc6ab3
        popl    %ecx
kusano fc6ab3
        popl    %ebx
kusano fc6ab3
        popl    %eax
kusano fc6ab3
        jmp     .L_check_mmx
kusano fc6ab3
#endif
kusano fc6ab3
kusano fc6ab3
kusano fc6ab3
/*** Non-MMX code ***/
kusano fc6ab3
kusano fc6ab3
#if defined ( NO_MMX ) || defined( RUN_TIME_MMX )
kusano fc6ab3
kusano fc6ab3
#define hold_r     %ebp
kusano fc6ab3
#define bits_r     %bl
kusano fc6ab3
#define bitslong_r %ebx
kusano fc6ab3
kusano fc6ab3
.align 32,0x90
kusano fc6ab3
.L_while_test:
kusano fc6ab3
        /* while (in < last && out < end)
kusano fc6ab3
         */
kusano fc6ab3
        cmpl    out_r, end(%esp)
kusano fc6ab3
        jbe     .L_break_loop           /* if (out >= end) */
kusano fc6ab3
kusano fc6ab3
        cmpl    in_r, last(%esp)
kusano fc6ab3
        jbe     .L_break_loop
kusano fc6ab3
kusano fc6ab3
.L_do_loop:
kusano fc6ab3
        /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
kusano fc6ab3
         *
kusano fc6ab3
         * do {
kusano fc6ab3
         *   if (bits < 15) {
kusano fc6ab3
         *     hold |= *((unsigned short *)in)++ << bits;
kusano fc6ab3
         *     bits += 16
kusano fc6ab3
         *   }
kusano fc6ab3
         *   this = lcode[hold & lmask]
kusano fc6ab3
         */
kusano fc6ab3
        cmpb    $15, bits_r
kusano fc6ab3
        ja      .L_get_length_code      /* if (15 < bits) */
kusano fc6ab3
kusano fc6ab3
        xorl    %eax, %eax
kusano fc6ab3
        lodsw                           /* al = *(ushort *)in++ */
kusano fc6ab3
        movb    bits_r, %cl             /* cl = bits, needs it for shifting */
kusano fc6ab3
        addb    $16, bits_r             /* bits += 16 */
kusano fc6ab3
        shll    %cl, %eax
kusano fc6ab3
        orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
kusano fc6ab3
kusano fc6ab3
.L_get_length_code:
kusano fc6ab3
        movl    lmask(%esp), %edx       /* edx = lmask */
kusano fc6ab3
        movl    lcode(%esp), %ecx       /* ecx = lcode */
kusano fc6ab3
        andl    hold_r, %edx            /* edx &= hold */
kusano fc6ab3
        movl    (%ecx,%edx,4), %eax     /* eax = lcode[hold & lmask] */
kusano fc6ab3
kusano fc6ab3
.L_dolen:
kusano fc6ab3
        /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
kusano fc6ab3
         *
kusano fc6ab3
         * dolen:
kusano fc6ab3
         *    bits -= this.bits;
kusano fc6ab3
         *    hold >>= this.bits
kusano fc6ab3
         */
kusano fc6ab3
        movb    %ah, %cl                /* cl = this.bits */
kusano fc6ab3
        subb    %ah, bits_r             /* bits -= this.bits */
kusano fc6ab3
        shrl    %cl, hold_r             /* hold >>= this.bits */
kusano fc6ab3
kusano fc6ab3
        /* check if op is a literal
kusano fc6ab3
         * if (op == 0) {
kusano fc6ab3
         *    PUP(out) = this.val;
kusano fc6ab3
         *  }
kusano fc6ab3
         */
kusano fc6ab3
        testb   %al, %al
kusano fc6ab3
        jnz     .L_test_for_length_base /* if (op != 0) 45.7% */
kusano fc6ab3
kusano fc6ab3
        shrl    $16, %eax               /* output this.val char */
kusano fc6ab3
        stosb
kusano fc6ab3
        jmp     .L_while_test
kusano fc6ab3
kusano fc6ab3
.L_test_for_length_base:
kusano fc6ab3
        /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = len
kusano fc6ab3
         *
kusano fc6ab3
         * else if (op & 16) {
kusano fc6ab3
         *   len = this.val
kusano fc6ab3
         *   op &= 15
kusano fc6ab3
         *   if (op) {
kusano fc6ab3
         *     if (op > bits) {
kusano fc6ab3
         *       hold |= *((unsigned short *)in)++ << bits;
kusano fc6ab3
         *       bits += 16
kusano fc6ab3
         *     }
kusano fc6ab3
         *     len += hold & mask[op];
kusano fc6ab3
         *     bits -= op;
kusano fc6ab3
         *     hold >>= op;
kusano fc6ab3
         *   }
kusano fc6ab3
         */
kusano fc6ab3
#define len_r %edx
kusano fc6ab3
        movl    %eax, len_r             /* len = this */
kusano fc6ab3
        shrl    $16, len_r              /* len = this.val */
kusano fc6ab3
        movb    %al, %cl
kusano fc6ab3
kusano fc6ab3
        testb   $16, %al
kusano fc6ab3
        jz      .L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
kusano fc6ab3
        andb    $15, %cl                /* op &= 15 */
kusano fc6ab3
        jz      .L_save_len             /* if (!op) */
kusano fc6ab3
        cmpb    %cl, bits_r
kusano fc6ab3
        jae     .L_add_bits_to_len      /* if (op <= bits) */
kusano fc6ab3
kusano fc6ab3
        movb    %cl, %ch                /* stash op in ch, freeing cl */
kusano fc6ab3
        xorl    %eax, %eax
kusano fc6ab3
        lodsw                           /* al = *(ushort *)in++ */
kusano fc6ab3
        movb    bits_r, %cl             /* cl = bits, needs it for shifting */
kusano fc6ab3
        addb    $16, bits_r             /* bits += 16 */
kusano fc6ab3
        shll    %cl, %eax
kusano fc6ab3
        orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
kusano fc6ab3
        movb    %ch, %cl                /* move op back to ecx */
kusano fc6ab3
kusano fc6ab3
.L_add_bits_to_len:
kusano fc6ab3
        movl    $1, %eax
kusano fc6ab3
        shll    %cl, %eax
kusano fc6ab3
        decl    %eax
kusano fc6ab3
        subb    %cl, bits_r
kusano fc6ab3
        andl    hold_r, %eax            /* eax &= hold */
kusano fc6ab3
        shrl    %cl, hold_r
kusano fc6ab3
        addl    %eax, len_r             /* len += hold & mask[op] */
kusano fc6ab3
kusano fc6ab3
.L_save_len:
kusano fc6ab3
        movl    len_r, len(%esp)        /* save len */
kusano fc6ab3
#undef  len_r
kusano fc6ab3
kusano fc6ab3
.L_decode_distance:
kusano fc6ab3
        /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = dist
kusano fc6ab3
         *
kusano fc6ab3
         *   if (bits < 15) {
kusano fc6ab3
         *     hold |= *((unsigned short *)in)++ << bits;
kusano fc6ab3
         *     bits += 16
kusano fc6ab3
         *   }
kusano fc6ab3
         *   this = dcode[hold & dmask];
kusano fc6ab3
         * dodist:
kusano fc6ab3
         *   bits -= this.bits;
kusano fc6ab3
         *   hold >>= this.bits;
kusano fc6ab3
         *   op = this.op;
kusano fc6ab3
         */
kusano fc6ab3
kusano fc6ab3
        cmpb    $15, bits_r
kusano fc6ab3
        ja      .L_get_distance_code    /* if (15 < bits) */
kusano fc6ab3
kusano fc6ab3
        xorl    %eax, %eax
kusano fc6ab3
        lodsw                           /* al = *(ushort *)in++ */
kusano fc6ab3
        movb    bits_r, %cl             /* cl = bits, needs it for shifting */
kusano fc6ab3
        addb    $16, bits_r             /* bits += 16 */
kusano fc6ab3
        shll    %cl, %eax
kusano fc6ab3
        orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
kusano fc6ab3
kusano fc6ab3
.L_get_distance_code:
kusano fc6ab3
        movl    dmask(%esp), %edx       /* edx = dmask */
kusano fc6ab3
        movl    dcode(%esp), %ecx       /* ecx = dcode */
kusano fc6ab3
        andl    hold_r, %edx            /* edx &= hold */
kusano fc6ab3
        movl    (%ecx,%edx,4), %eax     /* eax = dcode[hold & dmask] */
kusano fc6ab3
kusano fc6ab3
#define dist_r %edx
kusano fc6ab3
.L_dodist:
kusano fc6ab3
        movl    %eax, dist_r            /* dist = this */
kusano fc6ab3
        shrl    $16, dist_r             /* dist = this.val */
kusano fc6ab3
        movb    %ah, %cl
kusano fc6ab3
        subb    %ah, bits_r             /* bits -= this.bits */
kusano fc6ab3
        shrl    %cl, hold_r             /* hold >>= this.bits */
kusano fc6ab3
kusano fc6ab3
        /* if (op & 16) {
kusano fc6ab3
         *   dist = this.val
kusano fc6ab3
         *   op &= 15
kusano fc6ab3
         *   if (op > bits) {
kusano fc6ab3
         *     hold |= *((unsigned short *)in)++ << bits;
kusano fc6ab3
         *     bits += 16
kusano fc6ab3
         *   }
kusano fc6ab3
         *   dist += hold & mask[op];
kusano fc6ab3
         *   bits -= op;
kusano fc6ab3
         *   hold >>= op;
kusano fc6ab3
         */
kusano fc6ab3
        movb    %al, %cl                /* cl = this.op */
kusano fc6ab3
kusano fc6ab3
        testb   $16, %al                /* if ((op & 16) == 0) */
kusano fc6ab3
        jz      .L_test_for_second_level_dist
kusano fc6ab3
        andb    $15, %cl                /* op &= 15 */
kusano fc6ab3
        jz      .L_check_dist_one
kusano fc6ab3
        cmpb    %cl, bits_r
kusano fc6ab3
        jae     .L_add_bits_to_dist     /* if (op <= bits) 97.6% */
kusano fc6ab3
kusano fc6ab3
        movb    %cl, %ch                /* stash op in ch, freeing cl */
kusano fc6ab3
        xorl    %eax, %eax
kusano fc6ab3
        lodsw                           /* al = *(ushort *)in++ */
kusano fc6ab3
        movb    bits_r, %cl             /* cl = bits, needs it for shifting */
kusano fc6ab3
        addb    $16, bits_r             /* bits += 16 */
kusano fc6ab3
        shll    %cl, %eax
kusano fc6ab3
        orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
kusano fc6ab3
        movb    %ch, %cl                /* move op back to ecx */
kusano fc6ab3
kusano fc6ab3
.L_add_bits_to_dist:
kusano fc6ab3
        movl    $1, %eax
kusano fc6ab3
        shll    %cl, %eax
kusano fc6ab3
        decl    %eax                    /* (1 << op) - 1 */
kusano fc6ab3
        subb    %cl, bits_r
kusano fc6ab3
        andl    hold_r, %eax            /* eax &= hold */
kusano fc6ab3
        shrl    %cl, hold_r
kusano fc6ab3
        addl    %eax, dist_r            /* dist += hold & ((1 << op) - 1) */
kusano fc6ab3
        jmp     .L_check_window
kusano fc6ab3
kusano fc6ab3
.L_check_window:
kusano fc6ab3
        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
kusano fc6ab3
         *       %ecx = nbytes
kusano fc6ab3
         *
kusano fc6ab3
         * nbytes = out - beg;
kusano fc6ab3
         * if (dist <= nbytes) {
kusano fc6ab3
         *   from = out - dist;
kusano fc6ab3
         *   do {
kusano fc6ab3
         *     PUP(out) = PUP(from);
kusano fc6ab3
         *   } while (--len > 0) {
kusano fc6ab3
         * }
kusano fc6ab3
         */
kusano fc6ab3
kusano fc6ab3
        movl    in_r, in(%esp)          /* save in so from can use it's reg */
kusano fc6ab3
        movl    out_r, %eax
kusano fc6ab3
        subl    beg(%esp), %eax         /* nbytes = out - beg */
kusano fc6ab3
kusano fc6ab3
        cmpl    dist_r, %eax
kusano fc6ab3
        jb      .L_clip_window          /* if (dist > nbytes) 4.2% */
kusano fc6ab3
kusano fc6ab3
        movl    len(%esp), %ecx
kusano fc6ab3
        movl    out_r, from_r
kusano fc6ab3
        subl    dist_r, from_r          /* from = out - dist */
kusano fc6ab3
kusano fc6ab3
        subl    $3, %ecx
kusano fc6ab3
        movb    (from_r), %al
kusano fc6ab3
        movb    %al, (out_r)
kusano fc6ab3
        movb    1(from_r), %al
kusano fc6ab3
        movb    2(from_r), %dl
kusano fc6ab3
        addl    $3, from_r
kusano fc6ab3
        movb    %al, 1(out_r)
kusano fc6ab3
        movb    %dl, 2(out_r)
kusano fc6ab3
        addl    $3, out_r
kusano fc6ab3
        rep     movsb
kusano fc6ab3
kusano fc6ab3
        movl    in(%esp), in_r          /* move in back to %esi, toss from */
kusano fc6ab3
        jmp     .L_while_test
kusano fc6ab3
kusano fc6ab3
.align 16,0x90
kusano fc6ab3
.L_check_dist_one:
kusano fc6ab3
        cmpl    $1, dist_r
kusano fc6ab3
        jne     .L_check_window
kusano fc6ab3
        cmpl    out_r, beg(%esp)
kusano fc6ab3
        je      .L_check_window
kusano fc6ab3
kusano fc6ab3
        decl    out_r
kusano fc6ab3
        movl    len(%esp), %ecx
kusano fc6ab3
        movb    (out_r), %al
kusano fc6ab3
        subl    $3, %ecx
kusano fc6ab3
kusano fc6ab3
        movb    %al, 1(out_r)
kusano fc6ab3
        movb    %al, 2(out_r)
kusano fc6ab3
        movb    %al, 3(out_r)
kusano fc6ab3
        addl    $4, out_r
kusano fc6ab3
        rep     stosb
kusano fc6ab3
kusano fc6ab3
        jmp     .L_while_test
kusano fc6ab3
kusano fc6ab3
.align 16,0x90
kusano fc6ab3
.L_test_for_second_level_length:
kusano fc6ab3
        /* else if ((op & 64) == 0) {
kusano fc6ab3
         *   this = lcode[this.val + (hold & mask[op])];
kusano fc6ab3
         * }
kusano fc6ab3
         */
kusano fc6ab3
        testb   $64, %al
kusano fc6ab3
        jnz     .L_test_for_end_of_block  /* if ((op & 64) != 0) */
kusano fc6ab3
kusano fc6ab3
        movl    $1, %eax
kusano fc6ab3
        shll    %cl, %eax
kusano fc6ab3
        decl    %eax
kusano fc6ab3
        andl    hold_r, %eax            /* eax &= hold */
kusano fc6ab3
        addl    %edx, %eax              /* eax += this.val */
kusano fc6ab3
        movl    lcode(%esp), %edx       /* edx = lcode */
kusano fc6ab3
        movl    (%edx,%eax,4), %eax     /* eax = lcode[val + (hold&mask[op])] */
kusano fc6ab3
        jmp     .L_dolen
kusano fc6ab3
kusano fc6ab3
.align 16,0x90
kusano fc6ab3
.L_test_for_second_level_dist:
kusano fc6ab3
        /* else if ((op & 64) == 0) {
kusano fc6ab3
         *   this = dcode[this.val + (hold & mask[op])];
kusano fc6ab3
         * }
kusano fc6ab3
         */
kusano fc6ab3
        testb   $64, %al
kusano fc6ab3
        jnz     .L_invalid_distance_code  /* if ((op & 64) != 0) */
kusano fc6ab3
kusano fc6ab3
        movl    $1, %eax
kusano fc6ab3
        shll    %cl, %eax
kusano fc6ab3
        decl    %eax
kusano fc6ab3
        andl    hold_r, %eax            /* eax &= hold */
kusano fc6ab3
        addl    %edx, %eax              /* eax += this.val */
kusano fc6ab3
        movl    dcode(%esp), %edx       /* edx = dcode */
kusano fc6ab3
        movl    (%edx,%eax,4), %eax     /* eax = dcode[val + (hold&mask[op])] */
kusano fc6ab3
        jmp     .L_dodist
kusano fc6ab3
kusano fc6ab3
.align 16,0x90
kusano fc6ab3
.L_clip_window:
kusano fc6ab3
        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
kusano fc6ab3
         *       %ecx = nbytes
kusano fc6ab3
         *
kusano fc6ab3
         * else {
kusano fc6ab3
         *   if (dist > wsize) {
kusano fc6ab3
         *     invalid distance
kusano fc6ab3
         *   }
kusano fc6ab3
         *   from = window;
kusano fc6ab3
         *   nbytes = dist - nbytes;
kusano fc6ab3
         *   if (write == 0) {
kusano fc6ab3
         *     from += wsize - nbytes;
kusano fc6ab3
         */
kusano fc6ab3
#define nbytes_r %ecx
kusano fc6ab3
        movl    %eax, nbytes_r
kusano fc6ab3
        movl    wsize(%esp), %eax       /* prepare for dist compare */
kusano fc6ab3
        negl    nbytes_r                /* nbytes = -nbytes */
kusano fc6ab3
        movl    window(%esp), from_r    /* from = window */
kusano fc6ab3
kusano fc6ab3
        cmpl    dist_r, %eax
kusano fc6ab3
        jb      .L_invalid_distance_too_far /* if (dist > wsize) */
kusano fc6ab3
kusano fc6ab3
        addl    dist_r, nbytes_r        /* nbytes = dist - nbytes */
kusano fc6ab3
        cmpl    $0, write(%esp)
kusano fc6ab3
        jne     .L_wrap_around_window   /* if (write != 0) */
kusano fc6ab3
kusano fc6ab3
        subl    nbytes_r, %eax
kusano fc6ab3
        addl    %eax, from_r            /* from += wsize - nbytes */
kusano fc6ab3
kusano fc6ab3
        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
kusano fc6ab3
         *       %ecx = nbytes, %eax = len
kusano fc6ab3
         *
kusano fc6ab3
         *     if (nbytes < len) {
kusano fc6ab3
         *       len -= nbytes;
kusano fc6ab3
         *       do {
kusano fc6ab3
         *         PUP(out) = PUP(from);
kusano fc6ab3
         *       } while (--nbytes);
kusano fc6ab3
         *       from = out - dist;
kusano fc6ab3
         *     }
kusano fc6ab3
         *   }
kusano fc6ab3
         */
kusano fc6ab3
#define len_r %eax
kusano fc6ab3
        movl    len(%esp), len_r
kusano fc6ab3
        cmpl    nbytes_r, len_r
kusano fc6ab3
        jbe     .L_do_copy1             /* if (nbytes >= len) */
kusano fc6ab3
kusano fc6ab3
        subl    nbytes_r, len_r         /* len -= nbytes */
kusano fc6ab3
        rep     movsb
kusano fc6ab3
        movl    out_r, from_r
kusano fc6ab3
        subl    dist_r, from_r          /* from = out - dist */
kusano fc6ab3
        jmp     .L_do_copy1
kusano fc6ab3
kusano fc6ab3
        cmpl    nbytes_r, len_r
kusano fc6ab3
        jbe     .L_do_copy1             /* if (nbytes >= len) */
kusano fc6ab3
kusano fc6ab3
        subl    nbytes_r, len_r         /* len -= nbytes */
kusano fc6ab3
        rep     movsb
kusano fc6ab3
        movl    out_r, from_r
kusano fc6ab3
        subl    dist_r, from_r          /* from = out - dist */
kusano fc6ab3
        jmp     .L_do_copy1
kusano fc6ab3
kusano fc6ab3
.L_wrap_around_window:
kusano fc6ab3
        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
kusano fc6ab3
         *       %ecx = nbytes, %eax = write, %eax = len
kusano fc6ab3
         *
kusano fc6ab3
         *   else if (write < nbytes) {
kusano fc6ab3
         *     from += wsize + write - nbytes;
kusano fc6ab3
         *     nbytes -= write;
kusano fc6ab3
         *     if (nbytes < len) {
kusano fc6ab3
         *       len -= nbytes;
kusano fc6ab3
         *       do {
kusano fc6ab3
         *         PUP(out) = PUP(from);
kusano fc6ab3
         *       } while (--nbytes);
kusano fc6ab3
         *       from = window;
kusano fc6ab3
         *       nbytes = write;
kusano fc6ab3
         *       if (nbytes < len) {
kusano fc6ab3
         *         len -= nbytes;
kusano fc6ab3
         *         do {
kusano fc6ab3
         *           PUP(out) = PUP(from);
kusano fc6ab3
         *         } while(--nbytes);
kusano fc6ab3
         *         from = out - dist;
kusano fc6ab3
         *       }
kusano fc6ab3
         *     }
kusano fc6ab3
         *   }
kusano fc6ab3
         */
kusano fc6ab3
#define write_r %eax
kusano fc6ab3
        movl    write(%esp), write_r
kusano fc6ab3
        cmpl    write_r, nbytes_r
kusano fc6ab3
        jbe     .L_contiguous_in_window /* if (write >= nbytes) */
kusano fc6ab3
kusano fc6ab3
        addl    wsize(%esp), from_r
kusano fc6ab3
        addl    write_r, from_r
kusano fc6ab3
        subl    nbytes_r, from_r        /* from += wsize + write - nbytes */
kusano fc6ab3
        subl    write_r, nbytes_r       /* nbytes -= write */
kusano fc6ab3
#undef write_r
kusano fc6ab3
kusano fc6ab3
        movl    len(%esp), len_r
kusano fc6ab3
        cmpl    nbytes_r, len_r
kusano fc6ab3
        jbe     .L_do_copy1             /* if (nbytes >= len) */
kusano fc6ab3
kusano fc6ab3
        subl    nbytes_r, len_r         /* len -= nbytes */
kusano fc6ab3
        rep     movsb
kusano fc6ab3
        movl    window(%esp), from_r    /* from = window */
kusano fc6ab3
        movl    write(%esp), nbytes_r   /* nbytes = write */
kusano fc6ab3
        cmpl    nbytes_r, len_r
kusano fc6ab3
        jbe     .L_do_copy1             /* if (nbytes >= len) */
kusano fc6ab3
kusano fc6ab3
        subl    nbytes_r, len_r         /* len -= nbytes */
kusano fc6ab3
        rep     movsb
kusano fc6ab3
        movl    out_r, from_r
kusano fc6ab3
        subl    dist_r, from_r          /* from = out - dist */
kusano fc6ab3
        jmp     .L_do_copy1
kusano fc6ab3
kusano fc6ab3
.L_contiguous_in_window:
kusano fc6ab3
        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
kusano fc6ab3
         *       %ecx = nbytes, %eax = write, %eax = len
kusano fc6ab3
         *
kusano fc6ab3
         *   else {
kusano fc6ab3
         *     from += write - nbytes;
kusano fc6ab3
         *     if (nbytes < len) {
kusano fc6ab3
         *       len -= nbytes;
kusano fc6ab3
         *       do {
kusano fc6ab3
         *         PUP(out) = PUP(from);
kusano fc6ab3
         *       } while (--nbytes);
kusano fc6ab3
         *       from = out - dist;
kusano fc6ab3
         *     }
kusano fc6ab3
         *   }
kusano fc6ab3
         */
kusano fc6ab3
#define write_r %eax
kusano fc6ab3
        addl    write_r, from_r
kusano fc6ab3
        subl    nbytes_r, from_r        /* from += write - nbytes */
kusano fc6ab3
#undef write_r
kusano fc6ab3
kusano fc6ab3
        movl    len(%esp), len_r
kusano fc6ab3
        cmpl    nbytes_r, len_r
kusano fc6ab3
        jbe     .L_do_copy1             /* if (nbytes >= len) */
kusano fc6ab3
kusano fc6ab3
        subl    nbytes_r, len_r         /* len -= nbytes */
kusano fc6ab3
        rep     movsb
kusano fc6ab3
        movl    out_r, from_r
kusano fc6ab3
        subl    dist_r, from_r          /* from = out - dist */
kusano fc6ab3
kusano fc6ab3
.L_do_copy1:
kusano fc6ab3
        /* regs: %esi = from, %esi = in, %ebp = hold, %bl = bits, %edi = out
kusano fc6ab3
         *       %eax = len
kusano fc6ab3
         *
kusano fc6ab3
         *     while (len > 0) {
kusano fc6ab3
         *       PUP(out) = PUP(from);
kusano fc6ab3
         *       len--;
kusano fc6ab3
         *     }
kusano fc6ab3
         *   }
kusano fc6ab3
         * } while (in < last && out < end);
kusano fc6ab3
         */
kusano fc6ab3
#undef nbytes_r
kusano fc6ab3
#define in_r %esi
kusano fc6ab3
        movl    len_r, %ecx
kusano fc6ab3
        rep     movsb
kusano fc6ab3
kusano fc6ab3
        movl    in(%esp), in_r          /* move in back to %esi, toss from */
kusano fc6ab3
        jmp     .L_while_test
kusano fc6ab3
kusano fc6ab3
#undef len_r
kusano fc6ab3
#undef dist_r
kusano fc6ab3
kusano fc6ab3
#endif /* NO_MMX || RUN_TIME_MMX */
kusano fc6ab3
kusano fc6ab3
kusano fc6ab3
/*** MMX code ***/
kusano fc6ab3
kusano fc6ab3
#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
kusano fc6ab3
kusano fc6ab3
.align 32,0x90
kusano fc6ab3
.L_init_mmx:
kusano fc6ab3
        emms
kusano fc6ab3
kusano fc6ab3
#undef  bits_r
kusano fc6ab3
#undef  bitslong_r
kusano fc6ab3
#define bitslong_r %ebp
kusano fc6ab3
#define hold_mm    %mm0
kusano fc6ab3
        movd    %ebp, hold_mm
kusano fc6ab3
        movl    %ebx, bitslong_r
kusano fc6ab3
kusano fc6ab3
#define used_mm   %mm1
kusano fc6ab3
#define dmask2_mm %mm2
kusano fc6ab3
#define lmask2_mm %mm3
kusano fc6ab3
#define lmask_mm  %mm4
kusano fc6ab3
#define dmask_mm  %mm5
kusano fc6ab3
#define tmp_mm    %mm6
kusano fc6ab3
kusano fc6ab3
        movd    lmask(%esp), lmask_mm
kusano fc6ab3
        movq    lmask_mm, lmask2_mm
kusano fc6ab3
        movd    dmask(%esp), dmask_mm
kusano fc6ab3
        movq    dmask_mm, dmask2_mm
kusano fc6ab3
        pxor    used_mm, used_mm
kusano fc6ab3
        movl    lcode(%esp), %ebx       /* ebx = lcode */
kusano fc6ab3
        jmp     .L_do_loop_mmx
kusano fc6ab3
kusano fc6ab3
.align 32,0x90
kusano fc6ab3
.L_while_test_mmx:
kusano fc6ab3
        /* while (in < last && out < end)
kusano fc6ab3
         */
kusano fc6ab3
        cmpl    out_r, end(%esp)
kusano fc6ab3
        jbe     .L_break_loop           /* if (out >= end) */
kusano fc6ab3
kusano fc6ab3
        cmpl    in_r, last(%esp)
kusano fc6ab3
        jbe     .L_break_loop
kusano fc6ab3
kusano fc6ab3
.L_do_loop_mmx:
kusano fc6ab3
        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
kusano fc6ab3
kusano fc6ab3
        cmpl    $32, bitslong_r
kusano fc6ab3
        ja      .L_get_length_code_mmx  /* if (32 < bits) */
kusano fc6ab3
kusano fc6ab3
        movd    bitslong_r, tmp_mm
kusano fc6ab3
        movd    (in_r), %mm7
kusano fc6ab3
        addl    $4, in_r
kusano fc6ab3
        psllq   tmp_mm, %mm7
kusano fc6ab3
        addl    $32, bitslong_r
kusano fc6ab3
        por     %mm7, hold_mm           /* hold_mm |= *((uint *)in)++ << bits */
kusano fc6ab3
kusano fc6ab3
.L_get_length_code_mmx:
kusano fc6ab3
        pand    hold_mm, lmask_mm
kusano fc6ab3
        movd    lmask_mm, %eax
kusano fc6ab3
        movq    lmask2_mm, lmask_mm
kusano fc6ab3
        movl    (%ebx,%eax,4), %eax     /* eax = lcode[hold & lmask] */
kusano fc6ab3
kusano fc6ab3
.L_dolen_mmx:
kusano fc6ab3
        movzbl  %ah, %ecx               /* ecx = this.bits */
kusano fc6ab3
        movd    %ecx, used_mm
kusano fc6ab3
        subl    %ecx, bitslong_r        /* bits -= this.bits */
kusano fc6ab3
kusano fc6ab3
        testb   %al, %al
kusano fc6ab3
        jnz     .L_test_for_length_base_mmx /* if (op != 0) 45.7% */
kusano fc6ab3
kusano fc6ab3
        shrl    $16, %eax               /* output this.val char */
kusano fc6ab3
        stosb
kusano fc6ab3
        jmp     .L_while_test_mmx
kusano fc6ab3
kusano fc6ab3
.L_test_for_length_base_mmx:
kusano fc6ab3
#define len_r  %edx
kusano fc6ab3
        movl    %eax, len_r             /* len = this */
kusano fc6ab3
        shrl    $16, len_r              /* len = this.val */
kusano fc6ab3
kusano fc6ab3
        testb   $16, %al
kusano fc6ab3
        jz      .L_test_for_second_level_length_mmx /* if ((op & 16) == 0) 8% */
kusano fc6ab3
        andl    $15, %eax               /* op &= 15 */
kusano fc6ab3
        jz      .L_decode_distance_mmx  /* if (!op) */
kusano fc6ab3
kusano fc6ab3
        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
kusano fc6ab3
        movd    %eax, used_mm
kusano fc6ab3
        movd    hold_mm, %ecx
kusano fc6ab3
        subl    %eax, bitslong_r
kusano fc6ab3
        andl    .L_mask(,%eax,4), %ecx
kusano fc6ab3
        addl    %ecx, len_r             /* len += hold & mask[op] */
kusano fc6ab3
kusano fc6ab3
.L_decode_distance_mmx:
kusano fc6ab3
        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
kusano fc6ab3
kusano fc6ab3
        cmpl    $32, bitslong_r
kusano fc6ab3
        ja      .L_get_dist_code_mmx    /* if (32 < bits) */
kusano fc6ab3
kusano fc6ab3
        movd    bitslong_r, tmp_mm
kusano fc6ab3
        movd    (in_r), %mm7
kusano fc6ab3
        addl    $4, in_r
kusano fc6ab3
        psllq   tmp_mm, %mm7
kusano fc6ab3
        addl    $32, bitslong_r
kusano fc6ab3
        por     %mm7, hold_mm           /* hold_mm |= *((uint *)in)++ << bits */
kusano fc6ab3
kusano fc6ab3
.L_get_dist_code_mmx:
kusano fc6ab3
        movl    dcode(%esp), %ebx       /* ebx = dcode */
kusano fc6ab3
        pand    hold_mm, dmask_mm
kusano fc6ab3
        movd    dmask_mm, %eax
kusano fc6ab3
        movq    dmask2_mm, dmask_mm
kusano fc6ab3
        movl    (%ebx,%eax,4), %eax     /* eax = dcode[hold & lmask] */
kusano fc6ab3
kusano fc6ab3
.L_dodist_mmx:
kusano fc6ab3
#define dist_r %ebx
kusano fc6ab3
        movzbl  %ah, %ecx               /* ecx = this.bits */
kusano fc6ab3
        movl    %eax, dist_r
kusano fc6ab3
        shrl    $16, dist_r             /* dist  = this.val */
kusano fc6ab3
        subl    %ecx, bitslong_r        /* bits -= this.bits */
kusano fc6ab3
        movd    %ecx, used_mm
kusano fc6ab3
kusano fc6ab3
        testb   $16, %al                /* if ((op & 16) == 0) */
kusano fc6ab3
        jz      .L_test_for_second_level_dist_mmx
kusano fc6ab3
        andl    $15, %eax               /* op &= 15 */
kusano fc6ab3
        jz      .L_check_dist_one_mmx
kusano fc6ab3
kusano fc6ab3
.L_add_bits_to_dist_mmx:
kusano fc6ab3
        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
kusano fc6ab3
        movd    %eax, used_mm           /* save bit length of current op */
kusano fc6ab3
        movd    hold_mm, %ecx           /* get the next bits on input stream */
kusano fc6ab3
        subl    %eax, bitslong_r        /* bits -= op bits */
kusano fc6ab3
        andl    .L_mask(,%eax,4), %ecx  /* ecx   = hold & mask[op] */
kusano fc6ab3
        addl    %ecx, dist_r            /* dist += hold & mask[op] */
kusano fc6ab3
kusano fc6ab3
.L_check_window_mmx:
kusano fc6ab3
        movl    in_r, in(%esp)          /* save in so from can use it's reg */
kusano fc6ab3
        movl    out_r, %eax
kusano fc6ab3
        subl    beg(%esp), %eax         /* nbytes = out - beg */
kusano fc6ab3
kusano fc6ab3
        cmpl    dist_r, %eax
kusano fc6ab3
        jb      .L_clip_window_mmx      /* if (dist > nbytes) 4.2% */
kusano fc6ab3
kusano fc6ab3
        movl    len_r, %ecx
kusano fc6ab3
        movl    out_r, from_r
kusano fc6ab3
        subl    dist_r, from_r          /* from = out - dist */
kusano fc6ab3
kusano fc6ab3
        subl    $3, %ecx
kusano fc6ab3
        movb    (from_r), %al
kusano fc6ab3
        movb    %al, (out_r)
kusano fc6ab3
        movb    1(from_r), %al
kusano fc6ab3
        movb    2(from_r), %dl
kusano fc6ab3
        addl    $3, from_r
kusano fc6ab3
        movb    %al, 1(out_r)
kusano fc6ab3
        movb    %dl, 2(out_r)
kusano fc6ab3
        addl    $3, out_r
kusano fc6ab3
        rep     movsb
kusano fc6ab3
kusano fc6ab3
        movl    in(%esp), in_r          /* move in back to %esi, toss from */
kusano fc6ab3
        movl    lcode(%esp), %ebx       /* move lcode back to %ebx, toss dist */
kusano fc6ab3
        jmp     .L_while_test_mmx
kusano fc6ab3
kusano fc6ab3
.align 16,0x90
kusano fc6ab3
.L_check_dist_one_mmx:
kusano fc6ab3
        cmpl    $1, dist_r
kusano fc6ab3
        jne     .L_check_window_mmx
kusano fc6ab3
        cmpl    out_r, beg(%esp)
kusano fc6ab3
        je      .L_check_window_mmx
kusano fc6ab3
kusano fc6ab3
        decl    out_r
kusano fc6ab3
        movl    len_r, %ecx
kusano fc6ab3
        movb    (out_r), %al
kusano fc6ab3
        subl    $3, %ecx
kusano fc6ab3
kusano fc6ab3
        movb    %al, 1(out_r)
kusano fc6ab3
        movb    %al, 2(out_r)
kusano fc6ab3
        movb    %al, 3(out_r)
kusano fc6ab3
        addl    $4, out_r
kusano fc6ab3
        rep     stosb
kusano fc6ab3
kusano fc6ab3
        movl    lcode(%esp), %ebx       /* move lcode back to %ebx, toss dist */
kusano fc6ab3
        jmp     .L_while_test_mmx
kusano fc6ab3
kusano fc6ab3
.align 16,0x90
kusano fc6ab3
.L_test_for_second_level_length_mmx:
kusano fc6ab3
        testb   $64, %al
kusano fc6ab3
        jnz     .L_test_for_end_of_block  /* if ((op & 64) != 0) */
kusano fc6ab3
kusano fc6ab3
        andl    $15, %eax
kusano fc6ab3
        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
kusano fc6ab3
        movd    hold_mm, %ecx
kusano fc6ab3
        andl    .L_mask(,%eax,4), %ecx
kusano fc6ab3
        addl    len_r, %ecx
kusano fc6ab3
        movl    (%ebx,%ecx,4), %eax     /* eax = lcode[hold & lmask] */
kusano fc6ab3
        jmp     .L_dolen_mmx
kusano fc6ab3
kusano fc6ab3
.align 16,0x90
kusano fc6ab3
.L_test_for_second_level_dist_mmx:
kusano fc6ab3
        testb   $64, %al
kusano fc6ab3
        jnz     .L_invalid_distance_code  /* if ((op & 64) != 0) */
kusano fc6ab3
kusano fc6ab3
        andl    $15, %eax
kusano fc6ab3
        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
kusano fc6ab3
        movd    hold_mm, %ecx
kusano fc6ab3
        andl    .L_mask(,%eax,4), %ecx
kusano fc6ab3
        movl    dcode(%esp), %eax       /* ecx = dcode */
kusano fc6ab3
        addl    dist_r, %ecx
kusano fc6ab3
        movl    (%eax,%ecx,4), %eax     /* eax = lcode[hold & lmask] */
kusano fc6ab3
        jmp     .L_dodist_mmx
kusano fc6ab3
kusano fc6ab3
.align 16,0x90
kusano fc6ab3
.L_clip_window_mmx:
kusano fc6ab3
#define nbytes_r %ecx
kusano fc6ab3
        movl    %eax, nbytes_r
kusano fc6ab3
        movl    wsize(%esp), %eax       /* prepare for dist compare */
kusano fc6ab3
        negl    nbytes_r                /* nbytes = -nbytes */
kusano fc6ab3
        movl    window(%esp), from_r    /* from = window */
kusano fc6ab3
kusano fc6ab3
        cmpl    dist_r, %eax
kusano fc6ab3
        jb      .L_invalid_distance_too_far /* if (dist > wsize) */
kusano fc6ab3
kusano fc6ab3
        addl    dist_r, nbytes_r        /* nbytes = dist - nbytes */
kusano fc6ab3
        cmpl    $0, write(%esp)
kusano fc6ab3
        jne     .L_wrap_around_window_mmx /* if (write != 0) */
kusano fc6ab3
kusano fc6ab3
        subl    nbytes_r, %eax
kusano fc6ab3
        addl    %eax, from_r            /* from += wsize - nbytes */
kusano fc6ab3
kusano fc6ab3
        cmpl    nbytes_r, len_r
kusano fc6ab3
        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
kusano fc6ab3
kusano fc6ab3
        subl    nbytes_r, len_r         /* len -= nbytes */
kusano fc6ab3
        rep     movsb
kusano fc6ab3
        movl    out_r, from_r
kusano fc6ab3
        subl    dist_r, from_r          /* from = out - dist */
kusano fc6ab3
        jmp     .L_do_copy1_mmx
kusano fc6ab3
kusano fc6ab3
        cmpl    nbytes_r, len_r
kusano fc6ab3
        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
kusano fc6ab3
kusano fc6ab3
        subl    nbytes_r, len_r         /* len -= nbytes */
kusano fc6ab3
        rep     movsb
kusano fc6ab3
        movl    out_r, from_r
kusano fc6ab3
        subl    dist_r, from_r          /* from = out - dist */
kusano fc6ab3
        jmp     .L_do_copy1_mmx
kusano fc6ab3
kusano fc6ab3
.L_wrap_around_window_mmx:
kusano fc6ab3
#define write_r %eax
kusano fc6ab3
        movl    write(%esp), write_r
kusano fc6ab3
        cmpl    write_r, nbytes_r
kusano fc6ab3
        jbe     .L_contiguous_in_window_mmx /* if (write >= nbytes) */
kusano fc6ab3
kusano fc6ab3
        addl    wsize(%esp), from_r
kusano fc6ab3
        addl    write_r, from_r
kusano fc6ab3
        subl    nbytes_r, from_r        /* from += wsize + write - nbytes */
kusano fc6ab3
        subl    write_r, nbytes_r       /* nbytes -= write */
kusano fc6ab3
#undef write_r
kusano fc6ab3
kusano fc6ab3
        cmpl    nbytes_r, len_r
kusano fc6ab3
        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
kusano fc6ab3
kusano fc6ab3
        subl    nbytes_r, len_r         /* len -= nbytes */
kusano fc6ab3
        rep     movsb
kusano fc6ab3
        movl    window(%esp), from_r    /* from = window */
kusano fc6ab3
        movl    write(%esp), nbytes_r   /* nbytes = write */
kusano fc6ab3
        cmpl    nbytes_r, len_r
kusano fc6ab3
        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
kusano fc6ab3
kusano fc6ab3
        subl    nbytes_r, len_r         /* len -= nbytes */
kusano fc6ab3
        rep     movsb
kusano fc6ab3
        movl    out_r, from_r
kusano fc6ab3
        subl    dist_r, from_r          /* from = out - dist */
kusano fc6ab3
        jmp     .L_do_copy1_mmx
kusano fc6ab3
kusano fc6ab3
.L_contiguous_in_window_mmx:
kusano fc6ab3
#define write_r %eax
kusano fc6ab3
        addl    write_r, from_r
kusano fc6ab3
        subl    nbytes_r, from_r        /* from += write - nbytes */
kusano fc6ab3
#undef write_r
kusano fc6ab3
kusano fc6ab3
        cmpl    nbytes_r, len_r
kusano fc6ab3
        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
kusano fc6ab3
kusano fc6ab3
        subl    nbytes_r, len_r         /* len -= nbytes */
kusano fc6ab3
        rep     movsb
kusano fc6ab3
        movl    out_r, from_r
kusano fc6ab3
        subl    dist_r, from_r          /* from = out - dist */
kusano fc6ab3
kusano fc6ab3
.L_do_copy1_mmx:
kusano fc6ab3
#undef nbytes_r
kusano fc6ab3
#define in_r %esi
kusano fc6ab3
        movl    len_r, %ecx
kusano fc6ab3
        rep     movsb
kusano fc6ab3
kusano fc6ab3
        movl    in(%esp), in_r          /* move in back to %esi, toss from */
kusano fc6ab3
        movl    lcode(%esp), %ebx       /* move lcode back to %ebx, toss dist */
kusano fc6ab3
        jmp     .L_while_test_mmx
kusano fc6ab3
kusano fc6ab3
#undef hold_r
kusano fc6ab3
#undef bitslong_r
kusano fc6ab3
kusano fc6ab3
#endif /* USE_MMX || RUN_TIME_MMX */
kusano fc6ab3
kusano fc6ab3
kusano fc6ab3
/*** USE_MMX, NO_MMX, and RUNTIME_MMX from here on ***/
kusano fc6ab3
kusano fc6ab3
.L_invalid_distance_code:
kusano fc6ab3
        /* else {
kusano fc6ab3
         *   strm->msg = "invalid distance code";
kusano fc6ab3
         *   state->mode = BAD;
kusano fc6ab3
         * }
kusano fc6ab3
         */
kusano fc6ab3
        movl    $.L_invalid_distance_code_msg, %ecx
kusano fc6ab3
        movl    $INFLATE_MODE_BAD, %edx
kusano fc6ab3
        jmp     .L_update_stream_state
kusano fc6ab3
kusano fc6ab3
.L_test_for_end_of_block:
kusano fc6ab3
        /* else if (op & 32) {
kusano fc6ab3
         *   state->mode = TYPE;
kusano fc6ab3
         *   break;
kusano fc6ab3
         * }
kusano fc6ab3
         */
kusano fc6ab3
        testb   $32, %al
kusano fc6ab3
        jz      .L_invalid_literal_length_code  /* if ((op & 32) == 0) */
kusano fc6ab3
kusano fc6ab3
        movl    $0, %ecx
kusano fc6ab3
        movl    $INFLATE_MODE_TYPE, %edx
kusano fc6ab3
        jmp     .L_update_stream_state
kusano fc6ab3
kusano fc6ab3
.L_invalid_literal_length_code:
kusano fc6ab3
        /* else {
kusano fc6ab3
         *   strm->msg = "invalid literal/length code";
kusano fc6ab3
         *   state->mode = BAD;
kusano fc6ab3
         * }
kusano fc6ab3
         */
kusano fc6ab3
        movl    $.L_invalid_literal_length_code_msg, %ecx
kusano fc6ab3
        movl    $INFLATE_MODE_BAD, %edx
kusano fc6ab3
        jmp     .L_update_stream_state
kusano fc6ab3
kusano fc6ab3
.L_invalid_distance_too_far:
kusano fc6ab3
        /* strm->msg = "invalid distance too far back";
kusano fc6ab3
         * state->mode = BAD;
kusano fc6ab3
         */
kusano fc6ab3
        movl    in(%esp), in_r          /* from_r has in's reg, put in back */
kusano fc6ab3
        movl    $.L_invalid_distance_too_far_msg, %ecx
kusano fc6ab3
        movl    $INFLATE_MODE_BAD, %edx
kusano fc6ab3
        jmp     .L_update_stream_state
kusano fc6ab3
kusano fc6ab3
.L_update_stream_state:
kusano fc6ab3
        /* set strm->msg = %ecx, strm->state->mode = %edx */
kusano fc6ab3
        movl    strm_sp(%esp), %eax
kusano fc6ab3
        testl   %ecx, %ecx              /* if (msg != NULL) */
kusano fc6ab3
        jz      .L_skip_msg
kusano fc6ab3
        movl    %ecx, msg_strm(%eax)    /* strm->msg = msg */
kusano fc6ab3
.L_skip_msg:
kusano fc6ab3
        movl    state_strm(%eax), %eax  /* state = strm->state */
kusano fc6ab3
        movl    %edx, mode_state(%eax)  /* state->mode = edx (BAD | TYPE) */
kusano fc6ab3
        jmp     .L_break_loop
kusano fc6ab3
kusano fc6ab3
.align 32,0x90
kusano fc6ab3
.L_break_loop:
kusano fc6ab3
kusano fc6ab3
/*
kusano fc6ab3
 * Regs:
kusano fc6ab3
 *
kusano fc6ab3
 * bits = %ebp when mmx, and in %ebx when non-mmx
kusano fc6ab3
 * hold = %hold_mm when mmx, and in %ebp when non-mmx
kusano fc6ab3
 * in   = %esi
kusano fc6ab3
 * out  = %edi
kusano fc6ab3
 */
kusano fc6ab3
kusano fc6ab3
#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
kusano fc6ab3
kusano fc6ab3
#if defined( RUN_TIME_MMX )
kusano fc6ab3
kusano fc6ab3
        cmpl    $DO_USE_MMX, inflate_fast_use_mmx
kusano fc6ab3
        jne     .L_update_next_in
kusano fc6ab3
kusano fc6ab3
#endif /* RUN_TIME_MMX */
kusano fc6ab3
kusano fc6ab3
        movl    %ebp, %ebx
kusano fc6ab3
kusano fc6ab3
.L_update_next_in:
kusano fc6ab3
kusano fc6ab3
#endif
kusano fc6ab3
kusano fc6ab3
#define strm_r  %eax
kusano fc6ab3
#define state_r %edx
kusano fc6ab3
kusano fc6ab3
        /* len = bits >> 3;
kusano fc6ab3
         * in -= len;
kusano fc6ab3
         * bits -= len << 3;
kusano fc6ab3
         * hold &= (1U << bits) - 1;
kusano fc6ab3
         * state->hold = hold;
kusano fc6ab3
         * state->bits = bits;
kusano fc6ab3
         * strm->next_in = in;
kusano fc6ab3
         * strm->next_out = out;
kusano fc6ab3
         */
kusano fc6ab3
        movl    strm_sp(%esp), strm_r
kusano fc6ab3
        movl    %ebx, %ecx
kusano fc6ab3
        movl    state_strm(strm_r), state_r
kusano fc6ab3
        shrl    $3, %ecx
kusano fc6ab3
        subl    %ecx, in_r
kusano fc6ab3
        shll    $3, %ecx
kusano fc6ab3
        subl    %ecx, %ebx
kusano fc6ab3
        movl    out_r, next_out_strm(strm_r)
kusano fc6ab3
        movl    %ebx, bits_state(state_r)
kusano fc6ab3
        movl    %ebx, %ecx
kusano fc6ab3
kusano fc6ab3
        leal    buf(%esp), %ebx
kusano fc6ab3
        cmpl    %ebx, last(%esp)
kusano fc6ab3
        jne     .L_buf_not_used         /* if buf != last */
kusano fc6ab3
kusano fc6ab3
        subl    %ebx, in_r              /* in -= buf */
kusano fc6ab3
        movl    next_in_strm(strm_r), %ebx
kusano fc6ab3
        movl    %ebx, last(%esp)        /* last = strm->next_in */
kusano fc6ab3
        addl    %ebx, in_r              /* in += strm->next_in */
kusano fc6ab3
        movl    avail_in_strm(strm_r), %ebx
kusano fc6ab3
        subl    $11, %ebx
kusano fc6ab3
        addl    %ebx, last(%esp)    /* last = &strm->next_in[ avail_in - 11 ] */
kusano fc6ab3
kusano fc6ab3
.L_buf_not_used:
kusano fc6ab3
        movl    in_r, next_in_strm(strm_r)
kusano fc6ab3
kusano fc6ab3
        movl    $1, %ebx
kusano fc6ab3
        shll    %cl, %ebx
kusano fc6ab3
        decl    %ebx
kusano fc6ab3
kusano fc6ab3
#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
kusano fc6ab3
kusano fc6ab3
#if defined( RUN_TIME_MMX )
kusano fc6ab3
kusano fc6ab3
        cmpl    $DO_USE_MMX, inflate_fast_use_mmx
kusano fc6ab3
        jne     .L_update_hold
kusano fc6ab3
kusano fc6ab3
#endif /* RUN_TIME_MMX */
kusano fc6ab3
kusano fc6ab3
        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
kusano fc6ab3
        movd    hold_mm, %ebp
kusano fc6ab3
kusano fc6ab3
        emms
kusano fc6ab3
kusano fc6ab3
.L_update_hold:
kusano fc6ab3
kusano fc6ab3
#endif /* USE_MMX || RUN_TIME_MMX */
kusano fc6ab3
kusano fc6ab3
        andl    %ebx, %ebp
kusano fc6ab3
        movl    %ebp, hold_state(state_r)
kusano fc6ab3
kusano fc6ab3
#define last_r %ebx
kusano fc6ab3
kusano fc6ab3
        /* strm->avail_in = in < last ? 11 + (last - in) : 11 - (in - last) */
kusano fc6ab3
        movl    last(%esp), last_r
kusano fc6ab3
        cmpl    in_r, last_r
kusano fc6ab3
        jbe     .L_last_is_smaller     /* if (in >= last) */
kusano fc6ab3
kusano fc6ab3
        subl    in_r, last_r           /* last -= in */
kusano fc6ab3
        addl    $11, last_r            /* last += 11 */
kusano fc6ab3
        movl    last_r, avail_in_strm(strm_r)
kusano fc6ab3
        jmp     .L_fixup_out
kusano fc6ab3
.L_last_is_smaller:
kusano fc6ab3
        subl    last_r, in_r           /* in -= last */
kusano fc6ab3
        negl    in_r                   /* in = -in */
kusano fc6ab3
        addl    $11, in_r              /* in += 11 */
kusano fc6ab3
        movl    in_r, avail_in_strm(strm_r)
kusano fc6ab3
kusano fc6ab3
#undef last_r
kusano fc6ab3
#define end_r %ebx
kusano fc6ab3
kusano fc6ab3
.L_fixup_out:
kusano fc6ab3
        /* strm->avail_out = out < end ? 257 + (end - out) : 257 - (out - end)*/
kusano fc6ab3
        movl    end(%esp), end_r
kusano fc6ab3
        cmpl    out_r, end_r
kusano fc6ab3
        jbe     .L_end_is_smaller      /* if (out >= end) */
kusano fc6ab3
kusano fc6ab3
        subl    out_r, end_r           /* end -= out */
kusano fc6ab3
        addl    $257, end_r            /* end += 257 */
kusano fc6ab3
        movl    end_r, avail_out_strm(strm_r)
kusano fc6ab3
        jmp     .L_done
kusano fc6ab3
.L_end_is_smaller:
kusano fc6ab3
        subl    end_r, out_r           /* out -= end */
kusano fc6ab3
        negl    out_r                  /* out = -out */
kusano fc6ab3
        addl    $257, out_r            /* out += 257 */
kusano fc6ab3
        movl    out_r, avail_out_strm(strm_r)
kusano fc6ab3
kusano fc6ab3
#undef end_r
kusano fc6ab3
#undef strm_r
kusano fc6ab3
#undef state_r
kusano fc6ab3
kusano fc6ab3
.L_done:
kusano fc6ab3
        addl    $local_var_size, %esp
kusano fc6ab3
        popf
kusano fc6ab3
        popl    %ebx
kusano fc6ab3
        popl    %ebp
kusano fc6ab3
        popl    %esi
kusano fc6ab3
        popl    %edi
kusano fc6ab3
        ret
kusano fc6ab3
kusano fc6ab3
#if defined( GAS_ELF )
kusano fc6ab3
/* elf info */
kusano fc6ab3
.type inflate_fast,@function
kusano fc6ab3
.size inflate_fast,.-inflate_fast
kusano fc6ab3
#endif