kusano 7d535a
/* inffas8664.c is a hand tuned assembler version of inffast.c - fast decoding
kusano 7d535a
 * version for AMD64 on Windows using Microsoft C compiler
kusano 7d535a
 *
kusano 7d535a
 * Copyright (C) 1995-2003 Mark Adler
kusano 7d535a
 * For conditions of distribution and use, see copyright notice in zlib.h
kusano 7d535a
 *
kusano 7d535a
 * Copyright (C) 2003 Chris Anderson <christop@charm.net></christop@charm.net>
kusano 7d535a
 * Please use the copyright conditions above.
kusano 7d535a
 *
kusano 7d535a
 * 2005 - Adaptation to Microsoft C Compiler for AMD64 by Gilles Vollant
kusano 7d535a
 *
kusano 7d535a
 * inffas8664.c call function inffas8664fnc in inffasx64.asm
kusano 7d535a
 *  inffasx64.asm is automatically convert from AMD64 portion of inffas86.c
kusano 7d535a
 *
kusano 7d535a
 * Dec-29-2003 -- I added AMD64 inflate asm support.  This version is also
kusano 7d535a
 * slightly quicker on x86 systems because, instead of using rep movsb to copy
kusano 7d535a
 * data, it uses rep movsw, which moves data in 2-byte chunks instead of single
kusano 7d535a
 * bytes.  I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates
kusano 7d535a
 * from http://fedora.linux.duke.edu/fc1_x86_64
kusano 7d535a
 * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with
kusano 7d535a
 * 1GB ram.  The 64-bit version is about 4% faster than the 32-bit version,
kusano 7d535a
 * when decompressing mozilla-source-1.3.tar.gz.
kusano 7d535a
 *
kusano 7d535a
 * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
kusano 7d535a
 * the gcc -S output of zlib-1.2.0/inffast.c.  Zlib-1.2.0 is in beta release at
kusano 7d535a
 * the moment.  I have successfully compiled and tested this code with gcc2.96,
kusano 7d535a
 * gcc3.2, icc5.0, msvc6.0.  It is very close to the speed of inffast.S
kusano 7d535a
 * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
kusano 7d535a
 * enabled.  I will attempt to merge the MMX code into this version.  Newer
kusano 7d535a
 * versions of this and inffast.S can be found at
kusano 7d535a
 * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
kusano 7d535a
 *
kusano 7d535a
 */
kusano 7d535a
kusano 7d535a
#include <stdio.h></stdio.h>
kusano 7d535a
#include "zutil.h"
kusano 7d535a
#include "inftrees.h"
kusano 7d535a
#include "inflate.h"
kusano 7d535a
#include "inffast.h"
kusano 7d535a
kusano 7d535a
/* Mark Adler's comments from inffast.c: */
kusano 7d535a
kusano 7d535a
/*
kusano 7d535a
   Decode literal, length, and distance codes and write out the resulting
kusano 7d535a
   literal and match bytes until either not enough input or output is
kusano 7d535a
   available, an end-of-block is encountered, or a data error is encountered.
kusano 7d535a
   When large enough input and output buffers are supplied to inflate(), for
kusano 7d535a
   example, a 16K input buffer and a 64K output buffer, more than 95% of the
kusano 7d535a
   inflate execution time is spent in this routine.
kusano 7d535a
kusano 7d535a
   Entry assumptions:
kusano 7d535a
kusano 7d535a
        state->mode == LEN
kusano 7d535a
        strm->avail_in >= 6
kusano 7d535a
        strm->avail_out >= 258
kusano 7d535a
        start >= strm->avail_out
kusano 7d535a
        state->bits < 8
kusano 7d535a
kusano 7d535a
   On return, state->mode is one of:
kusano 7d535a
kusano 7d535a
        LEN -- ran out of enough output space or enough available input
kusano 7d535a
        TYPE -- reached end of block code, inflate() to interpret next block
kusano 7d535a
        BAD -- error in block data
kusano 7d535a
kusano 7d535a
   Notes:
kusano 7d535a
kusano 7d535a
    - The maximum input bits used by a length/distance pair is 15 bits for the
kusano 7d535a
      length code, 5 bits for the length extra, 15 bits for the distance code,
kusano 7d535a
      and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
kusano 7d535a
      Therefore if strm->avail_in >= 6, then there is enough input to avoid
kusano 7d535a
      checking for available input while decoding.
kusano 7d535a
kusano 7d535a
    - The maximum bytes that a single length/distance pair can output is 258
kusano 7d535a
      bytes, which is the maximum length that can be coded.  inflate_fast()
kusano 7d535a
      requires strm->avail_out >= 258 for each loop to avoid checking for
kusano 7d535a
      output space.
kusano 7d535a
 */
kusano 7d535a
kusano 7d535a
kusano 7d535a
kusano 7d535a
    typedef struct inffast_ar {
kusano 7d535a
/* 64   32                               x86  x86_64 */
kusano 7d535a
/* ar offset                              register */
kusano 7d535a
/*  0    0 */ void *esp;                /* esp save */
kusano 7d535a
/*  8    4 */ void *ebp;                /* ebp save */
kusano 7d535a
/* 16    8 */ unsigned char FAR *in;    /* esi rsi  local strm->next_in */
kusano 7d535a
/* 24   12 */ unsigned char FAR *last;  /*     r9   while in < last */
kusano 7d535a
/* 32   16 */ unsigned char FAR *out;   /* edi rdi  local strm->next_out */
kusano 7d535a
/* 40   20 */ unsigned char FAR *beg;   /*          inflate()'s init next_out */
kusano 7d535a
/* 48   24 */ unsigned char FAR *end;   /*     r10  while out < end */
kusano 7d535a
/* 56   28 */ unsigned char FAR *window;/*          size of window, wsize!=0 */
kusano 7d535a
/* 64   32 */ code const FAR *lcode;    /* ebp rbp  local strm->lencode */
kusano 7d535a
/* 72   36 */ code const FAR *dcode;    /*     r11  local strm->distcode */
kusano 7d535a
/* 80   40 */ size_t /*unsigned long */hold;       /* edx rdx  local strm->hold */
kusano 7d535a
/* 88   44 */ unsigned bits;            /* ebx rbx  local strm->bits */
kusano 7d535a
/* 92   48 */ unsigned wsize;           /*          window size */
kusano 7d535a
/* 96   52 */ unsigned write;           /*          window write index */
kusano 7d535a
/*100   56 */ unsigned lmask;           /*     r12  mask for lcode */
kusano 7d535a
/*104   60 */ unsigned dmask;           /*     r13  mask for dcode */
kusano 7d535a
/*108   64 */ unsigned len;             /*     r14  match length */
kusano 7d535a
/*112   68 */ unsigned dist;            /*     r15  match distance */
kusano 7d535a
/*116   72 */ unsigned status;          /*          set when state chng*/
kusano 7d535a
    } type_ar;
kusano 7d535a
#ifdef ASMINF
kusano 7d535a
kusano 7d535a
void inflate_fast(strm, start)
kusano 7d535a
z_streamp strm;
kusano 7d535a
unsigned start;         /* inflate()'s starting value for strm->avail_out */
kusano 7d535a
{
kusano 7d535a
    struct inflate_state FAR *state;
kusano 7d535a
    type_ar ar;
kusano 7d535a
    void inffas8664fnc(struct inffast_ar * par);
kusano 7d535a
kusano 7d535a
kusano 7d535a
kusano 7d535a
#if (defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )) || (defined(_MSC_VER) && defined(_M_AMD64))
kusano 7d535a
#define PAD_AVAIL_IN 6
kusano 7d535a
#define PAD_AVAIL_OUT 258
kusano 7d535a
#else
kusano 7d535a
#define PAD_AVAIL_IN 5
kusano 7d535a
#define PAD_AVAIL_OUT 257
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
    /* copy state to local variables */
kusano 7d535a
    state = (struct inflate_state FAR *)strm->state;
kusano 7d535a
kusano 7d535a
    ar.in = strm->next_in;
kusano 7d535a
    ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN);
kusano 7d535a
    ar.out = strm->next_out;
kusano 7d535a
    ar.beg = ar.out - (start - strm->avail_out);
kusano 7d535a
    ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT);
kusano 7d535a
    ar.wsize = state->wsize;
kusano 7d535a
    ar.write = state->wnext;
kusano 7d535a
    ar.window = state->window;
kusano 7d535a
    ar.hold = state->hold;
kusano 7d535a
    ar.bits = state->bits;
kusano 7d535a
    ar.lcode = state->lencode;
kusano 7d535a
    ar.dcode = state->distcode;
kusano 7d535a
    ar.lmask = (1U << state->lenbits) - 1;
kusano 7d535a
    ar.dmask = (1U << state->distbits) - 1;
kusano 7d535a
kusano 7d535a
    /* decode literals and length/distances until end-of-block or not enough
kusano 7d535a
       input data or output space */
kusano 7d535a
kusano 7d535a
    /* align in on 1/2 hold size boundary */
kusano 7d535a
    while (((size_t)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) {
kusano 7d535a
        ar.hold += (unsigned long)*ar.in++ << ar.bits;
kusano 7d535a
        ar.bits += 8;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    inffas8664fnc(&ar);
kusano 7d535a
kusano 7d535a
    if (ar.status > 1) {
kusano 7d535a
        if (ar.status == 2)
kusano 7d535a
            strm->msg = "invalid literal/length code";
kusano 7d535a
        else if (ar.status == 3)
kusano 7d535a
            strm->msg = "invalid distance code";
kusano 7d535a
        else
kusano 7d535a
            strm->msg = "invalid distance too far back";
kusano 7d535a
        state->mode = BAD;
kusano 7d535a
    }
kusano 7d535a
    else if ( ar.status == 1 ) {
kusano 7d535a
        state->mode = TYPE;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
kusano 7d535a
    ar.len = ar.bits >> 3;
kusano 7d535a
    ar.in -= ar.len;
kusano 7d535a
    ar.bits -= ar.len << 3;
kusano 7d535a
    ar.hold &= (1U << ar.bits) - 1;
kusano 7d535a
kusano 7d535a
    /* update state and return */
kusano 7d535a
    strm->next_in = ar.in;
kusano 7d535a
    strm->next_out = ar.out;
kusano 7d535a
    strm->avail_in = (unsigned)(ar.in < ar.last ?
kusano 7d535a
                                PAD_AVAIL_IN + (ar.last - ar.in) :
kusano 7d535a
                                PAD_AVAIL_IN - (ar.in - ar.last));
kusano 7d535a
    strm->avail_out = (unsigned)(ar.out < ar.end ?
kusano 7d535a
                                 PAD_AVAIL_OUT + (ar.end - ar.out) :
kusano 7d535a
                                 PAD_AVAIL_OUT - (ar.out - ar.end));
kusano 7d535a
    state->hold = (unsigned long)ar.hold;
kusano 7d535a
    state->bits = ar.bits;
kusano 7d535a
    return;
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
#endif