19e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/* inffas8664.c is a hand tuned assembler version of inffast.c - fast decoding
29e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * version for AMD64 on Windows using Microsoft C compiler
39e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project *
49e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * Copyright (C) 1995-2003 Mark Adler
59e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * For conditions of distribution and use, see copyright notice in zlib.h
69e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project *
79e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * Copyright (C) 2003 Chris Anderson <christop@charm.net>
89e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * Please use the copyright conditions above.
99e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project *
109e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * 2005 - Adaptation to Microsoft C Compiler for AMD64 by Gilles Vollant
119e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project *
129e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * inffas8664.c call function inffas8664fnc in inffasx64.asm
139e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project *  inffasx64.asm is automatically convert from AMD64 portion of inffas86.c
149e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project *
159e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * Dec-29-2003 -- I added AMD64 inflate asm support.  This version is also
169e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * slightly quicker on x86 systems because, instead of using rep movsb to copy
179e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * data, it uses rep movsw, which moves data in 2-byte chunks instead of single
189e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * bytes.  I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates
199e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * from http://fedora.linux.duke.edu/fc1_x86_64
209e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with
219e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * 1GB ram.  The 64-bit version is about 4% faster than the 32-bit version,
229e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * when decompressing mozilla-source-1.3.tar.gz.
239e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project *
249e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
259e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * the gcc -S output of zlib-1.2.0/inffast.c.  Zlib-1.2.0 is in beta release at
269e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * the moment.  I have successfully compiled and tested this code with gcc2.96,
279e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * gcc3.2, icc5.0, msvc6.0.  It is very close to the speed of inffast.S
289e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
299e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * enabled.  I will attempt to merge the MMX code into this version.  Newer
309e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * versions of this and inffast.S can be found at
319e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
329e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project *
339e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project */
349e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
359e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project#include <stdio.h>
369e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project#include "zutil.h"
379e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project#include "inftrees.h"
389e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project#include "inflate.h"
399e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project#include "inffast.h"
409e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
419e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/* Mark Adler's comments from inffast.c: */
429e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
439e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/*
449e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project   Decode literal, length, and distance codes and write out the resulting
459e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project   literal and match bytes until either not enough input or output is
469e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project   available, an end-of-block is encountered, or a data error is encountered.
479e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project   When large enough input and output buffers are supplied to inflate(), for
489e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project   example, a 16K input buffer and a 64K output buffer, more than 95% of the
499e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project   inflate execution time is spent in this routine.
509e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
519e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project   Entry assumptions:
529e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
539e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project        state->mode == LEN
549e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project        strm->avail_in >= 6
559e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project        strm->avail_out >= 258
569e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project        start >= strm->avail_out
579e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project        state->bits < 8
589e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
599e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project   On return, state->mode is one of:
609e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
619e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project        LEN -- ran out of enough output space or enough available input
629e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project        TYPE -- reached end of block code, inflate() to interpret next block
639e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project        BAD -- error in block data
649e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
659e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project   Notes:
669e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
679e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    - The maximum input bits used by a length/distance pair is 15 bits for the
689e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project      length code, 5 bits for the length extra, 15 bits for the distance code,
699e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project      and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
709e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project      Therefore if strm->avail_in >= 6, then there is enough input to avoid
719e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project      checking for available input while decoding.
729e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
739e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    - The maximum bytes that a single length/distance pair can output is 258
749e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project      bytes, which is the maximum length that can be coded.  inflate_fast()
759e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project      requires strm->avail_out >= 258 for each loop to avoid checking for
769e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project      output space.
779e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project */
789e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
799e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
809e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
819e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    typedef struct inffast_ar {
829e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/* 64   32                               x86  x86_64 */
839e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/* ar offset                              register */
849e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/*  0    0 */ void *esp;                /* esp save */
859e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/*  8    4 */ void *ebp;                /* ebp save */
869e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/* 16    8 */ unsigned char FAR *in;    /* esi rsi  local strm->next_in */
879e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/* 24   12 */ unsigned char FAR *last;  /*     r9   while in < last */
889e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/* 32   16 */ unsigned char FAR *out;   /* edi rdi  local strm->next_out */
899e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/* 40   20 */ unsigned char FAR *beg;   /*          inflate()'s init next_out */
909e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/* 48   24 */ unsigned char FAR *end;   /*     r10  while out < end */
919e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/* 56   28 */ unsigned char FAR *window;/*          size of window, wsize!=0 */
929e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/* 64   32 */ code const FAR *lcode;    /* ebp rbp  local strm->lencode */
939e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/* 72   36 */ code const FAR *dcode;    /*     r11  local strm->distcode */
949e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/* 80   40 */ size_t /*unsigned long */hold;       /* edx rdx  local strm->hold */
959e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/* 88   44 */ unsigned bits;            /* ebx rbx  local strm->bits */
969e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/* 92   48 */ unsigned wsize;           /*          window size */
979e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/* 96   52 */ unsigned write;           /*          window write index */
989e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/*100   56 */ unsigned lmask;           /*     r12  mask for lcode */
999e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/*104   60 */ unsigned dmask;           /*     r13  mask for dcode */
1009e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/*108   64 */ unsigned len;             /*     r14  match length */
1019e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/*112   68 */ unsigned dist;            /*     r15  match distance */
1029e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project/*116   72 */ unsigned status;          /*          set when state chng*/
1039e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    } type_ar;
1049e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project#ifdef ASMINF
1059e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
1069e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Projectvoid inflate_fast(strm, start)
1079e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Projectz_streamp strm;
1089e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Projectunsigned start;         /* inflate()'s starting value for strm->avail_out */
1099e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project{
1109e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    struct inflate_state FAR *state;
1119e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    type_ar ar;
1129e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    void inffas8664fnc(struct inffast_ar * par);
1139e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
114381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
1159e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
1169e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project#if (defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )) || (defined(_MSC_VER) && defined(_M_AMD64))
1179e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project#define PAD_AVAIL_IN 6
118381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes#define PAD_AVAIL_OUT 258
1199e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project#else
1209e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project#define PAD_AVAIL_IN 5
1219e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project#define PAD_AVAIL_OUT 257
1229e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project#endif
1239e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
1249e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    /* copy state to local variables */
1259e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    state = (struct inflate_state FAR *)strm->state;
1269e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
1279e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    ar.in = strm->next_in;
1289e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN);
1299e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    ar.out = strm->next_out;
1309e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    ar.beg = ar.out - (start - strm->avail_out);
1319e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT);
1329e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    ar.wsize = state->wsize;
133381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    ar.write = state->wnext;
1349e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    ar.window = state->window;
1359e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    ar.hold = state->hold;
1369e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    ar.bits = state->bits;
1379e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    ar.lcode = state->lencode;
1389e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    ar.dcode = state->distcode;
1399e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    ar.lmask = (1U << state->lenbits) - 1;
1409e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    ar.dmask = (1U << state->distbits) - 1;
1419e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
1429e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    /* decode literals and length/distances until end-of-block or not enough
1439e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project       input data or output space */
1449e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
1459e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    /* align in on 1/2 hold size boundary */
1469e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    while (((size_t)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) {
1479e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project        ar.hold += (unsigned long)*ar.in++ << ar.bits;
1489e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project        ar.bits += 8;
1499e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    }
1509e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
1519e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    inffas8664fnc(&ar);
1529e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
1539e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    if (ar.status > 1) {
1549e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project        if (ar.status == 2)
1559e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project            strm->msg = "invalid literal/length code";
1569e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project        else if (ar.status == 3)
1579e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project            strm->msg = "invalid distance code";
1589e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project        else
1599e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project            strm->msg = "invalid distance too far back";
1609e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project        state->mode = BAD;
1619e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    }
1629e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    else if ( ar.status == 1 ) {
1639e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project        state->mode = TYPE;
1649e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    }
1659e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
1669e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
1679e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    ar.len = ar.bits >> 3;
1689e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    ar.in -= ar.len;
1699e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    ar.bits -= ar.len << 3;
1709e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    ar.hold &= (1U << ar.bits) - 1;
1719e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
1729e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    /* update state and return */
1739e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    strm->next_in = ar.in;
1749e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    strm->next_out = ar.out;
1759e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    strm->avail_in = (unsigned)(ar.in < ar.last ?
1769e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project                                PAD_AVAIL_IN + (ar.last - ar.in) :
1779e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project                                PAD_AVAIL_IN - (ar.in - ar.last));
1789e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    strm->avail_out = (unsigned)(ar.out < ar.end ?
1799e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project                                 PAD_AVAIL_OUT + (ar.end - ar.out) :
1809e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project                                 PAD_AVAIL_OUT - (ar.out - ar.end));
1819e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    state->hold = (unsigned long)ar.hold;
1829e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    state->bits = ar.bits;
1839e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project    return;
1849e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project}
1859e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project
1869e38dfa2f95fce609707a0941f10af9a785288deThe Android Open Source Project#endif
187