1/*
2 * inffast.S is a hand tuned assembler version of:
3 *
4 * inffast.c -- fast decoding
5 * Copyright (C) 1995-2003 Mark Adler
6 * For conditions of distribution and use, see copyright notice in zlib.h
7 *
8 * Copyright (C) 2003 Chris Anderson <christop@charm.net>
9 * Please use the copyright conditions above.
10 *
11 * This version (Jan-23-2003) of inflate_fast was coded and tested under
12 * GNU/Linux on a pentium 3, using the gcc-3.2 compiler distribution.  On that
13 * machine, I found that gzip style archives decompressed about 20% faster than
14 * the gcc-3.2 -O3 -fomit-frame-pointer compiled version.  Your results will
15 * depend on how large of a buffer is used for z_stream.next_in & next_out
16 * (8K-32K worked best for my 256K cpu cache) and how much overhead there is in
17 * stream processing I/O and crc32/addler32.  In my case, this routine used
18 * 70% of the cpu time and crc32 used 20%.
19 *
20 * I am confident that this version will work in the general case, but I have
21 * not tested a wide variety of datasets or a wide variety of platforms.
22 *
23 * Jan-24-2003 -- Added -DUSE_MMX define for slightly faster inflating.
24 * It should be a runtime flag instead of compile time flag...
25 *
26 * Jan-26-2003 -- Added runtime check for MMX support with cpuid instruction.
27 * With -DUSE_MMX, only MMX code is compiled.  With -DNO_MMX, only non-MMX code
28 * is compiled.  Without either option, runtime detection is enabled.  Runtime
29 * detection should work on all modern cpus and the recomended algorithm (flip
30 * ID bit on eflags and then use the cpuid instruction) is used in many
31 * multimedia applications.  Tested under win2k with gcc-2.95 and gas-2.12
32 * distributed with cygwin3.  Compiling with gcc-2.95 -c inffast.S -o
33 * inffast.obj generates a COFF object which can then be linked with MSVC++
34 * compiled code.  Tested under FreeBSD 4.7 with gcc-2.95.
35 *
36 * Jan-28-2003 -- Tested Athlon XP... MMX mode is slower than no MMX (and
37 * slower than compiler generated code).  Adjusted cpuid check to use the MMX
38 * code only for Pentiums < P4 until I have more data on the P4.  Speed
39 * improvment is only about 15% on the Athlon when compared with code generated
40 * with MSVC++.  Not sure yet, but I think the P4 will also be slower using the
41 * MMX mode because many of it's x86 ALU instructions execute in .5 cycles and
42 * have less latency than MMX ops.  Added code to buffer the last 11 bytes of
43 * the input stream since the MMX code grabs bits in chunks of 32, which
44 * differs from the inffast.c algorithm.  I don't think there would have been
45 * read overruns where a page boundary was crossed (a segfault), but there
46 * could have been overruns when next_in ends on unaligned memory (unintialized
47 * memory read).
48 *
49 * Mar-13-2003 -- P4 MMX is slightly slower than P4 NO_MMX.  I created a C
50 * version of the non-MMX code so that it doesn't depend on zstrm and zstate
51 * structure offsets which are hard coded in this file.  This was last tested
52 * with zlib-1.2.0 which is currently in beta testing, newer versions of this
53 * and inffas86.c can be found at http://www.eetbeetee.com/zlib/ and
54 * http://www.charm.net/~christop/zlib/
55 */
56
57
58/*
59 * if you have underscore linking problems (_inflate_fast undefined), try
60 * using -DGAS_COFF
61 */
62#if ! defined( GAS_COFF ) && ! defined( GAS_ELF )
63
64#if defined( WIN32 ) || defined( __CYGWIN__ )
65#define GAS_COFF /* windows object format */
66#else
67#define GAS_ELF
68#endif
69
70#endif /* ! GAS_COFF && ! GAS_ELF */
71
72
73#if defined( GAS_COFF )
74
75/* coff externals have underscores */
76#define inflate_fast _inflate_fast
77#define inflate_fast_use_mmx _inflate_fast_use_mmx
78
79#endif /* GAS_COFF */
80
81
82.file "inffast.S"
83
84.globl inflate_fast
85
86.text
87.align 4,0
88.L_invalid_literal_length_code_msg:
89.string "invalid literal/length code"
90
91.align 4,0
92.L_invalid_distance_code_msg:
93.string "invalid distance code"
94
95.align 4,0
96.L_invalid_distance_too_far_msg:
97.string "invalid distance too far back"
98
99#if ! defined( NO_MMX )
100.align 4,0
101.L_mask: /* mask[N] = ( 1 << N ) - 1 */
102.long 0
103.long 1
104.long 3
105.long 7
106.long 15
107.long 31
108.long 63
109.long 127
110.long 255
111.long 511
112.long 1023
113.long 2047
114.long 4095
115.long 8191
116.long 16383
117.long 32767
118.long 65535
119.long 131071
120.long 262143
121.long 524287
122.long 1048575
123.long 2097151
124.long 4194303
125.long 8388607
126.long 16777215
127.long 33554431
128.long 67108863
129.long 134217727
130.long 268435455
131.long 536870911
132.long 1073741823
133.long 2147483647
134.long 4294967295
135#endif /* NO_MMX */
136
137.text
138
139/*
140 * struct z_stream offsets, in zlib.h
141 */
142#define next_in_strm   0   /* strm->next_in */
143#define avail_in_strm  4   /* strm->avail_in */
144#define next_out_strm  12  /* strm->next_out */
145#define avail_out_strm 16  /* strm->avail_out */
146#define msg_strm       24  /* strm->msg */
147#define state_strm     28  /* strm->state */
148
149/*
150 * struct inflate_state offsets, in inflate.h
151 */
152#define mode_state     0   /* state->mode */
153#define wsize_state    32  /* state->wsize */
154#define write_state    40  /* state->write */
155#define window_state   44  /* state->window */
156#define hold_state     48  /* state->hold */
157#define bits_state     52  /* state->bits */
158#define lencode_state  68  /* state->lencode */
159#define distcode_state 72  /* state->distcode */
160#define lenbits_state  76  /* state->lenbits */
161#define distbits_state 80  /* state->distbits */
162
163/*
164 * inflate_fast's activation record
165 */
166#define local_var_size 64 /* how much local space for vars */
167#define strm_sp        88 /* first arg: z_stream * (local_var_size + 24) */
168#define start_sp       92 /* second arg: unsigned int (local_var_size + 28) */
169
170/*
171 * offsets for local vars on stack
172 */
173#define out            60  /* unsigned char* */
174#define window         56  /* unsigned char* */
175#define wsize          52  /* unsigned int */
176#define write          48  /* unsigned int */
177#define in             44  /* unsigned char* */
178#define beg            40  /* unsigned char* */
179#define buf            28  /* char[ 12 ] */
180#define len            24  /* unsigned int */
181#define last           20  /* unsigned char* */
182#define end            16  /* unsigned char* */
183#define dcode          12  /* code* */
184#define lcode           8  /* code* */
185#define dmask           4  /* unsigned int */
186#define lmask           0  /* unsigned int */
187
188/*
189 * typedef enum inflate_mode consts, in inflate.h
190 */
191#define INFLATE_MODE_TYPE 11  /* state->mode flags enum-ed in inflate.h */
192#define INFLATE_MODE_BAD  26
193
194
195#if ! defined( USE_MMX ) && ! defined( NO_MMX )
196
197#define RUN_TIME_MMX
198
199#define CHECK_MMX    1
200#define DO_USE_MMX   2
201#define DONT_USE_MMX 3
202
203.globl inflate_fast_use_mmx
204
205.data
206
207.align 4,0
208inflate_fast_use_mmx: /* integer flag for run time control 1=check,2=mmx,3=no */
209.long CHECK_MMX
210
211#if defined( GAS_ELF )
212/* elf info */
213.type   inflate_fast_use_mmx,@object
214.size   inflate_fast_use_mmx,4
215#endif
216
217#endif /* RUN_TIME_MMX */
218
219#if defined( GAS_COFF )
220/* coff info: scl 2 = extern, type 32 = function */
221.def inflate_fast; .scl 2; .type 32; .endef
222#endif
223
224.text
225
226.align 32,0x90
227inflate_fast:
228        pushl   %edi
229        pushl   %esi
230        pushl   %ebp
231        pushl   %ebx
232        pushf   /* save eflags (strm_sp, state_sp assumes this is 32 bits) */
233        subl    $local_var_size, %esp
234        cld
235
236#define strm_r  %esi
237#define state_r %edi
238
239        movl    strm_sp(%esp), strm_r
240        movl    state_strm(strm_r), state_r
241
242        /* in = strm->next_in;
243         * out = strm->next_out;
244         * last = in + strm->avail_in - 11;
245         * beg = out - (start - strm->avail_out);
246         * end = out + (strm->avail_out - 257);
247         */
248        movl    avail_in_strm(strm_r), %edx
249        movl    next_in_strm(strm_r), %eax
250
251        addl    %eax, %edx      /* avail_in += next_in */
252        subl    $11, %edx       /* avail_in -= 11 */
253
254        movl    %eax, in(%esp)
255        movl    %edx, last(%esp)
256
257        movl    start_sp(%esp), %ebp
258        movl    avail_out_strm(strm_r), %ecx
259        movl    next_out_strm(strm_r), %ebx
260
261        subl    %ecx, %ebp      /* start -= avail_out */
262        negl    %ebp            /* start = -start */
263        addl    %ebx, %ebp      /* start += next_out */
264
265        subl    $257, %ecx      /* avail_out -= 257 */
266        addl    %ebx, %ecx      /* avail_out += out */
267
268        movl    %ebx, out(%esp)
269        movl    %ebp, beg(%esp)
270        movl    %ecx, end(%esp)
271
272        /* wsize = state->wsize;
273         * write = state->write;
274         * window = state->window;
275         * hold = state->hold;
276         * bits = state->bits;
277         * lcode = state->lencode;
278         * dcode = state->distcode;
279         * lmask = ( 1 << state->lenbits ) - 1;
280         * dmask = ( 1 << state->distbits ) - 1;
281         */
282
283        movl    lencode_state(state_r), %eax
284        movl    distcode_state(state_r), %ecx
285
286        movl    %eax, lcode(%esp)
287        movl    %ecx, dcode(%esp)
288
289        movl    $1, %eax
290        movl    lenbits_state(state_r), %ecx
291        shll    %cl, %eax
292        decl    %eax
293        movl    %eax, lmask(%esp)
294
295        movl    $1, %eax
296        movl    distbits_state(state_r), %ecx
297        shll    %cl, %eax
298        decl    %eax
299        movl    %eax, dmask(%esp)
300
301        movl    wsize_state(state_r), %eax
302        movl    write_state(state_r), %ecx
303        movl    window_state(state_r), %edx
304
305        movl    %eax, wsize(%esp)
306        movl    %ecx, write(%esp)
307        movl    %edx, window(%esp)
308
309        movl    hold_state(state_r), %ebp
310        movl    bits_state(state_r), %ebx
311
312#undef strm_r
313#undef state_r
314
315#define in_r       %esi
316#define from_r     %esi
317#define out_r      %edi
318
319        movl    in(%esp), in_r
320        movl    last(%esp), %ecx
321        cmpl    in_r, %ecx
322        ja      .L_align_long           /* if in < last */
323
324        addl    $11, %ecx               /* ecx = &in[ avail_in ] */
325        subl    in_r, %ecx              /* ecx = avail_in */
326        movl    $12, %eax
327        subl    %ecx, %eax              /* eax = 12 - avail_in */
328        leal    buf(%esp), %edi
329        rep     movsb                   /* memcpy( buf, in, avail_in ) */
330        movl    %eax, %ecx
331        xorl    %eax, %eax
332        rep     stosb         /* memset( &buf[ avail_in ], 0, 12 - avail_in ) */
333        leal    buf(%esp), in_r         /* in = buf */
334        movl    in_r, last(%esp)        /* last = in, do just one iteration */
335        jmp     .L_is_aligned
336
337        /* align in_r on long boundary */
338.L_align_long:
339        testl   $3, in_r
340        jz      .L_is_aligned
341        xorl    %eax, %eax
342        movb    (in_r), %al
343        incl    in_r
344        movl    %ebx, %ecx
345        addl    $8, %ebx
346        shll    %cl, %eax
347        orl     %eax, %ebp
348        jmp     .L_align_long
349
350.L_is_aligned:
351        movl    out(%esp), out_r
352
353#if defined( NO_MMX )
354        jmp     .L_do_loop
355#endif
356
357#if defined( USE_MMX )
358        jmp     .L_init_mmx
359#endif
360
361/*** Runtime MMX check ***/
362
363#if defined( RUN_TIME_MMX )
364.L_check_mmx:
365        cmpl    $DO_USE_MMX, inflate_fast_use_mmx
366        je      .L_init_mmx
367        ja      .L_do_loop /* > 2 */
368
369        pushl   %eax
370        pushl   %ebx
371        pushl   %ecx
372        pushl   %edx
373        pushf
374        movl    (%esp), %eax      /* copy eflags to eax */
375        xorl    $0x200000, (%esp) /* try toggling ID bit of eflags (bit 21)
376                                   * to see if cpu supports cpuid...
377                                   * ID bit method not supported by NexGen but
378                                   * bios may load a cpuid instruction and
379                                   * cpuid may be disabled on Cyrix 5-6x86 */
380        popf
381        pushf
382        popl    %edx              /* copy new eflags to edx */
383        xorl    %eax, %edx        /* test if ID bit is flipped */
384        jz      .L_dont_use_mmx   /* not flipped if zero */
385        xorl    %eax, %eax
386        cpuid
387        cmpl    $0x756e6547, %ebx /* check for GenuineIntel in ebx,ecx,edx */
388        jne     .L_dont_use_mmx
389        cmpl    $0x6c65746e, %ecx
390        jne     .L_dont_use_mmx
391        cmpl    $0x49656e69, %edx
392        jne     .L_dont_use_mmx
393        movl    $1, %eax
394        cpuid                     /* get cpu features */
395        shrl    $8, %eax
396        andl    $15, %eax
397        cmpl    $6, %eax          /* check for Pentium family, is 0xf for P4 */
398        jne     .L_dont_use_mmx
399        testl   $0x800000, %edx   /* test if MMX feature is set (bit 23) */
400        jnz     .L_use_mmx
401        jmp     .L_dont_use_mmx
402.L_use_mmx:
403        movl    $DO_USE_MMX, inflate_fast_use_mmx
404        jmp     .L_check_mmx_pop
405.L_dont_use_mmx:
406        movl    $DONT_USE_MMX, inflate_fast_use_mmx
407.L_check_mmx_pop:
408        popl    %edx
409        popl    %ecx
410        popl    %ebx
411        popl    %eax
412        jmp     .L_check_mmx
413#endif
414
415
416/*** Non-MMX code ***/
417
418#if defined ( NO_MMX ) || defined( RUN_TIME_MMX )
419
420#define hold_r     %ebp
421#define bits_r     %bl
422#define bitslong_r %ebx
423
424.align 32,0x90
425.L_while_test:
426        /* while (in < last && out < end)
427         */
428        cmpl    out_r, end(%esp)
429        jbe     .L_break_loop           /* if (out >= end) */
430
431        cmpl    in_r, last(%esp)
432        jbe     .L_break_loop
433
434.L_do_loop:
435        /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
436         *
437         * do {
438         *   if (bits < 15) {
439         *     hold |= *((unsigned short *)in)++ << bits;
440         *     bits += 16
441         *   }
442         *   this = lcode[hold & lmask]
443         */
444        cmpb    $15, bits_r
445        ja      .L_get_length_code      /* if (15 < bits) */
446
447        xorl    %eax, %eax
448        lodsw                           /* al = *(ushort *)in++ */
449        movb    bits_r, %cl             /* cl = bits, needs it for shifting */
450        addb    $16, bits_r             /* bits += 16 */
451        shll    %cl, %eax
452        orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
453
454.L_get_length_code:
455        movl    lmask(%esp), %edx       /* edx = lmask */
456        movl    lcode(%esp), %ecx       /* ecx = lcode */
457        andl    hold_r, %edx            /* edx &= hold */
458        movl    (%ecx,%edx,4), %eax     /* eax = lcode[hold & lmask] */
459
460.L_dolen:
461        /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
462         *
463         * dolen:
464         *    bits -= this.bits;
465         *    hold >>= this.bits
466         */
467        movb    %ah, %cl                /* cl = this.bits */
468        subb    %ah, bits_r             /* bits -= this.bits */
469        shrl    %cl, hold_r             /* hold >>= this.bits */
470
471        /* check if op is a literal
472         * if (op == 0) {
473         *    PUP(out) = this.val;
474         *  }
475         */
476        testb   %al, %al
477        jnz     .L_test_for_length_base /* if (op != 0) 45.7% */
478
479        shrl    $16, %eax               /* output this.val char */
480        stosb
481        jmp     .L_while_test
482
483.L_test_for_length_base:
484        /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = len
485         *
486         * else if (op & 16) {
487         *   len = this.val
488         *   op &= 15
489         *   if (op) {
490         *     if (op > bits) {
491         *       hold |= *((unsigned short *)in)++ << bits;
492         *       bits += 16
493         *     }
494         *     len += hold & mask[op];
495         *     bits -= op;
496         *     hold >>= op;
497         *   }
498         */
499#define len_r %edx
500        movl    %eax, len_r             /* len = this */
501        shrl    $16, len_r              /* len = this.val */
502        movb    %al, %cl
503
504        testb   $16, %al
505        jz      .L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
506        andb    $15, %cl                /* op &= 15 */
507        jz      .L_save_len             /* if (!op) */
508        cmpb    %cl, bits_r
509        jae     .L_add_bits_to_len      /* if (op <= bits) */
510
511        movb    %cl, %ch                /* stash op in ch, freeing cl */
512        xorl    %eax, %eax
513        lodsw                           /* al = *(ushort *)in++ */
514        movb    bits_r, %cl             /* cl = bits, needs it for shifting */
515        addb    $16, bits_r             /* bits += 16 */
516        shll    %cl, %eax
517        orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
518        movb    %ch, %cl                /* move op back to ecx */
519
520.L_add_bits_to_len:
521        movl    $1, %eax
522        shll    %cl, %eax
523        decl    %eax
524        subb    %cl, bits_r
525        andl    hold_r, %eax            /* eax &= hold */
526        shrl    %cl, hold_r
527        addl    %eax, len_r             /* len += hold & mask[op] */
528
529.L_save_len:
530        movl    len_r, len(%esp)        /* save len */
531#undef  len_r
532
533.L_decode_distance:
534        /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = dist
535         *
536         *   if (bits < 15) {
537         *     hold |= *((unsigned short *)in)++ << bits;
538         *     bits += 16
539         *   }
540         *   this = dcode[hold & dmask];
541         * dodist:
542         *   bits -= this.bits;
543         *   hold >>= this.bits;
544         *   op = this.op;
545         */
546
547        cmpb    $15, bits_r
548        ja      .L_get_distance_code    /* if (15 < bits) */
549
550        xorl    %eax, %eax
551        lodsw                           /* al = *(ushort *)in++ */
552        movb    bits_r, %cl             /* cl = bits, needs it for shifting */
553        addb    $16, bits_r             /* bits += 16 */
554        shll    %cl, %eax
555        orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
556
557.L_get_distance_code:
558        movl    dmask(%esp), %edx       /* edx = dmask */
559        movl    dcode(%esp), %ecx       /* ecx = dcode */
560        andl    hold_r, %edx            /* edx &= hold */
561        movl    (%ecx,%edx,4), %eax     /* eax = dcode[hold & dmask] */
562
563#define dist_r %edx
564.L_dodist:
565        movl    %eax, dist_r            /* dist = this */
566        shrl    $16, dist_r             /* dist = this.val */
567        movb    %ah, %cl
568        subb    %ah, bits_r             /* bits -= this.bits */
569        shrl    %cl, hold_r             /* hold >>= this.bits */
570
571        /* if (op & 16) {
572         *   dist = this.val
573         *   op &= 15
574         *   if (op > bits) {
575         *     hold |= *((unsigned short *)in)++ << bits;
576         *     bits += 16
577         *   }
578         *   dist += hold & mask[op];
579         *   bits -= op;
580         *   hold >>= op;
581         */
582        movb    %al, %cl                /* cl = this.op */
583
584        testb   $16, %al                /* if ((op & 16) == 0) */
585        jz      .L_test_for_second_level_dist
586        andb    $15, %cl                /* op &= 15 */
587        jz      .L_check_dist_one
588        cmpb    %cl, bits_r
589        jae     .L_add_bits_to_dist     /* if (op <= bits) 97.6% */
590
591        movb    %cl, %ch                /* stash op in ch, freeing cl */
592        xorl    %eax, %eax
593        lodsw                           /* al = *(ushort *)in++ */
594        movb    bits_r, %cl             /* cl = bits, needs it for shifting */
595        addb    $16, bits_r             /* bits += 16 */
596        shll    %cl, %eax
597        orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
598        movb    %ch, %cl                /* move op back to ecx */
599
600.L_add_bits_to_dist:
601        movl    $1, %eax
602        shll    %cl, %eax
603        decl    %eax                    /* (1 << op) - 1 */
604        subb    %cl, bits_r
605        andl    hold_r, %eax            /* eax &= hold */
606        shrl    %cl, hold_r
607        addl    %eax, dist_r            /* dist += hold & ((1 << op) - 1) */
608        jmp     .L_check_window
609
610.L_check_window:
611        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
612         *       %ecx = nbytes
613         *
614         * nbytes = out - beg;
615         * if (dist <= nbytes) {
616         *   from = out - dist;
617         *   do {
618         *     PUP(out) = PUP(from);
619         *   } while (--len > 0) {
620         * }
621         */
622
623        movl    in_r, in(%esp)          /* save in so from can use it's reg */
624        movl    out_r, %eax
625        subl    beg(%esp), %eax         /* nbytes = out - beg */
626
627        cmpl    dist_r, %eax
628        jb      .L_clip_window          /* if (dist > nbytes) 4.2% */
629
630        movl    len(%esp), %ecx
631        movl    out_r, from_r
632        subl    dist_r, from_r          /* from = out - dist */
633
634        subl    $3, %ecx
635        movb    (from_r), %al
636        movb    %al, (out_r)
637        movb    1(from_r), %al
638        movb    2(from_r), %dl
639        addl    $3, from_r
640        movb    %al, 1(out_r)
641        movb    %dl, 2(out_r)
642        addl    $3, out_r
643        rep     movsb
644
645        movl    in(%esp), in_r          /* move in back to %esi, toss from */
646        jmp     .L_while_test
647
648.align 16,0x90
649.L_check_dist_one:
650        cmpl    $1, dist_r
651        jne     .L_check_window
652        cmpl    out_r, beg(%esp)
653        je      .L_check_window
654
655        decl    out_r
656        movl    len(%esp), %ecx
657        movb    (out_r), %al
658        subl    $3, %ecx
659
660        movb    %al, 1(out_r)
661        movb    %al, 2(out_r)
662        movb    %al, 3(out_r)
663        addl    $4, out_r
664        rep     stosb
665
666        jmp     .L_while_test
667
668.align 16,0x90
669.L_test_for_second_level_length:
670        /* else if ((op & 64) == 0) {
671         *   this = lcode[this.val + (hold & mask[op])];
672         * }
673         */
674        testb   $64, %al
675        jnz     .L_test_for_end_of_block  /* if ((op & 64) != 0) */
676
677        movl    $1, %eax
678        shll    %cl, %eax
679        decl    %eax
680        andl    hold_r, %eax            /* eax &= hold */
681        addl    %edx, %eax              /* eax += this.val */
682        movl    lcode(%esp), %edx       /* edx = lcode */
683        movl    (%edx,%eax,4), %eax     /* eax = lcode[val + (hold&mask[op])] */
684        jmp     .L_dolen
685
686.align 16,0x90
687.L_test_for_second_level_dist:
688        /* else if ((op & 64) == 0) {
689         *   this = dcode[this.val + (hold & mask[op])];
690         * }
691         */
692        testb   $64, %al
693        jnz     .L_invalid_distance_code  /* if ((op & 64) != 0) */
694
695        movl    $1, %eax
696        shll    %cl, %eax
697        decl    %eax
698        andl    hold_r, %eax            /* eax &= hold */
699        addl    %edx, %eax              /* eax += this.val */
700        movl    dcode(%esp), %edx       /* edx = dcode */
701        movl    (%edx,%eax,4), %eax     /* eax = dcode[val + (hold&mask[op])] */
702        jmp     .L_dodist
703
704.align 16,0x90
705.L_clip_window:
706        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
707         *       %ecx = nbytes
708         *
709         * else {
710         *   if (dist > wsize) {
711         *     invalid distance
712         *   }
713         *   from = window;
714         *   nbytes = dist - nbytes;
715         *   if (write == 0) {
716         *     from += wsize - nbytes;
717         */
718#define nbytes_r %ecx
719        movl    %eax, nbytes_r
720        movl    wsize(%esp), %eax       /* prepare for dist compare */
721        negl    nbytes_r                /* nbytes = -nbytes */
722        movl    window(%esp), from_r    /* from = window */
723
724        cmpl    dist_r, %eax
725        jb      .L_invalid_distance_too_far /* if (dist > wsize) */
726
727        addl    dist_r, nbytes_r        /* nbytes = dist - nbytes */
728        cmpl    $0, write(%esp)
729        jne     .L_wrap_around_window   /* if (write != 0) */
730
731        subl    nbytes_r, %eax
732        addl    %eax, from_r            /* from += wsize - nbytes */
733
734        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
735         *       %ecx = nbytes, %eax = len
736         *
737         *     if (nbytes < len) {
738         *       len -= nbytes;
739         *       do {
740         *         PUP(out) = PUP(from);
741         *       } while (--nbytes);
742         *       from = out - dist;
743         *     }
744         *   }
745         */
746#define len_r %eax
747        movl    len(%esp), len_r
748        cmpl    nbytes_r, len_r
749        jbe     .L_do_copy1             /* if (nbytes >= len) */
750
751        subl    nbytes_r, len_r         /* len -= nbytes */
752        rep     movsb
753        movl    out_r, from_r
754        subl    dist_r, from_r          /* from = out - dist */
755        jmp     .L_do_copy1
756
757        cmpl    nbytes_r, len_r
758        jbe     .L_do_copy1             /* if (nbytes >= len) */
759
760        subl    nbytes_r, len_r         /* len -= nbytes */
761        rep     movsb
762        movl    out_r, from_r
763        subl    dist_r, from_r          /* from = out - dist */
764        jmp     .L_do_copy1
765
766.L_wrap_around_window:
767        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
768         *       %ecx = nbytes, %eax = write, %eax = len
769         *
770         *   else if (write < nbytes) {
771         *     from += wsize + write - nbytes;
772         *     nbytes -= write;
773         *     if (nbytes < len) {
774         *       len -= nbytes;
775         *       do {
776         *         PUP(out) = PUP(from);
777         *       } while (--nbytes);
778         *       from = window;
779         *       nbytes = write;
780         *       if (nbytes < len) {
781         *         len -= nbytes;
782         *         do {
783         *           PUP(out) = PUP(from);
784         *         } while(--nbytes);
785         *         from = out - dist;
786         *       }
787         *     }
788         *   }
789         */
790#define write_r %eax
791        movl    write(%esp), write_r
792        cmpl    write_r, nbytes_r
793        jbe     .L_contiguous_in_window /* if (write >= nbytes) */
794
795        addl    wsize(%esp), from_r
796        addl    write_r, from_r
797        subl    nbytes_r, from_r        /* from += wsize + write - nbytes */
798        subl    write_r, nbytes_r       /* nbytes -= write */
799#undef write_r
800
801        movl    len(%esp), len_r
802        cmpl    nbytes_r, len_r
803        jbe     .L_do_copy1             /* if (nbytes >= len) */
804
805        subl    nbytes_r, len_r         /* len -= nbytes */
806        rep     movsb
807        movl    window(%esp), from_r    /* from = window */
808        movl    write(%esp), nbytes_r   /* nbytes = write */
809        cmpl    nbytes_r, len_r
810        jbe     .L_do_copy1             /* if (nbytes >= len) */
811
812        subl    nbytes_r, len_r         /* len -= nbytes */
813        rep     movsb
814        movl    out_r, from_r
815        subl    dist_r, from_r          /* from = out - dist */
816        jmp     .L_do_copy1
817
818.L_contiguous_in_window:
819        /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
820         *       %ecx = nbytes, %eax = write, %eax = len
821         *
822         *   else {
823         *     from += write - nbytes;
824         *     if (nbytes < len) {
825         *       len -= nbytes;
826         *       do {
827         *         PUP(out) = PUP(from);
828         *       } while (--nbytes);
829         *       from = out - dist;
830         *     }
831         *   }
832         */
833#define write_r %eax
834        addl    write_r, from_r
835        subl    nbytes_r, from_r        /* from += write - nbytes */
836#undef write_r
837
838        movl    len(%esp), len_r
839        cmpl    nbytes_r, len_r
840        jbe     .L_do_copy1             /* if (nbytes >= len) */
841
842        subl    nbytes_r, len_r         /* len -= nbytes */
843        rep     movsb
844        movl    out_r, from_r
845        subl    dist_r, from_r          /* from = out - dist */
846
847.L_do_copy1:
848        /* regs: %esi = from, %esi = in, %ebp = hold, %bl = bits, %edi = out
849         *       %eax = len
850         *
851         *     while (len > 0) {
852         *       PUP(out) = PUP(from);
853         *       len--;
854         *     }
855         *   }
856         * } while (in < last && out < end);
857         */
858#undef nbytes_r
859#define in_r %esi
860        movl    len_r, %ecx
861        rep     movsb
862
863        movl    in(%esp), in_r          /* move in back to %esi, toss from */
864        jmp     .L_while_test
865
866#undef len_r
867#undef dist_r
868
869#endif /* NO_MMX || RUN_TIME_MMX */
870
871
872/*** MMX code ***/
873
874#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
875
876.align 32,0x90
877.L_init_mmx:
878        emms
879
880#undef  bits_r
881#undef  bitslong_r
882#define bitslong_r %ebp
883#define hold_mm    %mm0
884        movd    %ebp, hold_mm
885        movl    %ebx, bitslong_r
886
887#define used_mm   %mm1
888#define dmask2_mm %mm2
889#define lmask2_mm %mm3
890#define lmask_mm  %mm4
891#define dmask_mm  %mm5
892#define tmp_mm    %mm6
893
894        movd    lmask(%esp), lmask_mm
895        movq    lmask_mm, lmask2_mm
896        movd    dmask(%esp), dmask_mm
897        movq    dmask_mm, dmask2_mm
898        pxor    used_mm, used_mm
899        movl    lcode(%esp), %ebx       /* ebx = lcode */
900        jmp     .L_do_loop_mmx
901
902.align 32,0x90
903.L_while_test_mmx:
904        /* while (in < last && out < end)
905         */
906        cmpl    out_r, end(%esp)
907        jbe     .L_break_loop           /* if (out >= end) */
908
909        cmpl    in_r, last(%esp)
910        jbe     .L_break_loop
911
912.L_do_loop_mmx:
913        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
914
915        cmpl    $32, bitslong_r
916        ja      .L_get_length_code_mmx  /* if (32 < bits) */
917
918        movd    bitslong_r, tmp_mm
919        movd    (in_r), %mm7
920        addl    $4, in_r
921        psllq   tmp_mm, %mm7
922        addl    $32, bitslong_r
923        por     %mm7, hold_mm           /* hold_mm |= *((uint *)in)++ << bits */
924
925.L_get_length_code_mmx:
926        pand    hold_mm, lmask_mm
927        movd    lmask_mm, %eax
928        movq    lmask2_mm, lmask_mm
929        movl    (%ebx,%eax,4), %eax     /* eax = lcode[hold & lmask] */
930
931.L_dolen_mmx:
932        movzbl  %ah, %ecx               /* ecx = this.bits */
933        movd    %ecx, used_mm
934        subl    %ecx, bitslong_r        /* bits -= this.bits */
935
936        testb   %al, %al
937        jnz     .L_test_for_length_base_mmx /* if (op != 0) 45.7% */
938
939        shrl    $16, %eax               /* output this.val char */
940        stosb
941        jmp     .L_while_test_mmx
942
943.L_test_for_length_base_mmx:
944#define len_r  %edx
945        movl    %eax, len_r             /* len = this */
946        shrl    $16, len_r              /* len = this.val */
947
948        testb   $16, %al
949        jz      .L_test_for_second_level_length_mmx /* if ((op & 16) == 0) 8% */
950        andl    $15, %eax               /* op &= 15 */
951        jz      .L_decode_distance_mmx  /* if (!op) */
952
953        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
954        movd    %eax, used_mm
955        movd    hold_mm, %ecx
956        subl    %eax, bitslong_r
957        andl    .L_mask(,%eax,4), %ecx
958        addl    %ecx, len_r             /* len += hold & mask[op] */
959
960.L_decode_distance_mmx:
961        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
962
963        cmpl    $32, bitslong_r
964        ja      .L_get_dist_code_mmx    /* if (32 < bits) */
965
966        movd    bitslong_r, tmp_mm
967        movd    (in_r), %mm7
968        addl    $4, in_r
969        psllq   tmp_mm, %mm7
970        addl    $32, bitslong_r
971        por     %mm7, hold_mm           /* hold_mm |= *((uint *)in)++ << bits */
972
973.L_get_dist_code_mmx:
974        movl    dcode(%esp), %ebx       /* ebx = dcode */
975        pand    hold_mm, dmask_mm
976        movd    dmask_mm, %eax
977        movq    dmask2_mm, dmask_mm
978        movl    (%ebx,%eax,4), %eax     /* eax = dcode[hold & lmask] */
979
980.L_dodist_mmx:
981#define dist_r %ebx
982        movzbl  %ah, %ecx               /* ecx = this.bits */
983        movl    %eax, dist_r
984        shrl    $16, dist_r             /* dist  = this.val */
985        subl    %ecx, bitslong_r        /* bits -= this.bits */
986        movd    %ecx, used_mm
987
988        testb   $16, %al                /* if ((op & 16) == 0) */
989        jz      .L_test_for_second_level_dist_mmx
990        andl    $15, %eax               /* op &= 15 */
991        jz      .L_check_dist_one_mmx
992
993.L_add_bits_to_dist_mmx:
994        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
995        movd    %eax, used_mm           /* save bit length of current op */
996        movd    hold_mm, %ecx           /* get the next bits on input stream */
997        subl    %eax, bitslong_r        /* bits -= op bits */
998        andl    .L_mask(,%eax,4), %ecx  /* ecx   = hold & mask[op] */
999        addl    %ecx, dist_r            /* dist += hold & mask[op] */
1000
1001.L_check_window_mmx:
1002        movl    in_r, in(%esp)          /* save in so from can use it's reg */
1003        movl    out_r, %eax
1004        subl    beg(%esp), %eax         /* nbytes = out - beg */
1005
1006        cmpl    dist_r, %eax
1007        jb      .L_clip_window_mmx      /* if (dist > nbytes) 4.2% */
1008
1009        movl    len_r, %ecx
1010        movl    out_r, from_r
1011        subl    dist_r, from_r          /* from = out - dist */
1012
1013        subl    $3, %ecx
1014        movb    (from_r), %al
1015        movb    %al, (out_r)
1016        movb    1(from_r), %al
1017        movb    2(from_r), %dl
1018        addl    $3, from_r
1019        movb    %al, 1(out_r)
1020        movb    %dl, 2(out_r)
1021        addl    $3, out_r
1022        rep     movsb
1023
1024        movl    in(%esp), in_r          /* move in back to %esi, toss from */
1025        movl    lcode(%esp), %ebx       /* move lcode back to %ebx, toss dist */
1026        jmp     .L_while_test_mmx
1027
1028.align 16,0x90
1029.L_check_dist_one_mmx:
1030        cmpl    $1, dist_r
1031        jne     .L_check_window_mmx
1032        cmpl    out_r, beg(%esp)
1033        je      .L_check_window_mmx
1034
1035        decl    out_r
1036        movl    len_r, %ecx
1037        movb    (out_r), %al
1038        subl    $3, %ecx
1039
1040        movb    %al, 1(out_r)
1041        movb    %al, 2(out_r)
1042        movb    %al, 3(out_r)
1043        addl    $4, out_r
1044        rep     stosb
1045
1046        movl    lcode(%esp), %ebx       /* move lcode back to %ebx, toss dist */
1047        jmp     .L_while_test_mmx
1048
1049.align 16,0x90
1050.L_test_for_second_level_length_mmx:
1051        testb   $64, %al
1052        jnz     .L_test_for_end_of_block  /* if ((op & 64) != 0) */
1053
1054        andl    $15, %eax
1055        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
1056        movd    hold_mm, %ecx
1057        andl    .L_mask(,%eax,4), %ecx
1058        addl    len_r, %ecx
1059        movl    (%ebx,%ecx,4), %eax     /* eax = lcode[hold & lmask] */
1060        jmp     .L_dolen_mmx
1061
1062.align 16,0x90
1063.L_test_for_second_level_dist_mmx:
1064        testb   $64, %al
1065        jnz     .L_invalid_distance_code  /* if ((op & 64) != 0) */
1066
1067        andl    $15, %eax
1068        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
1069        movd    hold_mm, %ecx
1070        andl    .L_mask(,%eax,4), %ecx
1071        movl    dcode(%esp), %eax       /* ecx = dcode */
1072        addl    dist_r, %ecx
1073        movl    (%eax,%ecx,4), %eax     /* eax = lcode[hold & lmask] */
1074        jmp     .L_dodist_mmx
1075
1076.align 16,0x90
1077.L_clip_window_mmx:
1078#define nbytes_r %ecx
1079        movl    %eax, nbytes_r
1080        movl    wsize(%esp), %eax       /* prepare for dist compare */
1081        negl    nbytes_r                /* nbytes = -nbytes */
1082        movl    window(%esp), from_r    /* from = window */
1083
1084        cmpl    dist_r, %eax
1085        jb      .L_invalid_distance_too_far /* if (dist > wsize) */
1086
1087        addl    dist_r, nbytes_r        /* nbytes = dist - nbytes */
1088        cmpl    $0, write(%esp)
1089        jne     .L_wrap_around_window_mmx /* if (write != 0) */
1090
1091        subl    nbytes_r, %eax
1092        addl    %eax, from_r            /* from += wsize - nbytes */
1093
1094        cmpl    nbytes_r, len_r
1095        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
1096
1097        subl    nbytes_r, len_r         /* len -= nbytes */
1098        rep     movsb
1099        movl    out_r, from_r
1100        subl    dist_r, from_r          /* from = out - dist */
1101        jmp     .L_do_copy1_mmx
1102
1103        cmpl    nbytes_r, len_r
1104        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
1105
1106        subl    nbytes_r, len_r         /* len -= nbytes */
1107        rep     movsb
1108        movl    out_r, from_r
1109        subl    dist_r, from_r          /* from = out - dist */
1110        jmp     .L_do_copy1_mmx
1111
1112.L_wrap_around_window_mmx:
1113#define write_r %eax
1114        movl    write(%esp), write_r
1115        cmpl    write_r, nbytes_r
1116        jbe     .L_contiguous_in_window_mmx /* if (write >= nbytes) */
1117
1118        addl    wsize(%esp), from_r
1119        addl    write_r, from_r
1120        subl    nbytes_r, from_r        /* from += wsize + write - nbytes */
1121        subl    write_r, nbytes_r       /* nbytes -= write */
1122#undef write_r
1123
1124        cmpl    nbytes_r, len_r
1125        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
1126
1127        subl    nbytes_r, len_r         /* len -= nbytes */
1128        rep     movsb
1129        movl    window(%esp), from_r    /* from = window */
1130        movl    write(%esp), nbytes_r   /* nbytes = write */
1131        cmpl    nbytes_r, len_r
1132        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
1133
1134        subl    nbytes_r, len_r         /* len -= nbytes */
1135        rep     movsb
1136        movl    out_r, from_r
1137        subl    dist_r, from_r          /* from = out - dist */
1138        jmp     .L_do_copy1_mmx
1139
1140.L_contiguous_in_window_mmx:
1141#define write_r %eax
1142        addl    write_r, from_r
1143        subl    nbytes_r, from_r        /* from += write - nbytes */
1144#undef write_r
1145
1146        cmpl    nbytes_r, len_r
1147        jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
1148
1149        subl    nbytes_r, len_r         /* len -= nbytes */
1150        rep     movsb
1151        movl    out_r, from_r
1152        subl    dist_r, from_r          /* from = out - dist */
1153
1154.L_do_copy1_mmx:
1155#undef nbytes_r
1156#define in_r %esi
1157        movl    len_r, %ecx
1158        rep     movsb
1159
1160        movl    in(%esp), in_r          /* move in back to %esi, toss from */
1161        movl    lcode(%esp), %ebx       /* move lcode back to %ebx, toss dist */
1162        jmp     .L_while_test_mmx
1163
1164#undef hold_r
1165#undef bitslong_r
1166
1167#endif /* USE_MMX || RUN_TIME_MMX */
1168
1169
1170/*** USE_MMX, NO_MMX, and RUNTIME_MMX from here on ***/
1171
1172.L_invalid_distance_code:
1173        /* else {
1174         *   strm->msg = "invalid distance code";
1175         *   state->mode = BAD;
1176         * }
1177         */
1178        movl    $.L_invalid_distance_code_msg, %ecx
1179        movl    $INFLATE_MODE_BAD, %edx
1180        jmp     .L_update_stream_state
1181
1182.L_test_for_end_of_block:
1183        /* else if (op & 32) {
1184         *   state->mode = TYPE;
1185         *   break;
1186         * }
1187         */
1188        testb   $32, %al
1189        jz      .L_invalid_literal_length_code  /* if ((op & 32) == 0) */
1190
1191        movl    $0, %ecx
1192        movl    $INFLATE_MODE_TYPE, %edx
1193        jmp     .L_update_stream_state
1194
1195.L_invalid_literal_length_code:
1196        /* else {
1197         *   strm->msg = "invalid literal/length code";
1198         *   state->mode = BAD;
1199         * }
1200         */
1201        movl    $.L_invalid_literal_length_code_msg, %ecx
1202        movl    $INFLATE_MODE_BAD, %edx
1203        jmp     .L_update_stream_state
1204
1205.L_invalid_distance_too_far:
1206        /* strm->msg = "invalid distance too far back";
1207         * state->mode = BAD;
1208         */
1209        movl    in(%esp), in_r          /* from_r has in's reg, put in back */
1210        movl    $.L_invalid_distance_too_far_msg, %ecx
1211        movl    $INFLATE_MODE_BAD, %edx
1212        jmp     .L_update_stream_state
1213
1214.L_update_stream_state:
1215        /* set strm->msg = %ecx, strm->state->mode = %edx */
1216        movl    strm_sp(%esp), %eax
1217        testl   %ecx, %ecx              /* if (msg != NULL) */
1218        jz      .L_skip_msg
1219        movl    %ecx, msg_strm(%eax)    /* strm->msg = msg */
1220.L_skip_msg:
1221        movl    state_strm(%eax), %eax  /* state = strm->state */
1222        movl    %edx, mode_state(%eax)  /* state->mode = edx (BAD | TYPE) */
1223        jmp     .L_break_loop
1224
1225.align 32,0x90
1226.L_break_loop:
1227
1228/*
1229 * Regs:
1230 *
1231 * bits = %ebp when mmx, and in %ebx when non-mmx
1232 * hold = %hold_mm when mmx, and in %ebp when non-mmx
1233 * in   = %esi
1234 * out  = %edi
1235 */
1236
1237#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
1238
1239#if defined( RUN_TIME_MMX )
1240
1241        cmpl    $DO_USE_MMX, inflate_fast_use_mmx
1242        jne     .L_update_next_in
1243
1244#endif /* RUN_TIME_MMX */
1245
1246        movl    %ebp, %ebx
1247
1248.L_update_next_in:
1249
1250#endif
1251
1252#define strm_r  %eax
1253#define state_r %edx
1254
1255        /* len = bits >> 3;
1256         * in -= len;
1257         * bits -= len << 3;
1258         * hold &= (1U << bits) - 1;
1259         * state->hold = hold;
1260         * state->bits = bits;
1261         * strm->next_in = in;
1262         * strm->next_out = out;
1263         */
1264        movl    strm_sp(%esp), strm_r
1265        movl    %ebx, %ecx
1266        movl    state_strm(strm_r), state_r
1267        shrl    $3, %ecx
1268        subl    %ecx, in_r
1269        shll    $3, %ecx
1270        subl    %ecx, %ebx
1271        movl    out_r, next_out_strm(strm_r)
1272        movl    %ebx, bits_state(state_r)
1273        movl    %ebx, %ecx
1274
1275        leal    buf(%esp), %ebx
1276        cmpl    %ebx, last(%esp)
1277        jne     .L_buf_not_used         /* if buf != last */
1278
1279        subl    %ebx, in_r              /* in -= buf */
1280        movl    next_in_strm(strm_r), %ebx
1281        movl    %ebx, last(%esp)        /* last = strm->next_in */
1282        addl    %ebx, in_r              /* in += strm->next_in */
1283        movl    avail_in_strm(strm_r), %ebx
1284        subl    $11, %ebx
1285        addl    %ebx, last(%esp)    /* last = &strm->next_in[ avail_in - 11 ] */
1286
1287.L_buf_not_used:
1288        movl    in_r, next_in_strm(strm_r)
1289
1290        movl    $1, %ebx
1291        shll    %cl, %ebx
1292        decl    %ebx
1293
1294#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
1295
1296#if defined( RUN_TIME_MMX )
1297
1298        cmpl    $DO_USE_MMX, inflate_fast_use_mmx
1299        jne     .L_update_hold
1300
1301#endif /* RUN_TIME_MMX */
1302
1303        psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
1304        movd    hold_mm, %ebp
1305
1306        emms
1307
1308.L_update_hold:
1309
1310#endif /* USE_MMX || RUN_TIME_MMX */
1311
1312        andl    %ebx, %ebp
1313        movl    %ebp, hold_state(state_r)
1314
1315#define last_r %ebx
1316
1317        /* strm->avail_in = in < last ? 11 + (last - in) : 11 - (in - last) */
1318        movl    last(%esp), last_r
1319        cmpl    in_r, last_r
1320        jbe     .L_last_is_smaller     /* if (in >= last) */
1321
1322        subl    in_r, last_r           /* last -= in */
1323        addl    $11, last_r            /* last += 11 */
1324        movl    last_r, avail_in_strm(strm_r)
1325        jmp     .L_fixup_out
1326.L_last_is_smaller:
1327        subl    last_r, in_r           /* in -= last */
1328        negl    in_r                   /* in = -in */
1329        addl    $11, in_r              /* in += 11 */
1330        movl    in_r, avail_in_strm(strm_r)
1331
1332#undef last_r
1333#define end_r %ebx
1334
1335.L_fixup_out:
1336        /* strm->avail_out = out < end ? 257 + (end - out) : 257 - (out - end)*/
1337        movl    end(%esp), end_r
1338        cmpl    out_r, end_r
1339        jbe     .L_end_is_smaller      /* if (out >= end) */
1340
1341        subl    out_r, end_r           /* end -= out */
1342        addl    $257, end_r            /* end += 257 */
1343        movl    end_r, avail_out_strm(strm_r)
1344        jmp     .L_done
1345.L_end_is_smaller:
1346        subl    end_r, out_r           /* out -= end */
1347        negl    out_r                  /* out = -out */
1348        addl    $257, out_r            /* out += 257 */
1349        movl    out_r, avail_out_strm(strm_r)
1350
1351#undef end_r
1352#undef strm_r
1353#undef state_r
1354
1355.L_done:
1356        addl    $local_var_size, %esp
1357        popf
1358        popl    %ebx
1359        popl    %ebp
1360        popl    %esi
1361        popl    %edi
1362        ret
1363
1364#if defined( GAS_ELF )
1365/* elf info */
1366.type inflate_fast,@function
1367.size inflate_fast,.-inflate_fast
1368#endif
1369