1;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
2; *
3; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
4; *
5; * Copyright (C) 1995-2003 Mark Adler
6; * For conditions of distribution and use, see copyright notice in zlib.h
7; *
8; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
9; * Please use the copyright conditions above.
10; *
11; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
12; * the gcc -S output of zlib-1.2.0/inffast.c.  Zlib-1.2.0 is in beta release at
13; * the moment.  I have successfully compiled and tested this code with gcc2.96,
14; * gcc3.2, icc5.0, msvc6.0.  It is very close to the speed of inffast.S
15; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
16; * enabled.  I will attempt to merge the MMX code into this version.  Newer
17; * versions of this and inffast.S can be found at
18; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
19; *
20; * 2005 : modification by Gilles Vollant
21; */
22; For Visual C++ 4.x and higher and ML 6.x and higher
23;   ml.exe is in directory \MASM611C of Win95 DDK
24;   ml.exe is also distributed in http://www.masm32.com/masmdl.htm
25;    and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
26;
27;
28;   compile with command line option
29;   ml  /coff /Zi /c /Flinffas32.lst inffas32.asm
30
31;   if you define NO_GZIP (see inflate.h), compile with
32;   ml  /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
33
34
35; zlib122sup is 0 fort zlib 1.2.2.1 and lower
36; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
37;        in inflate_state in inflate.h)
38zlib1222sup      equ    8
39
40
41IFDEF GUNZIP
42  INFLATE_MODE_TYPE    equ 11
43  INFLATE_MODE_BAD     equ 26
44ELSE
45  IFNDEF NO_GUNZIP
46    INFLATE_MODE_TYPE    equ 11
47    INFLATE_MODE_BAD     equ 26
48  ELSE
49    INFLATE_MODE_TYPE    equ 3
50    INFLATE_MODE_BAD     equ 17
51  ENDIF
52ENDIF
53
54
55; 75 "inffast.S"
56;FILE "inffast.S"
57
58;;;GLOBAL _inflate_fast
59
60;;;SECTION .text
61
62
63
64	.586p
65	.mmx
66
67	name	inflate_fast_x86
68	.MODEL	FLAT
69
70_DATA			segment
71inflate_fast_use_mmx:
72	dd	1
73
74
75_TEXT			segment
76
77
78
79ALIGN 4
80	db	'Fast decoding Code from Chris Anderson'
81	db	0
82
83ALIGN 4
84invalid_literal_length_code_msg:
85	db	'invalid literal/length code'
86	db	0
87
88ALIGN 4
89invalid_distance_code_msg:
90	db	'invalid distance code'
91	db	0
92
93ALIGN 4
94invalid_distance_too_far_msg:
95	db	'invalid distance too far back'
96	db	0
97
98
99ALIGN 4
100inflate_fast_mask:
101dd	0
102dd	1
103dd	3
104dd	7
105dd	15
106dd	31
107dd	63
108dd	127
109dd	255
110dd	511
111dd	1023
112dd	2047
113dd	4095
114dd	8191
115dd	16383
116dd	32767
117dd	65535
118dd	131071
119dd	262143
120dd	524287
121dd	1048575
122dd	2097151
123dd	4194303
124dd	8388607
125dd	16777215
126dd	33554431
127dd	67108863
128dd	134217727
129dd	268435455
130dd	536870911
131dd	1073741823
132dd	2147483647
133dd	4294967295
134
135
136mode_state	 equ	0	;/* state->mode	*/
137wsize_state	 equ	(32+zlib1222sup)	;/* state->wsize */
138write_state	 equ	(36+4+zlib1222sup)	;/* state->write */
139window_state	 equ	(40+4+zlib1222sup)	;/* state->window */
140hold_state	 equ	(44+4+zlib1222sup)	;/* state->hold	*/
141bits_state	 equ	(48+4+zlib1222sup)	;/* state->bits	*/
142lencode_state	 equ	(64+4+zlib1222sup)	;/* state->lencode */
143distcode_state	 equ	(68+4+zlib1222sup)	;/* state->distcode */
144lenbits_state	 equ	(72+4+zlib1222sup)	;/* state->lenbits */
145distbits_state	 equ	(76+4+zlib1222sup)	;/* state->distbits */
146
147
148;;SECTION .text
149; 205 "inffast.S"
150;GLOBAL	inflate_fast_use_mmx
151
152;SECTION .data
153
154
155; GLOBAL inflate_fast_use_mmx:object
156;.size inflate_fast_use_mmx, 4
157; 226 "inffast.S"
158;SECTION .text
159
160ALIGN 4
161_inflate_fast proc near
162.FPO (16, 4, 0, 0, 1, 0)
163	push  edi
164	push  esi
165	push  ebp
166	push  ebx
167	pushfd
168	sub  esp,64
169	cld
170
171
172
173
174	mov  esi, [esp+88]
175	mov  edi, [esi+28]
176
177
178
179
180
181
182
183	mov  edx, [esi+4]
184	mov  eax, [esi+0]
185
186	add  edx,eax
187	sub  edx,11
188
189	mov  [esp+44],eax
190	mov  [esp+20],edx
191
192	mov  ebp, [esp+92]
193	mov  ecx, [esi+16]
194	mov  ebx, [esi+12]
195
196	sub  ebp,ecx
197	neg  ebp
198	add  ebp,ebx
199
200	sub  ecx,257
201	add  ecx,ebx
202
203	mov  [esp+60],ebx
204	mov  [esp+40],ebp
205	mov  [esp+16],ecx
206; 285 "inffast.S"
207	mov  eax, [edi+lencode_state]
208	mov  ecx, [edi+distcode_state]
209
210	mov  [esp+8],eax
211	mov  [esp+12],ecx
212
213	mov  eax,1
214	mov  ecx, [edi+lenbits_state]
215	shl  eax,cl
216	dec  eax
217	mov  [esp+0],eax
218
219	mov  eax,1
220	mov  ecx, [edi+distbits_state]
221	shl  eax,cl
222	dec  eax
223	mov  [esp+4],eax
224
225	mov  eax, [edi+wsize_state]
226	mov  ecx, [edi+write_state]
227	mov  edx, [edi+window_state]
228
229	mov  [esp+52],eax
230	mov  [esp+48],ecx
231	mov  [esp+56],edx
232
233	mov  ebp, [edi+hold_state]
234	mov  ebx, [edi+bits_state]
235; 321 "inffast.S"
236	mov  esi, [esp+44]
237	mov  ecx, [esp+20]
238	cmp  ecx,esi
239	ja   L_align_long
240
241	add  ecx,11
242	sub  ecx,esi
243	mov  eax,12
244	sub  eax,ecx
245	lea  edi, [esp+28]
246	rep movsb
247	mov  ecx,eax
248	xor  eax,eax
249	rep stosb
250	lea  esi, [esp+28]
251	mov  [esp+20],esi
252	jmp  L_is_aligned
253
254
255L_align_long:
256	test  esi,3
257	jz   L_is_aligned
258	xor  eax,eax
259	mov  al, [esi]
260	inc  esi
261	mov  ecx,ebx
262	add  ebx,8
263	shl  eax,cl
264	or  ebp,eax
265	jmp L_align_long
266
267L_is_aligned:
268	mov  edi, [esp+60]
269; 366 "inffast.S"
270L_check_mmx:
271	cmp  dword ptr [inflate_fast_use_mmx],2
272	je   L_init_mmx
273	ja   L_do_loop
274
275	push  eax
276	push  ebx
277	push  ecx
278	push  edx
279	pushfd
280	mov  eax, [esp]
281	xor  dword ptr [esp],0200000h
282
283
284
285
286	popfd
287	pushfd
288	pop  edx
289	xor  edx,eax
290	jz   L_dont_use_mmx
291	xor  eax,eax
292	cpuid
293	cmp  ebx,0756e6547h
294	jne  L_dont_use_mmx
295	cmp  ecx,06c65746eh
296	jne  L_dont_use_mmx
297	cmp  edx,049656e69h
298	jne  L_dont_use_mmx
299	mov  eax,1
300	cpuid
301	shr  eax,8
302	and  eax,15
303	cmp  eax,6
304	jne  L_dont_use_mmx
305	test  edx,0800000h
306	jnz  L_use_mmx
307	jmp  L_dont_use_mmx
308L_use_mmx:
309	mov  dword ptr [inflate_fast_use_mmx],2
310	jmp  L_check_mmx_pop
311L_dont_use_mmx:
312	mov  dword ptr [inflate_fast_use_mmx],3
313L_check_mmx_pop:
314	pop  edx
315	pop  ecx
316	pop  ebx
317	pop  eax
318	jmp  L_check_mmx
319; 426 "inffast.S"
320ALIGN 4
321L_do_loop:
322; 437 "inffast.S"
323	cmp  bl,15
324	ja   L_get_length_code
325
326	xor  eax,eax
327	lodsw
328	mov  cl,bl
329	add  bl,16
330	shl  eax,cl
331	or  ebp,eax
332
333L_get_length_code:
334	mov  edx, [esp+0]
335	mov  ecx, [esp+8]
336	and  edx,ebp
337	mov  eax, [ecx+edx*4]
338
339L_dolen:
340
341
342
343
344
345
346	mov  cl,ah
347	sub  bl,ah
348	shr  ebp,cl
349
350
351
352
353
354
355	test  al,al
356	jnz   L_test_for_length_base
357
358	shr  eax,16
359	stosb
360
361L_while_test:
362
363
364	cmp  [esp+16],edi
365	jbe  L_break_loop
366
367	cmp  [esp+20],esi
368	ja   L_do_loop
369	jmp  L_break_loop
370
371L_test_for_length_base:
372; 502 "inffast.S"
373	mov  edx,eax
374	shr  edx,16
375	mov  cl,al
376
377	test  al,16
378	jz   L_test_for_second_level_length
379	and  cl,15
380	jz   L_save_len
381	cmp  bl,cl
382	jae  L_add_bits_to_len
383
384	mov  ch,cl
385	xor  eax,eax
386	lodsw
387	mov  cl,bl
388	add  bl,16
389	shl  eax,cl
390	or  ebp,eax
391	mov  cl,ch
392
393L_add_bits_to_len:
394	mov  eax,1
395	shl  eax,cl
396	dec  eax
397	sub  bl,cl
398	and  eax,ebp
399	shr  ebp,cl
400	add  edx,eax
401
402L_save_len:
403	mov  [esp+24],edx
404
405
406L_decode_distance:
407; 549 "inffast.S"
408	cmp  bl,15
409	ja   L_get_distance_code
410
411	xor  eax,eax
412	lodsw
413	mov  cl,bl
414	add  bl,16
415	shl  eax,cl
416	or  ebp,eax
417
418L_get_distance_code:
419	mov  edx, [esp+4]
420	mov  ecx, [esp+12]
421	and  edx,ebp
422	mov  eax, [ecx+edx*4]
423
424
425L_dodist:
426	mov  edx,eax
427	shr  edx,16
428	mov  cl,ah
429	sub  bl,ah
430	shr  ebp,cl
431; 584 "inffast.S"
432	mov  cl,al
433
434	test  al,16
435	jz  L_test_for_second_level_dist
436	and  cl,15
437	jz  L_check_dist_one
438	cmp  bl,cl
439	jae  L_add_bits_to_dist
440
441	mov  ch,cl
442	xor  eax,eax
443	lodsw
444	mov  cl,bl
445	add  bl,16
446	shl  eax,cl
447	or  ebp,eax
448	mov  cl,ch
449
450L_add_bits_to_dist:
451	mov  eax,1
452	shl  eax,cl
453	dec  eax
454	sub  bl,cl
455	and  eax,ebp
456	shr  ebp,cl
457	add  edx,eax
458	jmp  L_check_window
459
460L_check_window:
461; 625 "inffast.S"
462	mov  [esp+44],esi
463	mov  eax,edi
464	sub  eax, [esp+40]
465
466	cmp  eax,edx
467	jb   L_clip_window
468
469	mov  ecx, [esp+24]
470	mov  esi,edi
471	sub  esi,edx
472
473	sub  ecx,3
474	mov  al, [esi]
475	mov  [edi],al
476	mov  al, [esi+1]
477	mov  dl, [esi+2]
478	add  esi,3
479	mov  [edi+1],al
480	mov  [edi+2],dl
481	add  edi,3
482	rep movsb
483
484	mov  esi, [esp+44]
485	jmp  L_while_test
486
487ALIGN 4
488L_check_dist_one:
489	cmp  edx,1
490	jne  L_check_window
491	cmp  [esp+40],edi
492	je  L_check_window
493
494	dec  edi
495	mov  ecx, [esp+24]
496	mov  al, [edi]
497	sub  ecx,3
498
499	mov  [edi+1],al
500	mov  [edi+2],al
501	mov  [edi+3],al
502	add  edi,4
503	rep stosb
504
505	jmp  L_while_test
506
507ALIGN 4
508L_test_for_second_level_length:
509
510
511
512
513	test  al,64
514	jnz   L_test_for_end_of_block
515
516	mov  eax,1
517	shl  eax,cl
518	dec  eax
519	and  eax,ebp
520	add  eax,edx
521	mov  edx, [esp+8]
522	mov  eax, [edx+eax*4]
523	jmp  L_dolen
524
525ALIGN 4
526L_test_for_second_level_dist:
527
528
529
530
531	test  al,64
532	jnz   L_invalid_distance_code
533
534	mov  eax,1
535	shl  eax,cl
536	dec  eax
537	and  eax,ebp
538	add  eax,edx
539	mov  edx, [esp+12]
540	mov  eax, [edx+eax*4]
541	jmp  L_dodist
542
543ALIGN 4
544L_clip_window:
545; 721 "inffast.S"
546	mov  ecx,eax
547	mov  eax, [esp+52]
548	neg  ecx
549	mov  esi, [esp+56]
550
551	cmp  eax,edx
552	jb   L_invalid_distance_too_far
553
554	add  ecx,edx
555	cmp  dword ptr [esp+48],0
556	jne  L_wrap_around_window
557
558	sub  eax,ecx
559	add  esi,eax
560; 749 "inffast.S"
561	mov  eax, [esp+24]
562	cmp  eax,ecx
563	jbe  L_do_copy1
564
565	sub  eax,ecx
566	rep movsb
567	mov  esi,edi
568	sub  esi,edx
569	jmp  L_do_copy1
570
571	cmp  eax,ecx
572	jbe  L_do_copy1
573
574	sub  eax,ecx
575	rep movsb
576	mov  esi,edi
577	sub  esi,edx
578	jmp  L_do_copy1
579
580L_wrap_around_window:
581; 793 "inffast.S"
582	mov  eax, [esp+48]
583	cmp  ecx,eax
584	jbe  L_contiguous_in_window
585
586	add  esi, [esp+52]
587	add  esi,eax
588	sub  esi,ecx
589	sub  ecx,eax
590
591
592	mov  eax, [esp+24]
593	cmp  eax,ecx
594	jbe  L_do_copy1
595
596	sub  eax,ecx
597	rep movsb
598	mov  esi, [esp+56]
599	mov  ecx, [esp+48]
600	cmp  eax,ecx
601	jbe  L_do_copy1
602
603	sub  eax,ecx
604	rep movsb
605	mov  esi,edi
606	sub  esi,edx
607	jmp  L_do_copy1
608
609L_contiguous_in_window:
610; 836 "inffast.S"
611	add  esi,eax
612	sub  esi,ecx
613
614
615	mov  eax, [esp+24]
616	cmp  eax,ecx
617	jbe  L_do_copy1
618
619	sub  eax,ecx
620	rep movsb
621	mov  esi,edi
622	sub  esi,edx
623
624L_do_copy1:
625; 862 "inffast.S"
626	mov  ecx,eax
627	rep movsb
628
629	mov  esi, [esp+44]
630	jmp  L_while_test
631; 878 "inffast.S"
632ALIGN 4
633L_init_mmx:
634	emms
635
636
637
638
639
640	movd mm0,ebp
641	mov  ebp,ebx
642; 896 "inffast.S"
643	movd mm4,dword ptr [esp+0]
644	movq mm3,mm4
645	movd mm5,dword ptr [esp+4]
646	movq mm2,mm5
647	pxor mm1,mm1
648	mov  ebx, [esp+8]
649	jmp  L_do_loop_mmx
650
651ALIGN 4
652L_do_loop_mmx:
653	psrlq mm0,mm1
654
655	cmp  ebp,32
656	ja  L_get_length_code_mmx
657
658	movd mm6,ebp
659	movd mm7,dword ptr [esi]
660	add  esi,4
661	psllq mm7,mm6
662	add  ebp,32
663	por mm0,mm7
664
665L_get_length_code_mmx:
666	pand mm4,mm0
667	movd eax,mm4
668	movq mm4,mm3
669	mov  eax, [ebx+eax*4]
670
671L_dolen_mmx:
672	movzx  ecx,ah
673	movd mm1,ecx
674	sub  ebp,ecx
675
676	test  al,al
677	jnz L_test_for_length_base_mmx
678
679	shr  eax,16
680	stosb
681
682L_while_test_mmx:
683
684
685	cmp  [esp+16],edi
686	jbe L_break_loop
687
688	cmp  [esp+20],esi
689	ja L_do_loop_mmx
690	jmp L_break_loop
691
692L_test_for_length_base_mmx:
693
694	mov  edx,eax
695	shr  edx,16
696
697	test  al,16
698	jz  L_test_for_second_level_length_mmx
699	and  eax,15
700	jz L_decode_distance_mmx
701
702	psrlq mm0,mm1
703	movd mm1,eax
704	movd ecx,mm0
705	sub  ebp,eax
706	and  ecx, [inflate_fast_mask+eax*4]
707	add  edx,ecx
708
709L_decode_distance_mmx:
710	psrlq mm0,mm1
711
712	cmp  ebp,32
713	ja L_get_dist_code_mmx
714
715	movd mm6,ebp
716	movd mm7,dword ptr [esi]
717	add  esi,4
718	psllq mm7,mm6
719	add  ebp,32
720	por mm0,mm7
721
722L_get_dist_code_mmx:
723	mov  ebx, [esp+12]
724	pand mm5,mm0
725	movd eax,mm5
726	movq mm5,mm2
727	mov  eax, [ebx+eax*4]
728
729L_dodist_mmx:
730
731	movzx  ecx,ah
732	mov  ebx,eax
733	shr  ebx,16
734	sub  ebp,ecx
735	movd mm1,ecx
736
737	test  al,16
738	jz L_test_for_second_level_dist_mmx
739	and  eax,15
740	jz L_check_dist_one_mmx
741
742L_add_bits_to_dist_mmx:
743	psrlq mm0,mm1
744	movd mm1,eax
745	movd ecx,mm0
746	sub  ebp,eax
747	and  ecx, [inflate_fast_mask+eax*4]
748	add  ebx,ecx
749
750L_check_window_mmx:
751	mov  [esp+44],esi
752	mov  eax,edi
753	sub  eax, [esp+40]
754
755	cmp  eax,ebx
756	jb L_clip_window_mmx
757
758	mov  ecx,edx
759	mov  esi,edi
760	sub  esi,ebx
761
762	sub  ecx,3
763	mov  al, [esi]
764	mov  [edi],al
765	mov  al, [esi+1]
766	mov  dl, [esi+2]
767	add  esi,3
768	mov  [edi+1],al
769	mov  [edi+2],dl
770	add  edi,3
771	rep movsb
772
773	mov  esi, [esp+44]
774	mov  ebx, [esp+8]
775	jmp  L_while_test_mmx
776
777ALIGN 4
778L_check_dist_one_mmx:
779	cmp  ebx,1
780	jne  L_check_window_mmx
781	cmp  [esp+40],edi
782	je   L_check_window_mmx
783
784	dec  edi
785	mov  ecx,edx
786	mov  al, [edi]
787	sub  ecx,3
788
789	mov  [edi+1],al
790	mov  [edi+2],al
791	mov  [edi+3],al
792	add  edi,4
793	rep stosb
794
795	mov  ebx, [esp+8]
796	jmp  L_while_test_mmx
797
798ALIGN 4
799L_test_for_second_level_length_mmx:
800	test  al,64
801	jnz L_test_for_end_of_block
802
803	and  eax,15
804	psrlq mm0,mm1
805	movd ecx,mm0
806	and  ecx, [inflate_fast_mask+eax*4]
807	add  ecx,edx
808	mov  eax, [ebx+ecx*4]
809	jmp L_dolen_mmx
810
811ALIGN 4
812L_test_for_second_level_dist_mmx:
813	test  al,64
814	jnz L_invalid_distance_code
815
816	and  eax,15
817	psrlq mm0,mm1
818	movd ecx,mm0
819	and  ecx, [inflate_fast_mask+eax*4]
820	mov  eax, [esp+12]
821	add  ecx,ebx
822	mov  eax, [eax+ecx*4]
823	jmp  L_dodist_mmx
824
825ALIGN 4
826L_clip_window_mmx:
827
828	mov  ecx,eax
829	mov  eax, [esp+52]
830	neg  ecx
831	mov  esi, [esp+56]
832
833	cmp  eax,ebx
834	jb  L_invalid_distance_too_far
835
836	add  ecx,ebx
837	cmp  dword ptr [esp+48],0
838	jne  L_wrap_around_window_mmx
839
840	sub  eax,ecx
841	add  esi,eax
842
843	cmp  edx,ecx
844	jbe  L_do_copy1_mmx
845
846	sub  edx,ecx
847	rep movsb
848	mov  esi,edi
849	sub  esi,ebx
850	jmp  L_do_copy1_mmx
851
852	cmp  edx,ecx
853	jbe  L_do_copy1_mmx
854
855	sub  edx,ecx
856	rep movsb
857	mov  esi,edi
858	sub  esi,ebx
859	jmp  L_do_copy1_mmx
860
861L_wrap_around_window_mmx:
862
863	mov  eax, [esp+48]
864	cmp  ecx,eax
865	jbe  L_contiguous_in_window_mmx
866
867	add  esi, [esp+52]
868	add  esi,eax
869	sub  esi,ecx
870	sub  ecx,eax
871
872
873	cmp  edx,ecx
874	jbe  L_do_copy1_mmx
875
876	sub  edx,ecx
877	rep movsb
878	mov  esi, [esp+56]
879	mov  ecx, [esp+48]
880	cmp  edx,ecx
881	jbe  L_do_copy1_mmx
882
883	sub  edx,ecx
884	rep movsb
885	mov  esi,edi
886	sub  esi,ebx
887	jmp  L_do_copy1_mmx
888
889L_contiguous_in_window_mmx:
890
891	add  esi,eax
892	sub  esi,ecx
893
894
895	cmp  edx,ecx
896	jbe  L_do_copy1_mmx
897
898	sub  edx,ecx
899	rep movsb
900	mov  esi,edi
901	sub  esi,ebx
902
903L_do_copy1_mmx:
904
905
906	mov  ecx,edx
907	rep movsb
908
909	mov  esi, [esp+44]
910	mov  ebx, [esp+8]
911	jmp  L_while_test_mmx
912; 1174 "inffast.S"
913L_invalid_distance_code:
914
915
916
917
918
919	mov  ecx, invalid_distance_code_msg
920	mov  edx,INFLATE_MODE_BAD
921	jmp  L_update_stream_state
922
923L_test_for_end_of_block:
924
925
926
927
928
929	test  al,32
930	jz  L_invalid_literal_length_code
931
932	mov  ecx,0
933	mov  edx,INFLATE_MODE_TYPE
934	jmp  L_update_stream_state
935
936L_invalid_literal_length_code:
937
938
939
940
941
942	mov  ecx, invalid_literal_length_code_msg
943	mov  edx,INFLATE_MODE_BAD
944	jmp  L_update_stream_state
945
946L_invalid_distance_too_far:
947
948
949
950	mov  esi, [esp+44]
951	mov  ecx, invalid_distance_too_far_msg
952	mov  edx,INFLATE_MODE_BAD
953	jmp  L_update_stream_state
954
955L_update_stream_state:
956
957	mov  eax, [esp+88]
958	test  ecx,ecx
959	jz  L_skip_msg
960	mov  [eax+24],ecx
961L_skip_msg:
962	mov  eax, [eax+28]
963	mov  [eax+mode_state],edx
964	jmp  L_break_loop
965
966ALIGN 4
967L_break_loop:
968; 1243 "inffast.S"
969	cmp  dword ptr [inflate_fast_use_mmx],2
970	jne  L_update_next_in
971
972
973
974	mov  ebx,ebp
975
976L_update_next_in:
977; 1266 "inffast.S"
978	mov  eax, [esp+88]
979	mov  ecx,ebx
980	mov  edx, [eax+28]
981	shr  ecx,3
982	sub  esi,ecx
983	shl  ecx,3
984	sub  ebx,ecx
985	mov  [eax+12],edi
986	mov  [edx+bits_state],ebx
987	mov  ecx,ebx
988
989	lea  ebx, [esp+28]
990	cmp  [esp+20],ebx
991	jne  L_buf_not_used
992
993	sub  esi,ebx
994	mov  ebx, [eax+0]
995	mov  [esp+20],ebx
996	add  esi,ebx
997	mov  ebx, [eax+4]
998	sub  ebx,11
999	add  [esp+20],ebx
1000
1001L_buf_not_used:
1002	mov  [eax+0],esi
1003
1004	mov  ebx,1
1005	shl  ebx,cl
1006	dec  ebx
1007
1008
1009
1010
1011
1012	cmp  dword ptr [inflate_fast_use_mmx],2
1013	jne  L_update_hold
1014
1015
1016
1017	psrlq mm0,mm1
1018	movd ebp,mm0
1019
1020	emms
1021
1022L_update_hold:
1023
1024
1025
1026	and  ebp,ebx
1027	mov  [edx+hold_state],ebp
1028
1029
1030
1031
1032	mov  ebx, [esp+20]
1033	cmp  ebx,esi
1034	jbe  L_last_is_smaller
1035
1036	sub  ebx,esi
1037	add  ebx,11
1038	mov  [eax+4],ebx
1039	jmp  L_fixup_out
1040L_last_is_smaller:
1041	sub  esi,ebx
1042	neg  esi
1043	add  esi,11
1044	mov  [eax+4],esi
1045
1046
1047
1048
1049L_fixup_out:
1050
1051	mov  ebx, [esp+16]
1052	cmp  ebx,edi
1053	jbe  L_end_is_smaller
1054
1055	sub  ebx,edi
1056	add  ebx,257
1057	mov  [eax+16],ebx
1058	jmp  L_done
1059L_end_is_smaller:
1060	sub  edi,ebx
1061	neg  edi
1062	add  edi,257
1063	mov  [eax+16],edi
1064
1065
1066
1067
1068
1069L_done:
1070	add  esp,64
1071	popfd
1072	pop  ebx
1073	pop  ebp
1074	pop  esi
1075	pop  edi
1076	ret
1077_inflate_fast endp
1078
1079_TEXT	ends
1080end
1081