1/*
2Copyright (c) 2010, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8    * Redistributions of source code must retain the above copyright notice,
9    * this list of conditions and the following disclaimer.
10
11    * Redistributions in binary form must reproduce the above copyright notice,
12    * this list of conditions and the following disclaimer in the documentation
13    * and/or other materials provided with the distribution.
14
15    * Neither the name of Intel Corporation nor the names of its contributors
16    * may be used to endorse or promote products derived from this software
17    * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef L
32# define L(label)	.L##label
33#endif
34
35#ifndef cfi_startproc
36# define cfi_startproc			.cfi_startproc
37#endif
38
39#ifndef cfi_endproc
40# define cfi_endproc			.cfi_endproc
41#endif
42
43#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
48# define cfi_restore(reg)		.cfi_restore reg
49#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
53#endif
54
55#ifndef cfi_remember_state
56# define cfi_remember_state		.cfi_remember_state
57#endif
58
59#ifndef cfi_restore_state
60# define cfi_restore_state		.cfi_restore_state
61#endif
62
63#ifndef ENTRY
64# define ENTRY(name)			\
65	.type name,  @function; 	\
66	.globl name;			\
67	.p2align 4;			\
68name:					\
69	cfi_startproc
70#endif
71
72#ifndef END
73# define END(name)			\
74	cfi_endproc;			\
75	.size name, .-name
76#endif
77
78#define CFI_PUSH(REG)						\
79  cfi_adjust_cfa_offset (4);					\
80  cfi_rel_offset (REG, 0)
81
82#define CFI_POP(REG)						\
83  cfi_adjust_cfa_offset (-4);					\
84  cfi_restore (REG)
85
86#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
87#define POP(REG)	popl REG; CFI_POP (REG)
88
89#ifndef USE_AS_STRNCMP
90# define STR1		4
91# define STR2		STR1+4
92# define RETURN		ret
93
94# define UPDATE_STRNCMP_COUNTER
95#else
96# define STR1		8
97# define STR2		STR1+4
98# define CNT		STR2+4
99# define RETURN		POP (%ebp); ret; CFI_PUSH (%ebp)
100
101# define UPDATE_STRNCMP_COUNTER				\
102	/* calculate left number to compare */		\
103	mov	$16, %esi;				\
104	sub	%ecx, %esi;				\
105	cmpl	%esi, %ebp;				\
106	jbe	L(more8byteseq);			\
107	sub	%esi, %ebp
108#endif
109
110#ifndef STRCMP
111# define STRCMP strcmp
112#endif
113
114	.section .text.ssse3,"ax",@progbits
115ENTRY (STRCMP)
116#ifdef USE_AS_STRNCMP
117	PUSH	(%ebp)
118#endif
119	movl	STR1(%esp), %edx
120	movl	STR2(%esp), %eax
121#ifdef USE_AS_STRNCMP
122	movl	CNT(%esp), %ebp
123	cmpl	$16, %ebp
124	jb	L(less16bytes_sncmp)
125	jmp	L(more16bytes)
126#endif
127
128	movzbl	(%eax), %ecx
129	cmpb	%cl, (%edx)
130	jne	L(neq)
131	cmpl	$0, %ecx
132	je	L(eq)
133
134	movzbl	1(%eax), %ecx
135	cmpb	%cl, 1(%edx)
136	jne	L(neq)
137	cmpl	$0, %ecx
138	je	L(eq)
139
140	movzbl	2(%eax), %ecx
141	cmpb	%cl, 2(%edx)
142	jne	L(neq)
143	cmpl	$0, %ecx
144	je	L(eq)
145
146	movzbl	3(%eax), %ecx
147	cmpb	%cl, 3(%edx)
148	jne	L(neq)
149	cmpl	$0, %ecx
150	je	L(eq)
151
152	movzbl	4(%eax), %ecx
153	cmpb	%cl, 4(%edx)
154	jne	L(neq)
155	cmpl	$0, %ecx
156	je	L(eq)
157
158	movzbl	5(%eax), %ecx
159	cmpb	%cl, 5(%edx)
160	jne	L(neq)
161	cmpl	$0, %ecx
162	je	L(eq)
163
164	movzbl	6(%eax), %ecx
165	cmpb	%cl, 6(%edx)
166	jne	L(neq)
167	cmpl	$0, %ecx
168	je	L(eq)
169
170	movzbl	7(%eax), %ecx
171	cmpb	%cl, 7(%edx)
172	jne	L(neq)
173	cmpl	$0, %ecx
174	je	L(eq)
175
176	add	$8, %edx
177	add	$8, %eax
178#ifdef USE_AS_STRNCMP
179	cmpl	$8, %ebp
180	lea	-8(%ebp), %ebp
181	je	L(eq)
182L(more16bytes):
183#endif
184	movl	%edx, %ecx
185	and	$0xfff, %ecx
186	cmpl	$0xff0, %ecx
187	ja	L(crosspage)
188	mov	%eax, %ecx
189	and	$0xfff, %ecx
190	cmpl	$0xff0, %ecx
191	ja	L(crosspage)
192	pxor	%xmm0, %xmm0
193	movlpd	(%eax), %xmm1
194	movlpd	(%edx), %xmm2
195	movhpd	8(%eax), %xmm1
196	movhpd	8(%edx), %xmm2
197	pcmpeqb	%xmm1, %xmm0
198	pcmpeqb	%xmm2, %xmm1
199	psubb	%xmm0, %xmm1
200	pmovmskb %xmm1, %ecx
201	sub	$0xffff, %ecx
202	jnz	L(less16bytes)
203#ifdef USE_AS_STRNCMP
204	cmpl	$16, %ebp
205	lea	-16(%ebp), %ebp
206	jbe	L(eq)
207#endif
208	add	$16, %eax
209	add	$16, %edx
210
211L(crosspage):
212
213	PUSH	(%ebx)
214	PUSH	(%edi)
215	PUSH	(%esi)
216#ifdef USE_AS_STRNCMP
217	cfi_remember_state
218#endif
219
220	movl	%edx, %edi
221	movl	%eax, %ecx
222	and	$0xf, %ecx
223	and	$0xf, %edi
224	xor	%ecx, %eax
225	xor	%edi, %edx
226	xor	%ebx, %ebx
227	cmpl	%edi, %ecx
228	je	L(ashr_0)
229	ja	L(bigger)
230	or	$0x20, %ebx
231	xchg	%edx, %eax
232	xchg	%ecx, %edi
233L(bigger):
234	lea	15(%edi), %edi
235	sub	%ecx, %edi
236	cmpl	$8, %edi
237	jle	L(ashr_less_8)
238	cmpl	$14, %edi
239	je	L(ashr_15)
240	cmpl	$13, %edi
241	je	L(ashr_14)
242	cmpl	$12, %edi
243	je	L(ashr_13)
244	cmpl	$11, %edi
245	je	L(ashr_12)
246	cmpl	$10, %edi
247	je	L(ashr_11)
248	cmpl	$9, %edi
249	je	L(ashr_10)
250L(ashr_less_8):
251	je	L(ashr_9)
252	cmpl	$7, %edi
253	je	L(ashr_8)
254	cmpl	$6, %edi
255	je	L(ashr_7)
256	cmpl	$5, %edi
257	je	L(ashr_6)
258	cmpl	$4, %edi
259	je	L(ashr_5)
260	cmpl	$3, %edi
261	je	L(ashr_4)
262	cmpl	$2, %edi
263	je	L(ashr_3)
264	cmpl	$1, %edi
265	je	L(ashr_2)
266	cmpl	$0, %edi
267	je	L(ashr_1)
268
269/*
270 * The following cases will be handled by ashr_0
271 *  ecx(offset of esi)  eax(offset of edi)  relative offset  corresponding case
272 *        n(0~15)            n(0~15)           15(15+ n-n)         ashr_0
273 */
274	.p2align 4
275L(ashr_0):
276	mov	$0xffff, %esi
277	movdqa	(%eax), %xmm1
278	pxor	%xmm0, %xmm0
279	pcmpeqb	%xmm1, %xmm0
280	pcmpeqb	(%edx), %xmm1
281	psubb	%xmm0, %xmm1
282	pmovmskb %xmm1, %edi
283	shr	%cl, %esi
284	shr	%cl, %edi
285	sub	%edi, %esi
286	mov	%ecx, %edi
287	jne	L(less32bytes)
288	UPDATE_STRNCMP_COUNTER
289	mov	$0x10, %ebx
290	mov	$0x10, %ecx
291	pxor	%xmm0, %xmm0
292	.p2align 4
293L(loop_ashr_0):
294	movdqa	(%eax, %ecx), %xmm1
295	movdqa	(%edx, %ecx), %xmm2
296
297	pcmpeqb	%xmm1, %xmm0
298	pcmpeqb	%xmm2, %xmm1
299	psubb	%xmm0, %xmm1
300	pmovmskb %xmm1, %esi
301	sub	$0xffff, %esi
302	jnz	L(exit)
303#ifdef USE_AS_STRNCMP
304	cmpl	$16, %ebp
305	lea	-16(%ebp), %ebp
306	jbe	L(more8byteseq)
307#endif
308	add	$16, %ecx
309	jmp	L(loop_ashr_0)
310
311/*
312 * The following cases will be handled by ashr_1
313 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
314 *        n(15)            n -15            0(15 +(n-15) - n)         ashr_1
315 */
316	.p2align 4
317L(ashr_1):
318	mov	$0xffff, %esi
319	pxor	%xmm0, %xmm0
320	movdqa	(%edx), %xmm2
321	movdqa	(%eax), %xmm1
322	pcmpeqb	%xmm1, %xmm0
323	pslldq	$15, %xmm2
324	pcmpeqb	%xmm1, %xmm2
325	psubb	%xmm0, %xmm2
326	pmovmskb %xmm2, %edi
327	shr	%cl, %esi
328	shr	%cl, %edi
329	sub	%edi, %esi
330	lea	-15(%ecx), %edi
331	jnz	L(less32bytes)
332
333	UPDATE_STRNCMP_COUNTER
334
335	movdqa	(%edx), %xmm3
336	pxor	%xmm0, %xmm0
337	mov	$16, %ecx
338	or	$1, %ebx
339	lea	1(%edx), %edi
340	and	$0xfff, %edi
341	sub	$0x1000, %edi
342
343	.p2align 4
344L(loop_ashr_1):
345	add	$16, %edi
346	jg	L(nibble_ashr_1)
347
348L(gobble_ashr_1):
349	movdqa	(%eax, %ecx), %xmm1
350	movdqa	(%edx, %ecx), %xmm2
351	movdqa	%xmm2, %xmm4
352
353	palignr	$1, %xmm3, %xmm2
354
355	pcmpeqb	%xmm1, %xmm0
356	pcmpeqb	%xmm2, %xmm1
357	psubb	%xmm0, %xmm1
358	pmovmskb %xmm1, %esi
359	sub	$0xffff, %esi
360	jnz	L(exit)
361#ifdef USE_AS_STRNCMP
362	cmpl	$16, %ebp
363	lea	-16(%ebp), %ebp
364	jbe	L(more8byteseq)
365#endif
366
367	add	$16, %ecx
368	movdqa	%xmm4, %xmm3
369
370	add	$16, %edi
371	jg	L(nibble_ashr_1)
372
373	movdqa	(%eax, %ecx), %xmm1
374	movdqa	(%edx, %ecx), %xmm2
375	movdqa	%xmm2, %xmm4
376
377	palignr	$1, %xmm3, %xmm2
378
379	pcmpeqb	%xmm1, %xmm0
380	pcmpeqb	%xmm2, %xmm1
381	psubb	%xmm0, %xmm1
382	pmovmskb %xmm1, %esi
383	sub	$0xffff, %esi
384	jnz	L(exit)
385
386#ifdef USE_AS_STRNCMP
387	cmpl	$16, %ebp
388	lea	-16(%ebp), %ebp
389	jbe	L(more8byteseq)
390#endif
391	add	$16, %ecx
392	movdqa	%xmm4, %xmm3
393	jmp	L(loop_ashr_1)
394
395	.p2align 4
396L(nibble_ashr_1):
397	pcmpeqb	%xmm3, %xmm0
398	pmovmskb %xmm0, %esi
399	test	$0xfffe, %esi
400	jnz	L(ashr_1_exittail)
401
402#ifdef USE_AS_STRNCMP
403	cmpl	$15, %ebp
404	jbe	L(ashr_1_exittail)
405#endif
406	pxor	%xmm0, %xmm0
407	sub	$0x1000, %edi
408	jmp	L(gobble_ashr_1)
409
410	.p2align 4
411L(ashr_1_exittail):
412	movdqa	(%eax, %ecx), %xmm1
413	psrldq	$1, %xmm0
414	psrldq	$1, %xmm3
415	jmp	L(aftertail)
416
417/*
418 * The following cases will be handled by ashr_2
419 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
420 *        n(14~15)            n -14            1(15 +(n-14) - n)         ashr_2
421 */
422	.p2align 4
423L(ashr_2):
424	mov	$0xffff, %esi
425	pxor	%xmm0, %xmm0
426	movdqa	(%edx), %xmm2
427	movdqa	(%eax), %xmm1
428	pcmpeqb	%xmm1, %xmm0
429	pslldq	$14, %xmm2
430	pcmpeqb	%xmm1, %xmm2
431	psubb	%xmm0, %xmm2
432	pmovmskb %xmm2, %edi
433	shr	%cl, %esi
434	shr	%cl, %edi
435	sub	%edi, %esi
436	lea	-14(%ecx), %edi
437	jnz	L(less32bytes)
438
439	UPDATE_STRNCMP_COUNTER
440
441	movdqa	(%edx), %xmm3
442	pxor	%xmm0, %xmm0
443	mov	$16, %ecx
444	or	$2, %ebx
445	lea	2(%edx), %edi
446	and	$0xfff, %edi
447	sub	$0x1000, %edi
448
449	.p2align 4
450L(loop_ashr_2):
451	add	$16, %edi
452	jg	L(nibble_ashr_2)
453
454L(gobble_ashr_2):
455	movdqa	(%eax, %ecx), %xmm1
456	movdqa	(%edx, %ecx), %xmm2
457	movdqa	%xmm2, %xmm4
458
459	palignr	$2, %xmm3, %xmm2
460
461	pcmpeqb	%xmm1, %xmm0
462	pcmpeqb	%xmm2, %xmm1
463	psubb	%xmm0, %xmm1
464	pmovmskb %xmm1, %esi
465	sub	$0xffff, %esi
466	jnz	L(exit)
467
468#ifdef USE_AS_STRNCMP
469	cmpl	$16, %ebp
470	lea	-16(%ebp), %ebp
471	jbe	L(more8byteseq)
472#endif
473	add	$16, %ecx
474	movdqa	%xmm4, %xmm3
475
476	add	$16, %edi
477	jg	L(nibble_ashr_2)
478
479	movdqa	(%eax, %ecx), %xmm1
480	movdqa	(%edx, %ecx), %xmm2
481	movdqa	%xmm2, %xmm4
482
483	palignr	$2, %xmm3, %xmm2
484
485	pcmpeqb	%xmm1, %xmm0
486	pcmpeqb	%xmm2, %xmm1
487	psubb	%xmm0, %xmm1
488	pmovmskb %xmm1, %esi
489	sub	$0xffff, %esi
490	jnz	L(exit)
491
492#ifdef USE_AS_STRNCMP
493	cmpl	$16, %ebp
494	lea	-16(%ebp), %ebp
495	jbe	L(more8byteseq)
496#endif
497	add	$16, %ecx
498	movdqa	%xmm4, %xmm3
499	jmp	L(loop_ashr_2)
500
501	.p2align 4
502L(nibble_ashr_2):
503	pcmpeqb	%xmm3, %xmm0
504	pmovmskb %xmm0, %esi
505	test	$0xfffc, %esi
506	jnz	L(ashr_2_exittail)
507
508#ifdef USE_AS_STRNCMP
509	cmpl	$14, %ebp
510	jbe	L(ashr_2_exittail)
511#endif
512
513	pxor	%xmm0, %xmm0
514	sub	$0x1000, %edi
515	jmp	L(gobble_ashr_2)
516
517	.p2align 4
518L(ashr_2_exittail):
519	movdqa	(%eax, %ecx), %xmm1
520	psrldq	$2, %xmm0
521	psrldq	$2, %xmm3
522	jmp	L(aftertail)
523
524/*
525 * The following cases will be handled by ashr_3
526 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
527 *        n(13~15)            n -13            2(15 +(n-13) - n)         ashr_3
528 */
529	.p2align 4
530L(ashr_3):
531	mov	$0xffff, %esi
532	pxor	%xmm0, %xmm0
533	movdqa	(%edx), %xmm2
534	movdqa	(%eax), %xmm1
535	pcmpeqb	%xmm1, %xmm0
536	pslldq	$13, %xmm2
537	pcmpeqb	%xmm1, %xmm2
538	psubb	%xmm0, %xmm2
539	pmovmskb %xmm2, %edi
540	shr	%cl, %esi
541	shr	%cl, %edi
542	sub	%edi, %esi
543	lea	-13(%ecx), %edi
544	jnz	L(less32bytes)
545
546	UPDATE_STRNCMP_COUNTER
547
548	movdqa	(%edx), %xmm3
549	pxor	%xmm0, %xmm0
550	mov	$16, %ecx
551	or	$3, %ebx
552	lea	3(%edx), %edi
553	and	$0xfff, %edi
554	sub	$0x1000, %edi
555
556	.p2align 4
557L(loop_ashr_3):
558	add	$16, %edi
559	jg	L(nibble_ashr_3)
560
561L(gobble_ashr_3):
562	movdqa	(%eax, %ecx), %xmm1
563	movdqa	(%edx, %ecx), %xmm2
564	movdqa	%xmm2, %xmm4
565
566	palignr	$3, %xmm3, %xmm2
567
568	pcmpeqb	%xmm1, %xmm0
569	pcmpeqb	%xmm2, %xmm1
570	psubb	%xmm0, %xmm1
571	pmovmskb %xmm1, %esi
572	sub	$0xffff, %esi
573	jnz	L(exit)
574
575#ifdef USE_AS_STRNCMP
576	cmpl	$16, %ebp
577	lea	-16(%ebp), %ebp
578	jbe	L(more8byteseq)
579#endif
580	add	$16, %ecx
581	movdqa	%xmm4, %xmm3
582
583	add	$16, %edi
584	jg	L(nibble_ashr_3)
585
586	movdqa	(%eax, %ecx), %xmm1
587	movdqa	(%edx, %ecx), %xmm2
588	movdqa	%xmm2, %xmm4
589
590	palignr	$3, %xmm3, %xmm2
591
592	pcmpeqb	%xmm1, %xmm0
593	pcmpeqb	%xmm2, %xmm1
594	psubb	%xmm0, %xmm1
595	pmovmskb %xmm1, %esi
596	sub	$0xffff, %esi
597	jnz	L(exit)
598
599#ifdef USE_AS_STRNCMP
600	cmpl	$16, %ebp
601	lea	-16(%ebp), %ebp
602	jbe	L(more8byteseq)
603#endif
604	add	$16, %ecx
605	movdqa	%xmm4, %xmm3
606	jmp	L(loop_ashr_3)
607
608	.p2align 4
609L(nibble_ashr_3):
610	pcmpeqb	%xmm3, %xmm0
611	pmovmskb %xmm0, %esi
612	test	$0xfff8, %esi
613	jnz	L(ashr_3_exittail)
614
615#ifdef USE_AS_STRNCMP
616	cmpl	$13, %ebp
617	jbe	L(ashr_3_exittail)
618#endif
619	pxor	%xmm0, %xmm0
620	sub	$0x1000, %edi
621	jmp	L(gobble_ashr_3)
622
623	.p2align 4
624L(ashr_3_exittail):
625	movdqa	(%eax, %ecx), %xmm1
626	psrldq	$3, %xmm0
627	psrldq	$3, %xmm3
628	jmp	L(aftertail)
629
630/*
631 * The following cases will be handled by ashr_4
632 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
633 *        n(12~15)            n -12            3(15 +(n-12) - n)         ashr_4
634 */
635	.p2align 4
636L(ashr_4):
637	mov	$0xffff, %esi
638	pxor	%xmm0, %xmm0
639	movdqa	(%edx), %xmm2
640	movdqa	(%eax), %xmm1
641	pcmpeqb	%xmm1, %xmm0
642	pslldq	$12, %xmm2
643	pcmpeqb	%xmm1, %xmm2
644	psubb	%xmm0, %xmm2
645	pmovmskb %xmm2, %edi
646	shr	%cl, %esi
647	shr	%cl, %edi
648	sub	%edi, %esi
649	lea	-12(%ecx), %edi
650	jnz	L(less32bytes)
651
652	UPDATE_STRNCMP_COUNTER
653
654	movdqa	(%edx), %xmm3
655	pxor	%xmm0, %xmm0
656	mov	$16, %ecx
657	or	$4, %ebx
658	lea	4(%edx), %edi
659	and	$0xfff, %edi
660	sub	$0x1000, %edi
661
662	.p2align 4
663L(loop_ashr_4):
664	add	$16, %edi
665	jg	L(nibble_ashr_4)
666
667L(gobble_ashr_4):
668	movdqa	(%eax, %ecx), %xmm1
669	movdqa	(%edx, %ecx), %xmm2
670	movdqa	%xmm2, %xmm4
671
672	palignr	$4, %xmm3, %xmm2
673
674	pcmpeqb	%xmm1, %xmm0
675	pcmpeqb	%xmm2, %xmm1
676	psubb	%xmm0, %xmm1
677	pmovmskb %xmm1, %esi
678	sub	$0xffff, %esi
679	jnz	L(exit)
680
681#ifdef USE_AS_STRNCMP
682	cmpl	$16, %ebp
683	lea	-16(%ebp), %ebp
684	jbe	L(more8byteseq)
685#endif
686
687	add	$16, %ecx
688	movdqa	%xmm4, %xmm3
689
690	add	$16, %edi
691	jg	L(nibble_ashr_4)
692
693	movdqa	(%eax, %ecx), %xmm1
694	movdqa	(%edx, %ecx), %xmm2
695	movdqa	%xmm2, %xmm4
696
697	palignr	$4, %xmm3, %xmm2
698
699	pcmpeqb	%xmm1, %xmm0
700	pcmpeqb	%xmm2, %xmm1
701	psubb	%xmm0, %xmm1
702	pmovmskb %xmm1, %esi
703	sub	$0xffff, %esi
704	jnz	L(exit)
705
706#ifdef USE_AS_STRNCMP
707	cmpl	$16, %ebp
708	lea	-16(%ebp), %ebp
709	jbe	L(more8byteseq)
710#endif
711
712	add	$16, %ecx
713	movdqa	%xmm4, %xmm3
714	jmp	L(loop_ashr_4)
715
716	.p2align 4
717L(nibble_ashr_4):
718	pcmpeqb	%xmm3, %xmm0
719	pmovmskb %xmm0, %esi
720	test	$0xfff0, %esi
721	jnz	L(ashr_4_exittail)
722
723#ifdef USE_AS_STRNCMP
724	cmpl	$12, %ebp
725	jbe	L(ashr_4_exittail)
726#endif
727
728	pxor	%xmm0, %xmm0
729	sub	$0x1000, %edi
730	jmp	L(gobble_ashr_4)
731
732	.p2align 4
733L(ashr_4_exittail):
734	movdqa	(%eax, %ecx), %xmm1
735	psrldq	$4, %xmm0
736	psrldq	$4, %xmm3
737	jmp	L(aftertail)
738
739/*
740 * The following cases will be handled by ashr_5
741 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
742 *        n(11~15)            n -11            4(15 +(n-11) - n)         ashr_5
743 */
744	.p2align 4
745L(ashr_5):
746	mov	$0xffff, %esi
747	pxor	%xmm0, %xmm0
748	movdqa	(%edx), %xmm2
749	movdqa	(%eax), %xmm1
750	pcmpeqb	%xmm1, %xmm0
751	pslldq	$11, %xmm2
752	pcmpeqb	%xmm1, %xmm2
753	psubb	%xmm0, %xmm2
754	pmovmskb %xmm2, %edi
755	shr	%cl, %esi
756	shr	%cl, %edi
757	sub	%edi, %esi
758	lea	-11(%ecx), %edi
759	jnz	L(less32bytes)
760
761	UPDATE_STRNCMP_COUNTER
762
763	movdqa	(%edx), %xmm3
764	pxor	%xmm0, %xmm0
765	mov	$16, %ecx
766	or	$5, %ebx
767	lea	5(%edx), %edi
768	and	$0xfff, %edi
769	sub	$0x1000, %edi
770
771	.p2align 4
772L(loop_ashr_5):
773	add	$16, %edi
774	jg	L(nibble_ashr_5)
775
776L(gobble_ashr_5):
777	movdqa	(%eax, %ecx), %xmm1
778	movdqa	(%edx, %ecx), %xmm2
779	movdqa	%xmm2, %xmm4
780
781	palignr	$5, %xmm3, %xmm2
782
783	pcmpeqb	%xmm1, %xmm0
784	pcmpeqb	%xmm2, %xmm1
785	psubb	%xmm0, %xmm1
786	pmovmskb %xmm1, %esi
787	sub	$0xffff, %esi
788	jnz	L(exit)
789
790#ifdef USE_AS_STRNCMP
791	cmpl	$16, %ebp
792	lea	-16(%ebp), %ebp
793	jbe	L(more8byteseq)
794#endif
795	add	$16, %ecx
796	movdqa	%xmm4, %xmm3
797
798	add	$16, %edi
799	jg	L(nibble_ashr_5)
800
801	movdqa	(%eax, %ecx), %xmm1
802	movdqa	(%edx, %ecx), %xmm2
803	movdqa	%xmm2, %xmm4
804
805	palignr	$5, %xmm3, %xmm2
806
807	pcmpeqb	%xmm1, %xmm0
808	pcmpeqb	%xmm2, %xmm1
809	psubb	%xmm0, %xmm1
810	pmovmskb %xmm1, %esi
811	sub	$0xffff, %esi
812	jnz	L(exit)
813
814#ifdef USE_AS_STRNCMP
815	cmpl	$16, %ebp
816	lea	-16(%ebp), %ebp
817	jbe	L(more8byteseq)
818#endif
819	add	$16, %ecx
820	movdqa	%xmm4, %xmm3
821	jmp	L(loop_ashr_5)
822
823	.p2align 4
824L(nibble_ashr_5):
825	pcmpeqb	%xmm3, %xmm0
826	pmovmskb %xmm0, %esi
827	test	$0xffe0, %esi
828	jnz	L(ashr_5_exittail)
829
830#ifdef USE_AS_STRNCMP
831	cmpl	$11, %ebp
832	jbe	L(ashr_5_exittail)
833#endif
834	pxor	%xmm0, %xmm0
835	sub	$0x1000, %edi
836	jmp	L(gobble_ashr_5)
837
838	.p2align 4
839L(ashr_5_exittail):
840	movdqa	(%eax, %ecx), %xmm1
841	psrldq	$5, %xmm0
842	psrldq	$5, %xmm3
843	jmp	L(aftertail)
844
845/*
846 * The following cases will be handled by ashr_6
847 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
848 *        n(10~15)            n -10            5(15 +(n-10) - n)         ashr_6
849 */
850
851	.p2align 4
852L(ashr_6):
853	mov	$0xffff, %esi
854	pxor	%xmm0, %xmm0
855	movdqa	(%edx), %xmm2
856	movdqa	(%eax), %xmm1
857	pcmpeqb	%xmm1, %xmm0
858	pslldq	$10, %xmm2
859	pcmpeqb	%xmm1, %xmm2
860	psubb	%xmm0, %xmm2
861	pmovmskb %xmm2, %edi
862	shr	%cl, %esi
863	shr	%cl, %edi
864	sub	%edi, %esi
865	lea	-10(%ecx), %edi
866	jnz	L(less32bytes)
867
868	UPDATE_STRNCMP_COUNTER
869
870	movdqa	(%edx), %xmm3
871	pxor	%xmm0, %xmm0
872	mov	$16, %ecx
873	or	$6, %ebx
874	lea	6(%edx), %edi
875	and	$0xfff, %edi
876	sub	$0x1000, %edi
877
878	.p2align 4
879L(loop_ashr_6):
880	add	$16, %edi
881	jg	L(nibble_ashr_6)
882
883L(gobble_ashr_6):
884	movdqa	(%eax, %ecx), %xmm1
885	movdqa	(%edx, %ecx), %xmm2
886	movdqa	%xmm2, %xmm4
887
888	palignr	$6, %xmm3, %xmm2
889
890	pcmpeqb	%xmm1, %xmm0
891	pcmpeqb	%xmm2, %xmm1
892	psubb	%xmm0, %xmm1
893	pmovmskb %xmm1, %esi
894	sub	$0xffff, %esi
895	jnz	L(exit)
896
897#ifdef USE_AS_STRNCMP
898	cmpl	$16, %ebp
899	lea	-16(%ebp), %ebp
900	jbe	L(more8byteseq)
901#endif
902
903	add	$16, %ecx
904	movdqa	%xmm4, %xmm3
905
906	add	$16, %edi
907	jg	L(nibble_ashr_6)
908
909	movdqa	(%eax, %ecx), %xmm1
910	movdqa	(%edx, %ecx), %xmm2
911	movdqa	%xmm2, %xmm4
912
913	palignr	$6, %xmm3, %xmm2
914
915	pcmpeqb	%xmm1, %xmm0
916	pcmpeqb	%xmm2, %xmm1
917	psubb	%xmm0, %xmm1
918	pmovmskb %xmm1, %esi
919	sub	$0xffff, %esi
920	jnz	L(exit)
921#ifdef USE_AS_STRNCMP
922	cmpl	$16, %ebp
923	lea	-16(%ebp), %ebp
924	jbe	L(more8byteseq)
925#endif
926
927	add	$16, %ecx
928	movdqa	%xmm4, %xmm3
929	jmp	L(loop_ashr_6)
930
931	.p2align 4
932L(nibble_ashr_6):
933	pcmpeqb	%xmm3, %xmm0
934	pmovmskb %xmm0, %esi
935	test	$0xffc0, %esi
936	jnz	L(ashr_6_exittail)
937
938#ifdef USE_AS_STRNCMP
939	cmpl	$10, %ebp
940	jbe	L(ashr_6_exittail)
941#endif
942	pxor	%xmm0, %xmm0
943	sub	$0x1000, %edi
944	jmp	L(gobble_ashr_6)
945
946	.p2align 4
947L(ashr_6_exittail):
948	movdqa	(%eax, %ecx), %xmm1
949	psrldq	$6, %xmm0
950	psrldq	$6, %xmm3
951	jmp	L(aftertail)
952
953/*
954 * The following cases will be handled by ashr_7
955 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
956 *        n(9~15)            n - 9            6(15 +(n-9) - n)         ashr_7
957 */
958
959	.p2align 4
960L(ashr_7):
961	mov	$0xffff, %esi
962	pxor	%xmm0, %xmm0
963	movdqa	(%edx), %xmm2
964	movdqa	(%eax), %xmm1
965	pcmpeqb	%xmm1, %xmm0
966	pslldq	$9, %xmm2
967	pcmpeqb	%xmm1, %xmm2
968	psubb	%xmm0, %xmm2
969	pmovmskb %xmm2, %edi
970	shr	%cl, %esi
971	shr	%cl, %edi
972	sub	%edi, %esi
973	lea	-9(%ecx), %edi
974	jnz	L(less32bytes)
975
976	UPDATE_STRNCMP_COUNTER
977
978	movdqa	(%edx), %xmm3
979	pxor	%xmm0, %xmm0
980	mov	$16, %ecx
981	or	$7, %ebx
982	lea	8(%edx), %edi
983	and	$0xfff, %edi
984	sub	$0x1000, %edi
985
986	.p2align 4
987L(loop_ashr_7):
988	add	$16, %edi
989	jg	L(nibble_ashr_7)
990
991L(gobble_ashr_7):
992	movdqa	(%eax, %ecx), %xmm1
993	movdqa	(%edx, %ecx), %xmm2
994	movdqa	%xmm2, %xmm4
995
996	palignr	$7, %xmm3, %xmm2
997
998	pcmpeqb	%xmm1, %xmm0
999	pcmpeqb	%xmm2, %xmm1
1000	psubb	%xmm0, %xmm1
1001	pmovmskb %xmm1, %esi
1002	sub	$0xffff, %esi
1003	jnz	L(exit)
1004
1005#ifdef USE_AS_STRNCMP
1006	cmpl	$16, %ebp
1007	lea	-16(%ebp), %ebp
1008	jbe	L(more8byteseq)
1009#endif
1010
1011	add	$16, %ecx
1012	movdqa	%xmm4, %xmm3
1013
1014	add	$16, %edi
1015	jg	L(nibble_ashr_7)
1016
1017	movdqa	(%eax, %ecx), %xmm1
1018	movdqa	(%edx, %ecx), %xmm2
1019	movdqa	%xmm2, %xmm4
1020
1021	palignr	$7, %xmm3, %xmm2
1022
1023	pcmpeqb	%xmm1, %xmm0
1024	pcmpeqb	%xmm2, %xmm1
1025	psubb	%xmm0, %xmm1
1026	pmovmskb %xmm1, %esi
1027	sub	$0xffff, %esi
1028	jnz	L(exit)
1029
1030#ifdef USE_AS_STRNCMP
1031	cmpl	$16, %ebp
1032	lea	-16(%ebp), %ebp
1033	jbe	L(more8byteseq)
1034#endif
1035
1036	add	$16, %ecx
1037	movdqa	%xmm4, %xmm3
1038	jmp	L(loop_ashr_7)
1039
1040	.p2align 4
1041L(nibble_ashr_7):
1042	pcmpeqb	%xmm3, %xmm0
1043	pmovmskb %xmm0, %esi
1044	test	$0xff80, %esi
1045	jnz	L(ashr_7_exittail)
1046
1047#ifdef USE_AS_STRNCMP
1048	cmpl	$9, %ebp
1049	jbe	L(ashr_7_exittail)
1050#endif
1051	pxor	%xmm0, %xmm0
1052	pxor	%xmm0, %xmm0
1053	sub	$0x1000, %edi
1054	jmp	L(gobble_ashr_7)
1055
1056	.p2align 4
1057L(ashr_7_exittail):
1058	movdqa	(%eax, %ecx), %xmm1
1059	psrldq	$7, %xmm0
1060	psrldq	$7, %xmm3
1061	jmp	L(aftertail)
1062
1063/*
1064 * The following cases will be handled by ashr_8
1065 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1066 *        n(8~15)            n - 8            7(15 +(n-8) - n)         ashr_8
1067 */
1068	.p2align 4
1069L(ashr_8):
1070	mov	$0xffff, %esi
1071	pxor	%xmm0, %xmm0
1072	movdqa	(%edx), %xmm2
1073	movdqa	(%eax), %xmm1
1074	pcmpeqb	%xmm1, %xmm0
1075	pslldq	$8, %xmm2
1076	pcmpeqb	%xmm1, %xmm2
1077	psubb	%xmm0, %xmm2
1078	pmovmskb %xmm2, %edi
1079	shr	%cl, %esi
1080	shr	%cl, %edi
1081	sub	%edi, %esi
1082	lea	-8(%ecx), %edi
1083	jnz	L(less32bytes)
1084
1085	UPDATE_STRNCMP_COUNTER
1086
1087	movdqa	(%edx), %xmm3
1088	pxor	%xmm0, %xmm0
1089	mov	$16, %ecx
1090	or	$8, %ebx
1091	lea	8(%edx), %edi
1092	and	$0xfff, %edi
1093	sub	$0x1000, %edi
1094
1095	.p2align 4
1096L(loop_ashr_8):
1097	add	$16, %edi
1098	jg	L(nibble_ashr_8)
1099
1100L(gobble_ashr_8):
1101	movdqa	(%eax, %ecx), %xmm1
1102	movdqa	(%edx, %ecx), %xmm2
1103	movdqa	%xmm2, %xmm4
1104
1105	palignr	$8, %xmm3, %xmm2
1106
1107	pcmpeqb	%xmm1, %xmm0
1108	pcmpeqb	%xmm2, %xmm1
1109	psubb	%xmm0, %xmm1
1110	pmovmskb %xmm1, %esi
1111	sub	$0xffff, %esi
1112	jnz	L(exit)
1113
1114#ifdef USE_AS_STRNCMP
1115	cmpl	$16, %ebp
1116	lea	-16(%ebp), %ebp
1117	jbe	L(more8byteseq)
1118#endif
1119	add	$16, %ecx
1120	movdqa	%xmm4, %xmm3
1121
1122	add	$16, %edi
1123	jg	L(nibble_ashr_8)
1124
1125	movdqa	(%eax, %ecx), %xmm1
1126	movdqa	(%edx, %ecx), %xmm2
1127	movdqa	%xmm2, %xmm4
1128
1129	palignr	$8, %xmm3, %xmm2
1130
1131	pcmpeqb	%xmm1, %xmm0
1132	pcmpeqb	%xmm2, %xmm1
1133	psubb	%xmm0, %xmm1
1134	pmovmskb %xmm1, %esi
1135	sub	$0xffff, %esi
1136	jnz	L(exit)
1137
1138#ifdef USE_AS_STRNCMP
1139	cmpl	$16, %ebp
1140	lea	-16(%ebp), %ebp
1141	jbe	L(more8byteseq)
1142#endif
1143	add	$16, %ecx
1144	movdqa	%xmm4, %xmm3
1145	jmp	L(loop_ashr_8)
1146
1147	.p2align 4
1148L(nibble_ashr_8):
1149	pcmpeqb	%xmm3, %xmm0
1150	pmovmskb %xmm0, %esi
1151	test	$0xff00, %esi
1152	jnz	L(ashr_8_exittail)
1153
1154#ifdef USE_AS_STRNCMP
1155	cmpl	$8, %ebp
1156	jbe	L(ashr_8_exittail)
1157#endif
1158	pxor	%xmm0, %xmm0
1159	pxor	%xmm0, %xmm0
1160	sub	$0x1000, %edi
1161	jmp	L(gobble_ashr_8)
1162
1163	.p2align 4
1164L(ashr_8_exittail):
1165	movdqa	(%eax, %ecx), %xmm1
1166	psrldq	$8, %xmm0
1167	psrldq	$8, %xmm3
1168	jmp	L(aftertail)
1169
1170/*
1171 * The following cases will be handled by ashr_9
1172 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1173 *        n(7~15)            n - 7            8(15 +(n-7) - n)         ashr_9
1174 */
1175	.p2align 4
1176L(ashr_9):
1177	mov	$0xffff, %esi
1178	pxor	%xmm0, %xmm0
1179	movdqa	(%edx), %xmm2
1180	movdqa	(%eax), %xmm1
1181	pcmpeqb	%xmm1, %xmm0
1182	pslldq	$7, %xmm2
1183	pcmpeqb	%xmm1, %xmm2
1184	psubb	%xmm0, %xmm2
1185	pmovmskb %xmm2, %edi
1186	shr	%cl, %esi
1187	shr	%cl, %edi
1188	sub	%edi, %esi
1189	lea	-7(%ecx), %edi
1190	jnz	L(less32bytes)
1191
1192	UPDATE_STRNCMP_COUNTER
1193
1194	movdqa	(%edx), %xmm3
1195	pxor	%xmm0, %xmm0
1196	mov	$16, %ecx
1197	or	$9, %ebx
1198	lea	9(%edx), %edi
1199	and	$0xfff, %edi
1200	sub	$0x1000, %edi
1201
1202	.p2align 4
1203L(loop_ashr_9):
1204	add	$16, %edi
1205	jg	L(nibble_ashr_9)
1206
1207L(gobble_ashr_9):
1208	movdqa	(%eax, %ecx), %xmm1
1209	movdqa	(%edx, %ecx), %xmm2
1210	movdqa	%xmm2, %xmm4
1211
1212	palignr	$9, %xmm3, %xmm2
1213
1214	pcmpeqb	%xmm1, %xmm0
1215	pcmpeqb	%xmm2, %xmm1
1216	psubb	%xmm0, %xmm1
1217	pmovmskb %xmm1, %esi
1218	sub	$0xffff, %esi
1219	jnz	L(exit)
1220
1221#ifdef USE_AS_STRNCMP
1222	cmpl	$16, %ebp
1223	lea	-16(%ebp), %ebp
1224	jbe	L(more8byteseq)
1225#endif
1226	add	$16, %ecx
1227	movdqa	%xmm4, %xmm3
1228
1229	add	$16, %edi
1230	jg	L(nibble_ashr_9)
1231
1232	movdqa	(%eax, %ecx), %xmm1
1233	movdqa	(%edx, %ecx), %xmm2
1234	movdqa	%xmm2, %xmm4
1235
1236	palignr	$9, %xmm3, %xmm2
1237
1238	pcmpeqb	%xmm1, %xmm0
1239	pcmpeqb	%xmm2, %xmm1
1240	psubb	%xmm0, %xmm1
1241	pmovmskb %xmm1, %esi
1242	sub	$0xffff, %esi
1243	jnz	L(exit)
1244
1245#ifdef USE_AS_STRNCMP
1246	cmpl	$16, %ebp
1247	lea	-16(%ebp), %ebp
1248	jbe	L(more8byteseq)
1249#endif
1250	add	$16, %ecx
1251	movdqa	%xmm4, %xmm3
1252	jmp	L(loop_ashr_9)
1253
1254	.p2align 4
1255L(nibble_ashr_9):
1256	pcmpeqb	%xmm3, %xmm0
1257	pmovmskb %xmm0, %esi
1258	test	$0xfe00, %esi
1259	jnz	L(ashr_9_exittail)
1260
1261#ifdef USE_AS_STRNCMP
1262	cmpl	$7, %ebp
1263	jbe	L(ashr_9_exittail)
1264#endif
1265	pxor	%xmm0, %xmm0
1266	sub	$0x1000, %edi
1267	jmp	L(gobble_ashr_9)
1268
1269	.p2align 4
1270L(ashr_9_exittail):
1271	movdqa	(%eax, %ecx), %xmm1
1272	psrldq	$9, %xmm0
1273	psrldq	$9, %xmm3
1274	jmp	L(aftertail)
1275
1276/*
1277 * The following cases will be handled by ashr_10
1278 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1279 *        n(6~15)            n - 6            9(15 +(n-6) - n)         ashr_10
1280 */
1281	.p2align 4
1282L(ashr_10):
1283	mov	$0xffff, %esi
1284	pxor	%xmm0, %xmm0
1285	movdqa	(%edx), %xmm2
1286	movdqa	(%eax), %xmm1
1287	pcmpeqb	%xmm1, %xmm0
1288	pslldq	$6, %xmm2
1289	pcmpeqb	%xmm1, %xmm2
1290	psubb	%xmm0, %xmm2
1291	pmovmskb %xmm2, %edi
1292	shr	%cl, %esi
1293	shr	%cl, %edi
1294	sub	%edi, %esi
1295	lea	-6(%ecx), %edi
1296	jnz	L(less32bytes)
1297
1298	UPDATE_STRNCMP_COUNTER
1299
1300	movdqa	(%edx), %xmm3
1301	pxor	%xmm0, %xmm0
1302	mov	$16, %ecx
1303	or	$10, %ebx
1304	lea	10(%edx), %edi
1305	and	$0xfff, %edi
1306	sub	$0x1000, %edi
1307
1308	.p2align 4
1309L(loop_ashr_10):
1310	add	$16, %edi
1311	jg	L(nibble_ashr_10)
1312
1313L(gobble_ashr_10):
1314	movdqa	(%eax, %ecx), %xmm1
1315	movdqa	(%edx, %ecx), %xmm2
1316	movdqa	%xmm2, %xmm4
1317
1318	palignr	$10, %xmm3, %xmm2
1319
1320	pcmpeqb	%xmm1, %xmm0
1321	pcmpeqb	%xmm2, %xmm1
1322	psubb	%xmm0, %xmm1
1323	pmovmskb %xmm1, %esi
1324	sub	$0xffff, %esi
1325	jnz	L(exit)
1326
1327#ifdef USE_AS_STRNCMP
1328	cmpl	$16, %ebp
1329	lea	-16(%ebp), %ebp
1330	jbe	L(more8byteseq)
1331#endif
1332	add	$16, %ecx
1333	movdqa	%xmm4, %xmm3
1334
1335	add	$16, %edi
1336	jg	L(nibble_ashr_10)
1337
1338	movdqa	(%eax, %ecx), %xmm1
1339	movdqa	(%edx, %ecx), %xmm2
1340	movdqa	%xmm2, %xmm4
1341
1342	palignr	$10, %xmm3, %xmm2
1343
1344	pcmpeqb	%xmm1, %xmm0
1345	pcmpeqb	%xmm2, %xmm1
1346	psubb	%xmm0, %xmm1
1347	pmovmskb %xmm1, %esi
1348	sub	$0xffff, %esi
1349	jnz	L(exit)
1350
1351#ifdef USE_AS_STRNCMP
1352	cmpl	$16, %ebp
1353	lea	-16(%ebp), %ebp
1354	jbe	L(more8byteseq)
1355#endif
1356	add	$16, %ecx
1357	movdqa	%xmm4, %xmm3
1358	jmp	L(loop_ashr_10)
1359
1360	.p2align 4
1361L(nibble_ashr_10):
1362	pcmpeqb	%xmm3, %xmm0
1363	pmovmskb %xmm0, %esi
1364	test	$0xfc00, %esi
1365	jnz	L(ashr_10_exittail)
1366
1367#ifdef USE_AS_STRNCMP
1368	cmpl	$6, %ebp
1369	jbe	L(ashr_10_exittail)
1370#endif
1371	pxor	%xmm0, %xmm0
1372	sub	$0x1000, %edi
1373	jmp	L(gobble_ashr_10)
1374
1375	.p2align 4
1376L(ashr_10_exittail):
1377	movdqa	(%eax, %ecx), %xmm1
1378	psrldq	$10, %xmm0
1379	psrldq	$10, %xmm3
1380	jmp	L(aftertail)
1381
1382/*
1383 * The following cases will be handled by ashr_11
1384 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1385 *        n(5~15)            n - 5            10(15 +(n-5) - n)         ashr_11
1386 */
1387	.p2align 4
1388L(ashr_11):
1389	mov	$0xffff, %esi
1390	pxor	%xmm0, %xmm0
1391	movdqa	(%edx), %xmm2
1392	movdqa	(%eax), %xmm1
1393	pcmpeqb	%xmm1, %xmm0
1394	pslldq	$5, %xmm2
1395	pcmpeqb	%xmm1, %xmm2
1396	psubb	%xmm0, %xmm2
1397	pmovmskb %xmm2, %edi
1398	shr	%cl, %esi
1399	shr	%cl, %edi
1400	sub	%edi, %esi
1401	lea	-5(%ecx), %edi
1402	jnz	L(less32bytes)
1403
1404	UPDATE_STRNCMP_COUNTER
1405
1406	movdqa	(%edx), %xmm3
1407	pxor	%xmm0, %xmm0
1408	mov	$16, %ecx
1409	or	$11, %ebx
1410	lea	11(%edx), %edi
1411	and	$0xfff, %edi
1412	sub	$0x1000, %edi
1413
1414	.p2align 4
1415L(loop_ashr_11):
1416	add	$16, %edi
1417	jg	L(nibble_ashr_11)
1418
1419L(gobble_ashr_11):
1420	movdqa	(%eax, %ecx), %xmm1
1421	movdqa	(%edx, %ecx), %xmm2
1422	movdqa	%xmm2, %xmm4
1423
1424	palignr	$11, %xmm3, %xmm2
1425
1426	pcmpeqb	%xmm1, %xmm0
1427	pcmpeqb	%xmm2, %xmm1
1428	psubb	%xmm0, %xmm1
1429	pmovmskb %xmm1, %esi
1430	sub	$0xffff, %esi
1431	jnz	L(exit)
1432
1433#ifdef USE_AS_STRNCMP
1434	cmpl	$16, %ebp
1435	lea	-16(%ebp), %ebp
1436	jbe	L(more8byteseq)
1437#endif
1438	add	$16, %ecx
1439	movdqa	%xmm4, %xmm3
1440
1441	add	$16, %edi
1442	jg	L(nibble_ashr_11)
1443
1444	movdqa	(%eax, %ecx), %xmm1
1445	movdqa	(%edx, %ecx), %xmm2
1446	movdqa	%xmm2, %xmm4
1447
1448	palignr	$11, %xmm3, %xmm2
1449
1450	pcmpeqb	%xmm1, %xmm0
1451	pcmpeqb	%xmm2, %xmm1
1452	psubb	%xmm0, %xmm1
1453	pmovmskb %xmm1, %esi
1454	sub	$0xffff, %esi
1455	jnz	L(exit)
1456
1457#ifdef USE_AS_STRNCMP
1458	cmpl	$16, %ebp
1459	lea	-16(%ebp), %ebp
1460	jbe	L(more8byteseq)
1461#endif
1462	add	$16, %ecx
1463	movdqa	%xmm4, %xmm3
1464	jmp	L(loop_ashr_11)
1465
1466	.p2align 4
1467L(nibble_ashr_11):
1468	pcmpeqb	%xmm3, %xmm0
1469	pmovmskb %xmm0, %esi
1470	test	$0xf800, %esi
1471	jnz	L(ashr_11_exittail)
1472
1473#ifdef USE_AS_STRNCMP
1474	cmpl	$5, %ebp
1475	jbe	L(ashr_11_exittail)
1476#endif
1477	pxor	%xmm0, %xmm0
1478	sub	$0x1000, %edi
1479	jmp	L(gobble_ashr_11)
1480
1481	.p2align 4
1482L(ashr_11_exittail):
1483	movdqa	(%eax, %ecx), %xmm1
1484	psrldq	$11, %xmm0
1485	psrldq	$11, %xmm3
1486	jmp	L(aftertail)
1487
1488/*
1489 * The following cases will be handled by ashr_12
1490 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1491 *        n(4~15)            n - 4            11(15 +(n-4) - n)         ashr_12
1492 */
1493	.p2align 4
1494L(ashr_12):
1495	mov	$0xffff, %esi
1496	pxor	%xmm0, %xmm0
1497	movdqa	(%edx), %xmm2
1498	movdqa	(%eax), %xmm1
1499	pcmpeqb	%xmm1, %xmm0
1500	pslldq	$4, %xmm2
1501	pcmpeqb	%xmm1, %xmm2
1502	psubb	%xmm0, %xmm2
1503	pmovmskb %xmm2, %edi
1504	shr	%cl, %esi
1505	shr	%cl, %edi
1506	sub	%edi, %esi
1507	lea	-4(%ecx), %edi
1508	jnz	L(less32bytes)
1509
1510	UPDATE_STRNCMP_COUNTER
1511
1512	movdqa	(%edx), %xmm3
1513	pxor	%xmm0, %xmm0
1514	mov	$16, %ecx
1515	or	$12, %ebx
1516	lea	12(%edx), %edi
1517	and	$0xfff, %edi
1518	sub	$0x1000, %edi
1519
1520	.p2align 4
1521L(loop_ashr_12):
1522	add	$16, %edi
1523	jg	L(nibble_ashr_12)
1524
1525L(gobble_ashr_12):
1526	movdqa	(%eax, %ecx), %xmm1
1527	movdqa	(%edx, %ecx), %xmm2
1528	movdqa	%xmm2, %xmm4
1529
1530	palignr	$12, %xmm3, %xmm2
1531
1532	pcmpeqb	%xmm1, %xmm0
1533	pcmpeqb	%xmm2, %xmm1
1534	psubb	%xmm0, %xmm1
1535	pmovmskb %xmm1, %esi
1536	sub	$0xffff, %esi
1537	jnz	L(exit)
1538
1539#ifdef USE_AS_STRNCMP
1540	cmpl	$16, %ebp
1541	lea	-16(%ebp), %ebp
1542	jbe	L(more8byteseq)
1543#endif
1544
1545	add	$16, %ecx
1546	movdqa	%xmm4, %xmm3
1547
1548	add	$16, %edi
1549	jg	L(nibble_ashr_12)
1550
1551	movdqa	(%eax, %ecx), %xmm1
1552	movdqa	(%edx, %ecx), %xmm2
1553	movdqa	%xmm2, %xmm4
1554
1555	palignr	$12, %xmm3, %xmm2
1556
1557	pcmpeqb	%xmm1, %xmm0
1558	pcmpeqb	%xmm2, %xmm1
1559	psubb	%xmm0, %xmm1
1560	pmovmskb %xmm1, %esi
1561	sub	$0xffff, %esi
1562	jnz	L(exit)
1563
1564#ifdef USE_AS_STRNCMP
1565	cmpl	$16, %ebp
1566	lea	-16(%ebp), %ebp
1567	jbe	L(more8byteseq)
1568#endif
1569	add	$16, %ecx
1570	movdqa	%xmm4, %xmm3
1571	jmp	L(loop_ashr_12)
1572
1573	.p2align 4
1574L(nibble_ashr_12):
1575	pcmpeqb	%xmm3, %xmm0
1576	pmovmskb %xmm0, %esi
1577	test	$0xf000, %esi
1578	jnz	L(ashr_12_exittail)
1579
1580#ifdef USE_AS_STRNCMP
1581	cmpl	$4, %ebp
1582	jbe	L(ashr_12_exittail)
1583#endif
1584	pxor	%xmm0, %xmm0
1585	sub	$0x1000, %edi
1586	jmp	L(gobble_ashr_12)
1587
1588	.p2align 4
1589L(ashr_12_exittail):
1590	movdqa	(%eax, %ecx), %xmm1
1591	psrldq	$12, %xmm0
1592	psrldq	$12, %xmm3
1593	jmp	L(aftertail)
1594
1595/*
1596 * The following cases will be handled by ashr_13
1597 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1598 *        n(3~15)            n - 3            12(15 +(n-3) - n)         ashr_13
1599 */
1600	.p2align 4
1601L(ashr_13):
1602	mov	$0xffff, %esi
1603	pxor	%xmm0, %xmm0
1604	movdqa	(%edx), %xmm2
1605	movdqa	(%eax), %xmm1
1606	pcmpeqb	%xmm1, %xmm0
1607	pslldq	$3, %xmm2
1608	pcmpeqb	%xmm1, %xmm2
1609	psubb	%xmm0, %xmm2
1610	pmovmskb %xmm2, %edi
1611	shr	%cl, %esi
1612	shr	%cl, %edi
1613	sub	%edi, %esi
1614	lea	-3(%ecx), %edi
1615	jnz	L(less32bytes)
1616
1617	UPDATE_STRNCMP_COUNTER
1618
1619	movdqa	(%edx), %xmm3
1620	pxor	%xmm0, %xmm0
1621	mov	$16, %ecx
1622	or	$13, %ebx
1623	lea	13(%edx), %edi
1624	and	$0xfff, %edi
1625	sub	$0x1000, %edi
1626
1627	.p2align 4
1628L(loop_ashr_13):
1629	add	$16, %edi
1630	jg	L(nibble_ashr_13)
1631
1632L(gobble_ashr_13):
1633	movdqa	(%eax, %ecx), %xmm1
1634	movdqa	(%edx, %ecx), %xmm2
1635	movdqa	%xmm2, %xmm4
1636
1637	palignr	$13, %xmm3, %xmm2
1638
1639	pcmpeqb	%xmm1, %xmm0
1640	pcmpeqb	%xmm2, %xmm1
1641	psubb	%xmm0, %xmm1
1642	pmovmskb %xmm1, %esi
1643	sub	$0xffff, %esi
1644	jnz	L(exit)
1645
1646#ifdef USE_AS_STRNCMP
1647	cmpl	$16, %ebp
1648	lea	-16(%ebp), %ebp
1649	jbe	L(more8byteseq)
1650#endif
1651	add	$16, %ecx
1652	movdqa	%xmm4, %xmm3
1653
1654	add	$16, %edi
1655	jg	L(nibble_ashr_13)
1656
1657	movdqa	(%eax, %ecx), %xmm1
1658	movdqa	(%edx, %ecx), %xmm2
1659	movdqa	%xmm2, %xmm4
1660
1661	palignr	$13, %xmm3, %xmm2
1662
1663	pcmpeqb	%xmm1, %xmm0
1664	pcmpeqb	%xmm2, %xmm1
1665	psubb	%xmm0, %xmm1
1666	pmovmskb %xmm1, %esi
1667	sub	$0xffff, %esi
1668	jnz	L(exit)
1669
1670#ifdef USE_AS_STRNCMP
1671	cmpl	$16, %ebp
1672	lea	-16(%ebp), %ebp
1673	jbe	L(more8byteseq)
1674#endif
1675	add	$16, %ecx
1676	movdqa	%xmm4, %xmm3
1677	jmp	L(loop_ashr_13)
1678
1679	.p2align 4
1680L(nibble_ashr_13):
1681	pcmpeqb	%xmm3, %xmm0
1682	pmovmskb %xmm0, %esi
1683	test	$0xe000, %esi
1684	jnz	L(ashr_13_exittail)
1685
1686#ifdef USE_AS_STRNCMP
1687	cmpl	$3, %ebp
1688	jbe	L(ashr_13_exittail)
1689#endif
1690	pxor	%xmm0, %xmm0
1691	sub	$0x1000, %edi
1692	jmp	L(gobble_ashr_13)
1693
1694	.p2align 4
1695L(ashr_13_exittail):
1696	movdqa	(%eax, %ecx), %xmm1
1697	psrldq	$13, %xmm0
1698	psrldq	$13, %xmm3
1699	jmp	L(aftertail)
1700
1701/*
1702 * The following cases will be handled by ashr_14
1703 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1704 *        n(2~15)            n - 2            13(15 +(n-2) - n)         ashr_14
1705 */
1706	.p2align 4
1707L(ashr_14):
1708	mov	$0xffff, %esi
1709	pxor	%xmm0, %xmm0
1710	movdqa	(%edx), %xmm2
1711	movdqa	(%eax), %xmm1
1712	pcmpeqb	%xmm1, %xmm0
1713	pslldq	$2, %xmm2
1714	pcmpeqb	%xmm1, %xmm2
1715	psubb	%xmm0, %xmm2
1716	pmovmskb %xmm2, %edi
1717	shr	%cl, %esi
1718	shr	%cl, %edi
1719	sub	%edi, %esi
1720	lea	-2(%ecx), %edi
1721	jnz	L(less32bytes)
1722
1723	UPDATE_STRNCMP_COUNTER
1724
1725	movdqa	(%edx), %xmm3
1726	pxor	%xmm0, %xmm0
1727	mov	$16, %ecx
1728	or	$14, %ebx
1729	lea	14(%edx), %edi
1730	and	$0xfff, %edi
1731	sub	$0x1000, %edi
1732
1733	.p2align 4
1734L(loop_ashr_14):
1735	add	$16, %edi
1736	jg	L(nibble_ashr_14)
1737
1738L(gobble_ashr_14):
1739	movdqa	(%eax, %ecx), %xmm1
1740	movdqa	(%edx, %ecx), %xmm2
1741	movdqa	%xmm2, %xmm4
1742
1743	palignr	$14, %xmm3, %xmm2
1744
1745	pcmpeqb	%xmm1, %xmm0
1746	pcmpeqb	%xmm2, %xmm1
1747	psubb	%xmm0, %xmm1
1748	pmovmskb %xmm1, %esi
1749	sub	$0xffff, %esi
1750	jnz	L(exit)
1751
1752#ifdef USE_AS_STRNCMP
1753	cmpl	$16, %ebp
1754	lea	-16(%ebp), %ebp
1755	jbe	L(more8byteseq)
1756#endif
1757	add	$16, %ecx
1758	movdqa	%xmm4, %xmm3
1759
1760	add	$16, %edi
1761	jg	L(nibble_ashr_14)
1762
1763	movdqa	(%eax, %ecx), %xmm1
1764	movdqa	(%edx, %ecx), %xmm2
1765	movdqa	%xmm2, %xmm4
1766
1767	palignr	$14, %xmm3, %xmm2
1768
1769	pcmpeqb	%xmm1, %xmm0
1770	pcmpeqb	%xmm2, %xmm1
1771	psubb	%xmm0, %xmm1
1772	pmovmskb %xmm1, %esi
1773	sub	$0xffff, %esi
1774	jnz	L(exit)
1775
1776#ifdef USE_AS_STRNCMP
1777	cmpl	$16, %ebp
1778	lea	-16(%ebp), %ebp
1779	jbe	L(more8byteseq)
1780#endif
1781	add	$16, %ecx
1782	movdqa	%xmm4, %xmm3
1783	jmp	L(loop_ashr_14)
1784
1785	.p2align 4
1786L(nibble_ashr_14):
1787	pcmpeqb	%xmm3, %xmm0
1788	pmovmskb %xmm0, %esi
1789	test	$0xc000, %esi
1790	jnz	L(ashr_14_exittail)
1791
1792#ifdef USE_AS_STRNCMP
1793	cmpl	$2, %ebp
1794	jbe	L(ashr_14_exittail)
1795#endif
1796	pxor	%xmm0, %xmm0
1797	sub	$0x1000, %edi
1798	jmp	L(gobble_ashr_14)
1799
1800	.p2align 4
1801L(ashr_14_exittail):
1802	movdqa	(%eax, %ecx), %xmm1
1803	psrldq	$14, %xmm0
1804	psrldq	$14, %xmm3
1805	jmp	L(aftertail)
1806
1807/*
1808 * The following cases will be handled by ashr_14
1809 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1810 *        n(1~15)            n - 1            14(15 +(n-1) - n)         ashr_15
1811 */
1812
1813	.p2align 4
1814L(ashr_15):
1815	mov	$0xffff, %esi
1816	pxor	%xmm0, %xmm0
1817	movdqa	(%edx), %xmm2
1818	movdqa	(%eax), %xmm1
1819	pcmpeqb	%xmm1, %xmm0
1820	pslldq	$1, %xmm2
1821	pcmpeqb	%xmm1, %xmm2
1822	psubb	%xmm0, %xmm2
1823	pmovmskb %xmm2, %edi
1824	shr	%cl, %esi
1825	shr	%cl, %edi
1826	sub	%edi, %esi
1827	lea	-1(%ecx), %edi
1828	jnz	L(less32bytes)
1829
1830	UPDATE_STRNCMP_COUNTER
1831
1832	movdqa	(%edx), %xmm3
1833	pxor	%xmm0, %xmm0
1834	mov	$16, %ecx
1835	or	$15, %ebx
1836	lea	15(%edx), %edi
1837	and	$0xfff, %edi
1838	sub	$0x1000, %edi
1839
1840	.p2align 4
1841L(loop_ashr_15):
1842	add	$16, %edi
1843	jg	L(nibble_ashr_15)
1844
1845L(gobble_ashr_15):
1846	movdqa	(%eax, %ecx), %xmm1
1847	movdqa	(%edx, %ecx), %xmm2
1848	movdqa	%xmm2, %xmm4
1849
1850	palignr	$15, %xmm3, %xmm2
1851
1852	pcmpeqb	%xmm1, %xmm0
1853	pcmpeqb	%xmm2, %xmm1
1854	psubb	%xmm0, %xmm1
1855	pmovmskb %xmm1, %esi
1856	sub	$0xffff, %esi
1857	jnz	L(exit)
1858
1859#ifdef USE_AS_STRNCMP
1860	cmpl	$16, %ebp
1861	lea	-16(%ebp), %ebp
1862	jbe	L(more8byteseq)
1863#endif
1864	add	$16, %ecx
1865	movdqa	%xmm4, %xmm3
1866
1867	add	$16, %edi
1868	jg	L(nibble_ashr_15)
1869
1870	movdqa	(%eax, %ecx), %xmm1
1871	movdqa	(%edx, %ecx), %xmm2
1872	movdqa	%xmm2, %xmm4
1873
1874	palignr	$15, %xmm3, %xmm2
1875
1876	pcmpeqb	%xmm1, %xmm0
1877	pcmpeqb	%xmm2, %xmm1
1878	psubb	%xmm0, %xmm1
1879	pmovmskb %xmm1, %esi
1880	sub	$0xffff, %esi
1881	jnz	L(exit)
1882
1883#ifdef USE_AS_STRNCMP
1884	cmpl	$16, %ebp
1885	lea	-16(%ebp), %ebp
1886	jbe	L(more8byteseq)
1887#endif
1888	add	$16, %ecx
1889	movdqa	%xmm4, %xmm3
1890	jmp	L(loop_ashr_15)
1891
1892	.p2align 4
1893L(nibble_ashr_15):
1894	pcmpeqb	%xmm3, %xmm0
1895	pmovmskb %xmm0, %esi
1896	test	$0x8000, %esi
1897	jnz	L(ashr_15_exittail)
1898
1899#ifdef USE_AS_STRNCMP
1900	cmpl	$1, %ebp
1901	jbe	L(ashr_15_exittail)
1902#endif
1903	pxor	%xmm0, %xmm0
1904	sub	$0x1000, %edi
1905	jmp	L(gobble_ashr_15)
1906
1907	.p2align 4
1908L(ashr_15_exittail):
1909	movdqa	(%eax, %ecx), %xmm1
1910	psrldq	$15, %xmm0
1911	psrldq	$15, %xmm3
1912	jmp	L(aftertail)
1913
1914	.p2align 4
1915L(aftertail):
1916	pcmpeqb	%xmm3, %xmm1
1917	psubb	%xmm0, %xmm1
1918	pmovmskb %xmm1, %esi
1919	not	%esi
1920L(exit):
1921	mov	%ebx, %edi
1922	and	$0x1f, %edi
1923	lea	-16(%edi, %ecx), %edi
1924L(less32bytes):
1925	add	%edi, %edx
1926	add	%ecx, %eax
1927	test	$0x20, %ebx
1928	jz	L(ret2)
1929	xchg	%eax, %edx
1930
1931	.p2align 4
1932L(ret2):
1933	mov	%esi, %ecx
1934	POP	(%esi)
1935	POP	(%edi)
1936	POP	(%ebx)
1937L(less16bytes):
1938	test	%cl, %cl
1939	jz	L(2next_8_bytes)
1940
1941	test	$0x01, %cl
1942	jnz	L(Byte0)
1943
1944	test	$0x02, %cl
1945	jnz	L(Byte1)
1946
1947	test	$0x04, %cl
1948	jnz	L(Byte2)
1949
1950	test	$0x08, %cl
1951	jnz	L(Byte3)
1952
1953	test	$0x10, %cl
1954	jnz	L(Byte4)
1955
1956	test	$0x20, %cl
1957	jnz	L(Byte5)
1958
1959	test	$0x40, %cl
1960	jnz	L(Byte6)
1961#ifdef USE_AS_STRNCMP
1962	cmpl	$7, %ebp
1963	jbe	L(eq)
1964#endif
1965
1966	movzbl	7(%eax), %ecx
1967	movzbl	7(%edx), %eax
1968
1969	sub	%ecx, %eax
1970	RETURN
1971
1972	.p2align 4
1973L(Byte0):
1974#ifdef USE_AS_STRNCMP
1975	cmpl	$0, %ebp
1976	jbe	L(eq)
1977#endif
1978	movzbl	(%eax), %ecx
1979	movzbl	(%edx), %eax
1980
1981	sub	%ecx, %eax
1982	RETURN
1983
1984	.p2align 4
1985L(Byte1):
1986#ifdef USE_AS_STRNCMP
1987	cmpl	$1, %ebp
1988	jbe	L(eq)
1989#endif
1990	movzbl	1(%eax), %ecx
1991	movzbl	1(%edx), %eax
1992
1993	sub	%ecx, %eax
1994	RETURN
1995
1996	.p2align 4
1997L(Byte2):
1998#ifdef USE_AS_STRNCMP
1999	cmpl	$2, %ebp
2000	jbe	L(eq)
2001#endif
2002	movzbl	2(%eax), %ecx
2003	movzbl	2(%edx), %eax
2004
2005	sub	%ecx, %eax
2006	RETURN
2007
2008	.p2align 4
2009L(Byte3):
2010#ifdef USE_AS_STRNCMP
2011	cmpl	$3, %ebp
2012	jbe	L(eq)
2013#endif
2014	movzbl	3(%eax), %ecx
2015	movzbl	3(%edx), %eax
2016
2017	sub	%ecx, %eax
2018	RETURN
2019
2020	.p2align 4
2021L(Byte4):
2022#ifdef USE_AS_STRNCMP
2023	cmpl	$4, %ebp
2024	jbe	L(eq)
2025#endif
2026	movzbl	4(%eax), %ecx
2027	movzbl	4(%edx), %eax
2028
2029	sub	%ecx, %eax
2030	RETURN
2031
2032	.p2align 4
2033L(Byte5):
2034#ifdef USE_AS_STRNCMP
2035	cmpl	$5, %ebp
2036	jbe	L(eq)
2037#endif
2038	movzbl	5(%eax), %ecx
2039	movzbl	5(%edx), %eax
2040
2041	sub	%ecx, %eax
2042	RETURN
2043
2044	.p2align 4
2045L(Byte6):
2046#ifdef USE_AS_STRNCMP
2047	cmpl	$6, %ebp
2048	jbe	L(eq)
2049#endif
2050	movzbl	6(%eax), %ecx
2051	movzbl	6(%edx), %eax
2052
2053	sub	%ecx, %eax
2054	RETURN
2055
2056	.p2align 4
2057L(2next_8_bytes):
2058	add	$8, %eax
2059	add	$8, %edx
2060#ifdef USE_AS_STRNCMP
2061	cmpl	$8, %ebp
2062	lea	-8(%ebp), %ebp
2063	jbe	L(eq)
2064#endif
2065
2066	test	$0x01, %ch
2067	jnz	L(Byte0)
2068
2069	test	$0x02, %ch
2070	jnz	L(Byte1)
2071
2072	test	$0x04, %ch
2073	jnz	L(Byte2)
2074
2075	test	$0x08, %ch
2076	jnz	L(Byte3)
2077
2078	test	$0x10, %ch
2079	jnz	L(Byte4)
2080
2081	test	$0x20, %ch
2082	jnz	L(Byte5)
2083
2084	test	$0x40, %ch
2085	jnz	L(Byte6)
2086
2087#ifdef USE_AS_STRNCMP
2088	cmpl	$7, %ebp
2089	jbe	L(eq)
2090#endif
2091	movzbl	7(%eax), %ecx
2092	movzbl	7(%edx), %eax
2093
2094	sub	%ecx, %eax
2095	RETURN
2096
2097	.p2align 4
2098L(neq):
2099	mov	$1, %eax
2100	ja	L(neq_bigger)
2101	neg	%eax
2102L(neq_bigger):
2103	RETURN
2104
2105#ifdef USE_AS_STRNCMP
2106	cfi_restore_state
2107	.p2align 4
2108L(more8byteseq):
2109	POP	(%esi)
2110	POP	(%edi)
2111	POP	(%ebx)
2112#endif
2113
2114L(eq):
2115
2116#ifdef USE_AS_STRNCMP
2117	POP	(%ebp)
2118#endif
2119	xorl	%eax, %eax
2120	ret
2121
2122#ifdef USE_AS_STRNCMP
2123	CFI_PUSH (%ebp)
2124
2125	.p2align 4
2126L(less16bytes_sncmp):
2127	test	%ebp, %ebp
2128	jz	L(eq)
2129
2130	movzbl	(%eax), %ecx
2131	cmpb	%cl, (%edx)
2132	jne	L(neq)
2133	test	%cl, %cl
2134	je	L(eq)
2135
2136	cmpl	$1, %ebp
2137	je	L(eq)
2138
2139	movzbl	1(%eax), %ecx
2140	cmpb	%cl, 1(%edx)
2141	jne	L(neq)
2142	test	%cl, %cl
2143	je	L(eq)
2144
2145	cmpl	$2, %ebp
2146	je	L(eq)
2147
2148	movzbl	2(%eax), %ecx
2149	cmpb	%cl, 2(%edx)
2150	jne	L(neq)
2151	test	%cl, %cl
2152	je	L(eq)
2153
2154	cmpl	$3, %ebp
2155	je	L(eq)
2156
2157	movzbl	3(%eax), %ecx
2158	cmpb	%cl, 3(%edx)
2159	jne	L(neq)
2160	test	%cl, %cl
2161	je	L(eq)
2162
2163	cmpl	$4, %ebp
2164	je	L(eq)
2165
2166	movzbl	4(%eax), %ecx
2167	cmpb	%cl, 4(%edx)
2168	jne	L(neq)
2169	test	%cl, %cl
2170	je	L(eq)
2171
2172	cmpl	$5, %ebp
2173	je	L(eq)
2174
2175	movzbl	5(%eax), %ecx
2176	cmpb	%cl, 5(%edx)
2177	jne	L(neq)
2178	test	%cl, %cl
2179	je	L(eq)
2180
2181	cmpl	$6, %ebp
2182	je	L(eq)
2183
2184	movzbl	6(%eax), %ecx
2185	cmpb	%cl, 6(%edx)
2186	jne	L(neq)
2187	test	%cl, %cl
2188	je	L(eq)
2189
2190	cmpl	$7, %ebp
2191	je	L(eq)
2192
2193	movzbl	7(%eax), %ecx
2194	cmpb	%cl, 7(%edx)
2195	jne	L(neq)
2196	test	%cl, %cl
2197	je	L(eq)
2198
2199
2200	cmpl	$8, %ebp
2201	je	L(eq)
2202
2203	movzbl	8(%eax), %ecx
2204	cmpb	%cl, 8(%edx)
2205	jne	L(neq)
2206	test	%cl, %cl
2207	je	L(eq)
2208
2209	cmpl	$9, %ebp
2210	je	L(eq)
2211
2212	movzbl	9(%eax), %ecx
2213	cmpb	%cl, 9(%edx)
2214	jne	L(neq)
2215	test	%cl, %cl
2216	je	L(eq)
2217
2218	cmpl	$10, %ebp
2219	je	L(eq)
2220
2221	movzbl	10(%eax), %ecx
2222	cmpb	%cl, 10(%edx)
2223	jne	L(neq)
2224	test	%cl, %cl
2225	je	L(eq)
2226
2227	cmpl	$11, %ebp
2228	je	L(eq)
2229
2230	movzbl	11(%eax), %ecx
2231	cmpb	%cl, 11(%edx)
2232	jne	L(neq)
2233	test	%cl, %cl
2234	je	L(eq)
2235
2236
2237	cmpl	$12, %ebp
2238	je	L(eq)
2239
2240	movzbl	12(%eax), %ecx
2241	cmpb	%cl, 12(%edx)
2242	jne	L(neq)
2243	test	%cl, %cl
2244	je	L(eq)
2245
2246	cmpl	$13, %ebp
2247	je	L(eq)
2248
2249	movzbl	13(%eax), %ecx
2250	cmpb	%cl, 13(%edx)
2251	jne	L(neq)
2252	test	%cl, %cl
2253	je	L(eq)
2254
2255	cmpl	$14, %ebp
2256	je	L(eq)
2257
2258	movzbl	14(%eax), %ecx
2259	cmpb	%cl, 14(%edx)
2260	jne	L(neq)
2261	test	%cl, %cl
2262	je	L(eq)
2263
2264	cmpl	$15, %ebp
2265	je	L(eq)
2266
2267	movzbl	15(%eax), %ecx
2268	cmpb	%cl, 15(%edx)
2269	jne	L(neq)
2270	test	%cl, %cl
2271	je	L(eq)
2272
2273	POP	(%ebp)
2274	xor	%eax, %eax
2275	ret
2276#endif
2277
2278END (STRCMP)
2279