ssse3-strcmp-latest.S revision 832a86eaba56dcf8066e4b96df12738a9dff7053
1/*
2Copyright (c) 2010, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8    * Redistributions of source code must retain the above copyright notice,
9    * this list of conditions and the following disclaimer.
10
11    * Redistributions in binary form must reproduce the above copyright notice,
12    * this list of conditions and the following disclaimer in the documentation
13    * and/or other materials provided with the distribution.
14
15    * Neither the name of Intel Corporation nor the names of its contributors
16    * may be used to endorse or promote products derived from this software
17    * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef L
32# define L(label)	.L##label
33#endif
34
35#ifndef cfi_startproc
36# define cfi_startproc			.cfi_startproc
37#endif
38
39#ifndef cfi_endproc
40# define cfi_endproc			.cfi_endproc
41#endif
42
43#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
48# define cfi_restore(reg)		.cfi_restore reg
49#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
53#endif
54
55#ifndef cfi_remember_state
56# define cfi_remember_state		.cfi_remember_state
57#endif
58
59#ifndef cfi_restore_state
60# define cfi_restore_state		.cfi_restore_state
61#endif
62
63#ifndef ENTRY
64# define ENTRY(name)			\
65	.type name,  @function; 	\
66	.globl name;			\
67	.p2align 4;			\
68name:					\
69	cfi_startproc
70#endif
71
72#ifndef END
73# define END(name)			\
74	cfi_endproc;			\
75	.size name, .-name
76#endif
77
78#define CFI_PUSH(REG)						\
79  cfi_adjust_cfa_offset (4);					\
80  cfi_rel_offset (REG, 0)
81
82#define CFI_POP(REG)						\
83  cfi_adjust_cfa_offset (-4);					\
84  cfi_restore (REG)
85
86#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
87#define POP(REG)	popl REG; CFI_POP (REG)
88
89#ifndef USE_AS_STRNCMP
90# define STR1		4
91# define STR2		STR1+4
92# define RETURN		ret
93
94# define UPDATE_STRNCMP_COUNTER
95#else
96# define STR1		8
97# define STR2		STR1+4
98# define CNT		STR2+4
99# define RETURN		POP (%ebp); ret; CFI_PUSH (%ebp)
100
101# define UPDATE_STRNCMP_COUNTER				\
102	/* calculate left number to compare */		\
103	mov	$16, %esi;				\
104	sub	%ecx, %esi;				\
105	cmp	%esi, %ebp;				\
106	jbe	L(more8byteseq);			\
107	sub	%esi, %ebp
108#endif
109
110	.section .text.ssse3,"ax",@progbits
111ENTRY (ssse3_strcmp_latest)
112#ifdef USE_AS_STRNCMP
113	PUSH	(%ebp)
114#endif
115	movl	STR1(%esp), %edx
116	movl	STR2(%esp), %eax
117#ifdef USE_AS_STRNCMP
118	movl	CNT(%esp), %ebp
119	cmp	$16, %ebp
120	jb	L(less16bytes_sncmp)
121	jmp	L(more16bytes)
122#endif
123
124	movzbl	(%eax), %ecx
125	cmpb	%cl, (%edx)
126	jne	L(neq)
127	cmpl	$0, %ecx
128	je	L(eq)
129
130	movzbl	1(%eax), %ecx
131	cmpb	%cl, 1(%edx)
132	jne	L(neq)
133	cmpl	$0, %ecx
134	je	L(eq)
135
136	movzbl	2(%eax), %ecx
137	cmpb	%cl, 2(%edx)
138	jne	L(neq)
139	cmpl	$0, %ecx
140	je	L(eq)
141
142	movzbl	3(%eax), %ecx
143	cmpb	%cl, 3(%edx)
144	jne	L(neq)
145	cmpl	$0, %ecx
146	je	L(eq)
147
148	movzbl	4(%eax), %ecx
149	cmpb	%cl, 4(%edx)
150	jne	L(neq)
151	cmpl	$0, %ecx
152	je	L(eq)
153
154	movzbl	5(%eax), %ecx
155	cmpb	%cl, 5(%edx)
156	jne	L(neq)
157	cmpl	$0, %ecx
158	je	L(eq)
159
160	movzbl	6(%eax), %ecx
161	cmpb	%cl, 6(%edx)
162	jne	L(neq)
163	cmpl	$0, %ecx
164	je	L(eq)
165
166	movzbl	7(%eax), %ecx
167	cmpb	%cl, 7(%edx)
168	jne	L(neq)
169	cmpl	$0, %ecx
170	je	L(eq)
171
172	add	$8, %edx
173	add	$8, %eax
174#ifdef USE_AS_STRNCMP
175	cmp	$8, %ebp
176	lea	-8(%ebp), %ebp
177	je	L(eq)
178L(more16bytes):
179#endif
180	movl	%edx, %ecx
181	and	$0xfff, %ecx
182	cmp	$0xff0, %ecx
183	ja	L(crosspage)
184	mov	%eax, %ecx
185	and	$0xfff, %ecx
186	cmp	$0xff0, %ecx
187	ja	L(crosspage)
188	pxor	%xmm0, %xmm0
189	movlpd	(%eax), %xmm1
190	movlpd	(%edx), %xmm2
191	movhpd	8(%eax), %xmm1
192	movhpd	8(%edx), %xmm2
193	pcmpeqb	%xmm1, %xmm0
194	pcmpeqb	%xmm2, %xmm1
195	psubb	%xmm0, %xmm1
196	pmovmskb %xmm1, %ecx
197	sub	$0xffff, %ecx
198	jnz	L(less16bytes)
199#ifdef USE_AS_STRNCMP
200	cmp	$16, %ebp
201	lea	-16(%ebp), %ebp
202	jbe	L(eq)
203#endif
204	add	$16, %eax
205	add	$16, %edx
206
207L(crosspage):
208
209	PUSH	(%ebx)
210	PUSH	(%edi)
211	PUSH	(%esi)
212#ifdef USE_AS_STRNCMP
213	cfi_remember_state
214#endif
215
216	movl	%edx, %edi
217	movl	%eax, %ecx
218	and	$0xf, %ecx
219	and	$0xf, %edi
220	xor	%ecx, %eax
221	xor	%edi, %edx
222	xor	%ebx, %ebx
223	cmp	%edi, %ecx
224	je	L(ashr_0)
225	ja	L(bigger)
226	or	$0x20, %ebx
227	xchg	%edx, %eax
228	xchg	%ecx, %edi
229L(bigger):
230	lea	15(%edi), %edi
231	sub	%ecx, %edi
232	cmp	$8, %edi
233	jle	L(ashr_less_8)
234	cmp	$14, %edi
235	je	L(ashr_15)
236	cmp	$13, %edi
237	je	L(ashr_14)
238	cmp	$12, %edi
239	je	L(ashr_13)
240	cmp	$11, %edi
241	je	L(ashr_12)
242	cmp	$10, %edi
243	je	L(ashr_11)
244	cmp	$9, %edi
245	je	L(ashr_10)
246L(ashr_less_8):
247	je	L(ashr_9)
248	cmp	$7, %edi
249	je	L(ashr_8)
250	cmp	$6, %edi
251	je	L(ashr_7)
252	cmp	$5, %edi
253	je	L(ashr_6)
254	cmp	$4, %edi
255	je	L(ashr_5)
256	cmp	$3, %edi
257	je	L(ashr_4)
258	cmp	$2, %edi
259	je	L(ashr_3)
260	cmp	$1, %edi
261	je	L(ashr_2)
262	cmp	$0, %edi
263	je	L(ashr_1)
264
265/*
266 * The following cases will be handled by ashr_0
267 *  ecx(offset of esi)  eax(offset of edi)  relative offset  corresponding case
268 *        n(0~15)            n(0~15)           15(15+ n-n)         ashr_0
269 */
270	.p2align 4
271L(ashr_0):
272	mov	$0xffff, %esi
273	movdqa	(%eax), %xmm1
274	pxor	%xmm0, %xmm0
275	pcmpeqb	%xmm1, %xmm0
276	pcmpeqb	(%edx), %xmm1
277	psubb	%xmm0, %xmm1
278	pmovmskb %xmm1, %edi
279	shr	%cl, %esi
280	shr	%cl, %edi
281	sub	%edi, %esi
282	mov	%ecx, %edi
283	jne	L(less32bytes)
284	UPDATE_STRNCMP_COUNTER
285	mov	$0x10, %ebx
286	mov	$0x10, %ecx
287	pxor	%xmm0, %xmm0
288	.p2align 4
289L(loop_ashr_0):
290	movdqa	(%eax, %ecx), %xmm1
291	movdqa	(%edx, %ecx), %xmm2
292
293	pcmpeqb	%xmm1, %xmm0
294	pcmpeqb	%xmm2, %xmm1
295	psubb	%xmm0, %xmm1
296	pmovmskb %xmm1, %esi
297	sub	$0xffff, %esi
298	jnz	L(exit)
299#ifdef USE_AS_STRNCMP
300	cmp	$16, %ebp
301	lea	-16(%ebp), %ebp
302	jbe	L(more8byteseq)
303#endif
304	add	$16, %ecx
305	jmp	L(loop_ashr_0)
306
307/*
308 * The following cases will be handled by ashr_1
309 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
310 *        n(15)            n -15            0(15 +(n-15) - n)         ashr_1
311 */
312	.p2align 4
313L(ashr_1):
314	mov	$0xffff, %esi
315	pxor	%xmm0, %xmm0
316	movdqa	(%edx), %xmm2
317	movdqa	(%eax), %xmm1
318	pcmpeqb	%xmm1, %xmm0
319	pslldq	$15, %xmm2
320	pcmpeqb	%xmm1, %xmm2
321	psubb	%xmm0, %xmm2
322	pmovmskb %xmm2, %edi
323	shr	%cl, %esi
324	shr	%cl, %edi
325	sub	%edi, %esi
326	lea	-15(%ecx), %edi
327	jnz	L(less32bytes)
328
329	UPDATE_STRNCMP_COUNTER
330
331	movdqa	(%edx), %xmm3
332	pxor	%xmm0, %xmm0
333	mov	$16, %ecx
334	or	$1, %ebx
335	lea	1(%edx), %edi
336	and	$0xfff, %edi
337	sub	$0x1000, %edi
338
339	.p2align 4
340L(loop_ashr_1):
341	add	$16, %edi
342	jg	L(nibble_ashr_1)
343
344L(gobble_ashr_1):
345	movdqa	(%eax, %ecx), %xmm1
346	movdqa	(%edx, %ecx), %xmm2
347	movdqa	%xmm2, %xmm4
348
349	palignr	$1, %xmm3, %xmm2
350
351	pcmpeqb	%xmm1, %xmm0
352	pcmpeqb	%xmm2, %xmm1
353	psubb	%xmm0, %xmm1
354	pmovmskb %xmm1, %esi
355	sub	$0xffff, %esi
356	jnz	L(exit)
357#ifdef USE_AS_STRNCMP
358	cmp	$16, %ebp
359	lea	-16(%ebp), %ebp
360	jbe	L(more8byteseq)
361#endif
362
363	add	$16, %ecx
364	movdqa	%xmm4, %xmm3
365
366	add	$16, %edi
367	jg	L(nibble_ashr_1)
368
369	movdqa	(%eax, %ecx), %xmm1
370	movdqa	(%edx, %ecx), %xmm2
371	movdqa	%xmm2, %xmm4
372
373	palignr	$1, %xmm3, %xmm2
374
375	pcmpeqb	%xmm1, %xmm0
376	pcmpeqb	%xmm2, %xmm1
377	psubb	%xmm0, %xmm1
378	pmovmskb %xmm1, %esi
379	sub	$0xffff, %esi
380	jnz	L(exit)
381
382#ifdef USE_AS_STRNCMP
383	cmp	$16, %ebp
384	lea	-16(%ebp), %ebp
385	jbe	L(more8byteseq)
386#endif
387	add	$16, %ecx
388	movdqa	%xmm4, %xmm3
389	jmp	L(loop_ashr_1)
390
391	.p2align 4
392L(nibble_ashr_1):
393	pcmpeqb	%xmm3, %xmm0
394	pmovmskb %xmm0, %esi
395	test	$0xfffe, %esi
396	jnz	L(ashr_1_exittail)
397
398#ifdef USE_AS_STRNCMP
399	cmp	$15, %ebp
400	jbe	L(ashr_1_exittail)
401#endif
402	pxor	%xmm0, %xmm0
403	sub	$0x1000, %edi
404	jmp	L(gobble_ashr_1)
405
406	.p2align 4
407L(ashr_1_exittail):
408	movdqa	(%eax, %ecx), %xmm1
409	psrldq	$1, %xmm0
410	psrldq	$1, %xmm3
411	jmp	L(aftertail)
412
413/*
414 * The following cases will be handled by ashr_2
415 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
416 *        n(14~15)            n -14            1(15 +(n-14) - n)         ashr_2
417 */
418	.p2align 4
419L(ashr_2):
420	mov	$0xffff, %esi
421	pxor	%xmm0, %xmm0
422	movdqa	(%edx), %xmm2
423	movdqa	(%eax), %xmm1
424	pcmpeqb	%xmm1, %xmm0
425	pslldq	$14, %xmm2
426	pcmpeqb	%xmm1, %xmm2
427	psubb	%xmm0, %xmm2
428	pmovmskb %xmm2, %edi
429	shr	%cl, %esi
430	shr	%cl, %edi
431	sub	%edi, %esi
432	lea	-14(%ecx), %edi
433	jnz	L(less32bytes)
434
435	UPDATE_STRNCMP_COUNTER
436
437	movdqa	(%edx), %xmm3
438	pxor	%xmm0, %xmm0
439	mov	$16, %ecx
440	or	$2, %ebx
441	lea	2(%edx), %edi
442	and	$0xfff, %edi
443	sub	$0x1000, %edi
444
445	.p2align 4
446L(loop_ashr_2):
447	add	$16, %edi
448	jg	L(nibble_ashr_2)
449
450L(gobble_ashr_2):
451	movdqa	(%eax, %ecx), %xmm1
452	movdqa	(%edx, %ecx), %xmm2
453	movdqa	%xmm2, %xmm4
454
455	palignr	$2, %xmm3, %xmm2
456
457	pcmpeqb	%xmm1, %xmm0
458	pcmpeqb	%xmm2, %xmm1
459	psubb	%xmm0, %xmm1
460	pmovmskb %xmm1, %esi
461	sub	$0xffff, %esi
462	jnz	L(exit)
463
464#ifdef USE_AS_STRNCMP
465	cmp	$16, %ebp
466	lea	-16(%ebp), %ebp
467	jbe	L(more8byteseq)
468#endif
469	add	$16, %ecx
470	movdqa	%xmm4, %xmm3
471
472	add	$16, %edi
473	jg	L(nibble_ashr_2)
474
475	movdqa	(%eax, %ecx), %xmm1
476	movdqa	(%edx, %ecx), %xmm2
477	movdqa	%xmm2, %xmm4
478
479	palignr	$2, %xmm3, %xmm2
480
481	pcmpeqb	%xmm1, %xmm0
482	pcmpeqb	%xmm2, %xmm1
483	psubb	%xmm0, %xmm1
484	pmovmskb %xmm1, %esi
485	sub	$0xffff, %esi
486	jnz	L(exit)
487
488#ifdef USE_AS_STRNCMP
489	cmp	$16, %ebp
490	lea	-16(%ebp), %ebp
491	jbe	L(more8byteseq)
492#endif
493	add	$16, %ecx
494	movdqa	%xmm4, %xmm3
495	jmp	L(loop_ashr_2)
496
497	.p2align 4
498L(nibble_ashr_2):
499	pcmpeqb	%xmm3, %xmm0
500	pmovmskb %xmm0, %esi
501	test	$0xfffc, %esi
502	jnz	L(ashr_2_exittail)
503
504#ifdef USE_AS_STRNCMP
505	cmp	$14, %ebp
506	jbe	L(ashr_2_exittail)
507#endif
508
509	pxor	%xmm0, %xmm0
510	sub	$0x1000, %edi
511	jmp	L(gobble_ashr_2)
512
513	.p2align 4
514L(ashr_2_exittail):
515	movdqa	(%eax, %ecx), %xmm1
516	psrldq	$2, %xmm0
517	psrldq	$2, %xmm3
518	jmp	L(aftertail)
519
520/*
521 * The following cases will be handled by ashr_3
522 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
523 *        n(13~15)            n -13            2(15 +(n-13) - n)         ashr_3
524 */
525	.p2align 4
526L(ashr_3):
527	mov	$0xffff, %esi
528	pxor	%xmm0, %xmm0
529	movdqa	(%edx), %xmm2
530	movdqa	(%eax), %xmm1
531	pcmpeqb	%xmm1, %xmm0
532	pslldq	$13, %xmm2
533	pcmpeqb	%xmm1, %xmm2
534	psubb	%xmm0, %xmm2
535	pmovmskb %xmm2, %edi
536	shr	%cl, %esi
537	shr	%cl, %edi
538	sub	%edi, %esi
539	lea	-13(%ecx), %edi
540	jnz	L(less32bytes)
541
542	UPDATE_STRNCMP_COUNTER
543
544	movdqa	(%edx), %xmm3
545	pxor	%xmm0, %xmm0
546	mov	$16, %ecx
547	or	$3, %ebx
548	lea	3(%edx), %edi
549	and	$0xfff, %edi
550	sub	$0x1000, %edi
551
552	.p2align 4
553L(loop_ashr_3):
554	add	$16, %edi
555	jg	L(nibble_ashr_3)
556
557L(gobble_ashr_3):
558	movdqa	(%eax, %ecx), %xmm1
559	movdqa	(%edx, %ecx), %xmm2
560	movdqa	%xmm2, %xmm4
561
562	palignr	$3, %xmm3, %xmm2
563
564	pcmpeqb	%xmm1, %xmm0
565	pcmpeqb	%xmm2, %xmm1
566	psubb	%xmm0, %xmm1
567	pmovmskb %xmm1, %esi
568	sub	$0xffff, %esi
569	jnz	L(exit)
570
571#ifdef USE_AS_STRNCMP
572	cmp	$16, %ebp
573	lea	-16(%ebp), %ebp
574	jbe	L(more8byteseq)
575#endif
576	add	$16, %ecx
577	movdqa	%xmm4, %xmm3
578
579	add	$16, %edi
580	jg	L(nibble_ashr_3)
581
582	movdqa	(%eax, %ecx), %xmm1
583	movdqa	(%edx, %ecx), %xmm2
584	movdqa	%xmm2, %xmm4
585
586	palignr	$3, %xmm3, %xmm2
587
588	pcmpeqb	%xmm1, %xmm0
589	pcmpeqb	%xmm2, %xmm1
590	psubb	%xmm0, %xmm1
591	pmovmskb %xmm1, %esi
592	sub	$0xffff, %esi
593	jnz	L(exit)
594
595#ifdef USE_AS_STRNCMP
596	cmp	$16, %ebp
597	lea	-16(%ebp), %ebp
598	jbe	L(more8byteseq)
599#endif
600	add	$16, %ecx
601	movdqa	%xmm4, %xmm3
602	jmp	L(loop_ashr_3)
603
604	.p2align 4
605L(nibble_ashr_3):
606	pcmpeqb	%xmm3, %xmm0
607	pmovmskb %xmm0, %esi
608	test	$0xfff8, %esi
609	jnz	L(ashr_3_exittail)
610
611#ifdef USE_AS_STRNCMP
612	cmp	$13, %ebp
613	jbe	L(ashr_3_exittail)
614#endif
615	pxor	%xmm0, %xmm0
616	sub	$0x1000, %edi
617	jmp	L(gobble_ashr_3)
618
619	.p2align 4
620L(ashr_3_exittail):
621	movdqa	(%eax, %ecx), %xmm1
622	psrldq	$3, %xmm0
623	psrldq	$3, %xmm3
624	jmp	L(aftertail)
625
626/*
627 * The following cases will be handled by ashr_4
628 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
629 *        n(12~15)            n -12            3(15 +(n-12) - n)         ashr_4
630 */
631	.p2align 4
632L(ashr_4):
633	mov	$0xffff, %esi
634	pxor	%xmm0, %xmm0
635	movdqa	(%edx), %xmm2
636	movdqa	(%eax), %xmm1
637	pcmpeqb	%xmm1, %xmm0
638	pslldq	$12, %xmm2
639	pcmpeqb	%xmm1, %xmm2
640	psubb	%xmm0, %xmm2
641	pmovmskb %xmm2, %edi
642	shr	%cl, %esi
643	shr	%cl, %edi
644	sub	%edi, %esi
645	lea	-12(%ecx), %edi
646	jnz	L(less32bytes)
647
648	UPDATE_STRNCMP_COUNTER
649
650	movdqa	(%edx), %xmm3
651	pxor	%xmm0, %xmm0
652	mov	$16, %ecx
653	or	$4, %ebx
654	lea	4(%edx), %edi
655	and	$0xfff, %edi
656	sub	$0x1000, %edi
657
658	.p2align 4
659L(loop_ashr_4):
660	add	$16, %edi
661	jg	L(nibble_ashr_4)
662
663L(gobble_ashr_4):
664	movdqa	(%eax, %ecx), %xmm1
665	movdqa	(%edx, %ecx), %xmm2
666	movdqa	%xmm2, %xmm4
667
668	palignr	$4, %xmm3, %xmm2
669
670	pcmpeqb	%xmm1, %xmm0
671	pcmpeqb	%xmm2, %xmm1
672	psubb	%xmm0, %xmm1
673	pmovmskb %xmm1, %esi
674	sub	$0xffff, %esi
675	jnz	L(exit)
676
677#ifdef USE_AS_STRNCMP
678	cmp	$16, %ebp
679	lea	-16(%ebp), %ebp
680	jbe	L(more8byteseq)
681#endif
682
683	add	$16, %ecx
684	movdqa	%xmm4, %xmm3
685
686	add	$16, %edi
687	jg	L(nibble_ashr_4)
688
689	movdqa	(%eax, %ecx), %xmm1
690	movdqa	(%edx, %ecx), %xmm2
691	movdqa	%xmm2, %xmm4
692
693	palignr	$4, %xmm3, %xmm2
694
695	pcmpeqb	%xmm1, %xmm0
696	pcmpeqb	%xmm2, %xmm1
697	psubb	%xmm0, %xmm1
698	pmovmskb %xmm1, %esi
699	sub	$0xffff, %esi
700	jnz	L(exit)
701
702#ifdef USE_AS_STRNCMP
703	cmp	$16, %ebp
704	lea	-16(%ebp), %ebp
705	jbe	L(more8byteseq)
706#endif
707
708	add	$16, %ecx
709	movdqa	%xmm4, %xmm3
710	jmp	L(loop_ashr_4)
711
712	.p2align 4
713L(nibble_ashr_4):
714	pcmpeqb	%xmm3, %xmm0
715	pmovmskb %xmm0, %esi
716	test	$0xfff0, %esi
717	jnz	L(ashr_4_exittail)
718
719#ifdef USE_AS_STRNCMP
720	cmp	$12, %ebp
721	jbe	L(ashr_4_exittail)
722#endif
723
724	pxor	%xmm0, %xmm0
725	sub	$0x1000, %edi
726	jmp	L(gobble_ashr_4)
727
728	.p2align 4
729L(ashr_4_exittail):
730	movdqa	(%eax, %ecx), %xmm1
731	psrldq	$4, %xmm0
732	psrldq	$4, %xmm3
733	jmp	L(aftertail)
734
735/*
736 * The following cases will be handled by ashr_5
737 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
738 *        n(11~15)            n -11            4(15 +(n-11) - n)         ashr_5
739 */
740	.p2align 4
741L(ashr_5):
742	mov	$0xffff, %esi
743	pxor	%xmm0, %xmm0
744	movdqa	(%edx), %xmm2
745	movdqa	(%eax), %xmm1
746	pcmpeqb	%xmm1, %xmm0
747	pslldq	$11, %xmm2
748	pcmpeqb	%xmm1, %xmm2
749	psubb	%xmm0, %xmm2
750	pmovmskb %xmm2, %edi
751	shr	%cl, %esi
752	shr	%cl, %edi
753	sub	%edi, %esi
754	lea	-11(%ecx), %edi
755	jnz	L(less32bytes)
756
757	UPDATE_STRNCMP_COUNTER
758
759	movdqa	(%edx), %xmm3
760	pxor	%xmm0, %xmm0
761	mov	$16, %ecx
762	or	$5, %ebx
763	lea	5(%edx), %edi
764	and	$0xfff, %edi
765	sub	$0x1000, %edi
766
767	.p2align 4
768L(loop_ashr_5):
769	add	$16, %edi
770	jg	L(nibble_ashr_5)
771
772L(gobble_ashr_5):
773	movdqa	(%eax, %ecx), %xmm1
774	movdqa	(%edx, %ecx), %xmm2
775	movdqa	%xmm2, %xmm4
776
777	palignr	$5, %xmm3, %xmm2
778
779	pcmpeqb	%xmm1, %xmm0
780	pcmpeqb	%xmm2, %xmm1
781	psubb	%xmm0, %xmm1
782	pmovmskb %xmm1, %esi
783	sub	$0xffff, %esi
784	jnz	L(exit)
785
786#ifdef USE_AS_STRNCMP
787	cmp	$16, %ebp
788	lea	-16(%ebp), %ebp
789	jbe	L(more8byteseq)
790#endif
791	add	$16, %ecx
792	movdqa	%xmm4, %xmm3
793
794	add	$16, %edi
795	jg	L(nibble_ashr_5)
796
797	movdqa	(%eax, %ecx), %xmm1
798	movdqa	(%edx, %ecx), %xmm2
799	movdqa	%xmm2, %xmm4
800
801	palignr	$5, %xmm3, %xmm2
802
803	pcmpeqb	%xmm1, %xmm0
804	pcmpeqb	%xmm2, %xmm1
805	psubb	%xmm0, %xmm1
806	pmovmskb %xmm1, %esi
807	sub	$0xffff, %esi
808	jnz	L(exit)
809
810#ifdef USE_AS_STRNCMP
811	cmp	$16, %ebp
812	lea	-16(%ebp), %ebp
813	jbe	L(more8byteseq)
814#endif
815	add	$16, %ecx
816	movdqa	%xmm4, %xmm3
817	jmp	L(loop_ashr_5)
818
819	.p2align 4
820L(nibble_ashr_5):
821	pcmpeqb	%xmm3, %xmm0
822	pmovmskb %xmm0, %esi
823	test	$0xffe0, %esi
824	jnz	L(ashr_5_exittail)
825
826#ifdef USE_AS_STRNCMP
827	cmp	$11, %ebp
828	jbe	L(ashr_5_exittail)
829#endif
830	pxor	%xmm0, %xmm0
831	sub	$0x1000, %edi
832	jmp	L(gobble_ashr_5)
833
834	.p2align 4
835L(ashr_5_exittail):
836	movdqa	(%eax, %ecx), %xmm1
837	psrldq	$5, %xmm0
838	psrldq	$5, %xmm3
839	jmp	L(aftertail)
840
841/*
842 * The following cases will be handled by ashr_6
843 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
844 *        n(10~15)            n -10            5(15 +(n-10) - n)         ashr_6
845 */
846
847	.p2align 4
848L(ashr_6):
849	mov	$0xffff, %esi
850	pxor	%xmm0, %xmm0
851	movdqa	(%edx), %xmm2
852	movdqa	(%eax), %xmm1
853	pcmpeqb	%xmm1, %xmm0
854	pslldq	$10, %xmm2
855	pcmpeqb	%xmm1, %xmm2
856	psubb	%xmm0, %xmm2
857	pmovmskb %xmm2, %edi
858	shr	%cl, %esi
859	shr	%cl, %edi
860	sub	%edi, %esi
861	lea	-10(%ecx), %edi
862	jnz	L(less32bytes)
863
864	UPDATE_STRNCMP_COUNTER
865
866	movdqa	(%edx), %xmm3
867	pxor	%xmm0, %xmm0
868	mov	$16, %ecx
869	or	$6, %ebx
870	lea	6(%edx), %edi
871	and	$0xfff, %edi
872	sub	$0x1000, %edi
873
874	.p2align 4
875L(loop_ashr_6):
876	add	$16, %edi
877	jg	L(nibble_ashr_6)
878
879L(gobble_ashr_6):
880	movdqa	(%eax, %ecx), %xmm1
881	movdqa	(%edx, %ecx), %xmm2
882	movdqa	%xmm2, %xmm4
883
884	palignr	$6, %xmm3, %xmm2
885
886	pcmpeqb	%xmm1, %xmm0
887	pcmpeqb	%xmm2, %xmm1
888	psubb	%xmm0, %xmm1
889	pmovmskb %xmm1, %esi
890	sub	$0xffff, %esi
891	jnz	L(exit)
892
893#ifdef USE_AS_STRNCMP
894	cmp	$16, %ebp
895	lea	-16(%ebp), %ebp
896	jbe	L(more8byteseq)
897#endif
898
899	add	$16, %ecx
900	movdqa	%xmm4, %xmm3
901
902	add	$16, %edi
903	jg	L(nibble_ashr_6)
904
905	movdqa	(%eax, %ecx), %xmm1
906	movdqa	(%edx, %ecx), %xmm2
907	movdqa	%xmm2, %xmm4
908
909	palignr	$6, %xmm3, %xmm2
910
911	pcmpeqb	%xmm1, %xmm0
912	pcmpeqb	%xmm2, %xmm1
913	psubb	%xmm0, %xmm1
914	pmovmskb %xmm1, %esi
915	sub	$0xffff, %esi
916	jnz	L(exit)
917#ifdef USE_AS_STRNCMP
918	cmp	$16, %ebp
919	lea	-16(%ebp), %ebp
920	jbe	L(more8byteseq)
921#endif
922
923	add	$16, %ecx
924	movdqa	%xmm4, %xmm3
925	jmp	L(loop_ashr_6)
926
927	.p2align 4
928L(nibble_ashr_6):
929	pcmpeqb	%xmm3, %xmm0
930	pmovmskb %xmm0, %esi
931	test	$0xffc0, %esi
932	jnz	L(ashr_6_exittail)
933
934#ifdef USE_AS_STRNCMP
935	cmp	$10, %ebp
936	jbe	L(ashr_6_exittail)
937#endif
938	pxor	%xmm0, %xmm0
939	sub	$0x1000, %edi
940	jmp	L(gobble_ashr_6)
941
942	.p2align 4
943L(ashr_6_exittail):
944	movdqa	(%eax, %ecx), %xmm1
945	psrldq	$6, %xmm0
946	psrldq	$6, %xmm3
947	jmp	L(aftertail)
948
949/*
950 * The following cases will be handled by ashr_7
951 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
952 *        n(9~15)            n - 9            6(15 +(n-9) - n)         ashr_7
953 */
954
955	.p2align 4
956L(ashr_7):
957	mov	$0xffff, %esi
958	pxor	%xmm0, %xmm0
959	movdqa	(%edx), %xmm2
960	movdqa	(%eax), %xmm1
961	pcmpeqb	%xmm1, %xmm0
962	pslldq	$9, %xmm2
963	pcmpeqb	%xmm1, %xmm2
964	psubb	%xmm0, %xmm2
965	pmovmskb %xmm2, %edi
966	shr	%cl, %esi
967	shr	%cl, %edi
968	sub	%edi, %esi
969	lea	-9(%ecx), %edi
970	jnz	L(less32bytes)
971
972	UPDATE_STRNCMP_COUNTER
973
974	movdqa	(%edx), %xmm3
975	pxor	%xmm0, %xmm0
976	mov	$16, %ecx
977	or	$7, %ebx
978	lea	8(%edx), %edi
979	and	$0xfff, %edi
980	sub	$0x1000, %edi
981
982	.p2align 4
983L(loop_ashr_7):
984	add	$16, %edi
985	jg	L(nibble_ashr_7)
986
987L(gobble_ashr_7):
988	movdqa	(%eax, %ecx), %xmm1
989	movdqa	(%edx, %ecx), %xmm2
990	movdqa	%xmm2, %xmm4
991
992	palignr	$7, %xmm3, %xmm2
993
994	pcmpeqb	%xmm1, %xmm0
995	pcmpeqb	%xmm2, %xmm1
996	psubb	%xmm0, %xmm1
997	pmovmskb %xmm1, %esi
998	sub	$0xffff, %esi
999	jnz	L(exit)
1000
1001#ifdef USE_AS_STRNCMP
1002	cmp	$16, %ebp
1003	lea	-16(%ebp), %ebp
1004	jbe	L(more8byteseq)
1005#endif
1006
1007	add	$16, %ecx
1008	movdqa	%xmm4, %xmm3
1009
1010	add	$16, %edi
1011	jg	L(nibble_ashr_7)
1012
1013	movdqa	(%eax, %ecx), %xmm1
1014	movdqa	(%edx, %ecx), %xmm2
1015	movdqa	%xmm2, %xmm4
1016
1017	palignr	$7, %xmm3, %xmm2
1018
1019	pcmpeqb	%xmm1, %xmm0
1020	pcmpeqb	%xmm2, %xmm1
1021	psubb	%xmm0, %xmm1
1022	pmovmskb %xmm1, %esi
1023	sub	$0xffff, %esi
1024	jnz	L(exit)
1025
1026#ifdef USE_AS_STRNCMP
1027	cmp	$16, %ebp
1028	lea	-16(%ebp), %ebp
1029	jbe	L(more8byteseq)
1030#endif
1031
1032	add	$16, %ecx
1033	movdqa	%xmm4, %xmm3
1034	jmp	L(loop_ashr_7)
1035
1036	.p2align 4
1037L(nibble_ashr_7):
1038	pcmpeqb	%xmm3, %xmm0
1039	pmovmskb %xmm0, %esi
1040	test	$0xff80, %esi
1041	jnz	L(ashr_7_exittail)
1042
1043#ifdef USE_AS_STRNCMP
1044	cmp	$9, %ebp
1045	jbe	L(ashr_7_exittail)
1046#endif
1047	pxor	%xmm0, %xmm0
1048	pxor	%xmm0, %xmm0
1049	sub	$0x1000, %edi
1050	jmp	L(gobble_ashr_7)
1051
1052	.p2align 4
1053L(ashr_7_exittail):
1054	movdqa	(%eax, %ecx), %xmm1
1055	psrldq	$7, %xmm0
1056	psrldq	$7, %xmm3
1057	jmp	L(aftertail)
1058
1059/*
1060 * The following cases will be handled by ashr_8
1061 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1062 *        n(8~15)            n - 8            7(15 +(n-8) - n)         ashr_8
1063 */
1064	.p2align 4
1065L(ashr_8):
1066	mov	$0xffff, %esi
1067	pxor	%xmm0, %xmm0
1068	movdqa	(%edx), %xmm2
1069	movdqa	(%eax), %xmm1
1070	pcmpeqb	%xmm1, %xmm0
1071	pslldq	$8, %xmm2
1072	pcmpeqb	%xmm1, %xmm2
1073	psubb	%xmm0, %xmm2
1074	pmovmskb %xmm2, %edi
1075	shr	%cl, %esi
1076	shr	%cl, %edi
1077	sub	%edi, %esi
1078	lea	-8(%ecx), %edi
1079	jnz	L(less32bytes)
1080
1081	UPDATE_STRNCMP_COUNTER
1082
1083	movdqa	(%edx), %xmm3
1084	pxor	%xmm0, %xmm0
1085	mov	$16, %ecx
1086	or	$8, %ebx
1087	lea	8(%edx), %edi
1088	and	$0xfff, %edi
1089	sub	$0x1000, %edi
1090
1091	.p2align 4
1092L(loop_ashr_8):
1093	add	$16, %edi
1094	jg	L(nibble_ashr_8)
1095
1096L(gobble_ashr_8):
1097	movdqa	(%eax, %ecx), %xmm1
1098	movdqa	(%edx, %ecx), %xmm2
1099	movdqa	%xmm2, %xmm4
1100
1101	palignr	$8, %xmm3, %xmm2
1102
1103	pcmpeqb	%xmm1, %xmm0
1104	pcmpeqb	%xmm2, %xmm1
1105	psubb	%xmm0, %xmm1
1106	pmovmskb %xmm1, %esi
1107	sub	$0xffff, %esi
1108	jnz	L(exit)
1109
1110#ifdef USE_AS_STRNCMP
1111	cmp	$16, %ebp
1112	lea	-16(%ebp), %ebp
1113	jbe	L(more8byteseq)
1114#endif
1115	add	$16, %ecx
1116	movdqa	%xmm4, %xmm3
1117
1118	add	$16, %edi
1119	jg	L(nibble_ashr_8)
1120
1121	movdqa	(%eax, %ecx), %xmm1
1122	movdqa	(%edx, %ecx), %xmm2
1123	movdqa	%xmm2, %xmm4
1124
1125	palignr	$8, %xmm3, %xmm2
1126
1127	pcmpeqb	%xmm1, %xmm0
1128	pcmpeqb	%xmm2, %xmm1
1129	psubb	%xmm0, %xmm1
1130	pmovmskb %xmm1, %esi
1131	sub	$0xffff, %esi
1132	jnz	L(exit)
1133
1134#ifdef USE_AS_STRNCMP
1135	cmp	$16, %ebp
1136	lea	-16(%ebp), %ebp
1137	jbe	L(more8byteseq)
1138#endif
1139	add	$16, %ecx
1140	movdqa	%xmm4, %xmm3
1141	jmp	L(loop_ashr_8)
1142
1143	.p2align 4
1144L(nibble_ashr_8):
1145	pcmpeqb	%xmm3, %xmm0
1146	pmovmskb %xmm0, %esi
1147	test	$0xff00, %esi
1148	jnz	L(ashr_8_exittail)
1149
1150#ifdef USE_AS_STRNCMP
1151	cmp	$8, %ebp
1152	jbe	L(ashr_8_exittail)
1153#endif
1154	pxor	%xmm0, %xmm0
1155	pxor	%xmm0, %xmm0
1156	sub	$0x1000, %edi
1157	jmp	L(gobble_ashr_8)
1158
1159	.p2align 4
1160L(ashr_8_exittail):
1161	movdqa	(%eax, %ecx), %xmm1
1162	psrldq	$8, %xmm0
1163	psrldq	$8, %xmm3
1164	jmp	L(aftertail)
1165
1166/*
1167 * The following cases will be handled by ashr_9
1168 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1169 *        n(7~15)            n - 7            8(15 +(n-7) - n)         ashr_9
1170 */
1171	.p2align 4
1172L(ashr_9):
1173	mov	$0xffff, %esi
1174	pxor	%xmm0, %xmm0
1175	movdqa	(%edx), %xmm2
1176	movdqa	(%eax), %xmm1
1177	pcmpeqb	%xmm1, %xmm0
1178	pslldq	$7, %xmm2
1179	pcmpeqb	%xmm1, %xmm2
1180	psubb	%xmm0, %xmm2
1181	pmovmskb %xmm2, %edi
1182	shr	%cl, %esi
1183	shr	%cl, %edi
1184	sub	%edi, %esi
1185	lea	-7(%ecx), %edi
1186	jnz	L(less32bytes)
1187
1188	UPDATE_STRNCMP_COUNTER
1189
1190	movdqa	(%edx), %xmm3
1191	pxor	%xmm0, %xmm0
1192	mov	$16, %ecx
1193	or	$9, %ebx
1194	lea	9(%edx), %edi
1195	and	$0xfff, %edi
1196	sub	$0x1000, %edi
1197
1198	.p2align 4
1199L(loop_ashr_9):
1200	add	$16, %edi
1201	jg	L(nibble_ashr_9)
1202
1203L(gobble_ashr_9):
1204	movdqa	(%eax, %ecx), %xmm1
1205	movdqa	(%edx, %ecx), %xmm2
1206	movdqa	%xmm2, %xmm4
1207
1208	palignr	$9, %xmm3, %xmm2
1209
1210	pcmpeqb	%xmm1, %xmm0
1211	pcmpeqb	%xmm2, %xmm1
1212	psubb	%xmm0, %xmm1
1213	pmovmskb %xmm1, %esi
1214	sub	$0xffff, %esi
1215	jnz	L(exit)
1216
1217#ifdef USE_AS_STRNCMP
1218	cmp	$16, %ebp
1219	lea	-16(%ebp), %ebp
1220	jbe	L(more8byteseq)
1221#endif
1222	add	$16, %ecx
1223	movdqa	%xmm4, %xmm3
1224
1225	add	$16, %edi
1226	jg	L(nibble_ashr_9)
1227
1228	movdqa	(%eax, %ecx), %xmm1
1229	movdqa	(%edx, %ecx), %xmm2
1230	movdqa	%xmm2, %xmm4
1231
1232	palignr	$9, %xmm3, %xmm2
1233
1234	pcmpeqb	%xmm1, %xmm0
1235	pcmpeqb	%xmm2, %xmm1
1236	psubb	%xmm0, %xmm1
1237	pmovmskb %xmm1, %esi
1238	sub	$0xffff, %esi
1239	jnz	L(exit)
1240
1241#ifdef USE_AS_STRNCMP
1242	cmp	$16, %ebp
1243	lea	-16(%ebp), %ebp
1244	jbe	L(more8byteseq)
1245#endif
1246	add	$16, %ecx
1247	movdqa	%xmm4, %xmm3
1248	jmp	L(loop_ashr_9)
1249
1250	.p2align 4
1251L(nibble_ashr_9):
1252	pcmpeqb	%xmm3, %xmm0
1253	pmovmskb %xmm0, %esi
1254	test	$0xfe00, %esi
1255	jnz	L(ashr_9_exittail)
1256
1257#ifdef USE_AS_STRNCMP
1258	cmp	$7, %ebp
1259	jbe	L(ashr_9_exittail)
1260#endif
1261	pxor	%xmm0, %xmm0
1262	sub	$0x1000, %edi
1263	jmp	L(gobble_ashr_9)
1264
1265	.p2align 4
1266L(ashr_9_exittail):
1267	movdqa	(%eax, %ecx), %xmm1
1268	psrldq	$9, %xmm0
1269	psrldq	$9, %xmm3
1270	jmp	L(aftertail)
1271
1272/*
1273 * The following cases will be handled by ashr_10
1274 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1275 *        n(6~15)            n - 6            9(15 +(n-6) - n)         ashr_10
1276 */
1277	.p2align 4
1278L(ashr_10):
1279	mov	$0xffff, %esi
1280	pxor	%xmm0, %xmm0
1281	movdqa	(%edx), %xmm2
1282	movdqa	(%eax), %xmm1
1283	pcmpeqb	%xmm1, %xmm0
1284	pslldq	$6, %xmm2
1285	pcmpeqb	%xmm1, %xmm2
1286	psubb	%xmm0, %xmm2
1287	pmovmskb %xmm2, %edi
1288	shr	%cl, %esi
1289	shr	%cl, %edi
1290	sub	%edi, %esi
1291	lea	-6(%ecx), %edi
1292	jnz	L(less32bytes)
1293
1294	UPDATE_STRNCMP_COUNTER
1295
1296	movdqa	(%edx), %xmm3
1297	pxor	%xmm0, %xmm0
1298	mov	$16, %ecx
1299	or	$10, %ebx
1300	lea	10(%edx), %edi
1301	and	$0xfff, %edi
1302	sub	$0x1000, %edi
1303
1304	.p2align 4
1305L(loop_ashr_10):
1306	add	$16, %edi
1307	jg	L(nibble_ashr_10)
1308
1309L(gobble_ashr_10):
1310	movdqa	(%eax, %ecx), %xmm1
1311	movdqa	(%edx, %ecx), %xmm2
1312	movdqa	%xmm2, %xmm4
1313
1314	palignr	$10, %xmm3, %xmm2
1315
1316	pcmpeqb	%xmm1, %xmm0
1317	pcmpeqb	%xmm2, %xmm1
1318	psubb	%xmm0, %xmm1
1319	pmovmskb %xmm1, %esi
1320	sub	$0xffff, %esi
1321	jnz	L(exit)
1322
1323#ifdef USE_AS_STRNCMP
1324	cmp	$16, %ebp
1325	lea	-16(%ebp), %ebp
1326	jbe	L(more8byteseq)
1327#endif
1328	add	$16, %ecx
1329	movdqa	%xmm4, %xmm3
1330
1331	add	$16, %edi
1332	jg	L(nibble_ashr_10)
1333
1334	movdqa	(%eax, %ecx), %xmm1
1335	movdqa	(%edx, %ecx), %xmm2
1336	movdqa	%xmm2, %xmm4
1337
1338	palignr	$10, %xmm3, %xmm2
1339
1340	pcmpeqb	%xmm1, %xmm0
1341	pcmpeqb	%xmm2, %xmm1
1342	psubb	%xmm0, %xmm1
1343	pmovmskb %xmm1, %esi
1344	sub	$0xffff, %esi
1345	jnz	L(exit)
1346
1347#ifdef USE_AS_STRNCMP
1348	cmp	$16, %ebp
1349	lea	-16(%ebp), %ebp
1350	jbe	L(more8byteseq)
1351#endif
1352	add	$16, %ecx
1353	movdqa	%xmm4, %xmm3
1354	jmp	L(loop_ashr_10)
1355
1356	.p2align 4
1357L(nibble_ashr_10):
1358	pcmpeqb	%xmm3, %xmm0
1359	pmovmskb %xmm0, %esi
1360	test	$0xfc00, %esi
1361	jnz	L(ashr_10_exittail)
1362
1363#ifdef USE_AS_STRNCMP
1364	cmp	$6, %ebp
1365	jbe	L(ashr_10_exittail)
1366#endif
1367	pxor	%xmm0, %xmm0
1368	sub	$0x1000, %edi
1369	jmp	L(gobble_ashr_10)
1370
1371	.p2align 4
1372L(ashr_10_exittail):
1373	movdqa	(%eax, %ecx), %xmm1
1374	psrldq	$10, %xmm0
1375	psrldq	$10, %xmm3
1376	jmp	L(aftertail)
1377
1378/*
1379 * The following cases will be handled by ashr_11
1380 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1381 *        n(5~15)            n - 5            10(15 +(n-5) - n)         ashr_11
1382 */
1383	.p2align 4
1384L(ashr_11):
1385	mov	$0xffff, %esi
1386	pxor	%xmm0, %xmm0
1387	movdqa	(%edx), %xmm2
1388	movdqa	(%eax), %xmm1
1389	pcmpeqb	%xmm1, %xmm0
1390	pslldq	$5, %xmm2
1391	pcmpeqb	%xmm1, %xmm2
1392	psubb	%xmm0, %xmm2
1393	pmovmskb %xmm2, %edi
1394	shr	%cl, %esi
1395	shr	%cl, %edi
1396	sub	%edi, %esi
1397	lea	-5(%ecx), %edi
1398	jnz	L(less32bytes)
1399
1400	UPDATE_STRNCMP_COUNTER
1401
1402	movdqa	(%edx), %xmm3
1403	pxor	%xmm0, %xmm0
1404	mov	$16, %ecx
1405	or	$11, %ebx
1406	lea	11(%edx), %edi
1407	and	$0xfff, %edi
1408	sub	$0x1000, %edi
1409
1410	.p2align 4
1411L(loop_ashr_11):
1412	add	$16, %edi
1413	jg	L(nibble_ashr_11)
1414
1415L(gobble_ashr_11):
1416	movdqa	(%eax, %ecx), %xmm1
1417	movdqa	(%edx, %ecx), %xmm2
1418	movdqa	%xmm2, %xmm4
1419
1420	palignr	$11, %xmm3, %xmm2
1421
1422	pcmpeqb	%xmm1, %xmm0
1423	pcmpeqb	%xmm2, %xmm1
1424	psubb	%xmm0, %xmm1
1425	pmovmskb %xmm1, %esi
1426	sub	$0xffff, %esi
1427	jnz	L(exit)
1428
1429#ifdef USE_AS_STRNCMP
1430	cmp	$16, %ebp
1431	lea	-16(%ebp), %ebp
1432	jbe	L(more8byteseq)
1433#endif
1434	add	$16, %ecx
1435	movdqa	%xmm4, %xmm3
1436
1437	add	$16, %edi
1438	jg	L(nibble_ashr_11)
1439
1440	movdqa	(%eax, %ecx), %xmm1
1441	movdqa	(%edx, %ecx), %xmm2
1442	movdqa	%xmm2, %xmm4
1443
1444	palignr	$11, %xmm3, %xmm2
1445
1446	pcmpeqb	%xmm1, %xmm0
1447	pcmpeqb	%xmm2, %xmm1
1448	psubb	%xmm0, %xmm1
1449	pmovmskb %xmm1, %esi
1450	sub	$0xffff, %esi
1451	jnz	L(exit)
1452
1453#ifdef USE_AS_STRNCMP
1454	cmp	$16, %ebp
1455	lea	-16(%ebp), %ebp
1456	jbe	L(more8byteseq)
1457#endif
1458	add	$16, %ecx
1459	movdqa	%xmm4, %xmm3
1460	jmp	L(loop_ashr_11)
1461
1462	.p2align 4
1463L(nibble_ashr_11):
1464	pcmpeqb	%xmm3, %xmm0
1465	pmovmskb %xmm0, %esi
1466	test	$0xf800, %esi
1467	jnz	L(ashr_11_exittail)
1468
1469#ifdef USE_AS_STRNCMP
1470	cmp	$5, %ebp
1471	jbe	L(ashr_11_exittail)
1472#endif
1473	pxor	%xmm0, %xmm0
1474	sub	$0x1000, %edi
1475	jmp	L(gobble_ashr_11)
1476
1477	.p2align 4
1478L(ashr_11_exittail):
1479	movdqa	(%eax, %ecx), %xmm1
1480	psrldq	$11, %xmm0
1481	psrldq	$11, %xmm3
1482	jmp	L(aftertail)
1483
1484/*
1485 * The following cases will be handled by ashr_12
1486 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1487 *        n(4~15)            n - 4            11(15 +(n-4) - n)         ashr_12
1488 */
1489	.p2align 4
1490L(ashr_12):
1491	mov	$0xffff, %esi
1492	pxor	%xmm0, %xmm0
1493	movdqa	(%edx), %xmm2
1494	movdqa	(%eax), %xmm1
1495	pcmpeqb	%xmm1, %xmm0
1496	pslldq	$4, %xmm2
1497	pcmpeqb	%xmm1, %xmm2
1498	psubb	%xmm0, %xmm2
1499	pmovmskb %xmm2, %edi
1500	shr	%cl, %esi
1501	shr	%cl, %edi
1502	sub	%edi, %esi
1503	lea	-4(%ecx), %edi
1504	jnz	L(less32bytes)
1505
1506	UPDATE_STRNCMP_COUNTER
1507
1508	movdqa	(%edx), %xmm3
1509	pxor	%xmm0, %xmm0
1510	mov	$16, %ecx
1511	or	$12, %ebx
1512	lea	12(%edx), %edi
1513	and	$0xfff, %edi
1514	sub	$0x1000, %edi
1515
1516	.p2align 4
1517L(loop_ashr_12):
1518	add	$16, %edi
1519	jg	L(nibble_ashr_12)
1520
1521L(gobble_ashr_12):
1522	movdqa	(%eax, %ecx), %xmm1
1523	movdqa	(%edx, %ecx), %xmm2
1524	movdqa	%xmm2, %xmm4
1525
1526	palignr	$12, %xmm3, %xmm2
1527
1528	pcmpeqb	%xmm1, %xmm0
1529	pcmpeqb	%xmm2, %xmm1
1530	psubb	%xmm0, %xmm1
1531	pmovmskb %xmm1, %esi
1532	sub	$0xffff, %esi
1533	jnz	L(exit)
1534
1535#ifdef USE_AS_STRNCMP
1536	cmp	$16, %ebp
1537	lea	-16(%ebp), %ebp
1538	jbe	L(more8byteseq)
1539#endif
1540
1541	add	$16, %ecx
1542	movdqa	%xmm4, %xmm3
1543
1544	add	$16, %edi
1545	jg	L(nibble_ashr_12)
1546
1547	movdqa	(%eax, %ecx), %xmm1
1548	movdqa	(%edx, %ecx), %xmm2
1549	movdqa	%xmm2, %xmm4
1550
1551	palignr	$12, %xmm3, %xmm2
1552
1553	pcmpeqb	%xmm1, %xmm0
1554	pcmpeqb	%xmm2, %xmm1
1555	psubb	%xmm0, %xmm1
1556	pmovmskb %xmm1, %esi
1557	sub	$0xffff, %esi
1558	jnz	L(exit)
1559
1560#ifdef USE_AS_STRNCMP
1561	cmp	$16, %ebp
1562	lea	-16(%ebp), %ebp
1563	jbe	L(more8byteseq)
1564#endif
1565	add	$16, %ecx
1566	movdqa	%xmm4, %xmm3
1567	jmp	L(loop_ashr_12)
1568
1569	.p2align 4
1570L(nibble_ashr_12):
1571	pcmpeqb	%xmm3, %xmm0
1572	pmovmskb %xmm0, %esi
1573	test	$0xf000, %esi
1574	jnz	L(ashr_12_exittail)
1575
1576#ifdef USE_AS_STRNCMP
1577	cmp	$4, %ebp
1578	jbe	L(ashr_12_exittail)
1579#endif
1580	pxor	%xmm0, %xmm0
1581	sub	$0x1000, %edi
1582	jmp	L(gobble_ashr_12)
1583
1584	.p2align 4
1585L(ashr_12_exittail):
1586	movdqa	(%eax, %ecx), %xmm1
1587	psrldq	$12, %xmm0
1588	psrldq	$12, %xmm3
1589	jmp	L(aftertail)
1590
1591/*
1592 * The following cases will be handled by ashr_13
1593 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1594 *        n(3~15)            n - 3            12(15 +(n-3) - n)         ashr_13
1595 */
1596	.p2align 4
1597L(ashr_13):
1598	mov	$0xffff, %esi
1599	pxor	%xmm0, %xmm0
1600	movdqa	(%edx), %xmm2
1601	movdqa	(%eax), %xmm1
1602	pcmpeqb	%xmm1, %xmm0
1603	pslldq	$3, %xmm2
1604	pcmpeqb	%xmm1, %xmm2
1605	psubb	%xmm0, %xmm2
1606	pmovmskb %xmm2, %edi
1607	shr	%cl, %esi
1608	shr	%cl, %edi
1609	sub	%edi, %esi
1610	lea	-3(%ecx), %edi
1611	jnz	L(less32bytes)
1612
1613	UPDATE_STRNCMP_COUNTER
1614
1615	movdqa	(%edx), %xmm3
1616	pxor	%xmm0, %xmm0
1617	mov	$16, %ecx
1618	or	$13, %ebx
1619	lea	13(%edx), %edi
1620	and	$0xfff, %edi
1621	sub	$0x1000, %edi
1622
1623	.p2align 4
1624L(loop_ashr_13):
1625	add	$16, %edi
1626	jg	L(nibble_ashr_13)
1627
1628L(gobble_ashr_13):
1629	movdqa	(%eax, %ecx), %xmm1
1630	movdqa	(%edx, %ecx), %xmm2
1631	movdqa	%xmm2, %xmm4
1632
1633	palignr	$13, %xmm3, %xmm2
1634
1635	pcmpeqb	%xmm1, %xmm0
1636	pcmpeqb	%xmm2, %xmm1
1637	psubb	%xmm0, %xmm1
1638	pmovmskb %xmm1, %esi
1639	sub	$0xffff, %esi
1640	jnz	L(exit)
1641
1642#ifdef USE_AS_STRNCMP
1643	cmp	$16, %ebp
1644	lea	-16(%ebp), %ebp
1645	jbe	L(more8byteseq)
1646#endif
1647	add	$16, %ecx
1648	movdqa	%xmm4, %xmm3
1649
1650	add	$16, %edi
1651	jg	L(nibble_ashr_13)
1652
1653	movdqa	(%eax, %ecx), %xmm1
1654	movdqa	(%edx, %ecx), %xmm2
1655	movdqa	%xmm2, %xmm4
1656
1657	palignr	$13, %xmm3, %xmm2
1658
1659	pcmpeqb	%xmm1, %xmm0
1660	pcmpeqb	%xmm2, %xmm1
1661	psubb	%xmm0, %xmm1
1662	pmovmskb %xmm1, %esi
1663	sub	$0xffff, %esi
1664	jnz	L(exit)
1665
1666#ifdef USE_AS_STRNCMP
1667	cmp	$16, %ebp
1668	lea	-16(%ebp), %ebp
1669	jbe	L(more8byteseq)
1670#endif
1671	add	$16, %ecx
1672	movdqa	%xmm4, %xmm3
1673	jmp	L(loop_ashr_13)
1674
1675	.p2align 4
1676L(nibble_ashr_13):
1677	pcmpeqb	%xmm3, %xmm0
1678	pmovmskb %xmm0, %esi
1679	test	$0xe000, %esi
1680	jnz	L(ashr_13_exittail)
1681
1682#ifdef USE_AS_STRNCMP
1683	cmp	$3, %ebp
1684	jbe	L(ashr_13_exittail)
1685#endif
1686	pxor	%xmm0, %xmm0
1687	sub	$0x1000, %edi
1688	jmp	L(gobble_ashr_13)
1689
1690	.p2align 4
1691L(ashr_13_exittail):
1692	movdqa	(%eax, %ecx), %xmm1
1693	psrldq	$13, %xmm0
1694	psrldq	$13, %xmm3
1695	jmp	L(aftertail)
1696
1697/*
1698 * The following cases will be handled by ashr_14
1699 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1700 *        n(2~15)            n - 2            13(15 +(n-2) - n)         ashr_14
1701 */
1702	.p2align 4
1703L(ashr_14):
1704	mov	$0xffff, %esi
1705	pxor	%xmm0, %xmm0
1706	movdqa	(%edx), %xmm2
1707	movdqa	(%eax), %xmm1
1708	pcmpeqb	%xmm1, %xmm0
1709	pslldq	$2, %xmm2
1710	pcmpeqb	%xmm1, %xmm2
1711	psubb	%xmm0, %xmm2
1712	pmovmskb %xmm2, %edi
1713	shr	%cl, %esi
1714	shr	%cl, %edi
1715	sub	%edi, %esi
1716	lea	-2(%ecx), %edi
1717	jnz	L(less32bytes)
1718
1719	UPDATE_STRNCMP_COUNTER
1720
1721	movdqa	(%edx), %xmm3
1722	pxor	%xmm0, %xmm0
1723	mov	$16, %ecx
1724	or	$14, %ebx
1725	lea	14(%edx), %edi
1726	and	$0xfff, %edi
1727	sub	$0x1000, %edi
1728
1729	.p2align 4
1730L(loop_ashr_14):
1731	add	$16, %edi
1732	jg	L(nibble_ashr_14)
1733
1734L(gobble_ashr_14):
1735	movdqa	(%eax, %ecx), %xmm1
1736	movdqa	(%edx, %ecx), %xmm2
1737	movdqa	%xmm2, %xmm4
1738
1739	palignr	$14, %xmm3, %xmm2
1740
1741	pcmpeqb	%xmm1, %xmm0
1742	pcmpeqb	%xmm2, %xmm1
1743	psubb	%xmm0, %xmm1
1744	pmovmskb %xmm1, %esi
1745	sub	$0xffff, %esi
1746	jnz	L(exit)
1747
1748#ifdef USE_AS_STRNCMP
1749	cmp	$16, %ebp
1750	lea	-16(%ebp), %ebp
1751	jbe	L(more8byteseq)
1752#endif
1753	add	$16, %ecx
1754	movdqa	%xmm4, %xmm3
1755
1756	add	$16, %edi
1757	jg	L(nibble_ashr_14)
1758
1759	movdqa	(%eax, %ecx), %xmm1
1760	movdqa	(%edx, %ecx), %xmm2
1761	movdqa	%xmm2, %xmm4
1762
1763	palignr	$14, %xmm3, %xmm2
1764
1765	pcmpeqb	%xmm1, %xmm0
1766	pcmpeqb	%xmm2, %xmm1
1767	psubb	%xmm0, %xmm1
1768	pmovmskb %xmm1, %esi
1769	sub	$0xffff, %esi
1770	jnz	L(exit)
1771
1772#ifdef USE_AS_STRNCMP
1773	cmp	$16, %ebp
1774	lea	-16(%ebp), %ebp
1775	jbe	L(more8byteseq)
1776#endif
1777	add	$16, %ecx
1778	movdqa	%xmm4, %xmm3
1779	jmp	L(loop_ashr_14)
1780
1781	.p2align 4
1782L(nibble_ashr_14):
1783	pcmpeqb	%xmm3, %xmm0
1784	pmovmskb %xmm0, %esi
1785	test	$0xc000, %esi
1786	jnz	L(ashr_14_exittail)
1787
1788#ifdef USE_AS_STRNCMP
1789	cmp	$2, %ebp
1790	jbe	L(ashr_14_exittail)
1791#endif
1792	pxor	%xmm0, %xmm0
1793	sub	$0x1000, %edi
1794	jmp	L(gobble_ashr_14)
1795
1796	.p2align 4
1797L(ashr_14_exittail):
1798	movdqa	(%eax, %ecx), %xmm1
1799	psrldq	$14, %xmm0
1800	psrldq	$14, %xmm3
1801	jmp	L(aftertail)
1802
1803/*
1804 * The following cases will be handled by ashr_14
1805 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1806 *        n(1~15)            n - 1            14(15 +(n-1) - n)         ashr_15
1807 */
1808
1809	.p2align 4
1810L(ashr_15):
1811	mov	$0xffff, %esi
1812	pxor	%xmm0, %xmm0
1813	movdqa	(%edx), %xmm2
1814	movdqa	(%eax), %xmm1
1815	pcmpeqb	%xmm1, %xmm0
1816	pslldq	$1, %xmm2
1817	pcmpeqb	%xmm1, %xmm2
1818	psubb	%xmm0, %xmm2
1819	pmovmskb %xmm2, %edi
1820	shr	%cl, %esi
1821	shr	%cl, %edi
1822	sub	%edi, %esi
1823	lea	-1(%ecx), %edi
1824	jnz	L(less32bytes)
1825
1826	UPDATE_STRNCMP_COUNTER
1827
1828	movdqa	(%edx), %xmm3
1829	pxor	%xmm0, %xmm0
1830	mov	$16, %ecx
1831	or	$15, %ebx
1832	lea	15(%edx), %edi
1833	and	$0xfff, %edi
1834	sub	$0x1000, %edi
1835
1836	.p2align 4
1837L(loop_ashr_15):
1838	add	$16, %edi
1839	jg	L(nibble_ashr_15)
1840
1841L(gobble_ashr_15):
1842	movdqa	(%eax, %ecx), %xmm1
1843	movdqa	(%edx, %ecx), %xmm2
1844	movdqa	%xmm2, %xmm4
1845
1846	palignr	$15, %xmm3, %xmm2
1847
1848	pcmpeqb	%xmm1, %xmm0
1849	pcmpeqb	%xmm2, %xmm1
1850	psubb	%xmm0, %xmm1
1851	pmovmskb %xmm1, %esi
1852	sub	$0xffff, %esi
1853	jnz	L(exit)
1854
1855#ifdef USE_AS_STRNCMP
1856	cmp	$16, %ebp
1857	lea	-16(%ebp), %ebp
1858	jbe	L(more8byteseq)
1859#endif
1860	add	$16, %ecx
1861	movdqa	%xmm4, %xmm3
1862
1863	add	$16, %edi
1864	jg	L(nibble_ashr_15)
1865
1866	movdqa	(%eax, %ecx), %xmm1
1867	movdqa	(%edx, %ecx), %xmm2
1868	movdqa	%xmm2, %xmm4
1869
1870	palignr	$15, %xmm3, %xmm2
1871
1872	pcmpeqb	%xmm1, %xmm0
1873	pcmpeqb	%xmm2, %xmm1
1874	psubb	%xmm0, %xmm1
1875	pmovmskb %xmm1, %esi
1876	sub	$0xffff, %esi
1877	jnz	L(exit)
1878
1879#ifdef USE_AS_STRNCMP
1880	cmp	$16, %ebp
1881	lea	-16(%ebp), %ebp
1882	jbe	L(more8byteseq)
1883#endif
1884	add	$16, %ecx
1885	movdqa	%xmm4, %xmm3
1886	jmp	L(loop_ashr_15)
1887
1888	.p2align 4
1889L(nibble_ashr_15):
1890	pcmpeqb	%xmm3, %xmm0
1891	pmovmskb %xmm0, %esi
1892	test	$0x8000, %esi
1893	jnz	L(ashr_15_exittail)
1894
1895#ifdef USE_AS_STRNCMP
1896	cmp	$1, %ebp
1897	jbe	L(ashr_15_exittail)
1898#endif
1899	pxor	%xmm0, %xmm0
1900	sub	$0x1000, %edi
1901	jmp	L(gobble_ashr_15)
1902
1903	.p2align 4
1904L(ashr_15_exittail):
1905	movdqa	(%eax, %ecx), %xmm1
1906	psrldq	$15, %xmm0
1907	psrldq	$15, %xmm3
1908	jmp	L(aftertail)
1909
1910	.p2align 4
1911L(aftertail):
1912	pcmpeqb	%xmm3, %xmm1
1913	psubb	%xmm0, %xmm1
1914	pmovmskb %xmm1, %esi
1915	not	%esi
1916L(exit):
1917	mov	%ebx, %edi
1918	and	$0x1f, %edi
1919	lea	-16(%edi, %ecx), %edi
1920L(less32bytes):
1921	add	%edi, %edx
1922	add	%ecx, %eax
1923	test	$0x20, %ebx
1924	jz	L(ret2)
1925	xchg	%eax, %edx
1926
1927	.p2align 4
1928L(ret2):
1929	mov	%esi, %ecx
1930	POP	(%esi)
1931	POP	(%edi)
1932	POP	(%ebx)
1933L(less16bytes):
1934	test	%cl, %cl
1935	jz	L(2next_8_bytes)
1936
1937	test	$0x01, %cl
1938	jnz	L(Byte0)
1939
1940	test	$0x02, %cl
1941	jnz	L(Byte1)
1942
1943	test	$0x04, %cl
1944	jnz	L(Byte2)
1945
1946	test	$0x08, %cl
1947	jnz	L(Byte3)
1948
1949	test	$0x10, %cl
1950	jnz	L(Byte4)
1951
1952	test	$0x20, %cl
1953	jnz	L(Byte5)
1954
1955	test	$0x40, %cl
1956	jnz	L(Byte6)
1957#ifdef USE_AS_STRNCMP
1958	cmp	$7, %ebp
1959	jbe	L(eq)
1960#endif
1961
1962	movzx	7(%eax), %ecx
1963	movzx	7(%edx), %eax
1964
1965	sub	%ecx, %eax
1966	RETURN
1967
1968	.p2align 4
1969L(Byte0):
1970#ifdef USE_AS_STRNCMP
1971	cmp	$0, %ebp
1972	jbe	L(eq)
1973#endif
1974	movzx	(%eax), %ecx
1975	movzx	(%edx), %eax
1976
1977	sub	%ecx, %eax
1978	RETURN
1979
1980	.p2align 4
1981L(Byte1):
1982#ifdef USE_AS_STRNCMP
1983	cmp	$1, %ebp
1984	jbe	L(eq)
1985#endif
1986	movzx	1(%eax), %ecx
1987	movzx	1(%edx), %eax
1988
1989	sub	%ecx, %eax
1990	RETURN
1991
1992	.p2align 4
1993L(Byte2):
1994#ifdef USE_AS_STRNCMP
1995	cmp	$2, %ebp
1996	jbe	L(eq)
1997#endif
1998	movzx	2(%eax), %ecx
1999	movzx	2(%edx), %eax
2000
2001	sub	%ecx, %eax
2002	RETURN
2003
2004	.p2align 4
2005L(Byte3):
2006#ifdef USE_AS_STRNCMP
2007	cmp	$3, %ebp
2008	jbe	L(eq)
2009#endif
2010	movzx	3(%eax), %ecx
2011	movzx	3(%edx), %eax
2012
2013	sub	%ecx, %eax
2014	RETURN
2015
2016	.p2align 4
2017L(Byte4):
2018#ifdef USE_AS_STRNCMP
2019	cmp	$4, %ebp
2020	jbe	L(eq)
2021#endif
2022	movzx	4(%eax), %ecx
2023	movzx	4(%edx), %eax
2024
2025	sub	%ecx, %eax
2026	RETURN
2027
2028	.p2align 4
2029L(Byte5):
2030#ifdef USE_AS_STRNCMP
2031	cmp	$5, %ebp
2032	jbe	L(eq)
2033#endif
2034	movzx	5(%eax), %ecx
2035	movzx	5(%edx), %eax
2036
2037	sub	%ecx, %eax
2038	RETURN
2039
2040	.p2align 4
2041L(Byte6):
2042#ifdef USE_AS_STRNCMP
2043	cmp	$6, %ebp
2044	jbe	L(eq)
2045#endif
2046	movzx	6(%eax), %ecx
2047	movzx	6(%edx), %eax
2048
2049	sub	%ecx, %eax
2050	RETURN
2051
2052	.p2align 4
2053L(2next_8_bytes):
2054	add	$8, %eax
2055	add	$8, %edx
2056#ifdef USE_AS_STRNCMP
2057	cmp	$8, %ebp
2058	lea	-8(%ebp), %ebp
2059	jbe	L(eq)
2060#endif
2061
2062	test	$0x01, %ch
2063	jnz	L(Byte0)
2064
2065	test	$0x02, %ch
2066	jnz	L(Byte1)
2067
2068	test	$0x04, %ch
2069	jnz	L(Byte2)
2070
2071	test	$0x08, %ch
2072	jnz	L(Byte3)
2073
2074	test	$0x10, %ch
2075	jnz	L(Byte4)
2076
2077	test	$0x20, %ch
2078	jnz	L(Byte5)
2079
2080	test	$0x40, %ch
2081	jnz	L(Byte6)
2082
2083#ifdef USE_AS_STRNCMP
2084	cmp	$7, %ebp
2085	jbe	L(eq)
2086#endif
2087	movzx	7(%eax), %ecx
2088	movzx	7(%edx), %eax
2089
2090	sub	%ecx, %eax
2091	RETURN
2092
2093	.p2align 4
2094L(neq):
2095	mov	$1, %eax
2096	ja	L(neq_bigger)
2097	neg	%eax
2098L(neq_bigger):
2099	RETURN
2100
2101#ifdef USE_AS_STRNCMP
2102	cfi_restore_state
2103	.p2align 4
2104L(more8byteseq):
2105	POP	(%esi)
2106	POP	(%edi)
2107	POP	(%ebx)
2108#endif
2109
2110L(eq):
2111
2112#ifdef USE_AS_STRNCMP
2113	POP	(%ebp)
2114#endif
2115	xorl	%eax, %eax
2116	ret
2117
2118#ifdef USE_AS_STRNCMP
2119	CFI_PUSH (%ebp)
2120
2121	.p2align 4
2122L(less16bytes_sncmp):
2123	test	%ebp, %ebp
2124	jz	L(eq)
2125
2126	movzbl	(%eax), %ecx
2127	cmpb	%cl, (%edx)
2128	jne	L(neq)
2129	test	%cl, %cl
2130	je	L(eq)
2131
2132	cmp	$1, %ebp
2133	je	L(eq)
2134
2135	movzbl	1(%eax), %ecx
2136	cmpb	%cl, 1(%edx)
2137	jne	L(neq)
2138	test	%cl, %cl
2139	je	L(eq)
2140
2141	cmp	$2, %ebp
2142	je	L(eq)
2143
2144	movzbl	2(%eax), %ecx
2145	cmpb	%cl, 2(%edx)
2146	jne	L(neq)
2147	test	%cl, %cl
2148	je	L(eq)
2149
2150	cmp	$3, %ebp
2151	je	L(eq)
2152
2153	movzbl	3(%eax), %ecx
2154	cmpb	%cl, 3(%edx)
2155	jne	L(neq)
2156	test	%cl, %cl
2157	je	L(eq)
2158
2159	cmp	$4, %ebp
2160	je	L(eq)
2161
2162	movzbl	4(%eax), %ecx
2163	cmpb	%cl, 4(%edx)
2164	jne	L(neq)
2165	test	%cl, %cl
2166	je	L(eq)
2167
2168	cmp	$5, %ebp
2169	je	L(eq)
2170
2171	movzbl	5(%eax), %ecx
2172	cmpb	%cl, 5(%edx)
2173	jne	L(neq)
2174	test	%cl, %cl
2175	je	L(eq)
2176
2177	cmp	$6, %ebp
2178	je	L(eq)
2179
2180	movzbl	6(%eax), %ecx
2181	cmpb	%cl, 6(%edx)
2182	jne	L(neq)
2183	test	%cl, %cl
2184	je	L(eq)
2185
2186	cmp	$7, %ebp
2187	je	L(eq)
2188
2189	movzbl	7(%eax), %ecx
2190	cmpb	%cl, 7(%edx)
2191	jne	L(neq)
2192	test	%cl, %cl
2193	je	L(eq)
2194
2195
2196	cmp	$8, %ebp
2197	je	L(eq)
2198
2199	movzbl	8(%eax), %ecx
2200	cmpb	%cl, 8(%edx)
2201	jne	L(neq)
2202	test	%cl, %cl
2203	je	L(eq)
2204
2205	cmp	$9, %ebp
2206	je	L(eq)
2207
2208	movzbl	9(%eax), %ecx
2209	cmpb	%cl, 9(%edx)
2210	jne	L(neq)
2211	test	%cl, %cl
2212	je	L(eq)
2213
2214	cmp	$10, %ebp
2215	je	L(eq)
2216
2217	movzbl	10(%eax), %ecx
2218	cmpb	%cl, 10(%edx)
2219	jne	L(neq)
2220	test	%cl, %cl
2221	je	L(eq)
2222
2223	cmp	$11, %ebp
2224	je	L(eq)
2225
2226	movzbl	11(%eax), %ecx
2227	cmpb	%cl, 11(%edx)
2228	jne	L(neq)
2229	test	%cl, %cl
2230	je	L(eq)
2231
2232
2233	cmp	$12, %ebp
2234	je	L(eq)
2235
2236	movzbl	12(%eax), %ecx
2237	cmpb	%cl, 12(%edx)
2238	jne	L(neq)
2239	test	%cl, %cl
2240	je	L(eq)
2241
2242	cmp	$13, %ebp
2243	je	L(eq)
2244
2245	movzbl	13(%eax), %ecx
2246	cmpb	%cl, 13(%edx)
2247	jne	L(neq)
2248	test	%cl, %cl
2249	je	L(eq)
2250
2251	cmp	$14, %ebp
2252	je	L(eq)
2253
2254	movzbl	14(%eax), %ecx
2255	cmpb	%cl, 14(%edx)
2256	jne	L(neq)
2257	test	%cl, %cl
2258	je	L(eq)
2259
2260	cmp	$15, %ebp
2261	je	L(eq)
2262
2263	movzbl	15(%eax), %ecx
2264	cmpb	%cl, 15(%edx)
2265	jne	L(neq)
2266	test	%cl, %cl
2267	je	L(eq)
2268
2269	POP	(%ebp)
2270	xor	%eax, %eax
2271	ret
2272#endif
2273
2274END (ssse3_strcmp_latest)
2275