1/*
2Copyright (c) 2011 Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8    * Redistributions of source code must retain the above copyright notice,
9    * this list of conditions and the following disclaimer.
10
11    * Redistributions in binary form must reproduce the above copyright notice,
12    * this list of conditions and the following disclaimer in the documentation
13    * and/or other materials provided with the distribution.
14
15    * Neither the name of Intel Corporation nor the names of its contributors
16    * may be used to endorse or promote products derived from this software
17    * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef L
32# define L(label)	.L##label
33#endif
34
35#ifndef cfi_startproc
36# define cfi_startproc	.cfi_startproc
37#endif
38
39#ifndef cfi_endproc
40# define cfi_endproc	.cfi_endproc
41#endif
42
43#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
48# define cfi_restore(reg)	.cfi_restore reg
49#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
53#endif
54
55#ifndef ENTRY
56# define ENTRY(name)	\
57	.type name, @function;	\
58	.globl name;	\
59	.p2align 4;	\
60name:	\
61	cfi_startproc
62#endif
63
64#ifndef END
65# define END(name)	\
66	cfi_endproc;	\
67	.size name, .-name
68#endif
69
70#define CFI_PUSH(REG)	\
71	cfi_adjust_cfa_offset (4);	\
72	cfi_rel_offset (REG, 0)
73
74#define CFI_POP(REG)	\
75	cfi_adjust_cfa_offset (-4);	\
76	cfi_restore (REG)
77
78#define PUSH(REG) pushl REG; CFI_PUSH (REG)
79#define POP(REG) popl REG; CFI_POP (REG)
80
81#define ENTRANCE PUSH(%esi); PUSH(%edi)
82#define RETURN  POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
83#define PARMS  4
84#define STR1  PARMS
85#define STR2  STR1+4
86
87	.text
88ENTRY (wcscmp)
89/*
90	* This implementation uses SSE to compare up to 16 bytes at a time.
91*/
92	mov	STR1(%esp), %edx
93	mov	STR2(%esp), %eax
94
95	mov	(%eax), %ecx
96	cmp	%ecx, (%edx)
97	jne	L(neq)
98	test	%ecx, %ecx
99	jz	L(eq)
100
101	mov	4(%eax), %ecx
102	cmp	%ecx, 4(%edx)
103	jne	L(neq)
104	test	%ecx, %ecx
105	jz	L(eq)
106
107	mov	8(%eax), %ecx
108	cmp	%ecx, 8(%edx)
109	jne	L(neq)
110	test	%ecx, %ecx
111	jz	L(eq)
112
113	mov	12(%eax), %ecx
114	cmp	%ecx, 12(%edx)
115	jne	L(neq)
116	test	%ecx, %ecx
117	jz	L(eq)
118
119	ENTRANCE
120	add	$16, %eax
121	add	$16, %edx
122
123	mov	%eax, %esi
124	mov	%edx, %edi
125	pxor	%xmm0, %xmm0		/* clear %xmm0 for null char checks */
126	mov	%al, %ch
127	mov	%dl, %cl
128	and	$63, %eax		/* esi alignment in cache line */
129	and	$63, %edx		/* edi alignment in cache line */
130	and	$15, %cl
131	jz	L(continue_00)
132	cmp	$16, %edx
133	jb	L(continue_0)
134	cmp	$32, %edx
135	jb	L(continue_16)
136	cmp	$48, %edx
137	jb	L(continue_32)
138
139L(continue_48):
140	and	$15, %ch
141	jz	L(continue_48_00)
142	cmp	$16, %eax
143	jb	L(continue_0_48)
144	cmp	$32, %eax
145	jb	L(continue_16_48)
146	cmp	$48, %eax
147	jb	L(continue_32_48)
148
149	.p2align 4
150L(continue_48_48):
151	mov	(%esi), %ecx
152	cmp	%ecx, (%edi)
153	jne	L(nequal)
154	test	%ecx, %ecx
155	jz	L(equal)
156
157	mov	4(%esi), %ecx
158	cmp	%ecx, 4(%edi)
159	jne	L(nequal)
160	test	%ecx, %ecx
161	jz	L(equal)
162
163	mov	8(%esi), %ecx
164	cmp	%ecx, 8(%edi)
165	jne	L(nequal)
166	test	%ecx, %ecx
167	jz	L(equal)
168
169	mov	12(%esi), %ecx
170	cmp	%ecx, 12(%edi)
171	jne	L(nequal)
172	test	%ecx, %ecx
173	jz	L(equal)
174
175	movdqu	16(%edi), %xmm1
176	movdqu	16(%esi), %xmm2
177	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
178	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
179	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
180	pmovmskb %xmm1, %edx
181	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
182	jnz	L(less4_double_words_16)
183
184	movdqu	32(%edi), %xmm1
185	movdqu	32(%esi), %xmm2
186	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
187	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
188	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
189	pmovmskb %xmm1, %edx
190	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
191	jnz	L(less4_double_words_32)
192
193	movdqu	48(%edi), %xmm1
194	movdqu	48(%esi), %xmm2
195	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
196	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
197	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
198	pmovmskb %xmm1, %edx
199	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
200	jnz	L(less4_double_words_48)
201
202	add	$64, %esi
203	add	$64, %edi
204	jmp	L(continue_48_48)
205
206L(continue_0):
207	and	$15, %ch
208	jz	L(continue_0_00)
209	cmp	$16, %eax
210	jb	L(continue_0_0)
211	cmp	$32, %eax
212	jb	L(continue_0_16)
213	cmp	$48, %eax
214	jb	L(continue_0_32)
215
216	.p2align 4
217L(continue_0_48):
218	mov	(%esi), %ecx
219	cmp	%ecx, (%edi)
220	jne	L(nequal)
221	test	%ecx, %ecx
222	jz	L(equal)
223
224	mov	4(%esi), %ecx
225	cmp	%ecx, 4(%edi)
226	jne	L(nequal)
227	test	%ecx, %ecx
228	jz	L(equal)
229
230	mov	8(%esi), %ecx
231	cmp	%ecx, 8(%edi)
232	jne	L(nequal)
233	test	%ecx, %ecx
234	jz	L(equal)
235
236	mov	12(%esi), %ecx
237	cmp	%ecx, 12(%edi)
238	jne	L(nequal)
239	test	%ecx, %ecx
240	jz	L(equal)
241
242	movdqu	16(%edi), %xmm1
243	movdqu	16(%esi), %xmm2
244	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
245	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
246	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
247	pmovmskb %xmm1, %edx
248	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
249	jnz	L(less4_double_words_16)
250
251	movdqu	32(%edi), %xmm1
252	movdqu	32(%esi), %xmm2
253	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
254	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
255	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
256	pmovmskb %xmm1, %edx
257	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
258	jnz	L(less4_double_words_32)
259
260	mov	48(%esi), %ecx
261	cmp	%ecx, 48(%edi)
262	jne	L(nequal)
263	test	%ecx, %ecx
264	jz	L(equal)
265
266	mov	52(%esi), %ecx
267	cmp	%ecx, 52(%edi)
268	jne	L(nequal)
269	test	%ecx, %ecx
270	jz	L(equal)
271
272	mov	56(%esi), %ecx
273	cmp	%ecx, 56(%edi)
274	jne	L(nequal)
275	test	%ecx, %ecx
276	jz	L(equal)
277
278	mov	60(%esi), %ecx
279	cmp	%ecx, 60(%edi)
280	jne	L(nequal)
281	test	%ecx, %ecx
282	jz	L(equal)
283
284	add	$64, %esi
285	add	$64, %edi
286	jmp	L(continue_0_48)
287
288	.p2align 4
289L(continue_00):
290	and	$15, %ch
291	jz	L(continue_00_00)
292	cmp	$16, %eax
293	jb	L(continue_00_0)
294	cmp	$32, %eax
295	jb	L(continue_00_16)
296	cmp	$48, %eax
297	jb	L(continue_00_32)
298
299	.p2align 4
300L(continue_00_48):
301	pcmpeqd	(%edi), %xmm0
302	mov	(%edi), %eax
303	pmovmskb %xmm0, %ecx
304	test	%ecx, %ecx
305	jnz	L(less4_double_words1)
306
307	cmp	(%esi), %eax
308	jne	L(nequal)
309
310	mov	4(%edi), %eax
311	cmp	4(%esi), %eax
312	jne	L(nequal)
313
314	mov	8(%edi), %eax
315	cmp	8(%esi), %eax
316	jne	L(nequal)
317
318	mov	12(%edi), %eax
319	cmp	12(%esi), %eax
320	jne	L(nequal)
321
322	movdqu	16(%esi), %xmm2
323	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
324	pcmpeqd	16(%edi), %xmm2		/* compare first 4 double_words for equality */
325	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
326	pmovmskb %xmm2, %edx
327	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
328	jnz	L(less4_double_words_16)
329
330	movdqu	32(%esi), %xmm2
331	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
332	pcmpeqd	32(%edi), %xmm2		/* compare first 4 double_words for equality */
333	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
334	pmovmskb %xmm2, %edx
335	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
336	jnz	L(less4_double_words_32)
337
338	movdqu	48(%esi), %xmm2
339	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
340	pcmpeqd	48(%edi), %xmm2		/* compare first 4 double_words for equality */
341	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
342	pmovmskb %xmm2, %edx
343	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
344	jnz	L(less4_double_words_48)
345
346	add	$64, %esi
347	add	$64, %edi
348	jmp	L(continue_00_48)
349
350	.p2align 4
351L(continue_32):
352	and	$15, %ch
353	jz	L(continue_32_00)
354	cmp	$16, %eax
355	jb	L(continue_0_32)
356	cmp	$32, %eax
357	jb	L(continue_16_32)
358	cmp	$48, %eax
359	jb	L(continue_32_32)
360
361	.p2align 4
362L(continue_32_48):
363	mov	(%esi), %ecx
364	cmp	%ecx, (%edi)
365	jne	L(nequal)
366	test	%ecx, %ecx
367	jz	L(equal)
368
369	mov	4(%esi), %ecx
370	cmp	%ecx, 4(%edi)
371	jne	L(nequal)
372	test	%ecx, %ecx
373	jz	L(equal)
374
375	mov	8(%esi), %ecx
376	cmp	%ecx, 8(%edi)
377	jne	L(nequal)
378	test	%ecx, %ecx
379	jz	L(equal)
380
381	mov	12(%esi), %ecx
382	cmp	%ecx, 12(%edi)
383	jne	L(nequal)
384	test	%ecx, %ecx
385	jz	L(equal)
386
387	mov	16(%esi), %ecx
388	cmp	%ecx, 16(%edi)
389	jne	L(nequal)
390	test	%ecx, %ecx
391	jz	L(equal)
392
393	mov	20(%esi), %ecx
394	cmp	%ecx, 20(%edi)
395	jne	L(nequal)
396	test	%ecx, %ecx
397	jz	L(equal)
398
399	mov	24(%esi), %ecx
400	cmp	%ecx, 24(%edi)
401	jne	L(nequal)
402	test	%ecx, %ecx
403	jz	L(equal)
404
405	mov	28(%esi), %ecx
406	cmp	%ecx, 28(%edi)
407	jne	L(nequal)
408	test	%ecx, %ecx
409	jz	L(equal)
410
411	movdqu	32(%edi), %xmm1
412	movdqu	32(%esi), %xmm2
413	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
414	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
415	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
416	pmovmskb %xmm1, %edx
417	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
418	jnz	L(less4_double_words_32)
419
420	movdqu	48(%edi), %xmm1
421	movdqu	48(%esi), %xmm2
422	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
423	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
424	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
425	pmovmskb %xmm1, %edx
426	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
427	jnz	L(less4_double_words_48)
428
429	add	$64, %esi
430	add	$64, %edi
431	jmp	L(continue_32_48)
432
433	.p2align 4
434L(continue_16):
435	and	$15, %ch
436	jz	L(continue_16_00)
437	cmp	$16, %eax
438	jb	L(continue_0_16)
439	cmp	$32, %eax
440	jb	L(continue_16_16)
441	cmp	$48, %eax
442	jb	L(continue_16_32)
443
444	.p2align 4
445L(continue_16_48):
446	mov	(%esi), %ecx
447	cmp	%ecx, (%edi)
448	jne	L(nequal)
449	test	%ecx, %ecx
450	jz	L(equal)
451
452	mov	4(%esi), %ecx
453	cmp	%ecx, 4(%edi)
454	jne	L(nequal)
455	test	%ecx, %ecx
456	jz	L(equal)
457
458	mov	8(%esi), %ecx
459	cmp	%ecx, 8(%edi)
460	jne	L(nequal)
461	test	%ecx, %ecx
462	jz	L(equal)
463
464	mov	12(%esi), %ecx
465	cmp	%ecx, 12(%edi)
466	jne	L(nequal)
467	test	%ecx, %ecx
468	jz	L(equal)
469
470	movdqu	16(%edi), %xmm1
471	movdqu	16(%esi), %xmm2
472	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
473	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
474	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
475	pmovmskb %xmm1, %edx
476	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
477	jnz	L(less4_double_words_16)
478
479	mov	32(%esi), %ecx
480	cmp	%ecx, 32(%edi)
481	jne	L(nequal)
482	test	%ecx, %ecx
483	jz	L(equal)
484
485	mov	36(%esi), %ecx
486	cmp	%ecx, 36(%edi)
487	jne	L(nequal)
488	test	%ecx, %ecx
489	jz	L(equal)
490
491	mov	40(%esi), %ecx
492	cmp	%ecx, 40(%edi)
493	jne	L(nequal)
494	test	%ecx, %ecx
495	jz	L(equal)
496
497	mov	44(%esi), %ecx
498	cmp	%ecx, 44(%edi)
499	jne	L(nequal)
500	test	%ecx, %ecx
501	jz	L(equal)
502
503	movdqu	48(%edi), %xmm1
504	movdqu	48(%esi), %xmm2
505	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
506	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
507	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
508	pmovmskb %xmm1, %edx
509	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
510	jnz	L(less4_double_words_48)
511
512	add	$64, %esi
513	add	$64, %edi
514	jmp	L(continue_16_48)
515
516	.p2align 4
517L(continue_00_00):
518	movdqa	(%edi), %xmm1
519	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
520	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
521	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
522	pmovmskb %xmm1, %edx
523	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
524	jnz	L(less4_double_words)
525
526	movdqa	16(%edi), %xmm3
527	pcmpeqd	%xmm3, %xmm0		/* Any null double_word? */
528	pcmpeqd	16(%esi), %xmm3		/* compare first 4 double_words for equality */
529	psubb	%xmm0, %xmm3		/* packed sub of comparison results*/
530	pmovmskb %xmm3, %edx
531	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
532	jnz	L(less4_double_words_16)
533
534	movdqa	32(%edi), %xmm5
535	pcmpeqd	%xmm5, %xmm0		/* Any null double_word? */
536	pcmpeqd	32(%esi), %xmm5		/* compare first 4 double_words for equality */
537	psubb	%xmm0, %xmm5		/* packed sub of comparison results*/
538	pmovmskb %xmm5, %edx
539	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
540	jnz	L(less4_double_words_32)
541
542	movdqa	48(%edi), %xmm1
543	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
544	pcmpeqd	48(%esi), %xmm1		/* compare first 4 double_words for equality */
545	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
546	pmovmskb %xmm1, %edx
547	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
548	jnz	L(less4_double_words_48)
549
550	add	$64, %esi
551	add	$64, %edi
552	jmp	L(continue_00_00)
553
554	.p2align 4
555L(continue_00_32):
556	movdqu	(%esi), %xmm2
557	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
558	pcmpeqd	(%edi), %xmm2		/* compare first 4 double_words for equality */
559	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
560	pmovmskb %xmm2, %edx
561	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
562	jnz	L(less4_double_words)
563
564	add	$16, %esi
565	add	$16, %edi
566	jmp	L(continue_00_48)
567
568	.p2align 4
569L(continue_00_16):
570	movdqu	(%esi), %xmm2
571	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
572	pcmpeqd	(%edi), %xmm2		/* compare first 4 double_words for equality */
573	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
574	pmovmskb %xmm2, %edx
575	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
576	jnz	L(less4_double_words)
577
578	movdqu	16(%esi), %xmm2
579	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
580	pcmpeqd	16(%edi), %xmm2		/* compare first 4 double_words for equality */
581	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
582	pmovmskb %xmm2, %edx
583	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
584	jnz	L(less4_double_words_16)
585
586	add	$32, %esi
587	add	$32, %edi
588	jmp	L(continue_00_48)
589
590	.p2align 4
591L(continue_00_0):
592	movdqu	(%esi), %xmm2
593	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
594	pcmpeqd	(%edi), %xmm2		/* compare first 4 double_words for equality */
595	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
596	pmovmskb %xmm2, %edx
597	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
598	jnz	L(less4_double_words)
599
600	movdqu	16(%esi), %xmm2
601	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
602	pcmpeqd	16(%edi), %xmm2		/* compare first 4 double_words for equality */
603	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
604	pmovmskb %xmm2, %edx
605	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
606	jnz	L(less4_double_words_16)
607
608	movdqu	32(%esi), %xmm2
609	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
610	pcmpeqd	32(%edi), %xmm2		/* compare first 4 double_words for equality */
611	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
612	pmovmskb %xmm2, %edx
613	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
614	jnz	L(less4_double_words_32)
615
616	add	$48, %esi
617	add	$48, %edi
618	jmp	L(continue_00_48)
619
620	.p2align 4
621L(continue_48_00):
622	pcmpeqd	(%esi), %xmm0
623	mov	(%edi), %eax
624	pmovmskb %xmm0, %ecx
625	test	%ecx, %ecx
626	jnz	L(less4_double_words1)
627
628	cmp	(%esi), %eax
629	jne	L(nequal)
630
631	mov	4(%edi), %eax
632	cmp	4(%esi), %eax
633	jne	L(nequal)
634
635	mov	8(%edi), %eax
636	cmp	8(%esi), %eax
637	jne	L(nequal)
638
639	mov	12(%edi), %eax
640	cmp	12(%esi), %eax
641	jne	L(nequal)
642
643	movdqu	16(%edi), %xmm1
644	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
645	pcmpeqd	16(%esi), %xmm1		/* compare first 4 double_words for equality */
646	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
647	pmovmskb %xmm1, %edx
648	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
649	jnz	L(less4_double_words_16)
650
651	movdqu	32(%edi), %xmm1
652	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
653	pcmpeqd	32(%esi), %xmm1		/* compare first 4 double_words for equality */
654	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
655	pmovmskb %xmm1, %edx
656	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
657	jnz	L(less4_double_words_32)
658
659	movdqu	48(%edi), %xmm1
660	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
661	pcmpeqd	48(%esi), %xmm1		/* compare first 4 double_words for equality */
662	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
663	pmovmskb %xmm1, %edx
664	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
665	jnz	L(less4_double_words_48)
666
667	add	$64, %esi
668	add	$64, %edi
669	jmp	L(continue_48_00)
670
671	.p2align 4
672L(continue_32_00):
673	movdqu	(%edi), %xmm1
674	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
675	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
676	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
677	pmovmskb %xmm1, %edx
678	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
679	jnz	L(less4_double_words)
680
681	add	$16, %esi
682	add	$16, %edi
683	jmp	L(continue_48_00)
684
685	.p2align 4
686L(continue_16_00):
687	movdqu	(%edi), %xmm1
688	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
689	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
690	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
691	pmovmskb %xmm1, %edx
692	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
693	jnz	L(less4_double_words)
694
695	movdqu	16(%edi), %xmm1
696	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
697	pcmpeqd	16(%esi), %xmm1		/* compare first 4 double_words for equality */
698	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
699	pmovmskb %xmm1, %edx
700	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
701	jnz	L(less4_double_words_16)
702
703	add	$32, %esi
704	add	$32, %edi
705	jmp	L(continue_48_00)
706
707	.p2align 4
708L(continue_0_00):
709	movdqu	(%edi), %xmm1
710	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
711	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
712	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
713	pmovmskb %xmm1, %edx
714	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
715	jnz	L(less4_double_words)
716
717	movdqu	16(%edi), %xmm1
718	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
719	pcmpeqd	16(%esi), %xmm1		/* compare first 4 double_words for equality */
720	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
721	pmovmskb %xmm1, %edx
722	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
723	jnz	L(less4_double_words_16)
724
725	movdqu	32(%edi), %xmm1
726	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
727	pcmpeqd	32(%esi), %xmm1		/* compare first 4 double_words for equality */
728	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
729	pmovmskb %xmm1, %edx
730	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
731	jnz	L(less4_double_words_32)
732
733	add	$48, %esi
734	add	$48, %edi
735	jmp	L(continue_48_00)
736
737	.p2align 4
738L(continue_32_32):
739	movdqu	(%edi), %xmm1
740	movdqu	(%esi), %xmm2
741	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
742	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
743	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
744	pmovmskb %xmm1, %edx
745	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
746	jnz	L(less4_double_words)
747
748	add	$16, %esi
749	add	$16, %edi
750	jmp	L(continue_48_48)
751
752	.p2align 4
753L(continue_16_16):
754	movdqu	(%edi), %xmm1
755	movdqu	(%esi), %xmm2
756	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
757	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
758	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
759	pmovmskb %xmm1, %edx
760	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
761	jnz	L(less4_double_words)
762
763	movdqu	16(%edi), %xmm3
764	movdqu	16(%esi), %xmm4
765	pcmpeqd	%xmm3, %xmm0		/* Any null double_word? */
766	pcmpeqd	%xmm4, %xmm3		/* compare first 4 double_words for equality */
767	psubb	%xmm0, %xmm3		/* packed sub of comparison results*/
768	pmovmskb %xmm3, %edx
769	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
770	jnz	L(less4_double_words_16)
771
772	add	$32, %esi
773	add	$32, %edi
774	jmp	L(continue_48_48)
775
776	.p2align 4
777L(continue_0_0):
778	movdqu	(%edi), %xmm1
779	movdqu	(%esi), %xmm2
780	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
781	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
782	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
783	pmovmskb %xmm1, %edx
784	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
785	jnz	L(less4_double_words)
786
787	movdqu	16(%edi), %xmm3
788	movdqu	16(%esi), %xmm4
789	pcmpeqd	%xmm3, %xmm0		/* Any null double_word? */
790	pcmpeqd	%xmm4, %xmm3		/* compare first 4 double_words for equality */
791	psubb	%xmm0, %xmm3		/* packed sub of comparison results*/
792	pmovmskb %xmm3, %edx
793	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
794	jnz	L(less4_double_words_16)
795
796	movdqu	32(%edi), %xmm1
797	movdqu	32(%esi), %xmm2
798	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
799	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
800	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
801	pmovmskb %xmm1, %edx
802	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
803	jnz	L(less4_double_words_32)
804
805	add	$48, %esi
806	add	$48, %edi
807	jmp	L(continue_48_48)
808
809	.p2align 4
810L(continue_0_16):
811	movdqu	(%edi), %xmm1
812	movdqu	(%esi), %xmm2
813	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
814	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
815	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
816	pmovmskb %xmm1, %edx
817	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
818	jnz	L(less4_double_words)
819
820	movdqu	16(%edi), %xmm1
821	movdqu	16(%esi), %xmm2
822	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
823	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
824	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
825	pmovmskb %xmm1, %edx
826	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
827	jnz	L(less4_double_words_16)
828
829	add	$32, %esi
830	add	$32, %edi
831	jmp	L(continue_32_48)
832
833	.p2align 4
834L(continue_0_32):
835	movdqu	(%edi), %xmm1
836	movdqu	(%esi), %xmm2
837	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
838	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
839	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
840	pmovmskb %xmm1, %edx
841	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
842	jnz	L(less4_double_words)
843
844	add	$16, %esi
845	add	$16, %edi
846	jmp	L(continue_16_48)
847
848	.p2align 4
849L(continue_16_32):
850	movdqu	(%edi), %xmm1
851	movdqu	(%esi), %xmm2
852	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
853	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
854	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
855	pmovmskb %xmm1, %edx
856	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
857	jnz	L(less4_double_words)
858
859	add	$16, %esi
860	add	$16, %edi
861	jmp	L(continue_32_48)
862
863	.p2align 4
864L(less4_double_words1):
865	cmp	(%esi), %eax
866	jne	L(nequal)
867	test	%eax, %eax
868	jz	L(equal)
869
870	mov	4(%esi), %ecx
871	cmp	%ecx, 4(%edi)
872	jne	L(nequal)
873	test	%ecx, %ecx
874	jz	L(equal)
875
876	mov	8(%esi), %ecx
877	cmp	%ecx, 8(%edi)
878	jne	L(nequal)
879	test	%ecx, %ecx
880	jz	L(equal)
881
882	mov	12(%esi), %ecx
883	cmp	%ecx, 12(%edi)
884	jne	L(nequal)
885	xor	%eax, %eax
886	RETURN
887
888	.p2align 4
889L(less4_double_words):
890	xor	%eax, %eax
891	test	%dl, %dl
892	jz	L(next_two_double_words)
893	and	$15, %dl
894	jz	L(second_double_word)
895	mov	(%esi), %ecx
896	cmp	%ecx, (%edi)
897	jne	L(nequal)
898	RETURN
899
900	.p2align 4
901L(second_double_word):
902	mov	4(%esi), %ecx
903	cmp	%ecx, 4(%edi)
904	jne	L(nequal)
905	RETURN
906
907	.p2align 4
908L(next_two_double_words):
909	and	$15, %dh
910	jz	L(fourth_double_word)
911	mov	8(%esi), %ecx
912	cmp	%ecx, 8(%edi)
913	jne	L(nequal)
914	RETURN
915
916	.p2align 4
917L(fourth_double_word):
918	mov	12(%esi), %ecx
919	cmp	%ecx, 12(%edi)
920	jne	L(nequal)
921	RETURN
922
923	.p2align 4
924L(less4_double_words_16):
925	xor	%eax, %eax
926	test	%dl, %dl
927	jz	L(next_two_double_words_16)
928	and	$15, %dl
929	jz	L(second_double_word_16)
930	mov	16(%esi), %ecx
931	cmp	%ecx, 16(%edi)
932	jne	L(nequal)
933	RETURN
934
935	.p2align 4
936L(second_double_word_16):
937	mov	20(%esi), %ecx
938	cmp	%ecx, 20(%edi)
939	jne	L(nequal)
940	RETURN
941
942	.p2align 4
943L(next_two_double_words_16):
944	and	$15, %dh
945	jz	L(fourth_double_word_16)
946	mov	24(%esi), %ecx
947	cmp	%ecx, 24(%edi)
948	jne	L(nequal)
949	RETURN
950
951	.p2align 4
952L(fourth_double_word_16):
953	mov	28(%esi), %ecx
954	cmp	%ecx, 28(%edi)
955	jne	L(nequal)
956	RETURN
957
958	.p2align 4
959L(less4_double_words_32):
960	xor	%eax, %eax
961	test	%dl, %dl
962	jz	L(next_two_double_words_32)
963	and	$15, %dl
964	jz	L(second_double_word_32)
965	mov	32(%esi), %ecx
966	cmp	%ecx, 32(%edi)
967	jne	L(nequal)
968	RETURN
969
970	.p2align 4
971L(second_double_word_32):
972	mov	36(%esi), %ecx
973	cmp	%ecx, 36(%edi)
974	jne	L(nequal)
975	RETURN
976
977	.p2align 4
978L(next_two_double_words_32):
979	and	$15, %dh
980	jz	L(fourth_double_word_32)
981	mov	40(%esi), %ecx
982	cmp	%ecx, 40(%edi)
983	jne	L(nequal)
984	RETURN
985
986	.p2align 4
987L(fourth_double_word_32):
988	mov	44(%esi), %ecx
989	cmp	%ecx, 44(%edi)
990	jne	L(nequal)
991	RETURN
992
993	.p2align 4
994L(less4_double_words_48):
995	xor	%eax, %eax
996	test	%dl, %dl
997	jz	L(next_two_double_words_48)
998	and	$15, %dl
999	jz	L(second_double_word_48)
1000	mov	48(%esi), %ecx
1001	cmp	%ecx, 48(%edi)
1002	jne	L(nequal)
1003	RETURN
1004
1005	.p2align 4
1006L(second_double_word_48):
1007	mov	52(%esi), %ecx
1008	cmp	%ecx, 52(%edi)
1009	jne	L(nequal)
1010	RETURN
1011
1012	.p2align 4
1013L(next_two_double_words_48):
1014	and	$15, %dh
1015	jz	L(fourth_double_word_48)
1016	mov	56(%esi), %ecx
1017	cmp	%ecx, 56(%edi)
1018	jne	L(nequal)
1019	RETURN
1020
1021	.p2align 4
1022L(fourth_double_word_48):
1023	mov	60(%esi), %ecx
1024	cmp	%ecx, 60(%edi)
1025	jne	L(nequal)
1026	RETURN
1027
1028	.p2align 4
1029L(nequal):
1030	mov	$1, %eax
1031	jg	L(return)
1032	neg	%eax
1033	RETURN
1034
1035	.p2align 4
1036L(return):
1037	RETURN
1038
1039	.p2align 4
1040L(equal):
1041	xorl	%eax, %eax
1042	RETURN
1043
1044	CFI_POP (%edi)
1045	CFI_POP (%esi)
1046
1047	.p2align 4
1048L(neq):
1049	mov	$1, %eax
1050	jg	L(neq_bigger)
1051	neg	%eax
1052
1053L(neq_bigger):
1054	ret
1055
1056	.p2align 4
1057L(eq):
1058	xorl	%eax, %eax
1059	ret
1060
1061END (wcscmp)
1062
1063