1/*
2Copyright (c) 2011, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8    * Redistributions of source code must retain the above copyright notice,
9    * this list of conditions and the following disclaimer.
10
11    * Redistributions in binary form must reproduce the above copyright notice,
12    * this list of conditions and the following disclaimer in the documentation
13    * and/or other materials provided with the distribution.
14
15    * Neither the name of Intel Corporation nor the names of its contributors
16    * may be used to endorse or promote products derived from this software
17    * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef L
32# define L(label)	.L##label
33#endif
34
35#ifndef cfi_startproc
36# define cfi_startproc	.cfi_startproc
37#endif
38
39#ifndef cfi_endproc
40# define cfi_endproc	.cfi_endproc
41#endif
42
43#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
48# define cfi_restore(reg)	.cfi_restore reg
49#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
53#endif
54
55#ifndef ENTRY
56# define ENTRY(name)	\
57	.type name, @function;	\
58	.globl name;	\
59	.p2align 4;	\
60name:	\
61	cfi_startproc
62#endif
63
64#ifndef END
65# define END(name)	\
66	cfi_endproc;	\
67	.size name, .-name
68#endif
69
70#define CFI_PUSH(REG)	\
71	cfi_adjust_cfa_offset (4);	\
72	cfi_rel_offset (REG, 0)
73
74#define CFI_POP(REG)	\
75	cfi_adjust_cfa_offset (-4);	\
76	cfi_restore (REG)
77
78#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
79#define POP(REG)	popl REG; CFI_POP (REG)
80
81#define PARMS	8
82#define ENTRANCE	PUSH(%edi);
83#define RETURN	POP (%edi); ret; CFI_PUSH (%edi);
84
85#define STR1  PARMS
86#define STR2  STR1+4
87
88	.text
89ENTRY (strrchr)
90
91	ENTRANCE
92	mov	STR1(%esp), %ecx
93	movd	STR2(%esp), %xmm1
94
95	pxor	%xmm2, %xmm2
96	mov	%ecx, %edi
97	punpcklbw %xmm1, %xmm1
98	punpcklbw %xmm1, %xmm1
99	/* ECX has OFFSET. */
100	and	$63, %ecx
101	pshufd	$0, %xmm1, %xmm1
102	cmp	$48, %ecx
103	ja	L(crosscache)
104
105/* unaligned string. */
106	movdqu	(%edi), %xmm0
107	pcmpeqb	%xmm0, %xmm2
108	pcmpeqb	%xmm1, %xmm0
109	/* Find where NULL is.  */
110	pmovmskb %xmm2, %ecx
111	/* Check if there is a match.  */
112	pmovmskb %xmm0, %eax
113	add	$16, %edi
114
115	test	%eax, %eax
116	jnz	L(unaligned_match1)
117
118	test	%ecx, %ecx
119	jnz	L(return_null)
120
121	and	$-16, %edi
122
123	PUSH	(%esi)
124	PUSH	(%ebx)
125
126	xor	%ebx, %ebx
127	jmp	L(loop)
128
129	CFI_POP    (%esi)
130	CFI_POP    (%ebx)
131
132	.p2align 4
133L(unaligned_match1):
134	test	%ecx, %ecx
135	jnz	L(prolog_find_zero_1)
136
137	PUSH	(%esi)
138	PUSH	(%ebx)
139
140	mov	%eax, %ebx
141	mov	%edi, %esi
142	and	$-16, %edi
143	jmp	L(loop)
144
145	CFI_POP    (%esi)
146	CFI_POP    (%ebx)
147
148	.p2align 4
149L(crosscache):
150/* Hancle unaligned string.  */
151	and	$15, %ecx
152	and	$-16, %edi
153	pxor	%xmm3, %xmm3
154	movdqa	(%edi), %xmm0
155	pcmpeqb	%xmm0, %xmm3
156	pcmpeqb	%xmm1, %xmm0
157	/* Find where NULL is.  */
158	pmovmskb %xmm3, %edx
159	/* Check if there is a match.  */
160	pmovmskb %xmm0, %eax
161	/* Remove the leading bytes.  */
162	shr	%cl, %edx
163	shr	%cl, %eax
164	add	$16, %edi
165
166	test	%eax, %eax
167	jnz	L(unaligned_match)
168
169	test	%edx, %edx
170	jnz	L(return_null)
171
172	PUSH	(%esi)
173	PUSH	(%ebx)
174
175	xor	%ebx, %ebx
176	jmp	L(loop)
177
178	CFI_POP    (%esi)
179	CFI_POP    (%ebx)
180
181	.p2align 4
182L(unaligned_match):
183	test	%edx, %edx
184	jnz	L(prolog_find_zero)
185
186	PUSH	(%esi)
187	PUSH	(%ebx)
188
189	mov	%eax, %ebx
190	lea	(%edi, %ecx), %esi
191
192/* Loop start on aligned string.  */
193	.p2align 4
194L(loop):
195	movdqa	(%edi), %xmm0
196	pcmpeqb	%xmm0, %xmm2
197	add	$16, %edi
198	pcmpeqb	%xmm1, %xmm0
199	pmovmskb %xmm2, %ecx
200	pmovmskb %xmm0, %eax
201	or	%eax, %ecx
202	jnz	L(matches)
203
204	movdqa	(%edi), %xmm0
205	pcmpeqb	%xmm0, %xmm2
206	add	$16, %edi
207	pcmpeqb	%xmm1, %xmm0
208	pmovmskb %xmm2, %ecx
209	pmovmskb %xmm0, %eax
210	or	%eax, %ecx
211	jnz	L(matches)
212
213	movdqa	(%edi), %xmm0
214	pcmpeqb	%xmm0, %xmm2
215	add	$16, %edi
216	pcmpeqb	%xmm1, %xmm0
217	pmovmskb %xmm2, %ecx
218	pmovmskb %xmm0, %eax
219	or	%eax, %ecx
220	jnz	L(matches)
221
222	movdqa	(%edi), %xmm0
223	pcmpeqb	%xmm0, %xmm2
224	add	$16, %edi
225	pcmpeqb	%xmm1, %xmm0
226	pmovmskb %xmm2, %ecx
227	pmovmskb %xmm0, %eax
228	or	%eax, %ecx
229	jz	L(loop)
230
231L(matches):
232	test	%eax, %eax
233	jnz	L(match)
234L(return_value):
235	test	%ebx, %ebx
236	jz	L(return_null_1)
237	mov	%ebx, %eax
238	mov	%esi, %edi
239
240	POP	(%ebx)
241	POP	(%esi)
242
243	jmp	L(match_case1)
244
245	CFI_PUSH    (%ebx)
246	CFI_PUSH    (%esi)
247
248	.p2align 4
249L(return_null_1):
250	POP	(%ebx)
251	POP	(%esi)
252
253	xor	%eax, %eax
254	RETURN
255
256	CFI_PUSH    (%ebx)
257	CFI_PUSH    (%esi)
258
259	.p2align 4
260L(match):
261	pmovmskb %xmm2, %ecx
262	test	%ecx, %ecx
263	jnz	L(find_zero)
264	mov	%eax, %ebx
265	mov	%edi, %esi
266	jmp	L(loop)
267
268	.p2align 4
269L(find_zero):
270	test	%cl, %cl
271	jz	L(find_zero_high)
272	mov	%cl, %dl
273	and	$15, %dl
274	jz	L(find_zero_8)
275	test	$0x01, %cl
276	jnz	L(FindZeroExit1)
277	test	$0x02, %cl
278	jnz	L(FindZeroExit2)
279	test	$0x04, %cl
280	jnz	L(FindZeroExit3)
281	and	$(1 << 4) - 1, %eax
282	jz	L(return_value)
283
284	POP	(%ebx)
285	POP	(%esi)
286	jmp     L(match_case1)
287
288	CFI_PUSH	(%ebx)
289	CFI_PUSH	(%esi)
290
291	.p2align 4
292L(find_zero_8):
293	test	$0x10, %cl
294	jnz	L(FindZeroExit5)
295	test	$0x20, %cl
296	jnz	L(FindZeroExit6)
297	test	$0x40, %cl
298	jnz	L(FindZeroExit7)
299	and	$(1 << 8) - 1, %eax
300	jz	L(return_value)
301
302	POP	(%ebx)
303	POP	(%esi)
304	jmp     L(match_case1)
305
306	CFI_PUSH	(%ebx)
307	CFI_PUSH	(%esi)
308
309	.p2align 4
310L(find_zero_high):
311	mov	%ch, %dh
312	and	$15, %dh
313	jz	L(find_zero_high_8)
314	test	$0x01, %ch
315	jnz	L(FindZeroExit9)
316	test	$0x02, %ch
317	jnz	L(FindZeroExit10)
318	test	$0x04, %ch
319	jnz	L(FindZeroExit11)
320	and	$(1 << 12) - 1, %eax
321	jz	L(return_value)
322
323	POP	(%ebx)
324	POP	(%esi)
325	jmp     L(match_case1)
326
327	CFI_PUSH	(%ebx)
328	CFI_PUSH	(%esi)
329
330	.p2align 4
331L(find_zero_high_8):
332	test	$0x10, %ch
333	jnz	L(FindZeroExit13)
334	test	$0x20, %ch
335	jnz	L(FindZeroExit14)
336	test	$0x40, %ch
337	jnz	L(FindZeroExit15)
338	and	$(1 << 16) - 1, %eax
339	jz	L(return_value)
340
341	POP	(%ebx)
342	POP	(%esi)
343	jmp     L(match_case1)
344
345	CFI_PUSH	(%ebx)
346	CFI_PUSH	(%esi)
347
348	.p2align 4
349L(FindZeroExit1):
350	and	$1, %eax
351	jz	L(return_value)
352
353	POP	(%ebx)
354	POP	(%esi)
355	jmp     L(match_case1)
356
357	CFI_PUSH	(%ebx)
358	CFI_PUSH	(%esi)
359
360	.p2align 4
361L(FindZeroExit2):
362	and	$(1 << 2) - 1, %eax
363	jz	L(return_value)
364
365	POP	(%ebx)
366	POP	(%esi)
367	jmp     L(match_case1)
368
369	CFI_PUSH	(%ebx)
370	CFI_PUSH	(%esi)
371
372	.p2align 4
373L(FindZeroExit3):
374	and	$(1 << 3) - 1, %eax
375	jz	L(return_value)
376
377	POP	(%ebx)
378	POP	(%esi)
379	jmp     L(match_case1)
380
381	CFI_PUSH	(%ebx)
382	CFI_PUSH	(%esi)
383
384	.p2align 4
385L(FindZeroExit5):
386	and	$(1 << 5) - 1, %eax
387	jz	L(return_value)
388
389	POP	(%ebx)
390	POP	(%esi)
391	jmp     L(match_case1)
392
393	CFI_PUSH	(%ebx)
394	CFI_PUSH	(%esi)
395
396	.p2align 4
397L(FindZeroExit6):
398	and	$(1 << 6) - 1, %eax
399	jz	L(return_value)
400
401	POP	(%ebx)
402	POP	(%esi)
403	jmp     L(match_case1)
404
405	CFI_PUSH	(%ebx)
406	CFI_PUSH	(%esi)
407
408	.p2align 4
409L(FindZeroExit7):
410	and	$(1 << 7) - 1, %eax
411	jz	L(return_value)
412
413	POP	(%ebx)
414	POP	(%esi)
415	jmp     L(match_case1)
416
417	CFI_PUSH	(%ebx)
418	CFI_PUSH	(%esi)
419
420	.p2align 4
421L(FindZeroExit9):
422	and	$(1 << 9) - 1, %eax
423	jz	L(return_value)
424
425	POP	(%ebx)
426	POP	(%esi)
427	jmp     L(match_case1)
428
429	CFI_PUSH	(%ebx)
430	CFI_PUSH	(%esi)
431
432	.p2align 4
433L(FindZeroExit10):
434	and	$(1 << 10) - 1, %eax
435	jz	L(return_value)
436
437	POP	(%ebx)
438	POP	(%esi)
439	jmp     L(match_case1)
440
441	CFI_PUSH	(%ebx)
442	CFI_PUSH	(%esi)
443
444	.p2align 4
445L(FindZeroExit11):
446	and	$(1 << 11) - 1, %eax
447	jz	L(return_value)
448
449	POP	(%ebx)
450	POP	(%esi)
451	jmp     L(match_case1)
452
453	CFI_PUSH	(%ebx)
454	CFI_PUSH	(%esi)
455
456	.p2align 4
457L(FindZeroExit13):
458	and	$(1 << 13) - 1, %eax
459	jz	L(return_value)
460
461	POP	(%ebx)
462	POP	(%esi)
463	jmp     L(match_case1)
464
465	CFI_PUSH	(%ebx)
466	CFI_PUSH	(%esi)
467
468	.p2align 4
469L(FindZeroExit14):
470	and	$(1 << 14) - 1, %eax
471	jz	L(return_value)
472
473	POP	(%ebx)
474	POP	(%esi)
475	jmp     L(match_case1)
476
477	CFI_PUSH	(%ebx)
478	CFI_PUSH	(%esi)
479
480	.p2align 4
481L(FindZeroExit15):
482	and	$(1 << 15) - 1, %eax
483	jz	L(return_value)
484
485	POP	(%ebx)
486	POP	(%esi)
487
488	.p2align 4
489L(match_case1):
490	test	%ah, %ah
491	jnz	L(match_case1_high)
492	mov	%al, %dl
493	and	$15 << 4, %dl
494	jnz	L(match_case1_8)
495	test	$0x08, %al
496	jnz	L(Exit4)
497	test	$0x04, %al
498	jnz	L(Exit3)
499	test	$0x02, %al
500	jnz	L(Exit2)
501	lea	-16(%edi), %eax
502	RETURN
503
504	.p2align 4
505L(match_case1_8):
506	test	$0x80, %al
507	jnz	L(Exit8)
508	test	$0x40, %al
509	jnz	L(Exit7)
510	test	$0x20, %al
511	jnz	L(Exit6)
512	lea	-12(%edi), %eax
513	RETURN
514
515	.p2align 4
516L(match_case1_high):
517	mov	%ah, %dh
518	and	$15 << 4, %dh
519	jnz	L(match_case1_high_8)
520	test	$0x08, %ah
521	jnz	L(Exit12)
522	test	$0x04, %ah
523	jnz	L(Exit11)
524	test	$0x02, %ah
525	jnz	L(Exit10)
526	lea	-8(%edi), %eax
527	RETURN
528
529	.p2align 4
530L(match_case1_high_8):
531	test	$0x80, %ah
532	jnz	L(Exit16)
533	test	$0x40, %ah
534	jnz	L(Exit15)
535	test	$0x20, %ah
536	jnz	L(Exit14)
537	lea	-4(%edi), %eax
538	RETURN
539
540	.p2align 4
541L(Exit2):
542	lea	-15(%edi), %eax
543	RETURN
544
545	.p2align 4
546L(Exit3):
547	lea	-14(%edi), %eax
548	RETURN
549
550	.p2align 4
551L(Exit4):
552	lea	-13(%edi), %eax
553	RETURN
554
555	.p2align 4
556L(Exit6):
557	lea	-11(%edi), %eax
558	RETURN
559
560	.p2align 4
561L(Exit7):
562	lea	-10(%edi), %eax
563	RETURN
564
565	.p2align 4
566L(Exit8):
567	lea	-9(%edi), %eax
568	RETURN
569
570	.p2align 4
571L(Exit10):
572	lea	-7(%edi), %eax
573	RETURN
574
575	.p2align 4
576L(Exit11):
577	lea	-6(%edi), %eax
578	RETURN
579
580	.p2align 4
581L(Exit12):
582	lea	-5(%edi), %eax
583	RETURN
584
585	.p2align 4
586L(Exit14):
587	lea	-3(%edi), %eax
588	RETURN
589
590	.p2align 4
591L(Exit15):
592	lea	-2(%edi), %eax
593	RETURN
594
595	.p2align 4
596L(Exit16):
597	lea	-1(%edi), %eax
598	RETURN
599
600/* Return NULL.  */
601	.p2align 4
602L(return_null):
603	xor	%eax, %eax
604	RETURN
605
606	.p2align 4
607L(prolog_find_zero):
608	add	%ecx, %edi
609	mov     %edx, %ecx
610L(prolog_find_zero_1):
611	test	%cl, %cl
612	jz	L(prolog_find_zero_high)
613	mov	%cl, %dl
614	and	$15, %dl
615	jz	L(prolog_find_zero_8)
616	test	$0x01, %cl
617	jnz	L(PrologFindZeroExit1)
618	test	$0x02, %cl
619	jnz	L(PrologFindZeroExit2)
620	test	$0x04, %cl
621	jnz	L(PrologFindZeroExit3)
622	and	$(1 << 4) - 1, %eax
623	jnz	L(match_case1)
624	xor	%eax, %eax
625	RETURN
626
627	.p2align 4
628L(prolog_find_zero_8):
629	test	$0x10, %cl
630	jnz	L(PrologFindZeroExit5)
631	test	$0x20, %cl
632	jnz	L(PrologFindZeroExit6)
633	test	$0x40, %cl
634	jnz	L(PrologFindZeroExit7)
635	and	$(1 << 8) - 1, %eax
636	jnz	L(match_case1)
637	xor	%eax, %eax
638	RETURN
639
640	.p2align 4
641L(prolog_find_zero_high):
642	mov	%ch, %dh
643	and	$15, %dh
644	jz	L(prolog_find_zero_high_8)
645	test	$0x01, %ch
646	jnz	L(PrologFindZeroExit9)
647	test	$0x02, %ch
648	jnz	L(PrologFindZeroExit10)
649	test	$0x04, %ch
650	jnz	L(PrologFindZeroExit11)
651	and	$(1 << 12) - 1, %eax
652	jnz	L(match_case1)
653	xor	%eax, %eax
654	RETURN
655
656	.p2align 4
657L(prolog_find_zero_high_8):
658	test	$0x10, %ch
659	jnz	L(PrologFindZeroExit13)
660	test	$0x20, %ch
661	jnz	L(PrologFindZeroExit14)
662	test	$0x40, %ch
663	jnz	L(PrologFindZeroExit15)
664	and	$(1 << 16) - 1, %eax
665	jnz	L(match_case1)
666	xor	%eax, %eax
667	RETURN
668
669	.p2align 4
670L(PrologFindZeroExit1):
671	and	$1, %eax
672	jnz	L(match_case1)
673	xor	%eax, %eax
674	RETURN
675
676	.p2align 4
677L(PrologFindZeroExit2):
678	and	$(1 << 2) - 1, %eax
679	jnz	L(match_case1)
680	xor	%eax, %eax
681	RETURN
682
683	.p2align 4
684L(PrologFindZeroExit3):
685	and	$(1 << 3) - 1, %eax
686	jnz	L(match_case1)
687	xor	%eax, %eax
688	RETURN
689
690	.p2align 4
691L(PrologFindZeroExit5):
692	and	$(1 << 5) - 1, %eax
693	jnz	L(match_case1)
694	xor	%eax, %eax
695	RETURN
696
697	.p2align 4
698L(PrologFindZeroExit6):
699	and	$(1 << 6) - 1, %eax
700	jnz	L(match_case1)
701	xor	%eax, %eax
702	RETURN
703
704	.p2align 4
705L(PrologFindZeroExit7):
706	and	$(1 << 7) - 1, %eax
707	jnz	L(match_case1)
708	xor	%eax, %eax
709	RETURN
710
711	.p2align 4
712L(PrologFindZeroExit9):
713	and	$(1 << 9) - 1, %eax
714	jnz	L(match_case1)
715	xor	%eax, %eax
716	RETURN
717
718	.p2align 4
719L(PrologFindZeroExit10):
720	and	$(1 << 10) - 1, %eax
721	jnz	L(match_case1)
722	xor	%eax, %eax
723	RETURN
724
725	.p2align 4
726L(PrologFindZeroExit11):
727	and	$(1 << 11) - 1, %eax
728	jnz	L(match_case1)
729	xor	%eax, %eax
730	RETURN
731
732	.p2align 4
733L(PrologFindZeroExit13):
734	and	$(1 << 13) - 1, %eax
735	jnz	L(match_case1)
736	xor	%eax, %eax
737	RETURN
738
739	.p2align 4
740L(PrologFindZeroExit14):
741	and	$(1 << 14) - 1, %eax
742	jnz	L(match_case1)
743	xor	%eax, %eax
744	RETURN
745
746	.p2align 4
747L(PrologFindZeroExit15):
748	and	$(1 << 15) - 1, %eax
749	jnz	L(match_case1)
750	xor	%eax, %eax
751	RETURN
752
753END (strrchr)
754