sse2-strlen-atom.S revision 832a86eaba56dcf8066e4b96df12738a9dff7053
1#define STRLEN sse2_strlen_atom
2
3#ifndef L
4# define L(label)	.L##label
5#endif
6
7#ifndef cfi_startproc
8# define cfi_startproc			.cfi_startproc
9#endif
10
11#ifndef cfi_endproc
12# define cfi_endproc			.cfi_endproc
13#endif
14
15#ifndef cfi_rel_offset
16# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
17#endif
18
19#ifndef cfi_restore
20# define cfi_restore(reg)		.cfi_restore reg
21#endif
22
23#ifndef cfi_adjust_cfa_offset
24# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
25#endif
26
27#ifndef cfi_remember_state
28# define cfi_remember_state		.cfi_remember_state
29#endif
30
31#ifndef cfi_restore_state
32# define cfi_restore_state		.cfi_restore_state
33#endif
34
35#ifndef ENTRY
36# define ENTRY(name)			\
37	.type name,  @function; 	\
38	.globl name;			\
39	.p2align 4;			\
40name:					\
41	cfi_startproc
42#endif
43
44#ifndef END
45# define END(name)			\
46	cfi_endproc;			\
47	.size name, .-name
48#endif
49
50#define CFI_PUSH(REG)						\
51  cfi_adjust_cfa_offset (4);					\
52  cfi_rel_offset (REG, 0)
53
54#define CFI_POP(REG)						\
55  cfi_adjust_cfa_offset (-4);					\
56  cfi_restore (REG)
57
58#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
59#define POP(REG)	popl REG; CFI_POP (REG)
60#define PARMS		4
61#define	STR		PARMS
62#define ENTRANCE
63#define RETURN		ret
64
65	.text
66ENTRY (STRLEN)
67	ENTRANCE
68	mov	STR(%esp), %edx
69	xor	%eax, %eax
70	cmpb	$0, (%edx)
71	jz	L(exit_tail0)
72	cmpb	$0, 1(%edx)
73	jz	L(exit_tail1)
74	cmpb	$0, 2(%edx)
75	jz	L(exit_tail2)
76	cmpb	$0, 3(%edx)
77	jz	L(exit_tail3)
78	cmpb	$0, 4(%edx)
79	jz	L(exit_tail4)
80	cmpb	$0, 5(%edx)
81	jz	L(exit_tail5)
82	cmpb	$0, 6(%edx)
83	jz	L(exit_tail6)
84	cmpb	$0, 7(%edx)
85	jz	L(exit_tail7)
86	cmpb	$0, 8(%edx)
87	jz	L(exit_tail8)
88	cmpb	$0, 9(%edx)
89	jz	L(exit_tail9)
90	cmpb	$0, 10(%edx)
91	jz	L(exit_tail10)
92	cmpb	$0, 11(%edx)
93	jz	L(exit_tail11)
94	cmpb	$0, 12(%edx)
95	jz	L(exit_tail12)
96	cmpb	$0, 13(%edx)
97	jz	L(exit_tail13)
98	cmpb	$0, 14(%edx)
99	jz	L(exit_tail14)
100	cmpb	$0, 15(%edx)
101	jz	L(exit_tail15)
102	pxor	%xmm0, %xmm0
103	mov	%edx, %eax
104	mov	%edx, %ecx
105	and	$-16, %eax
106	add	$16, %ecx
107	add	$16, %eax
108
109	pcmpeqb	(%eax), %xmm0
110	pmovmskb %xmm0, %edx
111	pxor	%xmm1, %xmm1
112	test	%edx, %edx
113	lea	16(%eax), %eax
114	jnz	L(exit)
115
116	pcmpeqb	(%eax), %xmm1
117	pmovmskb %xmm1, %edx
118	pxor	%xmm2, %xmm2
119	test	%edx, %edx
120	lea	16(%eax), %eax
121	jnz	L(exit)
122
123
124	pcmpeqb	(%eax), %xmm2
125	pmovmskb %xmm2, %edx
126	pxor	%xmm3, %xmm3
127	test	%edx, %edx
128	lea	16(%eax), %eax
129	jnz	L(exit)
130
131	pcmpeqb	(%eax), %xmm3
132	pmovmskb %xmm3, %edx
133	test	%edx, %edx
134	lea	16(%eax), %eax
135	jnz	L(exit)
136
137	pcmpeqb	(%eax), %xmm0
138	pmovmskb %xmm0, %edx
139	test	%edx, %edx
140	lea	16(%eax), %eax
141	jnz	L(exit)
142
143	pcmpeqb	(%eax), %xmm1
144	pmovmskb %xmm1, %edx
145	test	%edx, %edx
146	lea	16(%eax), %eax
147	jnz	L(exit)
148
149	pcmpeqb	(%eax), %xmm2
150	pmovmskb %xmm2, %edx
151	test	%edx, %edx
152	lea	16(%eax), %eax
153	jnz	L(exit)
154
155	pcmpeqb	(%eax), %xmm3
156	pmovmskb %xmm3, %edx
157	test	%edx, %edx
158	lea	16(%eax), %eax
159	jnz	L(exit)
160
161	pcmpeqb	(%eax), %xmm0
162	pmovmskb %xmm0, %edx
163	test	%edx, %edx
164	lea	16(%eax), %eax
165	jnz	L(exit)
166
167	pcmpeqb	(%eax), %xmm1
168	pmovmskb %xmm1, %edx
169	test	%edx, %edx
170	lea	16(%eax), %eax
171	jnz	L(exit)
172
173	pcmpeqb	(%eax), %xmm2
174	pmovmskb %xmm2, %edx
175	test	%edx, %edx
176	lea	16(%eax), %eax
177	jnz	L(exit)
178
179	pcmpeqb	(%eax), %xmm3
180	pmovmskb %xmm3, %edx
181	test	%edx, %edx
182	lea	16(%eax), %eax
183	jnz	L(exit)
184
185	pcmpeqb	(%eax), %xmm0
186	pmovmskb %xmm0, %edx
187	test	%edx, %edx
188	lea	16(%eax), %eax
189	jnz	L(exit)
190
191	pcmpeqb	(%eax), %xmm1
192	pmovmskb %xmm1, %edx
193	test	%edx, %edx
194	lea	16(%eax), %eax
195	jnz	L(exit)
196
197	pcmpeqb	(%eax), %xmm2
198	pmovmskb %xmm2, %edx
199	test	%edx, %edx
200	lea	16(%eax), %eax
201	jnz	L(exit)
202
203	pcmpeqb	(%eax), %xmm3
204	pmovmskb %xmm3, %edx
205	test	%edx, %edx
206	lea	16(%eax), %eax
207	jnz	L(exit)
208
209	and	$-0x40, %eax
210	PUSH (%esi)
211	PUSH (%edi)
212	PUSH (%ebx)
213	PUSH (%ebp)
214	xor	%ebp, %ebp
215L(aligned_64):
216	pcmpeqb	(%eax), %xmm0
217	pcmpeqb	16(%eax), %xmm1
218	pcmpeqb	32(%eax), %xmm2
219	pcmpeqb	48(%eax), %xmm3
220	pmovmskb %xmm0, %edx
221	pmovmskb %xmm1, %esi
222	pmovmskb %xmm2, %edi
223	pmovmskb %xmm3, %ebx
224	or	%edx, %ebp
225	or	%esi, %ebp
226	or	%edi, %ebp
227	or	%ebx, %ebp
228	lea	64(%eax), %eax
229	jz	L(aligned_64)
230L(48leave):
231	test	%edx, %edx
232	jnz	L(aligned_64_exit_16)
233	test	%esi, %esi
234	jnz	L(aligned_64_exit_32)
235	test	%edi, %edi
236	jnz	L(aligned_64_exit_48)
237	mov	%ebx, %edx
238	lea	(%eax), %eax
239	jmp	L(aligned_64_exit)
240L(aligned_64_exit_48):
241	lea	-16(%eax), %eax
242	mov	%edi, %edx
243	jmp	L(aligned_64_exit)
244L(aligned_64_exit_32):
245	lea	-32(%eax), %eax
246	mov	%esi, %edx
247	jmp	L(aligned_64_exit)
248L(aligned_64_exit_16):
249	lea	-48(%eax), %eax
250L(aligned_64_exit):
251	POP (%ebp)
252	POP (%ebx)
253	POP (%edi)
254	POP (%esi)
255L(exit):
256	sub	%ecx, %eax
257	test	%dl, %dl
258	jz	L(exit_high)
259	test	$0x01, %dl
260	jnz	L(exit_tail0)
261
262	test	$0x02, %dl
263	jnz	L(exit_tail1)
264
265	test	$0x04, %dl
266	jnz	L(exit_tail2)
267
268	test	$0x08, %dl
269	jnz	L(exit_tail3)
270
271	test	$0x10, %dl
272	jnz	L(exit_tail4)
273
274	test	$0x20, %dl
275	jnz	L(exit_tail5)
276
277	test	$0x40, %dl
278	jnz	L(exit_tail6)
279	add	$7, %eax
280L(exit_tail0):
281	RETURN
282
283L(exit_high):
284	add	$8, %eax
285	test	$0x01, %dh
286	jnz	L(exit_tail0)
287
288	test	$0x02, %dh
289	jnz	L(exit_tail1)
290
291	test	$0x04, %dh
292	jnz	L(exit_tail2)
293
294	test	$0x08, %dh
295	jnz	L(exit_tail3)
296
297	test	$0x10, %dh
298	jnz	L(exit_tail4)
299
300	test	$0x20, %dh
301	jnz	L(exit_tail5)
302
303	test	$0x40, %dh
304	jnz	L(exit_tail6)
305	add	$7, %eax
306	RETURN
307
308	.p2align 4
309L(exit_tail1):
310	add	$1, %eax
311	RETURN
312
313L(exit_tail2):
314	add	$2, %eax
315	RETURN
316
317L(exit_tail3):
318	add	$3, %eax
319	RETURN
320
321L(exit_tail4):
322	add	$4, %eax
323	RETURN
324
325L(exit_tail5):
326	add	$5, %eax
327	RETURN
328
329L(exit_tail6):
330	add	$6, %eax
331	RETURN
332
333L(exit_tail7):
334	add	$7, %eax
335	RETURN
336
337L(exit_tail8):
338	add	$8, %eax
339	RETURN
340
341L(exit_tail9):
342	add	$9, %eax
343	RETURN
344
345L(exit_tail10):
346	add	$10, %eax
347	RETURN
348
349L(exit_tail11):
350	add	$11, %eax
351	RETURN
352
353L(exit_tail12):
354	add	$12, %eax
355	RETURN
356
357L(exit_tail13):
358	add	$13, %eax
359	RETURN
360
361L(exit_tail14):
362	add	$14, %eax
363	RETURN
364
365L(exit_tail15):
366	add	$15, %eax
367	ret
368
369END (STRLEN)
370