1#ifndef _ASM_X86_STRING_32_H
2#define _ASM_X86_STRING_32_H
3
4#ifdef __KERNEL__
5
6/* Let gcc decide whether to inline or use the out of line functions */
7
8#define __HAVE_ARCH_STRCPY
9extern char *strcpy(char *dest, const char *src);
10
11#define __HAVE_ARCH_STRNCPY
12extern char *strncpy(char *dest, const char *src, size_t count);
13
14#define __HAVE_ARCH_STRCAT
15extern char *strcat(char *dest, const char *src);
16
17#define __HAVE_ARCH_STRNCAT
18extern char *strncat(char *dest, const char *src, size_t count);
19
20#define __HAVE_ARCH_STRCMP
21extern int strcmp(const char *cs, const char *ct);
22
23#define __HAVE_ARCH_STRNCMP
24extern int strncmp(const char *cs, const char *ct, size_t count);
25
26#define __HAVE_ARCH_STRCHR
27extern char *strchr(const char *s, int c);
28
29#define __HAVE_ARCH_STRLEN
30extern size_t strlen(const char *s);
31
32static __always_inline void *__memcpy(void *to, const void *from, size_t n)
33{
34	int d0, d1, d2;
35	asm volatile("rep ; movsl\n\t"
36		     "movl %4,%%ecx\n\t"
37		     "andl $3,%%ecx\n\t"
38		     "jz 1f\n\t"
39		     "rep ; movsb\n\t"
40		     "1:"
41		     : "=&c" (d0), "=&D" (d1), "=&S" (d2)
42		     : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from)
43		     : "memory");
44	return to;
45}
46
47/*
48 * This looks ugly, but the compiler can optimize it totally,
49 * as the count is constant.
50 */
51static __always_inline void *__constant_memcpy(void *to, const void *from,
52					       size_t n)
53{
54	long esi, edi;
55	if (!n)
56		return to;
57
58	switch (n) {
59	case 1:
60		*(char *)to = *(char *)from;
61		return to;
62	case 2:
63		*(short *)to = *(short *)from;
64		return to;
65	case 4:
66		*(int *)to = *(int *)from;
67		return to;
68	case 3:
69		*(short *)to = *(short *)from;
70		*((char *)to + 2) = *((char *)from + 2);
71		return to;
72	case 5:
73		*(int *)to = *(int *)from;
74		*((char *)to + 4) = *((char *)from + 4);
75		return to;
76	case 6:
77		*(int *)to = *(int *)from;
78		*((short *)to + 2) = *((short *)from + 2);
79		return to;
80	case 8:
81		*(int *)to = *(int *)from;
82		*((int *)to + 1) = *((int *)from + 1);
83		return to;
84	}
85
86	esi = (long)from;
87	edi = (long)to;
88	if (n >= 5 * 4) {
89		/* large block: use rep prefix */
90		int ecx;
91		asm volatile("rep ; movsl"
92			     : "=&c" (ecx), "=&D" (edi), "=&S" (esi)
93			     : "0" (n / 4), "1" (edi), "2" (esi)
94			     : "memory"
95		);
96	} else {
97		/* small block: don't clobber ecx + smaller code */
98		if (n >= 4 * 4)
99			asm volatile("movsl"
100				     : "=&D"(edi), "=&S"(esi)
101				     : "0"(edi), "1"(esi)
102				     : "memory");
103		if (n >= 3 * 4)
104			asm volatile("movsl"
105				     : "=&D"(edi), "=&S"(esi)
106				     : "0"(edi), "1"(esi)
107				     : "memory");
108		if (n >= 2 * 4)
109			asm volatile("movsl"
110				     : "=&D"(edi), "=&S"(esi)
111				     : "0"(edi), "1"(esi)
112				     : "memory");
113		if (n >= 1 * 4)
114			asm volatile("movsl"
115				     : "=&D"(edi), "=&S"(esi)
116				     : "0"(edi), "1"(esi)
117				     : "memory");
118	}
119	switch (n % 4) {
120		/* tail */
121	case 0:
122		return to;
123	case 1:
124		asm volatile("movsb"
125			     : "=&D"(edi), "=&S"(esi)
126			     : "0"(edi), "1"(esi)
127			     : "memory");
128		return to;
129	case 2:
130		asm volatile("movsw"
131			     : "=&D"(edi), "=&S"(esi)
132			     : "0"(edi), "1"(esi)
133			     : "memory");
134		return to;
135	default:
136		asm volatile("movsw\n\tmovsb"
137			     : "=&D"(edi), "=&S"(esi)
138			     : "0"(edi), "1"(esi)
139			     : "memory");
140		return to;
141	}
142}
143
144#define __HAVE_ARCH_MEMCPY
145
146#ifdef CONFIG_X86_USE_3DNOW
147
148#include <asm/mmx.h>
149
150/*
151 *	This CPU favours 3DNow strongly (eg AMD Athlon)
152 */
153
154static inline void *__constant_memcpy3d(void *to, const void *from, size_t len)
155{
156	if (len < 512)
157		return __constant_memcpy(to, from, len);
158	return _mmx_memcpy(to, from, len);
159}
160
161static inline void *__memcpy3d(void *to, const void *from, size_t len)
162{
163	if (len < 512)
164		return __memcpy(to, from, len);
165	return _mmx_memcpy(to, from, len);
166}
167
168#define memcpy(t, f, n)				\
169	(__builtin_constant_p((n))		\
170	 ? __constant_memcpy3d((t), (f), (n))	\
171	 : __memcpy3d((t), (f), (n)))
172
173#else
174
175/*
176 *	No 3D Now!
177 */
178
179#ifndef CONFIG_KMEMCHECK
180
181#if (__GNUC__ >= 4)
182#define memcpy(t, f, n) __builtin_memcpy(t, f, n)
183#else
184#define memcpy(t, f, n)				\
185	(__builtin_constant_p((n))		\
186	 ? __constant_memcpy((t), (f), (n))	\
187	 : __memcpy((t), (f), (n)))
188#endif
189#else
190/*
191 * kmemcheck becomes very happy if we use the REP instructions unconditionally,
192 * because it means that we know both memory operands in advance.
193 */
194#define memcpy(t, f, n) __memcpy((t), (f), (n))
195#endif
196
197#endif
198
199#define __HAVE_ARCH_MEMMOVE
200void *memmove(void *dest, const void *src, size_t n);
201
202#define memcmp __builtin_memcmp
203
204#define __HAVE_ARCH_MEMCHR
205extern void *memchr(const void *cs, int c, size_t count);
206
207static inline void *__memset_generic(void *s, char c, size_t count)
208{
209	int d0, d1;
210	asm volatile("rep\n\t"
211		     "stosb"
212		     : "=&c" (d0), "=&D" (d1)
213		     : "a" (c), "1" (s), "0" (count)
214		     : "memory");
215	return s;
216}
217
218/* we might want to write optimized versions of these later */
219#define __constant_count_memset(s, c, count) __memset_generic((s), (c), (count))
220
221/*
222 * memset(x, 0, y) is a reasonably common thing to do, so we want to fill
223 * things 32 bits at a time even when we don't know the size of the
224 * area at compile-time..
225 */
226static __always_inline
227void *__constant_c_memset(void *s, unsigned long c, size_t count)
228{
229	int d0, d1;
230	asm volatile("rep ; stosl\n\t"
231		     "testb $2,%b3\n\t"
232		     "je 1f\n\t"
233		     "stosw\n"
234		     "1:\ttestb $1,%b3\n\t"
235		     "je 2f\n\t"
236		     "stosb\n"
237		     "2:"
238		     : "=&c" (d0), "=&D" (d1)
239		     : "a" (c), "q" (count), "0" (count/4), "1" ((long)s)
240		     : "memory");
241	return s;
242}
243
244/* Added by Gertjan van Wingerde to make minix and sysv module work */
245#define __HAVE_ARCH_STRNLEN
246extern size_t strnlen(const char *s, size_t count);
247/* end of additional stuff */
248
249#define __HAVE_ARCH_STRSTR
250extern char *strstr(const char *cs, const char *ct);
251
252/*
253 * This looks horribly ugly, but the compiler can optimize it totally,
254 * as we by now know that both pattern and count is constant..
255 */
256static __always_inline
257void *__constant_c_and_count_memset(void *s, unsigned long pattern,
258				    size_t count)
259{
260	switch (count) {
261	case 0:
262		return s;
263	case 1:
264		*(unsigned char *)s = pattern & 0xff;
265		return s;
266	case 2:
267		*(unsigned short *)s = pattern & 0xffff;
268		return s;
269	case 3:
270		*(unsigned short *)s = pattern & 0xffff;
271		*((unsigned char *)s + 2) = pattern & 0xff;
272		return s;
273	case 4:
274		*(unsigned long *)s = pattern;
275		return s;
276	}
277
278#define COMMON(x)							\
279	asm volatile("rep ; stosl"					\
280		     x							\
281		     : "=&c" (d0), "=&D" (d1)				\
282		     : "a" (eax), "0" (count/4), "1" ((long)s)	\
283		     : "memory")
284
285	{
286		int d0, d1;
287#if __GNUC__ == 4 && __GNUC_MINOR__ == 0
288		/* Workaround for broken gcc 4.0 */
289		register unsigned long eax asm("%eax") = pattern;
290#else
291		unsigned long eax = pattern;
292#endif
293
294		switch (count % 4) {
295		case 0:
296			COMMON("");
297			return s;
298		case 1:
299			COMMON("\n\tstosb");
300			return s;
301		case 2:
302			COMMON("\n\tstosw");
303			return s;
304		default:
305			COMMON("\n\tstosw\n\tstosb");
306			return s;
307		}
308	}
309
310#undef COMMON
311}
312
313#define __constant_c_x_memset(s, c, count)			\
314	(__builtin_constant_p(count)				\
315	 ? __constant_c_and_count_memset((s), (c), (count))	\
316	 : __constant_c_memset((s), (c), (count)))
317
318#define __memset(s, c, count)				\
319	(__builtin_constant_p(count)			\
320	 ? __constant_count_memset((s), (c), (count))	\
321	 : __memset_generic((s), (c), (count)))
322
323#define __HAVE_ARCH_MEMSET
324#if (__GNUC__ >= 4)
325#define memset(s, c, count) __builtin_memset(s, c, count)
326#else
327#define memset(s, c, count)						\
328	(__builtin_constant_p(c)					\
329	 ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \
330				 (count))				\
331	 : __memset((s), (c), (count)))
332#endif
333
334/*
335 * find the first occurrence of byte 'c', or 1 past the area if none
336 */
337#define __HAVE_ARCH_MEMSCAN
338extern void *memscan(void *addr, int c, size_t size);
339
340#endif /* __KERNEL__ */
341
342#endif /* _ASM_X86_STRING_32_H */
343