1#ifndef _ASM_X86_BITOPS_H
2#define _ASM_X86_BITOPS_H
3
4/*
5 * Copyright 1992, Linus Torvalds.
6 *
7 * Note: inlines with more than a single statement should be marked
8 * __always_inline to avoid problems with older gcc's inlining heuristics.
9 */
10
11#ifndef _LINUX_BITOPS_H
12#error only <linux/bitops.h> can be included directly
13#endif
14
15#include <linux/compiler.h>
16#include <asm/alternative.h>
17
18/*
19 * These have to be done with inline assembly: that way the bit-setting
20 * is guaranteed to be atomic. All bit operations return 0 if the bit
21 * was cleared before the operation and != 0 if it was not.
22 *
23 * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
24 */
25
26#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
27/* Technically wrong, but this avoids compilation errors on some gcc
28   versions. */
29#define BITOP_ADDR(x) "=m" (*(volatile long *) (x))
30#else
31#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
32#endif
33
34#define ADDR				BITOP_ADDR(addr)
35
36/*
37 * We do the locked ops that don't return the old value as
38 * a mask operation on a byte.
39 */
40#define IS_IMMEDIATE(nr)		(__builtin_constant_p(nr))
41#define CONST_MASK_ADDR(nr, addr)	BITOP_ADDR((void *)(addr) + ((nr)>>3))
42#define CONST_MASK(nr)			(1 << ((nr) & 7))
43
44/**
45 * set_bit - Atomically set a bit in memory
46 * @nr: the bit to set
47 * @addr: the address to start counting from
48 *
49 * This function is atomic and may not be reordered.  See __set_bit()
50 * if you do not require the atomic guarantees.
51 *
52 * Note: there are no guarantees that this function will not be reordered
53 * on non x86 architectures, so if you are writing portable code,
54 * make sure not to rely on its reordering guarantees.
55 *
56 * Note that @nr may be almost arbitrarily large; this function is not
57 * restricted to acting on a single-word quantity.
58 */
59static __always_inline void
60set_bit(unsigned int nr, volatile unsigned long *addr)
61{
62	if (IS_IMMEDIATE(nr)) {
63		asm volatile(LOCK_PREFIX "orb %1,%0"
64			: CONST_MASK_ADDR(nr, addr)
65			: "iq" ((u8)CONST_MASK(nr))
66			: "memory");
67	} else {
68		asm volatile(LOCK_PREFIX "bts %1,%0"
69			: BITOP_ADDR(addr) : "Ir" (nr) : "memory");
70	}
71}
72
73/**
74 * __set_bit - Set a bit in memory
75 * @nr: the bit to set
76 * @addr: the address to start counting from
77 *
78 * Unlike set_bit(), this function is non-atomic and may be reordered.
79 * If it's called on the same region of memory simultaneously, the effect
80 * may be that only one operation succeeds.
81 */
82static inline void __set_bit(int nr, volatile unsigned long *addr)
83{
84	asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
85}
86
87/**
88 * clear_bit - Clears a bit in memory
89 * @nr: Bit to clear
90 * @addr: Address to start counting from
91 *
92 * clear_bit() is atomic and may not be reordered.  However, it does
93 * not contain a memory barrier, so if it is used for locking purposes,
94 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
95 * in order to ensure changes are visible on other processors.
96 */
97static __always_inline void
98clear_bit(int nr, volatile unsigned long *addr)
99{
100	if (IS_IMMEDIATE(nr)) {
101		asm volatile(LOCK_PREFIX "andb %1,%0"
102			: CONST_MASK_ADDR(nr, addr)
103			: "iq" ((u8)~CONST_MASK(nr)));
104	} else {
105		asm volatile(LOCK_PREFIX "btr %1,%0"
106			: BITOP_ADDR(addr)
107			: "Ir" (nr));
108	}
109}
110
111/*
112 * clear_bit_unlock - Clears a bit in memory
113 * @nr: Bit to clear
114 * @addr: Address to start counting from
115 *
116 * clear_bit() is atomic and implies release semantics before the memory
117 * operation. It can be used for an unlock.
118 */
119static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
120{
121	barrier();
122	clear_bit(nr, addr);
123}
124
125static inline void __clear_bit(int nr, volatile unsigned long *addr)
126{
127	asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
128}
129
130/*
131 * __clear_bit_unlock - Clears a bit in memory
132 * @nr: Bit to clear
133 * @addr: Address to start counting from
134 *
135 * __clear_bit() is non-atomic and implies release semantics before the memory
136 * operation. It can be used for an unlock if no other CPUs can concurrently
137 * modify other bits in the word.
138 *
139 * No memory barrier is required here, because x86 cannot reorder stores past
140 * older loads. Same principle as spin_unlock.
141 */
142static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
143{
144	barrier();
145	__clear_bit(nr, addr);
146}
147
148#define smp_mb__before_clear_bit()	barrier()
149#define smp_mb__after_clear_bit()	barrier()
150
151/**
152 * __change_bit - Toggle a bit in memory
153 * @nr: the bit to change
154 * @addr: the address to start counting from
155 *
156 * Unlike change_bit(), this function is non-atomic and may be reordered.
157 * If it's called on the same region of memory simultaneously, the effect
158 * may be that only one operation succeeds.
159 */
160static inline void __change_bit(int nr, volatile unsigned long *addr)
161{
162	asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
163}
164
165/**
166 * change_bit - Toggle a bit in memory
167 * @nr: Bit to change
168 * @addr: Address to start counting from
169 *
170 * change_bit() is atomic and may not be reordered.
171 * Note that @nr may be almost arbitrarily large; this function is not
172 * restricted to acting on a single-word quantity.
173 */
174static inline void change_bit(int nr, volatile unsigned long *addr)
175{
176	if (IS_IMMEDIATE(nr)) {
177		asm volatile(LOCK_PREFIX "xorb %1,%0"
178			: CONST_MASK_ADDR(nr, addr)
179			: "iq" ((u8)CONST_MASK(nr)));
180	} else {
181		asm volatile(LOCK_PREFIX "btc %1,%0"
182			: BITOP_ADDR(addr)
183			: "Ir" (nr));
184	}
185}
186
187/**
188 * test_and_set_bit - Set a bit and return its old value
189 * @nr: Bit to set
190 * @addr: Address to count from
191 *
192 * This operation is atomic and cannot be reordered.
193 * It also implies a memory barrier.
194 */
195static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
196{
197	int oldbit;
198
199	asm volatile(LOCK_PREFIX "bts %2,%1\n\t"
200		     "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
201
202	return oldbit;
203}
204
205/**
206 * test_and_set_bit_lock - Set a bit and return its old value for lock
207 * @nr: Bit to set
208 * @addr: Address to count from
209 *
210 * This is the same as test_and_set_bit on x86.
211 */
212static __always_inline int
213test_and_set_bit_lock(int nr, volatile unsigned long *addr)
214{
215	return test_and_set_bit(nr, addr);
216}
217
218/**
219 * __test_and_set_bit - Set a bit and return its old value
220 * @nr: Bit to set
221 * @addr: Address to count from
222 *
223 * This operation is non-atomic and can be reordered.
224 * If two examples of this operation race, one can appear to succeed
225 * but actually fail.  You must protect multiple accesses with a lock.
226 */
227static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
228{
229	int oldbit;
230
231	asm("bts %2,%1\n\t"
232	    "sbb %0,%0"
233	    : "=r" (oldbit), ADDR
234	    : "Ir" (nr));
235	return oldbit;
236}
237
238/**
239 * test_and_clear_bit - Clear a bit and return its old value
240 * @nr: Bit to clear
241 * @addr: Address to count from
242 *
243 * This operation is atomic and cannot be reordered.
244 * It also implies a memory barrier.
245 */
246static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
247{
248	int oldbit;
249
250	asm volatile(LOCK_PREFIX "btr %2,%1\n\t"
251		     "sbb %0,%0"
252		     : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
253
254	return oldbit;
255}
256
257/**
258 * __test_and_clear_bit - Clear a bit and return its old value
259 * @nr: Bit to clear
260 * @addr: Address to count from
261 *
262 * This operation is non-atomic and can be reordered.
263 * If two examples of this operation race, one can appear to succeed
264 * but actually fail.  You must protect multiple accesses with a lock.
265 */
266static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
267{
268	int oldbit;
269
270	asm volatile("btr %2,%1\n\t"
271		     "sbb %0,%0"
272		     : "=r" (oldbit), ADDR
273		     : "Ir" (nr));
274	return oldbit;
275}
276
277/* WARNING: non atomic and it can be reordered! */
278static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
279{
280	int oldbit;
281
282	asm volatile("btc %2,%1\n\t"
283		     "sbb %0,%0"
284		     : "=r" (oldbit), ADDR
285		     : "Ir" (nr) : "memory");
286
287	return oldbit;
288}
289
290/**
291 * test_and_change_bit - Change a bit and return its old value
292 * @nr: Bit to change
293 * @addr: Address to count from
294 *
295 * This operation is atomic and cannot be reordered.
296 * It also implies a memory barrier.
297 */
298static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
299{
300	int oldbit;
301
302	asm volatile(LOCK_PREFIX "btc %2,%1\n\t"
303		     "sbb %0,%0"
304		     : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
305
306	return oldbit;
307}
308
309static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
310{
311	return ((1UL << (nr % BITS_PER_LONG)) &
312		(addr[nr / BITS_PER_LONG])) != 0;
313}
314
315static inline int variable_test_bit(int nr, volatile const unsigned long *addr)
316{
317	int oldbit;
318
319	asm volatile("bt %2,%1\n\t"
320		     "sbb %0,%0"
321		     : "=r" (oldbit)
322		     : "m" (*(unsigned long *)addr), "Ir" (nr));
323
324	return oldbit;
325}
326
327#if 0 /* Fool kernel-doc since it doesn't do macros yet */
328/**
329 * test_bit - Determine whether a bit is set
330 * @nr: bit number to test
331 * @addr: Address to start counting from
332 */
333static int test_bit(int nr, const volatile unsigned long *addr);
334#endif
335
336#define test_bit(nr, addr)			\
337	(__builtin_constant_p((nr))		\
338	 ? constant_test_bit((nr), (addr))	\
339	 : variable_test_bit((nr), (addr)))
340
341/**
342 * __ffs - find first set bit in word
343 * @word: The word to search
344 *
345 * Undefined if no bit exists, so code should check against 0 first.
346 */
347static inline unsigned long __ffs(unsigned long word)
348{
349	asm("bsf %1,%0"
350		: "=r" (word)
351		: "rm" (word));
352	return word;
353}
354
355/**
356 * ffz - find first zero bit in word
357 * @word: The word to search
358 *
359 * Undefined if no zero exists, so code should check against ~0UL first.
360 */
361static inline unsigned long ffz(unsigned long word)
362{
363	asm("bsf %1,%0"
364		: "=r" (word)
365		: "r" (~word));
366	return word;
367}
368
369/*
370 * __fls: find last set bit in word
371 * @word: The word to search
372 *
373 * Undefined if no set bit exists, so code should check against 0 first.
374 */
375static inline unsigned long __fls(unsigned long word)
376{
377	asm("bsr %1,%0"
378	    : "=r" (word)
379	    : "rm" (word));
380	return word;
381}
382
383#undef ADDR
384
385#ifdef __KERNEL__
386/**
387 * ffs - find first set bit in word
388 * @x: the word to search
389 *
390 * This is defined the same way as the libc and compiler builtin ffs
391 * routines, therefore differs in spirit from the other bitops.
392 *
393 * ffs(value) returns 0 if value is 0 or the position of the first
394 * set bit if value is nonzero. The first (least significant) bit
395 * is at position 1.
396 */
397static inline int ffs(int x)
398{
399	int r;
400
401#ifdef CONFIG_X86_64
402	/*
403	 * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the
404	 * dest reg is undefined if x==0, but their CPU architect says its
405	 * value is written to set it to the same as before, except that the
406	 * top 32 bits will be cleared.
407	 *
408	 * We cannot do this on 32 bits because at the very least some
409	 * 486 CPUs did not behave this way.
410	 */
411	long tmp = -1;
412	asm("bsfl %1,%0"
413	    : "=r" (r)
414	    : "rm" (x), "0" (tmp));
415#elif defined(CONFIG_X86_CMOV)
416	asm("bsfl %1,%0\n\t"
417	    "cmovzl %2,%0"
418	    : "=&r" (r) : "rm" (x), "r" (-1));
419#else
420	asm("bsfl %1,%0\n\t"
421	    "jnz 1f\n\t"
422	    "movl $-1,%0\n"
423	    "1:" : "=r" (r) : "rm" (x));
424#endif
425	return r + 1;
426}
427
428/**
429 * fls - find last set bit in word
430 * @x: the word to search
431 *
432 * This is defined in a similar way as the libc and compiler builtin
433 * ffs, but returns the position of the most significant set bit.
434 *
435 * fls(value) returns 0 if value is 0 or the position of the last
436 * set bit if value is nonzero. The last (most significant) bit is
437 * at position 32.
438 */
439static inline int fls(int x)
440{
441	int r;
442
443#ifdef CONFIG_X86_64
444	/*
445	 * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
446	 * dest reg is undefined if x==0, but their CPU architect says its
447	 * value is written to set it to the same as before, except that the
448	 * top 32 bits will be cleared.
449	 *
450	 * We cannot do this on 32 bits because at the very least some
451	 * 486 CPUs did not behave this way.
452	 */
453	long tmp = -1;
454	asm("bsrl %1,%0"
455	    : "=r" (r)
456	    : "rm" (x), "0" (tmp));
457#elif defined(CONFIG_X86_CMOV)
458	asm("bsrl %1,%0\n\t"
459	    "cmovzl %2,%0"
460	    : "=&r" (r) : "rm" (x), "rm" (-1));
461#else
462	asm("bsrl %1,%0\n\t"
463	    "jnz 1f\n\t"
464	    "movl $-1,%0\n"
465	    "1:" : "=r" (r) : "rm" (x));
466#endif
467	return r + 1;
468}
469
470/**
471 * fls64 - find last set bit in a 64-bit word
472 * @x: the word to search
473 *
474 * This is defined in a similar way as the libc and compiler builtin
475 * ffsll, but returns the position of the most significant set bit.
476 *
477 * fls64(value) returns 0 if value is 0 or the position of the last
478 * set bit if value is nonzero. The last (most significant) bit is
479 * at position 64.
480 */
481#ifdef CONFIG_X86_64
482static __always_inline int fls64(__u64 x)
483{
484	long bitpos = -1;
485	/*
486	 * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
487	 * dest reg is undefined if x==0, but their CPU architect says its
488	 * value is written to set it to the same as before.
489	 */
490	asm("bsrq %1,%0"
491	    : "+r" (bitpos)
492	    : "rm" (x));
493	return bitpos + 1;
494}
495#else
496#include <asm-generic/bitops/fls64.h>
497#endif
498
499#include <asm-generic/bitops/find.h>
500
501#include <asm-generic/bitops/sched.h>
502
503#define ARCH_HAS_FAST_MULTIPLIER 1
504
505#include <asm/arch_hweight.h>
506
507#include <asm-generic/bitops/const_hweight.h>
508
509#include <asm-generic/bitops/le.h>
510
511#include <asm-generic/bitops/ext2-atomic-setbit.h>
512
513#endif /* __KERNEL__ */
514#endif /* _ASM_X86_BITOPS_H */
515