Intrin.h revision 0cc83d15e8dd51b04d3927efe1740df600491840
1/* ===-------- Intrin.h ---------------------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24/* Only include this if we're compiling for the windows platform. */
25#ifndef _MSC_VER
26#include_next <Intrin.h>
27#else
28
29#ifndef __INTRIN_H
30#define __INTRIN_H
31
32/* First include the standard intrinsics. */
33#include <x86intrin.h>
34
35#ifdef __cplusplus
36extern "C" {
37#endif
38
39/* And the random ones that aren't in those files. */
40__m64 _m_from_float(float);
41__m64 _m_from_int(int _l);
42void _m_prefetch(void *);
43float _m_to_float(__m64);
44int _m_to_int(__m64 _M);
45
46/* Other assorted instruction intrinsics. */
47void __addfsbyte(unsigned long, unsigned char);
48void __addfsdword(unsigned long, unsigned long);
49void __addfsword(unsigned long, unsigned short);
50void __code_seg(const char *);
51void __cpuid(int[4], int);
52void __cpuidex(int[4], int, int);
53void __debugbreak(void);
54__int64 __emul(int, int);
55unsigned __int64 __emulu(unsigned int, unsigned int);
56void __cdecl __fastfail(unsigned int);
57unsigned int __getcallerseflags(void);
58void __halt(void);
59unsigned char __inbyte(unsigned short);
60void __inbytestring(unsigned short, unsigned char *, unsigned long);
61void __incfsbyte(unsigned long);
62void __incfsdword(unsigned long);
63void __incfsword(unsigned long);
64unsigned long __indword(unsigned short);
65void __indwordstring(unsigned short, unsigned long *, unsigned long);
66void __int2c(void);
67void __invlpg(void *);
68unsigned short __inword(unsigned short);
69void __inwordstring(unsigned short, unsigned short *, unsigned long);
70void __lidt(void *);
71unsigned __int64 __ll_lshift(unsigned __int64, int);
72__int64 __ll_rshift(__int64, int);
73void __llwpcb(void *);
74unsigned char __lwpins32(unsigned int, unsigned int, unsigned int);
75void __lwpval32(unsigned int, unsigned int, unsigned int);
76unsigned int __lzcnt(unsigned int);
77unsigned short __lzcnt16(unsigned short);
78void __movsb(unsigned char *, unsigned char const *, size_t);
79void __movsd(unsigned long *, unsigned long const *, size_t);
80void __movsw(unsigned short *, unsigned short const *, size_t);
81void __nop(void);
82void __nvreg_restore_fence(void);
83void __nvreg_save_fence(void);
84void __outbyte(unsigned short, unsigned char);
85void __outbytestring(unsigned short, unsigned char *, unsigned long);
86void __outdword(unsigned short, unsigned long);
87void __outdwordstring(unsigned short, unsigned long *, unsigned long);
88void __outword(unsigned short, unsigned short);
89void __outwordstring(unsigned short, unsigned short *, unsigned long);
90static __inline__
91unsigned int __popcnt(unsigned int);
92static __inline__
93unsigned short __popcnt16(unsigned short);
94unsigned __int64 __rdtsc(void);
95unsigned __int64 __rdtscp(unsigned int *);
96unsigned long __readcr0(void);
97unsigned long __readcr2(void);
98unsigned long __readcr3(void);
99unsigned long __readcr5(void);
100unsigned long __readcr8(void);
101unsigned int __readdr(unsigned int);
102unsigned int __readeflags(void);
103unsigned char __readfsbyte(unsigned long);
104unsigned long __readfsdword(unsigned long);
105unsigned __int64 __readfsqword(unsigned long);
106unsigned short __readfsword(unsigned long);
107unsigned __int64 __readmsr(unsigned long);
108unsigned __int64 __readpmc(unsigned long);
109unsigned long __segmentlimit(unsigned long);
110void __sidt(void *);
111void *__slwpcb(void);
112void __stosb(unsigned char *, unsigned char, size_t);
113void __stosd(unsigned long *, unsigned long, size_t);
114void __stosw(unsigned short *, unsigned short, size_t);
115void __svm_clgi(void);
116void __svm_invlpga(void *, int);
117void __svm_skinit(int);
118void __svm_stgi(void);
119void __svm_vmload(size_t);
120void __svm_vmrun(size_t);
121void __svm_vmsave(size_t);
122void __ud2(void);
123unsigned __int64 __ull_rshift(unsigned __int64, int);
124void __vmx_off(void);
125void __vmx_vmptrst(unsigned __int64 *);
126void __wbinvd(void);
127void __writecr0(unsigned int);
128void __writecr3(unsigned int);
129void __writecr4(unsigned int);
130void __writecr8(unsigned int);
131void __writedr(unsigned int, unsigned int);
132void __writeeflags(unsigned int);
133void __writefsbyte(unsigned long, unsigned char);
134void __writefsdword(unsigned long, unsigned long);
135void __writefsqword(unsigned long, unsigned __int64);
136void __writefsword(unsigned long, unsigned short);
137void __writemsr(unsigned long, unsigned __int64);
138static __inline__
139void *_AddressOfReturnAddress(void);
140unsigned int _andn_u32(unsigned int, unsigned int);
141unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int);
142unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int);
143unsigned int _bextri_u32(unsigned int, unsigned int);
144static __inline__
145unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
146static __inline__
147unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
148static __inline__
149unsigned char _bittest(long const *, long);
150static __inline__
151unsigned char _bittestandcomplement(long *, long);
152static __inline__
153unsigned char _bittestandreset(long *, long);
154static __inline__
155unsigned char _bittestandset(long *, long);
156unsigned int _blcfill_u32(unsigned int);
157unsigned int _blci_u32(unsigned int);
158unsigned int _blcic_u32(unsigned int);
159unsigned int _blcmsk_u32(unsigned int);
160unsigned int _blcs_u32(unsigned int);
161unsigned int _blsfill_u32(unsigned int);
162unsigned int _blsi_u32(unsigned int);
163unsigned int _blsic_u32(unsigned int);
164unsigned int _blsmsk_u32(unsigned int);
165unsigned int _blsmsk_u32(unsigned int);
166unsigned int _blsr_u32(unsigned int);
167unsigned int _blsr_u32(unsigned int);
168unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64);
169unsigned long __cdecl _byteswap_ulong(unsigned long);
170unsigned short __cdecl _byteswap_ushort(unsigned short);
171unsigned _bzhi_u32(unsigned int, unsigned int);
172void __cdecl _disable(void);
173void __cdecl _enable(void);
174void __cdecl _fxrstor(void const *);
175void __cdecl _fxsave(void *);
176long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value);
177static __inline__
178long _InterlockedAnd(long volatile *_Value, long _Mask);
179static __inline__
180short _InterlockedAnd16(short volatile *_Value, short _Mask);
181static __inline__
182char _InterlockedAnd8(char volatile *_Value, char _Mask);
183unsigned char _interlockedbittestandreset(long volatile *, long);
184unsigned char _interlockedbittestandset(long volatile *, long);
185static __inline__
186long __cdecl _InterlockedCompareExchange(long volatile *_Destination,
187                                         long _Exchange, long _Comparand);
188long _InterlockedCompareExchange_HLEAcquire(long volatile *, long, long);
189long _InterlockedCompareExchange_HLERelease(long volatile *, long, long);
190static __inline__
191short _InterlockedCompareExchange16(short volatile *_Destination,
192                                    short _Exchange, short _Comparand);
193static __inline__
194__int64 _InterlockedCompareExchange64(__int64 volatile *_Destination,
195                                      __int64 _Exchange, __int64 _Comparand);
196__int64 _InterlockedcompareExchange64_HLEAcquire(__int64 volatile *, __int64,
197                                                 __int64);
198__int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64,
199                                                 __int64);
200static __inline__
201char _InterlockedCompareExchange8(char volatile *_Destination, char _Exchange,
202                                  char _Comparand);
203void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *,
204                                                    void *);
205void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *,
206                                                    void *);
207static __inline__
208long __cdecl _InterlockedDecrement(long volatile *_Addend);
209static __inline__
210short _InterlockedDecrement16(short volatile *_Addend);
211static __inline__
212long __cdecl _InterlockedExchange(long volatile *_Target, long _Value);
213static __inline__
214short _InterlockedExchange16(short volatile *_Target, short _Value);
215static __inline__
216char _InterlockedExchange8(char volatile *_Target, char _Value);
217static __inline__
218long __cdecl _InterlockedExchangeAdd(long volatile *_Addend, long _Value);
219long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long);
220long _InterlockedExchangeAdd_HLERelease(long volatile *, long);
221static __inline__
222char _InterlockedExchangeAdd8(char volatile *_Addend, char _Value);
223static __inline__
224long __cdecl _InterlockedIncrement(long volatile *_Addend);
225static __inline__
226short _InterlockedIncrement16(short volatile *_Addend);
227static __inline__
228long _InterlockedOr(long volatile *_Value, long _Mask);
229static __inline__
230short _InterlockedOr16(short volatile *_Value, short _Mask);
231static __inline__
232char _InterlockedOr8(char volatile *_Value, char _Mask);
233static __inline__
234long _InterlockedXor(long volatile *_Value, long _Mask);
235static __inline__
236short _InterlockedXor16(short volatile *_Value, short _Mask);
237static __inline__
238char _InterlockedXor8(char volatile *_Value, char _Mask);
239void __cdecl _invpcid(unsigned int, void *);
240static __inline__
241unsigned long __cdecl _lrotl(unsigned long, int);
242static __inline__
243unsigned long __cdecl _lrotr(unsigned long, int);
244static __inline__
245unsigned int _lzcnt_u32(unsigned int);
246void _ReadBarrier(void);
247void _ReadWriteBarrier(void);
248static __inline__
249void *_ReturnAddress(void);
250unsigned int _rorx_u32(unsigned int, const unsigned int);
251int __cdecl _rdrand16_step(unsigned short *);
252int __cdecl _rdrand32_step(unsigned int *);
253static __inline__
254unsigned int __cdecl _rotl(unsigned int _Value, int _Shift);
255static __inline__
256unsigned short _rotl16(unsigned short _Value, unsigned char _Shift);
257static __inline__
258unsigned __int64 __cdecl _rotl64(unsigned __int64 _Value, int _Shift);
259static __inline__
260unsigned char _rotl8(unsigned char _Value, unsigned char _Shift);
261static __inline__
262unsigned int __cdecl _rotr(unsigned int _Value, int _Shift);
263static __inline__
264unsigned short _rotr16(unsigned short _Value, unsigned char _Shift);
265static __inline__
266unsigned __int64 __cdecl _rotr64(unsigned __int64 _Value, int _Shift);
267static __inline__
268unsigned char _rotr8(unsigned char _Value, unsigned char _Shift);
269int _sarx_i32(int, unsigned int);
270
271/* FIXME: Need definition for jmp_buf.
272   int __cdecl _setjmp(jmp_buf); */
273
274unsigned int _shlx_u32(unsigned int, unsigned int);
275unsigned int _shrx_u32(unsigned int, unsigned int);
276void _Store_HLERelease(long volatile *, long);
277void _Store64_HLERelease(__int64 volatile *, __int64);
278void _StorePointer_HLERelease(void *volatile *, void *);
279unsigned int _t1mskc_u32(unsigned int);
280unsigned int _tzcnt_u32(unsigned int);
281unsigned int _tzcnt_u32(unsigned int);
282unsigned int _tzmsk_u32(unsigned int);
283void _WriteBarrier(void);
284void _xabort(const unsigned int imm);
285unsigned __int32 xbegin(void);
286void _xend(void);
287unsigned __int64 __cdecl _xgetbv(unsigned int);
288void __cdecl _xrstor(void const *, unsigned __int64);
289void __cdecl _xsave(void *, unsigned __int64);
290void __cdecl _xsaveopt(void *, unsigned __int64);
291void __cdecl _xsetbv(unsigned int, unsigned __int64);
292unsigned char _xtest(void);
293
294/* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */
295#ifdef __x86_64__
296void __addgsbyte(unsigned long, unsigned char);
297void __addgsdword(unsigned long, unsigned long);
298void __addgsqword(unsigned long, unsigned __int64);
299void __addgsword(unsigned long, unsigned short);
300void __faststorefence(void);
301void __incgsbyte(unsigned long);
302void __incgsdword(unsigned long);
303void __incgsqword(unsigned long);
304void __incgsword(unsigned long);
305unsigned __int64 __popcnt64(unsigned __int64);
306unsigned __int64 __shiftleft128(unsigned __int64 _LowPart,
307                                unsigned __int64 _HighPart,
308                                unsigned char _Shift);
309unsigned __int64 __shiftright128(unsigned __int64 _LowPart,
310                                 unsigned __int64 _HighPart,
311                                 unsigned char _Shift);
312void __stosq(unsigned __int64 *, unsigned __int64, size_t);
313unsigned __int64 _andn_u64(unsigned __int64, unsigned __int64);
314unsigned __int64 _bextr_u64(unsigned __int64, unsigned int, unsigned int);
315unsigned __int64 _bextri_u64(unsigned __int64, unsigned int);
316static __inline__
317unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
318static __inline__
319unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
320static __inline__
321unsigned char _bittest64(__int64 const *, __int64);
322static __inline__
323unsigned char _bittestandcomplement64(__int64 *, __int64);
324static __inline__
325unsigned char _bittestandreset64(__int64 *, __int64);
326static __inline__
327unsigned char _bittestandset64(__int64 *, __int64);
328unsigned __int64 _blcfill_u64(unsigned __int64);
329unsigned __int64 _blci_u64(unsigned __int64);
330unsigned __int64 _blcic_u64(unsigned __int64);
331unsigned __int64 _blcmsk_u64(unsigned __int64);
332unsigned __int64 _blcs_u64(unsigned __int64);
333unsigned __int64 _blsfill_u64(unsigned __int64);
334unsigned __int64 _blsi_u64(unsigned __int64);
335unsigned __int64 _blsic_u64(unsigned __int64);
336unsigned __int64 _blmsk_u64(unsigned __int64);
337unsigned __int64 _blsr_u64(unsigned __int64);
338unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64);
339unsigned __int64 _bzhi_u64(unsigned __int64, unsigned int);
340void __cdecl _fxrstor64(void const *);
341void __cdecl _fxsave64(void *);
342long _InterlockedAnd_np(long volatile *_Value, long _Mask);
343short _InterlockedAnd16_np(short volatile *_Value, short _Mask);
344__int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask);
345char _InterlockedAnd8_np(char volatile *_Value, char _Mask);
346unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64);
347unsigned char _interlockedbittestandset64(__int64 volatile *, __int64);
348long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange,
349                                    long _Comparand);
350unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination,
351                                             __int64 _ExchangeHigh,
352                                             __int64 _ExchangeLow,
353                                             __int64 *_CompareandResult);
354unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination,
355                                                __int64 _ExchangeHigh,
356                                                __int64 _ExchangeLow,
357                                                __int64 *_ComparandResult);
358short _InterlockedCompareExchange16_np(short volatile *_Destination,
359                                       short _Exchange, short _Comparand);
360__int64 _InterlockedCompareExchange64_np(__int64 volatile *_Destination,
361                                         __int64 _Exchange, __int64 _Comparand);
362void *_InterlockedCompareExchangePointer_np(void *volatile *_Destination,
363                                            void *_Exchange, void *_Comparand);
364long _InterlockedOr_np(long volatile *_Value, long _Mask);
365short _InterlockedOr16_np(short volatile *_Value, short _Mask);
366__int64 _InterlockedOr64_np(__int64 volatile *_Value, __int64 _Mask);
367char _InterlockedOr8_np(char volatile *_Value, char _Mask);
368long _InterlockedXor_np(long volatile *_Value, long _Mask);
369short _InterlockedXor16_np(short volatile *_Value, short _Mask);
370__int64 _InterlockedXor64_np(__int64 volatile *_Value, __int64 _Mask);
371char _InterlockedXor8_np(char volatile *_Value, char _Mask);
372unsigned __int64 _lzcnt_u64(unsigned __int64);
373__int64 _mul128(__int64 _Multiplier, __int64 _Multiplicand,
374                __int64 *_HighProduct);
375unsigned int __cdecl _readfsbase_u32(void);
376unsigned __int64 __cdecl _readfsbase_u64(void);
377unsigned int __cdecl _readgsbase_u32(void);
378unsigned __int64 __cdecl _readgsbase_u64(void);
379unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int);
380unsigned __int64 _tzcnt_u64(unsigned __int64);
381unsigned __int64 _tzmsk_u64(unsigned __int64);
382unsigned __int64 _umul128(unsigned __int64 _Multiplier,
383                          unsigned __int64 _Multiplicand,
384                          unsigned __int64 *_HighProduct);
385void __cdecl _writefsbase_u32(unsigned int);
386void _cdecl _writefsbase_u64(unsigned __int64);
387void __cdecl _writegsbase_u32(unsigned int);
388void __cdecl _writegsbase_u64(unsigned __int64);
389void __cdecl _xrstor64(void const *, unsigned __int64);
390void __cdecl _xsave64(void *, unsigned __int64);
391void __cdecl _xsaveopt64(void *, unsigned __int64);
392
393#endif /* __x86_64__ */
394
395/*----------------------------------------------------------------------------*\
396|* Bit Twiddling
397\*----------------------------------------------------------------------------*/
398static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
399_rotl8(unsigned char _Value, unsigned char _Shift) {
400  _Shift &= 0x7;
401  return _Shift ? (_Value << _Shift) | (_Value >> (8 - _Shift)) : _Value;
402}
403static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
404_rotr8(unsigned char _Value, unsigned char _Shift) {
405  _Shift &= 0x7;
406  return _Shift ? (_Value >> _Shift) | (_Value << (8 - _Shift)) : _Value;
407}
408static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
409_rotl16(unsigned short _Value, unsigned char _Shift) {
410  _Shift &= 0xf;
411  return _Shift ? (_Value << _Shift) | (_Value >> (16 - _Shift)) : _Value;
412}
413static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
414_rotr16(unsigned short _Value, unsigned char _Shift) {
415  _Shift &= 0xf;
416  return _Shift ? (_Value >> _Shift) | (_Value << (16 - _Shift)) : _Value;
417}
418static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
419_rotl(unsigned int _Value, int _Shift) {
420  _Shift &= 0x1f;
421  return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value;
422}
423static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
424_rotr(unsigned int _Value, int _Shift) {
425  _Shift &= 0x1f;
426  return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value;
427}
428static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
429_lrotl(unsigned long _Value, int _Shift) {
430  _Shift &= 0x1f;
431  return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value;
432}
433static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
434_lrotr(unsigned long _Value, int _Shift) {
435  _Shift &= 0x1f;
436  return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value;
437}
438static
439__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
440_rotl64(unsigned __int64 _Value, int _Shift) {
441  _Shift &= 0x3f;
442  return _Shift ? (_Value << _Shift) | (_Value >> (64 - _Shift)) : _Value;
443}
444static
445__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
446_rotr64(unsigned __int64 _Value, int _Shift) {
447  _Shift &= 0x3f;
448  return _Shift ? (_Value >> _Shift) | (_Value << (64 - _Shift)) : _Value;
449}
450/*----------------------------------------------------------------------------*\
451|* Bit Counting and Testing
452\*----------------------------------------------------------------------------*/
453static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
454_BitScanForward(unsigned long *_Index, unsigned long _Mask) {
455  if (!_Mask)
456    return 0;
457  *_Index = __builtin_ctzl(_Mask);
458  return 1;
459}
460static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
461_BitScanReverse(unsigned long *_Index, unsigned long _Mask) {
462  if (!_Mask)
463    return 0;
464  *_Index = 31 - __builtin_clzl(_Mask);
465  return 1;
466}
467static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
468_lzcnt_u32(unsigned int a) {
469  if (!a)
470    return 32;
471  return __builtin_clzl(a);
472}
473static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
474__popcnt16(unsigned short value) {
475  return __builtin_popcount((int)value);
476}
477static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
478__popcnt(unsigned int value) {
479  return __builtin_popcount(value);
480}
481static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
482_bittest(long const *a, long b) {
483  return (*a >> b) & 1;
484}
485static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
486_bittestandcomplement(long *a, long b) {
487  unsigned char x = (*a >> b) & 1;
488  *a = *a ^ (1 << b);
489  return x;
490}
491static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
492_bittestandreset(long *a, long b) {
493  unsigned char x = (*a >> b) & 1;
494  *a = *a & ~(1 << b);
495  return x;
496}
497static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
498_bittestandset(long *a, long b) {
499  unsigned char x = (*a >> b) & 1;
500  *a = *a | (1 << b);
501  return x;
502}
503#ifdef __x86_64__
504static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
505_BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask) {
506  if (!_Mask)
507    return 0;
508  *_Index = __builtin_ctzll(_Mask);
509  return 1;
510}
511static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
512_BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask) {
513  if (!_Mask)
514    return 0;
515  *_Index = 63 - __builtin_clzll(_Mask);
516  return 1;
517}
518static
519__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
520_lzcnt_u64(unsigned __int64 a) {
521  if (!a)
522    return 64;
523  return __builtin_clzll(a);
524}
525static __inline__
526unsigned __int64 __attribute__((__always_inline__, __nodebug__))
527 __popcnt64(unsigned __int64 value) {
528  return __builtin_popcountll(value);
529}
530static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
531_bittest64(__int64 const *a, __int64 b) {
532  return (*a >> b) & 1;
533}
534static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
535_bittestandcomplement64(__int64 *a, __int64 b) {
536  unsigned char x = (*a >> b) & 1;
537  *a = *a ^ (1ll << b);
538  return x;
539}
540static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
541_bittestandreset64(__int64 *a, __int64 b) {
542  unsigned char x = (*a >> b) & 1;
543  *a = *a & ~(1ll << b);
544  return x;
545}
546static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
547_bittestandset64(__int64 *a, __int64 b) {
548  unsigned char x = (*a >> b) & 1;
549  *a = *a | (1ll << b);
550  return x;
551}
552#endif
553/*----------------------------------------------------------------------------*\
554|* Interlocked Exchange Add
555\*----------------------------------------------------------------------------*/
556static __inline__ char __attribute__((__always_inline__, __nodebug__))
557_InterlockedExchangeAdd8(char volatile *_Addend, char _Value) {
558  return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
559}
560static __inline__ short __attribute__((__always_inline__, __nodebug__))
561_InterlockedExchangeAdd16(short volatile *_Addend, short _Value) {
562  return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
563}
564static __inline__ long __attribute__((__always_inline__, __nodebug__))
565_InterlockedExchangeAdd(long volatile *_Addend, long _Value) {
566  return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
567}
568#ifdef __x86_64__
569static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
570_InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value) {
571  return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
572}
573#endif
574/*----------------------------------------------------------------------------*\
575|* Interlocked Exchange Sub
576\*----------------------------------------------------------------------------*/
577static __inline__ char __attribute__((__always_inline__, __nodebug__))
578_InterlockedExchangeSub8(char volatile *_Subend, char _Value) {
579  return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
580}
581static __inline__ short __attribute__((__always_inline__, __nodebug__))
582_InterlockedExchangeSub16(short volatile *_Subend, short _Value) {
583  return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
584}
585static __inline__ long __attribute__((__always_inline__, __nodebug__))
586_InterlockedExchangeSub(long volatile *_Subend, long _Value) {
587  return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
588}
589#ifdef __x86_64__
590static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
591_InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value) {
592  return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
593}
594#endif
595/*----------------------------------------------------------------------------*\
596|* Interlocked Increment
597\*----------------------------------------------------------------------------*/
598static __inline__ char __attribute__((__always_inline__, __nodebug__))
599_InterlockedIncrement16(char volatile *_Value) {
600  return __atomic_add_fetch(_Value, 1, 0);
601}
602static __inline__ long __attribute__((__always_inline__, __nodebug__))
603_InterlockedIncrement(long volatile *_Value) {
604  return __atomic_add_fetch(_Value, 1, 0);
605}
606#ifdef __x86_64__
607static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
608_InterlockedIncrement64(__int64 volatile *_Value) {
609  return __atomic_add_fetch(_Value, 1, 0);
610}
611#endif
612/*----------------------------------------------------------------------------*\
613|* Interlocked Decrement
614\*----------------------------------------------------------------------------*/
615static __inline__ char __attribute__((__always_inline__, __nodebug__))
616_InterlockedDecrement16(char volatile *_Value) {
617  return __atomic_sub_fetch(_Value, 1, 0);
618}
619static __inline__ long __attribute__((__always_inline__, __nodebug__))
620_InterlockedDecrement(long volatile *_Value) {
621  return __atomic_sub_fetch(_Value, 1, 0);
622}
623#ifdef __x86_64__
624static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
625_InterlockedDecrement64(__int64 volatile *_Value) {
626  return __atomic_sub_fetch(_Value, 1, 0);
627}
628#endif
629/*----------------------------------------------------------------------------*\
630|* Interlocked And
631\*----------------------------------------------------------------------------*/
632static __inline__ char __attribute__((__always_inline__, __nodebug__))
633_InterlockedAnd8(char volatile *_Value, char _Mask) {
634  return __atomic_and_fetch(_Value, _Mask, 0);
635}
636static __inline__ short __attribute__((__always_inline__, __nodebug__))
637_InterlockedAnd16(short volatile *_Value, short _Mask) {
638  return __atomic_and_fetch(_Value, _Mask, 0);
639}
640static __inline__ long __attribute__((__always_inline__, __nodebug__))
641_InterlockedAnd(long volatile *_Value, long _Mask) {
642  return __atomic_and_fetch(_Value, _Mask, 0);
643}
644#ifdef __x86_64__
645static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
646_InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) {
647  return __atomic_and_fetch(_Value, _Mask, 0);
648}
649#endif
650/*----------------------------------------------------------------------------*\
651|* Interlocked Or
652\*----------------------------------------------------------------------------*/
653static __inline__ char __attribute__((__always_inline__, __nodebug__))
654_InterlockedOr8(char volatile *_Value, char _Mask) {
655  return __atomic_or_fetch(_Value, _Mask, 0);
656}
657static __inline__ short __attribute__((__always_inline__, __nodebug__))
658_InterlockedOr16(short volatile *_Value, short _Mask) {
659  return __atomic_or_fetch(_Value, _Mask, 0);
660}
661static __inline__ long __attribute__((__always_inline__, __nodebug__))
662_InterlockedOr(long volatile *_Value, long _Mask) {
663  return __atomic_or_fetch(_Value, _Mask, 0);
664}
665#ifdef __x86_64__
666static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
667_InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) {
668  return __atomic_or_fetch(_Value, _Mask, 0);
669}
670#endif
671/*----------------------------------------------------------------------------*\
672|* Interlocked Xor
673\*----------------------------------------------------------------------------*/
674static __inline__ char __attribute__((__always_inline__, __nodebug__))
675_InterlockedXor8(char volatile *_Value, char _Mask) {
676  return __atomic_xor_fetch(_Value, _Mask, 0);
677}
678static __inline__ short __attribute__((__always_inline__, __nodebug__))
679_InterlockedXor16(short volatile *_Value, short _Mask) {
680  return __atomic_xor_fetch(_Value, _Mask, 0);
681}
682static __inline__ long __attribute__((__always_inline__, __nodebug__))
683_InterlockedXor(long volatile *_Value, long _Mask) {
684  return __atomic_xor_fetch(_Value, _Mask, 0);
685}
686#ifdef __x86_64__
687static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
688_InterlockedXor64(__int64 volatile *_Value, __int64 _Mask) {
689  return __atomic_xor_fetch(_Value, _Mask, 0);
690}
691#endif
692/*----------------------------------------------------------------------------*\
693|* Interlocked Exchange
694\*----------------------------------------------------------------------------*/
695static __inline__ char __attribute__((__always_inline__, __nodebug__))
696_InterlockedExchange8(char volatile *_Target, char _Value) {
697  __atomic_exchange(_Target, &_Value, &_Value, 0);
698  return _Value;
699}
700static __inline__ short __attribute__((__always_inline__, __nodebug__))
701_InterlockedExchange16(short volatile *_Target, short _Value) {
702  __atomic_exchange(_Target, &_Value, &_Value, 0);
703  return _Value;
704}
705static __inline__ long __attribute__((__always_inline__, __nodebug__))
706_InterlockedExchange(long volatile *_Target, long _Value) {
707  __atomic_exchange(_Target, &_Value, &_Value, 0);
708  return _Value;
709}
710#ifdef __x86_64__
711static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
712_InterlockedExchange64(__int64 volatile *_Target, __int64 _Value) {
713  __atomic_exchange(_Target, &_Value, &_Value, 0);
714  return _Value;
715}
716#endif
717/*----------------------------------------------------------------------------*\
718|* Interlocked Compare Exchange
719\*----------------------------------------------------------------------------*/
720static __inline__ char __attribute__((__always_inline__, __nodebug__))
721_InterlockedCompareExchange8(char volatile *_Destination,
722                             char _Exchange, char _Comparand) {
723  __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
724  return _Comparand;
725}
726static __inline__ short __attribute__((__always_inline__, __nodebug__))
727_InterlockedCompareExchange16(short volatile *_Destination,
728                              short _Exchange, short _Comparand) {
729  __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
730  return _Comparand;
731}
732static __inline__ long __attribute__((__always_inline__, __nodebug__))
733_InterlockedCompareExchange(long volatile *_Destination,
734                            long _Exchange, long _Comparand) {
735  __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
736  return _Comparand;
737}
738#ifdef __x86_64__
739static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
740_InterlockedCompareExchange64(__int64 volatile *_Destination,
741                              __int64 _Exchange, __int64 _Comparand) {
742  __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
743  return _Comparand;
744}
745#endif
746/*----------------------------------------------------------------------------*\
747|* Misc
748\*----------------------------------------------------------------------------*/
749static __inline__ void * __attribute__((__always_inline__, __nodebug__))
750_AddressOfReturnAddress(void) {
751  return (void*)((char*)__builtin_frame_address(0) + sizeof(void*));
752}
753static __inline__ void * __attribute__((__always_inline__, __nodebug__))
754_ReturnAddress(void) {
755  return __builtin_return_address(0);
756}
757
758#ifdef __cplusplus
759}
760#endif
761
762#endif /* __INTRIN_H */
763#endif /* _MSC_VER */
764