1/* ===-------- Intrin.h ---------------------------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24/* Only include this if we're compiling for the windows platform. */ 25#ifndef _MSC_VER 26#include_next <Intrin.h> 27#else 28 29#ifndef __INTRIN_H 30#define __INTRIN_H 31 32/* First include the standard intrinsics. */ 33#if defined(__i386__) || defined(__x86_64__) 34#include <x86intrin.h> 35#endif 36 37/* For the definition of jmp_buf. */ 38#if __STDC_HOSTED__ 39#include <setjmp.h> 40#endif 41 42#ifdef __cplusplus 43extern "C" { 44#endif 45 46#if defined(__MMX__) 47/* And the random ones that aren't in those files. */ 48__m64 _m_from_float(float); 49__m64 _m_from_int(int _l); 50void _m_prefetch(void *); 51float _m_to_float(__m64); 52int _m_to_int(__m64 _M); 53#endif 54 55/* Other assorted instruction intrinsics. */ 56void __addfsbyte(unsigned long, unsigned char); 57void __addfsdword(unsigned long, unsigned long); 58void __addfsword(unsigned long, unsigned short); 59void __code_seg(const char *); 60static __inline__ 61void __cpuid(int[4], int); 62static __inline__ 63void __cpuidex(int[4], int, int); 64void __debugbreak(void); 65__int64 __emul(int, int); 66unsigned __int64 __emulu(unsigned int, unsigned int); 67void __cdecl __fastfail(unsigned int); 68unsigned int __getcallerseflags(void); 69static __inline__ 70void __halt(void); 71unsigned char __inbyte(unsigned short); 72void __inbytestring(unsigned short, unsigned char *, unsigned long); 73void __incfsbyte(unsigned long); 74void __incfsdword(unsigned long); 75void __incfsword(unsigned long); 76unsigned long __indword(unsigned short); 77void __indwordstring(unsigned short, unsigned long *, unsigned long); 78void __int2c(void); 79void __invlpg(void *); 80unsigned short __inword(unsigned short); 81void __inwordstring(unsigned short, unsigned short *, unsigned long); 82void __lidt(void *); 83unsigned __int64 __ll_lshift(unsigned __int64, int); 84__int64 __ll_rshift(__int64, int); 85void __llwpcb(void *); 86unsigned char __lwpins32(unsigned int, unsigned int, unsigned int); 87void __lwpval32(unsigned int, unsigned int, unsigned int); 88unsigned int __lzcnt(unsigned int); 89unsigned short __lzcnt16(unsigned short); 90static __inline__ 91void __movsb(unsigned char *, unsigned char const *, size_t); 92static __inline__ 93void __movsd(unsigned long *, unsigned long const *, size_t); 94static __inline__ 95void __movsw(unsigned short *, unsigned short const *, size_t); 96void __nop(void); 97void __nvreg_restore_fence(void); 98void __nvreg_save_fence(void); 99void __outbyte(unsigned short, unsigned char); 100void __outbytestring(unsigned short, unsigned char *, unsigned long); 101void __outdword(unsigned short, unsigned long); 102void __outdwordstring(unsigned short, unsigned long *, unsigned long); 103void __outword(unsigned short, unsigned short); 104void __outwordstring(unsigned short, unsigned short *, unsigned long); 105static __inline__ 106unsigned int __popcnt(unsigned int); 107static __inline__ 108unsigned short __popcnt16(unsigned short); 109unsigned long __readcr0(void); 110unsigned long __readcr2(void); 111static __inline__ 112unsigned long __readcr3(void); 113unsigned long __readcr4(void); 114unsigned long __readcr8(void); 115unsigned int __readdr(unsigned int); 116#ifdef __i386__ 117static __inline__ 118unsigned char __readfsbyte(unsigned long); 119static __inline__ 120unsigned long __readfsdword(unsigned long); 121static __inline__ 122unsigned __int64 __readfsqword(unsigned long); 123static __inline__ 124unsigned short __readfsword(unsigned long); 125#endif 126static __inline__ 127unsigned __int64 __readmsr(unsigned long); 128unsigned __int64 __readpmc(unsigned long); 129unsigned long __segmentlimit(unsigned long); 130void __sidt(void *); 131void *__slwpcb(void); 132static __inline__ 133void __stosb(unsigned char *, unsigned char, size_t); 134static __inline__ 135void __stosd(unsigned long *, unsigned long, size_t); 136static __inline__ 137void __stosw(unsigned short *, unsigned short, size_t); 138void __svm_clgi(void); 139void __svm_invlpga(void *, int); 140void __svm_skinit(int); 141void __svm_stgi(void); 142void __svm_vmload(size_t); 143void __svm_vmrun(size_t); 144void __svm_vmsave(size_t); 145void __ud2(void); 146unsigned __int64 __ull_rshift(unsigned __int64, int); 147void __vmx_off(void); 148void __vmx_vmptrst(unsigned __int64 *); 149void __wbinvd(void); 150void __writecr0(unsigned int); 151static __inline__ 152void __writecr3(unsigned int); 153void __writecr4(unsigned int); 154void __writecr8(unsigned int); 155void __writedr(unsigned int, unsigned int); 156void __writefsbyte(unsigned long, unsigned char); 157void __writefsdword(unsigned long, unsigned long); 158void __writefsqword(unsigned long, unsigned __int64); 159void __writefsword(unsigned long, unsigned short); 160void __writemsr(unsigned long, unsigned __int64); 161static __inline__ 162void *_AddressOfReturnAddress(void); 163unsigned int _andn_u32(unsigned int, unsigned int); 164unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int); 165unsigned int _bextri_u32(unsigned int, unsigned int); 166static __inline__ 167unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask); 168static __inline__ 169unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask); 170static __inline__ 171unsigned char _bittest(long const *, long); 172static __inline__ 173unsigned char _bittestandcomplement(long *, long); 174static __inline__ 175unsigned char _bittestandreset(long *, long); 176static __inline__ 177unsigned char _bittestandset(long *, long); 178unsigned int _blcfill_u32(unsigned int); 179unsigned int _blci_u32(unsigned int); 180unsigned int _blcic_u32(unsigned int); 181unsigned int _blcmsk_u32(unsigned int); 182unsigned int _blcs_u32(unsigned int); 183unsigned int _blsfill_u32(unsigned int); 184unsigned int _blsi_u32(unsigned int); 185unsigned int _blsic_u32(unsigned int); 186unsigned int _blsmsk_u32(unsigned int); 187unsigned int _blsr_u32(unsigned int); 188unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64); 189unsigned long __cdecl _byteswap_ulong(unsigned long); 190unsigned short __cdecl _byteswap_ushort(unsigned short); 191unsigned _bzhi_u32(unsigned int, unsigned int); 192void __cdecl _disable(void); 193void __cdecl _enable(void); 194void __cdecl _fxrstor(void const *); 195void __cdecl _fxsave(void *); 196long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value); 197static __inline__ 198long _InterlockedAnd(long volatile *_Value, long _Mask); 199static __inline__ 200short _InterlockedAnd16(short volatile *_Value, short _Mask); 201static __inline__ 202char _InterlockedAnd8(char volatile *_Value, char _Mask); 203unsigned char _interlockedbittestandreset(long volatile *, long); 204static __inline__ 205unsigned char _interlockedbittestandset(long volatile *, long); 206static __inline__ 207long __cdecl _InterlockedCompareExchange(long volatile *_Destination, 208 long _Exchange, long _Comparand); 209long _InterlockedCompareExchange_HLEAcquire(long volatile *, long, long); 210long _InterlockedCompareExchange_HLERelease(long volatile *, long, long); 211static __inline__ 212short _InterlockedCompareExchange16(short volatile *_Destination, 213 short _Exchange, short _Comparand); 214static __inline__ 215__int64 _InterlockedCompareExchange64(__int64 volatile *_Destination, 216 __int64 _Exchange, __int64 _Comparand); 217__int64 _InterlockedcompareExchange64_HLEAcquire(__int64 volatile *, __int64, 218 __int64); 219__int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64, 220 __int64); 221static __inline__ 222char _InterlockedCompareExchange8(char volatile *_Destination, char _Exchange, 223 char _Comparand); 224void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *, 225 void *); 226void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *, 227 void *); 228static __inline__ 229long __cdecl _InterlockedDecrement(long volatile *_Addend); 230static __inline__ 231short _InterlockedDecrement16(short volatile *_Addend); 232long _InterlockedExchange(long volatile *_Target, long _Value); 233static __inline__ 234short _InterlockedExchange16(short volatile *_Target, short _Value); 235static __inline__ 236char _InterlockedExchange8(char volatile *_Target, char _Value); 237static __inline__ 238long __cdecl _InterlockedExchangeAdd(long volatile *_Addend, long _Value); 239long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long); 240long _InterlockedExchangeAdd_HLERelease(long volatile *, long); 241static __inline__ 242short _InterlockedExchangeAdd16(short volatile *_Addend, short _Value); 243__int64 _InterlockedExchangeAdd64_HLEAcquire(__int64 volatile *, __int64); 244__int64 _InterlockedExchangeAdd64_HLERelease(__int64 volatile *, __int64); 245static __inline__ 246char _InterlockedExchangeAdd8(char volatile *_Addend, char _Value); 247static __inline__ 248long __cdecl _InterlockedIncrement(long volatile *_Addend); 249static __inline__ 250short _InterlockedIncrement16(short volatile *_Addend); 251static __inline__ 252long _InterlockedOr(long volatile *_Value, long _Mask); 253static __inline__ 254short _InterlockedOr16(short volatile *_Value, short _Mask); 255static __inline__ 256char _InterlockedOr8(char volatile *_Value, char _Mask); 257static __inline__ 258long _InterlockedXor(long volatile *_Value, long _Mask); 259static __inline__ 260short _InterlockedXor16(short volatile *_Value, short _Mask); 261static __inline__ 262char _InterlockedXor8(char volatile *_Value, char _Mask); 263void __cdecl _invpcid(unsigned int, void *); 264static __inline__ 265unsigned long __cdecl _lrotl(unsigned long, int); 266static __inline__ 267unsigned long __cdecl _lrotr(unsigned long, int); 268static __inline__ 269unsigned int _lzcnt_u32(unsigned int); 270static __inline__ 271void _ReadBarrier(void); 272static __inline__ 273void _ReadWriteBarrier(void); 274static __inline__ 275void *_ReturnAddress(void); 276unsigned int _rorx_u32(unsigned int, const unsigned int); 277int __cdecl _rdrand16_step(unsigned short *); 278int __cdecl _rdrand32_step(unsigned int *); 279static __inline__ 280unsigned int __cdecl _rotl(unsigned int _Value, int _Shift); 281static __inline__ 282unsigned short _rotl16(unsigned short _Value, unsigned char _Shift); 283static __inline__ 284unsigned __int64 __cdecl _rotl64(unsigned __int64 _Value, int _Shift); 285static __inline__ 286unsigned char _rotl8(unsigned char _Value, unsigned char _Shift); 287static __inline__ 288unsigned int __cdecl _rotr(unsigned int _Value, int _Shift); 289static __inline__ 290unsigned short _rotr16(unsigned short _Value, unsigned char _Shift); 291static __inline__ 292unsigned __int64 __cdecl _rotr64(unsigned __int64 _Value, int _Shift); 293static __inline__ 294unsigned char _rotr8(unsigned char _Value, unsigned char _Shift); 295int _sarx_i32(int, unsigned int); 296#if __STDC_HOSTED__ 297int __cdecl _setjmp(jmp_buf); 298#endif 299unsigned int _shlx_u32(unsigned int, unsigned int); 300unsigned int _shrx_u32(unsigned int, unsigned int); 301void _Store_HLERelease(long volatile *, long); 302void _Store64_HLERelease(__int64 volatile *, __int64); 303void _StorePointer_HLERelease(void *volatile *, void *); 304unsigned int _t1mskc_u32(unsigned int); 305unsigned int _tzcnt_u32(unsigned int); 306unsigned int _tzmsk_u32(unsigned int); 307static __inline__ 308void _WriteBarrier(void); 309void _xabort(const unsigned int imm); 310unsigned __int32 xbegin(void); 311void _xend(void); 312static __inline__ 313unsigned __int64 __cdecl _xgetbv(unsigned int); 314void __cdecl _xrstor(void const *, unsigned __int64); 315void __cdecl _xsave(void *, unsigned __int64); 316void __cdecl _xsaveopt(void *, unsigned __int64); 317void __cdecl _xsetbv(unsigned int, unsigned __int64); 318unsigned char _xtest(void); 319 320/* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */ 321#ifdef __x86_64__ 322void __addgsbyte(unsigned long, unsigned char); 323void __addgsdword(unsigned long, unsigned long); 324void __addgsqword(unsigned long, unsigned __int64); 325void __addgsword(unsigned long, unsigned short); 326static __inline__ 327void __faststorefence(void); 328void __incgsbyte(unsigned long); 329void __incgsdword(unsigned long); 330void __incgsqword(unsigned long); 331void __incgsword(unsigned long); 332unsigned char __lwpins64(unsigned __int64, unsigned int, unsigned int); 333void __lwpval64(unsigned __int64, unsigned int, unsigned int); 334unsigned __int64 __lzcnt64(unsigned __int64); 335static __inline__ 336void __movsq(unsigned long long *, unsigned long long const *, size_t); 337__int64 __mulh(__int64, __int64); 338static __inline__ 339unsigned __int64 __popcnt64(unsigned __int64); 340static __inline__ 341unsigned char __readgsbyte(unsigned long); 342static __inline__ 343unsigned long __readgsdword(unsigned long); 344static __inline__ 345unsigned __int64 __readgsqword(unsigned long); 346unsigned short __readgsword(unsigned long); 347unsigned __int64 __shiftleft128(unsigned __int64 _LowPart, 348 unsigned __int64 _HighPart, 349 unsigned char _Shift); 350unsigned __int64 __shiftright128(unsigned __int64 _LowPart, 351 unsigned __int64 _HighPart, 352 unsigned char _Shift); 353static __inline__ 354void __stosq(unsigned __int64 *, unsigned __int64, size_t); 355unsigned __int64 __umulh(unsigned __int64, unsigned __int64); 356unsigned char __vmx_on(unsigned __int64 *); 357unsigned char __vmx_vmclear(unsigned __int64 *); 358unsigned char __vmx_vmlaunch(void); 359unsigned char __vmx_vmptrld(unsigned __int64 *); 360unsigned char __vmx_vmread(size_t, size_t *); 361unsigned char __vmx_vmresume(void); 362unsigned char __vmx_vmwrite(size_t, size_t); 363void __writegsbyte(unsigned long, unsigned char); 364void __writegsdword(unsigned long, unsigned long); 365void __writegsqword(unsigned long, unsigned __int64); 366void __writegsword(unsigned long, unsigned short); 367unsigned __int64 _andn_u64(unsigned __int64, unsigned __int64); 368unsigned __int64 _bextr_u64(unsigned __int64, unsigned int, unsigned int); 369unsigned __int64 _bextri_u64(unsigned __int64, unsigned int); 370static __inline__ 371unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask); 372static __inline__ 373unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask); 374static __inline__ 375unsigned char _bittest64(__int64 const *, __int64); 376static __inline__ 377unsigned char _bittestandcomplement64(__int64 *, __int64); 378static __inline__ 379unsigned char _bittestandreset64(__int64 *, __int64); 380static __inline__ 381unsigned char _bittestandset64(__int64 *, __int64); 382unsigned __int64 _blcfill_u64(unsigned __int64); 383unsigned __int64 _blci_u64(unsigned __int64); 384unsigned __int64 _blcic_u64(unsigned __int64); 385unsigned __int64 _blcmsk_u64(unsigned __int64); 386unsigned __int64 _blcs_u64(unsigned __int64); 387unsigned __int64 _blsfill_u64(unsigned __int64); 388unsigned __int64 _blsi_u64(unsigned __int64); 389unsigned __int64 _blsic_u64(unsigned __int64); 390unsigned __int64 _blsmsk_u64(unsigned __int64); 391unsigned __int64 _blsr_u64(unsigned __int64); 392unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64); 393unsigned __int64 _bzhi_u64(unsigned __int64, unsigned int); 394void __cdecl _fxrstor64(void const *); 395void __cdecl _fxsave64(void *); 396long _InterlockedAnd_np(long volatile *_Value, long _Mask); 397short _InterlockedAnd16_np(short volatile *_Value, short _Mask); 398__int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask); 399char _InterlockedAnd8_np(char volatile *_Value, char _Mask); 400unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64); 401static __inline__ 402unsigned char _interlockedbittestandset64(__int64 volatile *, __int64); 403long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange, 404 long _Comparand); 405unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination, 406 __int64 _ExchangeHigh, 407 __int64 _ExchangeLow, 408 __int64 *_CompareandResult); 409unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination, 410 __int64 _ExchangeHigh, 411 __int64 _ExchangeLow, 412 __int64 *_ComparandResult); 413short _InterlockedCompareExchange16_np(short volatile *_Destination, 414 short _Exchange, short _Comparand); 415__int64 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *, __int64, 416 __int64); 417__int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64, 418 __int64); 419__int64 _InterlockedCompareExchange64_np(__int64 volatile *_Destination, 420 __int64 _Exchange, __int64 _Comparand); 421void *_InterlockedCompareExchangePointer(void *volatile *_Destination, 422 void *_Exchange, void *_Comparand); 423void *_InterlockedCompareExchangePointer_np(void *volatile *_Destination, 424 void *_Exchange, void *_Comparand); 425static __inline__ 426__int64 _InterlockedDecrement64(__int64 volatile *_Addend); 427static __inline__ 428__int64 _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value); 429static __inline__ 430__int64 _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value); 431void *_InterlockedExchangePointer(void *volatile *_Target, void *_Value); 432static __inline__ 433__int64 _InterlockedIncrement64(__int64 volatile *_Addend); 434long _InterlockedOr_np(long volatile *_Value, long _Mask); 435short _InterlockedOr16_np(short volatile *_Value, short _Mask); 436static __inline__ 437__int64 _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask); 438__int64 _InterlockedOr64_np(__int64 volatile *_Value, __int64 _Mask); 439char _InterlockedOr8_np(char volatile *_Value, char _Mask); 440long _InterlockedXor_np(long volatile *_Value, long _Mask); 441short _InterlockedXor16_np(short volatile *_Value, short _Mask); 442static __inline__ 443__int64 _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask); 444__int64 _InterlockedXor64_np(__int64 volatile *_Value, __int64 _Mask); 445char _InterlockedXor8_np(char volatile *_Value, char _Mask); 446static __inline__ 447unsigned __int64 _lzcnt_u64(unsigned __int64); 448__int64 _mul128(__int64 _Multiplier, __int64 _Multiplicand, 449 __int64 *_HighProduct); 450unsigned int __cdecl _readfsbase_u32(void); 451unsigned __int64 __cdecl _readfsbase_u64(void); 452unsigned int __cdecl _readgsbase_u32(void); 453unsigned __int64 __cdecl _readgsbase_u64(void); 454unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int); 455__int64 _sarx_i64(__int64, unsigned int); 456#if __STDC_HOSTED__ 457int __cdecl _setjmpex(jmp_buf); 458#endif 459unsigned __int64 _shlx_u64(unsigned __int64, unsigned int); 460unsigned __int64 shrx_u64(unsigned __int64, unsigned int); 461unsigned __int64 _tzcnt_u64(unsigned __int64); 462unsigned __int64 _tzmsk_u64(unsigned __int64); 463unsigned __int64 _umul128(unsigned __int64 _Multiplier, 464 unsigned __int64 _Multiplicand, 465 unsigned __int64 *_HighProduct); 466void __cdecl _writefsbase_u32(unsigned int); 467void _cdecl _writefsbase_u64(unsigned __int64); 468void __cdecl _writegsbase_u32(unsigned int); 469void __cdecl _writegsbase_u64(unsigned __int64); 470void __cdecl _xrstor64(void const *, unsigned __int64); 471void __cdecl _xsave64(void *, unsigned __int64); 472void __cdecl _xsaveopt64(void *, unsigned __int64); 473 474#endif /* __x86_64__ */ 475 476/*----------------------------------------------------------------------------*\ 477|* Bit Twiddling 478\*----------------------------------------------------------------------------*/ 479static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 480_rotl8(unsigned char _Value, unsigned char _Shift) { 481 _Shift &= 0x7; 482 return _Shift ? (_Value << _Shift) | (_Value >> (8 - _Shift)) : _Value; 483} 484static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 485_rotr8(unsigned char _Value, unsigned char _Shift) { 486 _Shift &= 0x7; 487 return _Shift ? (_Value >> _Shift) | (_Value << (8 - _Shift)) : _Value; 488} 489static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) 490_rotl16(unsigned short _Value, unsigned char _Shift) { 491 _Shift &= 0xf; 492 return _Shift ? (_Value << _Shift) | (_Value >> (16 - _Shift)) : _Value; 493} 494static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) 495_rotr16(unsigned short _Value, unsigned char _Shift) { 496 _Shift &= 0xf; 497 return _Shift ? (_Value >> _Shift) | (_Value << (16 - _Shift)) : _Value; 498} 499static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) 500_rotl(unsigned int _Value, int _Shift) { 501 _Shift &= 0x1f; 502 return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value; 503} 504static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) 505_rotr(unsigned int _Value, int _Shift) { 506 _Shift &= 0x1f; 507 return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value; 508} 509static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) 510_lrotl(unsigned long _Value, int _Shift) { 511 _Shift &= 0x1f; 512 return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value; 513} 514static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) 515_lrotr(unsigned long _Value, int _Shift) { 516 _Shift &= 0x1f; 517 return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value; 518} 519static 520__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) 521_rotl64(unsigned __int64 _Value, int _Shift) { 522 _Shift &= 0x3f; 523 return _Shift ? (_Value << _Shift) | (_Value >> (64 - _Shift)) : _Value; 524} 525static 526__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) 527_rotr64(unsigned __int64 _Value, int _Shift) { 528 _Shift &= 0x3f; 529 return _Shift ? (_Value >> _Shift) | (_Value << (64 - _Shift)) : _Value; 530} 531/*----------------------------------------------------------------------------*\ 532|* Bit Counting and Testing 533\*----------------------------------------------------------------------------*/ 534static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 535_BitScanForward(unsigned long *_Index, unsigned long _Mask) { 536 if (!_Mask) 537 return 0; 538 *_Index = __builtin_ctzl(_Mask); 539 return 1; 540} 541static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 542_BitScanReverse(unsigned long *_Index, unsigned long _Mask) { 543 if (!_Mask) 544 return 0; 545 *_Index = 31 - __builtin_clzl(_Mask); 546 return 1; 547} 548static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) 549_lzcnt_u32(unsigned int a) { 550 if (!a) 551 return 32; 552 return __builtin_clzl(a); 553} 554static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) 555__popcnt16(unsigned short value) { 556 return __builtin_popcount((int)value); 557} 558static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) 559__popcnt(unsigned int value) { 560 return __builtin_popcount(value); 561} 562static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 563_bittest(long const *a, long b) { 564 return (*a >> b) & 1; 565} 566static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 567_bittestandcomplement(long *a, long b) { 568 unsigned char x = (*a >> b) & 1; 569 *a = *a ^ (1 << b); 570 return x; 571} 572static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 573_bittestandreset(long *a, long b) { 574 unsigned char x = (*a >> b) & 1; 575 *a = *a & ~(1 << b); 576 return x; 577} 578static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 579_bittestandset(long *a, long b) { 580 unsigned char x = (*a >> b) & 1; 581 *a = *a | (1 << b); 582 return x; 583} 584#if defined(__i386__) || defined(__x86_64__) 585static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 586_interlockedbittestandset(long volatile *__BitBase, long __BitPos) { 587 unsigned char __Res; 588 __asm__ ("xor %0, %0\n" 589 "lock bts %2, %1\n" 590 "setc %0\n" 591 : "=r" (__Res), "+m"(*__BitBase) 592 : "Ir"(__BitPos)); 593 return __Res; 594} 595#endif 596#ifdef __x86_64__ 597static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 598_BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask) { 599 if (!_Mask) 600 return 0; 601 *_Index = __builtin_ctzll(_Mask); 602 return 1; 603} 604static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 605_BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask) { 606 if (!_Mask) 607 return 0; 608 *_Index = 63 - __builtin_clzll(_Mask); 609 return 1; 610} 611static 612__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) 613_lzcnt_u64(unsigned __int64 a) { 614 if (!a) 615 return 64; 616 return __builtin_clzll(a); 617} 618static __inline__ 619unsigned __int64 __attribute__((__always_inline__, __nodebug__)) 620 __popcnt64(unsigned __int64 value) { 621 return __builtin_popcountll(value); 622} 623static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 624_bittest64(__int64 const *a, __int64 b) { 625 return (*a >> b) & 1; 626} 627static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 628_bittestandcomplement64(__int64 *a, __int64 b) { 629 unsigned char x = (*a >> b) & 1; 630 *a = *a ^ (1ll << b); 631 return x; 632} 633static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 634_bittestandreset64(__int64 *a, __int64 b) { 635 unsigned char x = (*a >> b) & 1; 636 *a = *a & ~(1ll << b); 637 return x; 638} 639static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 640_bittestandset64(__int64 *a, __int64 b) { 641 unsigned char x = (*a >> b) & 1; 642 *a = *a | (1ll << b); 643 return x; 644} 645static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 646_interlockedbittestandset64(__int64 volatile *__BitBase, __int64 __BitPos) { 647 unsigned char __Res; 648 __asm__ ("xor %0, %0\n" 649 "lock bts %2, %1\n" 650 "setc %0\n" 651 : "=r" (__Res), "+m"(*__BitBase) 652 : "Ir"(__BitPos)); 653 return __Res; 654} 655#endif 656/*----------------------------------------------------------------------------*\ 657|* Interlocked Exchange Add 658\*----------------------------------------------------------------------------*/ 659static __inline__ char __attribute__((__always_inline__, __nodebug__)) 660_InterlockedExchangeAdd8(char volatile *_Addend, char _Value) { 661 return __atomic_add_fetch(_Addend, _Value, 0) - _Value; 662} 663static __inline__ short __attribute__((__always_inline__, __nodebug__)) 664_InterlockedExchangeAdd16(short volatile *_Addend, short _Value) { 665 return __atomic_add_fetch(_Addend, _Value, 0) - _Value; 666} 667#ifdef __x86_64__ 668static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 669_InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value) { 670 return __atomic_add_fetch(_Addend, _Value, 0) - _Value; 671} 672#endif 673/*----------------------------------------------------------------------------*\ 674|* Interlocked Exchange Sub 675\*----------------------------------------------------------------------------*/ 676static __inline__ char __attribute__((__always_inline__, __nodebug__)) 677_InterlockedExchangeSub8(char volatile *_Subend, char _Value) { 678 return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; 679} 680static __inline__ short __attribute__((__always_inline__, __nodebug__)) 681_InterlockedExchangeSub16(short volatile *_Subend, short _Value) { 682 return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; 683} 684static __inline__ long __attribute__((__always_inline__, __nodebug__)) 685_InterlockedExchangeSub(long volatile *_Subend, long _Value) { 686 return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; 687} 688#ifdef __x86_64__ 689static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 690_InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value) { 691 return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; 692} 693#endif 694/*----------------------------------------------------------------------------*\ 695|* Interlocked Increment 696\*----------------------------------------------------------------------------*/ 697static __inline__ short __attribute__((__always_inline__, __nodebug__)) 698_InterlockedIncrement16(short volatile *_Value) { 699 return __atomic_add_fetch(_Value, 1, 0); 700} 701#ifdef __x86_64__ 702static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 703_InterlockedIncrement64(__int64 volatile *_Value) { 704 return __atomic_add_fetch(_Value, 1, 0); 705} 706#endif 707/*----------------------------------------------------------------------------*\ 708|* Interlocked Decrement 709\*----------------------------------------------------------------------------*/ 710static __inline__ short __attribute__((__always_inline__, __nodebug__)) 711_InterlockedDecrement16(short volatile *_Value) { 712 return __atomic_sub_fetch(_Value, 1, 0); 713} 714#ifdef __x86_64__ 715static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 716_InterlockedDecrement64(__int64 volatile *_Value) { 717 return __atomic_sub_fetch(_Value, 1, 0); 718} 719#endif 720/*----------------------------------------------------------------------------*\ 721|* Interlocked And 722\*----------------------------------------------------------------------------*/ 723static __inline__ char __attribute__((__always_inline__, __nodebug__)) 724_InterlockedAnd8(char volatile *_Value, char _Mask) { 725 return __atomic_and_fetch(_Value, _Mask, 0); 726} 727static __inline__ short __attribute__((__always_inline__, __nodebug__)) 728_InterlockedAnd16(short volatile *_Value, short _Mask) { 729 return __atomic_and_fetch(_Value, _Mask, 0); 730} 731static __inline__ long __attribute__((__always_inline__, __nodebug__)) 732_InterlockedAnd(long volatile *_Value, long _Mask) { 733 return __atomic_and_fetch(_Value, _Mask, 0); 734} 735#ifdef __x86_64__ 736static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 737_InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) { 738 return __atomic_and_fetch(_Value, _Mask, 0); 739} 740#endif 741/*----------------------------------------------------------------------------*\ 742|* Interlocked Or 743\*----------------------------------------------------------------------------*/ 744static __inline__ char __attribute__((__always_inline__, __nodebug__)) 745_InterlockedOr8(char volatile *_Value, char _Mask) { 746 return __atomic_or_fetch(_Value, _Mask, 0); 747} 748static __inline__ short __attribute__((__always_inline__, __nodebug__)) 749_InterlockedOr16(short volatile *_Value, short _Mask) { 750 return __atomic_or_fetch(_Value, _Mask, 0); 751} 752static __inline__ long __attribute__((__always_inline__, __nodebug__)) 753_InterlockedOr(long volatile *_Value, long _Mask) { 754 return __atomic_or_fetch(_Value, _Mask, 0); 755} 756#ifdef __x86_64__ 757static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 758_InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) { 759 return __atomic_or_fetch(_Value, _Mask, 0); 760} 761#endif 762/*----------------------------------------------------------------------------*\ 763|* Interlocked Xor 764\*----------------------------------------------------------------------------*/ 765static __inline__ char __attribute__((__always_inline__, __nodebug__)) 766_InterlockedXor8(char volatile *_Value, char _Mask) { 767 return __atomic_xor_fetch(_Value, _Mask, 0); 768} 769static __inline__ short __attribute__((__always_inline__, __nodebug__)) 770_InterlockedXor16(short volatile *_Value, short _Mask) { 771 return __atomic_xor_fetch(_Value, _Mask, 0); 772} 773static __inline__ long __attribute__((__always_inline__, __nodebug__)) 774_InterlockedXor(long volatile *_Value, long _Mask) { 775 return __atomic_xor_fetch(_Value, _Mask, 0); 776} 777#ifdef __x86_64__ 778static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 779_InterlockedXor64(__int64 volatile *_Value, __int64 _Mask) { 780 return __atomic_xor_fetch(_Value, _Mask, 0); 781} 782#endif 783/*----------------------------------------------------------------------------*\ 784|* Interlocked Exchange 785\*----------------------------------------------------------------------------*/ 786static __inline__ char __attribute__((__always_inline__, __nodebug__)) 787_InterlockedExchange8(char volatile *_Target, char _Value) { 788 __atomic_exchange(_Target, &_Value, &_Value, 0); 789 return _Value; 790} 791static __inline__ short __attribute__((__always_inline__, __nodebug__)) 792_InterlockedExchange16(short volatile *_Target, short _Value) { 793 __atomic_exchange(_Target, &_Value, &_Value, 0); 794 return _Value; 795} 796#ifdef __x86_64__ 797static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 798_InterlockedExchange64(__int64 volatile *_Target, __int64 _Value) { 799 __atomic_exchange(_Target, &_Value, &_Value, 0); 800 return _Value; 801} 802#endif 803/*----------------------------------------------------------------------------*\ 804|* Interlocked Compare Exchange 805\*----------------------------------------------------------------------------*/ 806static __inline__ char __attribute__((__always_inline__, __nodebug__)) 807_InterlockedCompareExchange8(char volatile *_Destination, 808 char _Exchange, char _Comparand) { 809 __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); 810 return _Comparand; 811} 812static __inline__ short __attribute__((__always_inline__, __nodebug__)) 813_InterlockedCompareExchange16(short volatile *_Destination, 814 short _Exchange, short _Comparand) { 815 __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); 816 return _Comparand; 817} 818static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) 819_InterlockedCompareExchange64(__int64 volatile *_Destination, 820 __int64 _Exchange, __int64 _Comparand) { 821 __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); 822 return _Comparand; 823} 824/*----------------------------------------------------------------------------*\ 825|* Barriers 826\*----------------------------------------------------------------------------*/ 827#if defined(__i386__) || defined(__x86_64__) 828static __inline__ void __attribute__((__always_inline__, __nodebug__)) 829__attribute__((deprecated("use other intrinsics or C++11 atomics instead"))) 830_ReadWriteBarrier(void) { 831 __asm__ volatile ("" : : : "memory"); 832} 833static __inline__ void __attribute__((__always_inline__, __nodebug__)) 834__attribute__((deprecated("use other intrinsics or C++11 atomics instead"))) 835_ReadBarrier(void) { 836 __asm__ volatile ("" : : : "memory"); 837} 838static __inline__ void __attribute__((__always_inline__, __nodebug__)) 839__attribute__((deprecated("use other intrinsics or C++11 atomics instead"))) 840_WriteBarrier(void) { 841 __asm__ volatile ("" : : : "memory"); 842} 843#endif 844#ifdef __x86_64__ 845static __inline__ void __attribute__((__always_inline__, __nodebug__)) 846__faststorefence(void) { 847 __asm__ volatile("lock orq $0, (%%rsp)" : : : "memory"); 848} 849#endif 850/*----------------------------------------------------------------------------*\ 851|* readfs, readgs 852|* (Pointers in address space #256 and #257 are relative to the GS and FS 853|* segment registers, respectively.) 854\*----------------------------------------------------------------------------*/ 855#define __ptr_to_addr_space(__addr_space_nbr, __type, __offset) \ 856 ((volatile __type __attribute__((__address_space__(__addr_space_nbr)))*) \ 857 (__offset)) 858 859#ifdef __i386__ 860static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 861__readfsbyte(unsigned long __offset) { 862 return *__ptr_to_addr_space(257, unsigned char, __offset); 863} 864static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) 865__readfsdword(unsigned long __offset) { 866 return *__ptr_to_addr_space(257, unsigned long, __offset); 867} 868static __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) 869__readfsqword(unsigned long __offset) { 870 return *__ptr_to_addr_space(257, unsigned __int64, __offset); 871} 872static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) 873__readfsword(unsigned long __offset) { 874 return *__ptr_to_addr_space(257, unsigned short, __offset); 875} 876#endif 877#ifdef __x86_64__ 878static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 879__readgsbyte(unsigned long __offset) { 880 return *__ptr_to_addr_space(256, unsigned char, __offset); 881} 882static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) 883__readgsdword(unsigned long __offset) { 884 return *__ptr_to_addr_space(256, unsigned long, __offset); 885} 886static __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) 887__readgsqword(unsigned long __offset) { 888 return *__ptr_to_addr_space(256, unsigned __int64, __offset); 889} 890static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) 891__readgsword(unsigned long __offset) { 892 return *__ptr_to_addr_space(256, unsigned short, __offset); 893} 894#endif 895#undef __ptr_to_addr_space 896/*----------------------------------------------------------------------------*\ 897|* movs, stos 898\*----------------------------------------------------------------------------*/ 899#if defined(__i386__) || defined(__x86_64__) 900static __inline__ void __attribute__((__always_inline__, __nodebug__)) 901__movsb(unsigned char *__dst, unsigned char const *__src, size_t __n) { 902 __asm__("rep movsb" : : "D"(__dst), "S"(__src), "c"(__n) 903 : "%edi", "%esi", "%ecx"); 904} 905static __inline__ void __attribute__((__always_inline__, __nodebug__)) 906__movsd(unsigned long *__dst, unsigned long const *__src, size_t __n) { 907 __asm__("rep movsl" : : "D"(__dst), "S"(__src), "c"(__n) 908 : "%edi", "%esi", "%ecx"); 909} 910static __inline__ void __attribute__((__always_inline__, __nodebug__)) 911__movsw(unsigned short *__dst, unsigned short const *__src, size_t __n) { 912 __asm__("rep movsh" : : "D"(__dst), "S"(__src), "c"(__n) 913 : "%edi", "%esi", "%ecx"); 914} 915static __inline__ void __attribute__((__always_inline__, __nodebug__)) 916__stosb(unsigned char *__dst, unsigned char __x, size_t __n) { 917 __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n) 918 : "%edi", "%ecx"); 919} 920static __inline__ void __attribute__((__always_inline__, __nodebug__)) 921__stosd(unsigned long *__dst, unsigned long __x, size_t __n) { 922 __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n) 923 : "%edi", "%ecx"); 924} 925static __inline__ void __attribute__((__always_inline__, __nodebug__)) 926__stosw(unsigned short *__dst, unsigned short __x, size_t __n) { 927 __asm__("rep stosh" : : "D"(__dst), "a"(__x), "c"(__n) 928 : "%edi", "%ecx"); 929} 930#endif 931#ifdef __x86_64__ 932static __inline__ void __attribute__((__always_inline__, __nodebug__)) 933__movsq(unsigned long long *__dst, unsigned long long const *__src, size_t __n) { 934 __asm__("rep movsq" : : "D"(__dst), "S"(__src), "c"(__n) 935 : "%edi", "%esi", "%ecx"); 936} 937static __inline__ void __attribute__((__always_inline__, __nodebug__)) 938__stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) { 939 __asm__("rep stosq" : : "D"(__dst), "a"(__x), "c"(__n) 940 : "%edi", "%ecx"); 941} 942#endif 943 944/*----------------------------------------------------------------------------*\ 945|* Misc 946\*----------------------------------------------------------------------------*/ 947static __inline__ void * __attribute__((__always_inline__, __nodebug__)) 948_AddressOfReturnAddress(void) { 949 return (void*)((char*)__builtin_frame_address(0) + sizeof(void*)); 950} 951static __inline__ void * __attribute__((__always_inline__, __nodebug__)) 952_ReturnAddress(void) { 953 return __builtin_return_address(0); 954} 955#if defined(__i386__) || defined(__x86_64__) 956static __inline__ void __attribute__((__always_inline__, __nodebug__)) 957__cpuid(int __info[4], int __level) { 958 __asm__ ("cpuid" : "=a"(__info[0]), "=b" (__info[1]), "=c"(__info[2]), "=d"(__info[3]) 959 : "a"(__level)); 960} 961static __inline__ void __attribute__((__always_inline__, __nodebug__)) 962__cpuidex(int __info[4], int __level, int __ecx) { 963 __asm__ ("cpuid" : "=a"(__info[0]), "=b" (__info[1]), "=c"(__info[2]), "=d"(__info[3]) 964 : "a"(__level), "c"(__ecx)); 965} 966static __inline__ unsigned __int64 __cdecl __attribute__((__always_inline__, __nodebug__)) 967_xgetbv(unsigned int __xcr_no) { 968 unsigned int __eax, __edx; 969 __asm__ ("xgetbv" : "=a" (__eax), "=d" (__edx) : "c" (__xcr_no)); 970 return ((unsigned __int64)__edx << 32) | __eax; 971} 972static __inline__ void __attribute__((__always_inline__, __nodebug__)) 973__halt(void) { 974 __asm__ volatile ("hlt"); 975} 976#endif 977 978/*----------------------------------------------------------------------------*\ 979|* Privileged intrinsics 980\*----------------------------------------------------------------------------*/ 981#if defined(__i386__) || defined(__x86_64__) 982static __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) 983__readmsr(unsigned long __register) { 984 // Loads the contents of a 64-bit model specific register (MSR) specified in 985 // the ECX register into registers EDX:EAX. The EDX register is loaded with 986 // the high-order 32 bits of the MSR and the EAX register is loaded with the 987 // low-order 32 bits. If less than 64 bits are implemented in the MSR being 988 // read, the values returned to EDX:EAX in unimplemented bit locations are 989 // undefined. 990 unsigned long __edx; 991 unsigned long __eax; 992 __asm__ ("rdmsr" : "=d"(__edx), "=a"(__eax) : "c"(__register)); 993 return (((unsigned __int64)__edx) << 32) | (unsigned __int64)__eax; 994} 995 996static __inline__ unsigned long __attribute__((always_inline, __nodebug__)) 997__readcr3(void) { 998 unsigned long __cr3_val; 999 __asm__ __volatile__ ("mov %%cr3, %0" : "=q"(__cr3_val) : : "memory"); 1000 return __cr3_val; 1001} 1002 1003static __inline__ void __attribute__((always_inline, __nodebug__)) 1004__writecr3(unsigned int __cr3_val) { 1005 __asm__ ("mov %0, %%cr3" : : "q"(__cr3_val) : "memory"); 1006} 1007#endif 1008 1009#ifdef __cplusplus 1010} 1011#endif 1012 1013#endif /* __INTRIN_H */ 1014#endif /* _MSC_VER */ 1015