x86.h revision 7ce0a1d1337c01056ba24006efab21f00e179e04
1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12#ifndef VPX_PORTS_X86_H_ 13#define VPX_PORTS_X86_H_ 14#include <stdlib.h> 15#include "vpx_config.h" 16#include "vpx/vpx_integer.h" 17 18#ifdef __cplusplus 19extern "C" { 20#endif 21 22typedef enum { 23 VPX_CPU_UNKNOWN = -1, 24 VPX_CPU_AMD, 25 VPX_CPU_AMD_OLD, 26 VPX_CPU_CENTAUR, 27 VPX_CPU_CYRIX, 28 VPX_CPU_INTEL, 29 VPX_CPU_NEXGEN, 30 VPX_CPU_NSC, 31 VPX_CPU_RISE, 32 VPX_CPU_SIS, 33 VPX_CPU_TRANSMETA, 34 VPX_CPU_TRANSMETA_OLD, 35 VPX_CPU_UMC, 36 VPX_CPU_VIA, 37 38 VPX_CPU_LAST 39} vpx_cpu_t; 40 41#if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__) 42#if ARCH_X86_64 43#define cpuid(func, func2, ax, bx, cx, dx)\ 44 __asm__ __volatile__ (\ 45 "cpuid \n\t" \ 46 : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \ 47 : "a" (func), "c" (func2)); 48#else 49#define cpuid(func, func2, ax, bx, cx, dx)\ 50 __asm__ __volatile__ (\ 51 "mov %%ebx, %%edi \n\t" \ 52 "cpuid \n\t" \ 53 "xchg %%edi, %%ebx \n\t" \ 54 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ 55 : "a" (func), "c" (func2)); 56#endif 57#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/ 58#if ARCH_X86_64 59#define cpuid(func, func2, ax, bx, cx, dx)\ 60 asm volatile (\ 61 "xchg %rsi, %rbx \n\t" \ 62 "cpuid \n\t" \ 63 "movl %ebx, %edi \n\t" \ 64 "xchg %rsi, %rbx \n\t" \ 65 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ 66 : "a" (func), "c" (func2)); 67#else 68#define cpuid(func, func2, ax, bx, cx, dx)\ 69 asm volatile (\ 70 "pushl %ebx \n\t" \ 71 "cpuid \n\t" \ 72 "movl %ebx, %edi \n\t" \ 73 "popl %ebx \n\t" \ 74 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ 75 : "a" (func), "c" (func2)); 76#endif 77#else /* end __SUNPRO__ */ 78#if ARCH_X86_64 79#if defined(_MSC_VER) && _MSC_VER > 1500 80void __cpuidex(int CPUInfo[4], int info_type, int ecxvalue); 81#pragma intrinsic(__cpuidex) 82#define cpuid(func, func2, a, b, c, d) do {\ 83 int regs[4];\ 84 __cpuidex(regs, func, func2); \ 85 a = regs[0]; b = regs[1]; c = regs[2]; d = regs[3];\ 86 } while(0) 87#else 88void __cpuid(int CPUInfo[4], int info_type); 89#pragma intrinsic(__cpuid) 90#define cpuid(func, func2, a, b, c, d) do {\ 91 int regs[4];\ 92 __cpuid(regs, func); \ 93 a = regs[0]; b = regs[1]; c = regs[2]; d = regs[3];\ 94 } while (0) 95#endif 96#else 97#define cpuid(func, func2, a, b, c, d)\ 98 __asm mov eax, func\ 99 __asm mov ecx, func2\ 100 __asm cpuid\ 101 __asm mov a, eax\ 102 __asm mov b, ebx\ 103 __asm mov c, ecx\ 104 __asm mov d, edx 105#endif 106#endif /* end others */ 107 108// NaCl has no support for xgetbv or the raw opcode. 109#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__)) 110static INLINE uint64_t xgetbv(void) { 111 const uint32_t ecx = 0; 112 uint32_t eax, edx; 113 // Use the raw opcode for xgetbv for compatibility with older toolchains. 114 __asm__ volatile ( 115 ".byte 0x0f, 0x01, 0xd0\n" 116 : "=a"(eax), "=d"(edx) : "c" (ecx)); 117 return ((uint64_t)edx << 32) | eax; 118} 119#elif (defined(_M_X64) || defined(_M_IX86)) && \ 120 defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1 121#include <immintrin.h> 122#define xgetbv() _xgetbv(0) 123#elif defined(_MSC_VER) && defined(_M_IX86) 124static INLINE uint64_t xgetbv(void) { 125 uint32_t eax_, edx_; 126 __asm { 127 xor ecx, ecx // ecx = 0 128 // Use the raw opcode for xgetbv for compatibility with older toolchains. 129 __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 130 mov eax_, eax 131 mov edx_, edx 132 } 133 return ((uint64_t)edx_ << 32) | eax_; 134} 135#else 136#define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains. 137#endif 138 139#if defined(_MSC_VER) && _MSC_VER >= 1700 140#include <windows.h> 141#if WINAPI_FAMILY_PARTITION(WINAPI_FAMILY_APP) 142#define getenv(x) NULL 143#endif 144#endif 145 146#define HAS_MMX 0x01 147#define HAS_SSE 0x02 148#define HAS_SSE2 0x04 149#define HAS_SSE3 0x08 150#define HAS_SSSE3 0x10 151#define HAS_SSE4_1 0x20 152#define HAS_AVX 0x40 153#define HAS_AVX2 0x80 154#ifndef BIT 155#define BIT(n) (1<<n) 156#endif 157 158static INLINE int 159x86_simd_caps(void) { 160 unsigned int flags = 0; 161 unsigned int mask = ~0; 162 unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx; 163 char *env; 164 (void)reg_ebx; 165 166 /* See if the CPU capabilities are being overridden by the environment */ 167 env = getenv("VPX_SIMD_CAPS"); 168 169 if (env && *env) 170 return (int)strtol(env, NULL, 0); 171 172 env = getenv("VPX_SIMD_CAPS_MASK"); 173 174 if (env && *env) 175 mask = strtol(env, NULL, 0); 176 177 /* Ensure that the CPUID instruction supports extended features */ 178 cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx); 179 180 if (max_cpuid_val < 1) 181 return 0; 182 183 /* Get the standard feature flags */ 184 cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); 185 186 if (reg_edx & BIT(23)) flags |= HAS_MMX; 187 188 if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ 189 190 if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ 191 192 if (reg_ecx & BIT(0)) flags |= HAS_SSE3; 193 194 if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; 195 196 if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; 197 198 // bits 27 (OSXSAVE) & 28 (256-bit AVX) 199 if ((reg_ecx & (BIT(27) | BIT(28))) == (BIT(27) | BIT(28))) { 200 if ((xgetbv() & 0x6) == 0x6) { 201 flags |= HAS_AVX; 202 203 if (max_cpuid_val >= 7) { 204 /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ 205 cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); 206 207 if (reg_ebx & BIT(5)) flags |= HAS_AVX2; 208 } 209 } 210 } 211 212 return flags & mask; 213} 214 215#if ARCH_X86_64 && defined(_MSC_VER) 216unsigned __int64 __rdtsc(void); 217#pragma intrinsic(__rdtsc) 218#endif 219static INLINE unsigned int 220x86_readtsc(void) { 221#if defined(__GNUC__) && __GNUC__ 222 unsigned int tsc; 223 __asm__ __volatile__("rdtsc\n\t":"=a"(tsc):); 224 return tsc; 225#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) 226 unsigned int tsc; 227 asm volatile("rdtsc\n\t":"=a"(tsc):); 228 return tsc; 229#else 230#if ARCH_X86_64 231 return (unsigned int)__rdtsc(); 232#else 233 __asm rdtsc; 234#endif 235#endif 236} 237 238 239#if defined(__GNUC__) && __GNUC__ 240#define x86_pause_hint()\ 241 __asm__ __volatile__ ("pause \n\t") 242#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) 243#define x86_pause_hint()\ 244 asm volatile ("pause \n\t") 245#else 246#if ARCH_X86_64 247#define x86_pause_hint()\ 248 _mm_pause(); 249#else 250#define x86_pause_hint()\ 251 __asm pause 252#endif 253#endif 254 255#if defined(__GNUC__) && __GNUC__ 256static void 257x87_set_control_word(unsigned short mode) { 258 __asm__ __volatile__("fldcw %0" : : "m"(*&mode)); 259} 260static unsigned short 261x87_get_control_word(void) { 262 unsigned short mode; 263 __asm__ __volatile__("fstcw %0\n\t":"=m"(*&mode):); 264 return mode; 265} 266#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) 267static void 268x87_set_control_word(unsigned short mode) { 269 asm volatile("fldcw %0" : : "m"(*&mode)); 270} 271static unsigned short 272x87_get_control_word(void) { 273 unsigned short mode; 274 asm volatile("fstcw %0\n\t":"=m"(*&mode):); 275 return mode; 276} 277#elif ARCH_X86_64 278/* No fldcw intrinsics on Windows x64, punt to external asm */ 279extern void vpx_winx64_fldcw(unsigned short mode); 280extern unsigned short vpx_winx64_fstcw(void); 281#define x87_set_control_word vpx_winx64_fldcw 282#define x87_get_control_word vpx_winx64_fstcw 283#else 284static void 285x87_set_control_word(unsigned short mode) { 286 __asm { fldcw mode } 287} 288static unsigned short 289x87_get_control_word(void) { 290 unsigned short mode; 291 __asm { fstcw mode } 292 return mode; 293} 294#endif 295 296static INLINE unsigned int 297x87_set_double_precision(void) { 298 unsigned int mode = x87_get_control_word(); 299 x87_set_control_word((mode&~0x300) | 0x200); 300 return mode; 301} 302 303 304extern void vpx_reset_mmx_state(void); 305 306#ifdef __cplusplus 307} // extern "C" 308#endif 309 310#endif // VPX_PORTS_X86_H_ 311