1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12#ifndef VPX_PORTS_X86_H_ 13#define VPX_PORTS_X86_H_ 14#include <stdlib.h> 15#include "vpx_config.h" 16 17#ifdef __cplusplus 18extern "C" { 19#endif 20 21typedef enum { 22 VPX_CPU_UNKNOWN = -1, 23 VPX_CPU_AMD, 24 VPX_CPU_AMD_OLD, 25 VPX_CPU_CENTAUR, 26 VPX_CPU_CYRIX, 27 VPX_CPU_INTEL, 28 VPX_CPU_NEXGEN, 29 VPX_CPU_NSC, 30 VPX_CPU_RISE, 31 VPX_CPU_SIS, 32 VPX_CPU_TRANSMETA, 33 VPX_CPU_TRANSMETA_OLD, 34 VPX_CPU_UMC, 35 VPX_CPU_VIA, 36 37 VPX_CPU_LAST 38} vpx_cpu_t; 39 40#if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__) 41#if ARCH_X86_64 42#define cpuid(func, func2, ax, bx, cx, dx)\ 43 __asm__ __volatile__ (\ 44 "cpuid \n\t" \ 45 : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \ 46 : "a" (func), "c" (func2)); 47#else 48#define cpuid(func, func2, ax, bx, cx, dx)\ 49 __asm__ __volatile__ (\ 50 "mov %%ebx, %%edi \n\t" \ 51 "cpuid \n\t" \ 52 "xchg %%edi, %%ebx \n\t" \ 53 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ 54 : "a" (func), "c" (func2)); 55#endif 56#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/ 57#if ARCH_X86_64 58#define cpuid(func, func2, ax, bx, cx, dx)\ 59 asm volatile (\ 60 "xchg %rsi, %rbx \n\t" \ 61 "cpuid \n\t" \ 62 "movl %ebx, %edi \n\t" \ 63 "xchg %rsi, %rbx \n\t" \ 64 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ 65 : "a" (func), "c" (func2)); 66#else 67#define cpuid(func, func2, ax, bx, cx, dx)\ 68 asm volatile (\ 69 "pushl %ebx \n\t" \ 70 "cpuid \n\t" \ 71 "movl %ebx, %edi \n\t" \ 72 "popl %ebx \n\t" \ 73 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ 74 : "a" (func), "c" (func2)); 75#endif 76#else /* end __SUNPRO__ */ 77#if ARCH_X86_64 78#if defined(_MSC_VER) && _MSC_VER > 1500 79void __cpuidex(int CPUInfo[4], int info_type, int ecxvalue); 80#pragma intrinsic(__cpuidex) 81#define cpuid(func, func2, a, b, c, d) do {\ 82 int regs[4];\ 83 __cpuidex(regs, func, func2); \ 84 a = regs[0]; b = regs[1]; c = regs[2]; d = regs[3];\ 85 } while(0) 86#else 87void __cpuid(int CPUInfo[4], int info_type); 88#pragma intrinsic(__cpuid) 89#define cpuid(func, func2, a, b, c, d) do {\ 90 int regs[4];\ 91 __cpuid(regs, func); \ 92 a = regs[0]; b = regs[1]; c = regs[2]; d = regs[3];\ 93 } while (0) 94#endif 95#else 96#define cpuid(func, func2, a, b, c, d)\ 97 __asm mov eax, func\ 98 __asm mov ecx, func2\ 99 __asm cpuid\ 100 __asm mov a, eax\ 101 __asm mov b, ebx\ 102 __asm mov c, ecx\ 103 __asm mov d, edx 104#endif 105#endif /* end others */ 106 107#define HAS_MMX 0x01 108#define HAS_SSE 0x02 109#define HAS_SSE2 0x04 110#define HAS_SSE3 0x08 111#define HAS_SSSE3 0x10 112#define HAS_SSE4_1 0x20 113#define HAS_AVX 0x40 114#define HAS_AVX2 0x80 115#ifndef BIT 116#define BIT(n) (1<<n) 117#endif 118 119static int 120x86_simd_caps(void) { 121 unsigned int flags = 0; 122 unsigned int mask = ~0; 123 unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx; 124 char *env; 125 (void)reg_ebx; 126 127 /* See if the CPU capabilities are being overridden by the environment */ 128 env = getenv("VPX_SIMD_CAPS"); 129 130 if (env && *env) 131 return (int)strtol(env, NULL, 0); 132 133 env = getenv("VPX_SIMD_CAPS_MASK"); 134 135 if (env && *env) 136 mask = strtol(env, NULL, 0); 137 138 /* Ensure that the CPUID instruction supports extended features */ 139 cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); 140 141 if (reg_eax < 1) 142 return 0; 143 144 /* Get the standard feature flags */ 145 cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); 146 147 if (reg_edx & BIT(23)) flags |= HAS_MMX; 148 149 if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ 150 151 if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ 152 153 if (reg_ecx & BIT(0)) flags |= HAS_SSE3; 154 155 if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; 156 157 if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; 158 159 if (reg_ecx & BIT(28)) flags |= HAS_AVX; 160 161 /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ 162 reg_eax = 7; 163 reg_ecx = 0; 164 cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); 165 166 if (reg_ebx & BIT(5)) flags |= HAS_AVX2; 167 168 return flags & mask; 169} 170 171#if ARCH_X86_64 && defined(_MSC_VER) 172unsigned __int64 __rdtsc(void); 173#pragma intrinsic(__rdtsc) 174#endif 175static unsigned int 176x86_readtsc(void) { 177#if defined(__GNUC__) && __GNUC__ 178 unsigned int tsc; 179 __asm__ __volatile__("rdtsc\n\t":"=a"(tsc):); 180 return tsc; 181#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) 182 unsigned int tsc; 183 asm volatile("rdtsc\n\t":"=a"(tsc):); 184 return tsc; 185#else 186#if ARCH_X86_64 187 return (unsigned int)__rdtsc(); 188#else 189 __asm rdtsc; 190#endif 191#endif 192} 193 194 195#if defined(__GNUC__) && __GNUC__ 196#define x86_pause_hint()\ 197 __asm__ __volatile__ ("pause \n\t") 198#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) 199#define x86_pause_hint()\ 200 asm volatile ("pause \n\t") 201#else 202#if ARCH_X86_64 203#define x86_pause_hint()\ 204 _mm_pause(); 205#else 206#define x86_pause_hint()\ 207 __asm pause 208#endif 209#endif 210 211#if defined(__GNUC__) && __GNUC__ 212static void 213x87_set_control_word(unsigned short mode) { 214 __asm__ __volatile__("fldcw %0" : : "m"(*&mode)); 215} 216static unsigned short 217x87_get_control_word(void) { 218 unsigned short mode; 219 __asm__ __volatile__("fstcw %0\n\t":"=m"(*&mode):); 220 return mode; 221} 222#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) 223static void 224x87_set_control_word(unsigned short mode) { 225 asm volatile("fldcw %0" : : "m"(*&mode)); 226} 227static unsigned short 228x87_get_control_word(void) { 229 unsigned short mode; 230 asm volatile("fstcw %0\n\t":"=m"(*&mode):); 231 return mode; 232} 233#elif ARCH_X86_64 234/* No fldcw intrinsics on Windows x64, punt to external asm */ 235extern void vpx_winx64_fldcw(unsigned short mode); 236extern unsigned short vpx_winx64_fstcw(void); 237#define x87_set_control_word vpx_winx64_fldcw 238#define x87_get_control_word vpx_winx64_fstcw 239#else 240static void 241x87_set_control_word(unsigned short mode) { 242 __asm { fldcw mode } 243} 244static unsigned short 245x87_get_control_word(void) { 246 unsigned short mode; 247 __asm { fstcw mode } 248 return mode; 249} 250#endif 251 252static unsigned short 253x87_set_double_precision(void) { 254 unsigned short mode = x87_get_control_word(); 255 x87_set_control_word((mode&~0x300) | 0x200); 256 return mode; 257} 258 259 260extern void vpx_reset_mmx_state(void); 261 262#ifdef __cplusplus 263} // extern "C" 264#endif 265 266#endif // VPX_PORTS_X86_H_ 267