1/* 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "libyuv/cpu_id.h" 12 13#if defined(_MSC_VER) && !defined(__clang__) 14#include <intrin.h> // For __cpuidex() 15#endif 16#if !defined(__pnacl__) && !defined(__CLR_VER) && \ 17 !defined(__native_client__) && \ 18 defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) && \ 19 (defined(_M_IX86) || defined(_M_X64)) 20#include <immintrin.h> // For _xgetbv() 21#endif 22 23#if !defined(__native_client__) 24#include <stdlib.h> // For getenv() 25#endif 26 27// For ArmCpuCaps() but unittested on all platforms 28#include <stdio.h> 29#include <string.h> 30 31#include "libyuv/basic_types.h" // For CPU_X86 32 33#ifdef __cplusplus 34namespace libyuv { 35extern "C" { 36#endif 37 38// For functions that use the stack and have runtime checks for overflow, 39// use SAFEBUFFERS to avoid additional check. 40#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) 41#define SAFEBUFFERS __declspec(safebuffers) 42#else 43#define SAFEBUFFERS 44#endif 45 46// Low level cpuid for X86. Returns zeros on other CPUs. 47#if !defined(__pnacl__) && !defined(__CLR_VER) && \ 48 (defined(_M_IX86) || defined(_M_X64) || \ 49 defined(__i386__) || defined(__x86_64__)) 50LIBYUV_API 51void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) { 52#if defined(_MSC_VER) && !defined(__clang__) 53#if (_MSC_FULL_VER >= 160040219) 54 __cpuidex((int*)(cpu_info), info_eax, info_ecx); 55#elif defined(_M_IX86) 56 __asm { 57 mov eax, info_eax 58 mov ecx, info_ecx 59 mov edi, cpu_info 60 cpuid 61 mov [edi], eax 62 mov [edi + 4], ebx 63 mov [edi + 8], ecx 64 mov [edi + 12], edx 65 } 66#else 67 if (info_ecx == 0) { 68 __cpuid((int*)(cpu_info), info_eax); 69 } else { 70 cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0; 71 } 72#endif 73#else // defined(_MSC_VER) 74 uint32 info_ebx, info_edx; 75 asm volatile ( // NOLINT 76#if defined( __i386__) && defined(__PIC__) 77 // Preserve ebx for fpic 32 bit. 78 "mov %%ebx, %%edi \n" 79 "cpuid \n" 80 "xchg %%edi, %%ebx \n" 81 : "=D" (info_ebx), 82#else 83 "cpuid \n" 84 : "=b" (info_ebx), 85#endif // defined( __i386__) && defined(__PIC__) 86 "+a" (info_eax), "+c" (info_ecx), "=d" (info_edx)); 87 cpu_info[0] = info_eax; 88 cpu_info[1] = info_ebx; 89 cpu_info[2] = info_ecx; 90 cpu_info[3] = info_edx; 91#endif // defined(_MSC_VER) 92} 93 94#if !defined(__native_client__) 95#define HAS_XGETBV 96// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. 97int TestOsSaveYmm() { 98 uint32 xcr0 = 0u; 99#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) 100 xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required. 101#elif defined(_M_IX86) && defined(_MSC_VER) 102 __asm { 103 xor ecx, ecx // xcr 0 104 _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier. 105 mov xcr0, eax 106 } 107#elif defined(__i386__) || defined(__x86_64__) 108 asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx"); 109#endif // defined(_MSC_VER) 110 return((xcr0 & 6) == 6); // Is ymm saved? 111} 112#endif // !defined(__native_client__) 113#else 114LIBYUV_API 115void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) { 116 cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0; 117} 118#endif 119 120// based on libvpx arm_cpudetect.c 121// For Arm, but public to allow testing on any CPU 122LIBYUV_API SAFEBUFFERS 123int ArmCpuCaps(const char* cpuinfo_name) { 124 char cpuinfo_line[512]; 125 FILE* f = fopen(cpuinfo_name, "r"); 126 if (!f) { 127 // Assume Neon if /proc/cpuinfo is unavailable. 128 // This will occur for Chrome sandbox for Pepper or Render process. 129 return kCpuHasNEON; 130 } 131 while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) { 132 if (memcmp(cpuinfo_line, "Features", 8) == 0) { 133 char* p = strstr(cpuinfo_line, " neon"); 134 if (p && (p[5] == ' ' || p[5] == '\n')) { 135 fclose(f); 136 return kCpuHasNEON; 137 } 138 } 139 } 140 fclose(f); 141 return 0; 142} 143 144#if defined(__mips__) && defined(__linux__) 145static int MipsCpuCaps(const char* search_string) { 146 char cpuinfo_line[512]; 147 const char* file_name = "/proc/cpuinfo"; 148 FILE* f = fopen(file_name, "r"); 149 if (!f) { 150 // Assume DSP if /proc/cpuinfo is unavailable. 151 // This will occur for Chrome sandbox for Pepper or Render process. 152 return kCpuHasMIPS_DSP; 153 } 154 while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f) != NULL) { 155 if (strstr(cpuinfo_line, search_string) != NULL) { 156 fclose(f); 157 return kCpuHasMIPS_DSP; 158 } 159 } 160 fclose(f); 161 return 0; 162} 163#endif 164 165// CPU detect function for SIMD instruction sets. 166LIBYUV_API 167int cpu_info_ = kCpuInit; // cpu_info is not initialized yet. 168 169// Test environment variable for disabling CPU features. Any non-zero value 170// to disable. Zero ignored to make it easy to set the variable on/off. 171#if !defined(__native_client__) && !defined(_M_ARM) 172 173static LIBYUV_BOOL TestEnv(const char* name) { 174 const char* var = getenv(name); 175 if (var) { 176 if (var[0] != '0') { 177 return LIBYUV_TRUE; 178 } 179 } 180 return LIBYUV_FALSE; 181} 182#else // nacl does not support getenv(). 183static LIBYUV_BOOL TestEnv(const char*) { 184 return LIBYUV_FALSE; 185} 186#endif 187 188LIBYUV_API SAFEBUFFERS 189int InitCpuFlags(void) { 190#if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86) 191 192 uint32 cpu_info0[4] = { 0, 0, 0, 0 }; 193 uint32 cpu_info1[4] = { 0, 0, 0, 0 }; 194 uint32 cpu_info7[4] = { 0, 0, 0, 0 }; 195 CpuId(0, 0, cpu_info0); 196 CpuId(1, 0, cpu_info1); 197 if (cpu_info0[0] >= 7) { 198 CpuId(7, 0, cpu_info7); 199 } 200 cpu_info_ = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) | 201 ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | 202 ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) | 203 ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) | 204 ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) | 205 ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) | 206 kCpuHasX86; 207 208#ifdef HAS_XGETBV 209 if ((cpu_info1[2] & 0x18000000) == 0x18000000 && // AVX and OSSave 210 TestOsSaveYmm()) { // Saves YMM. 211 cpu_info_ |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | 212 kCpuHasAVX; 213 } 214#endif 215 // Environment variable overrides for testing. 216 if (TestEnv("LIBYUV_DISABLE_X86")) { 217 cpu_info_ &= ~kCpuHasX86; 218 } 219 if (TestEnv("LIBYUV_DISABLE_SSE2")) { 220 cpu_info_ &= ~kCpuHasSSE2; 221 } 222 if (TestEnv("LIBYUV_DISABLE_SSSE3")) { 223 cpu_info_ &= ~kCpuHasSSSE3; 224 } 225 if (TestEnv("LIBYUV_DISABLE_SSE41")) { 226 cpu_info_ &= ~kCpuHasSSE41; 227 } 228 if (TestEnv("LIBYUV_DISABLE_SSE42")) { 229 cpu_info_ &= ~kCpuHasSSE42; 230 } 231 if (TestEnv("LIBYUV_DISABLE_AVX")) { 232 cpu_info_ &= ~kCpuHasAVX; 233 } 234 if (TestEnv("LIBYUV_DISABLE_AVX2")) { 235 cpu_info_ &= ~kCpuHasAVX2; 236 } 237 if (TestEnv("LIBYUV_DISABLE_ERMS")) { 238 cpu_info_ &= ~kCpuHasERMS; 239 } 240 if (TestEnv("LIBYUV_DISABLE_FMA3")) { 241 cpu_info_ &= ~kCpuHasFMA3; 242 } 243#elif defined(__mips__) && defined(__linux__) 244 // Linux mips parse text file for dsp detect. 245 cpu_info_ = MipsCpuCaps("dsp"); // set kCpuHasMIPS_DSP. 246#if defined(__mips_dspr2) 247 cpu_info_ |= kCpuHasMIPS_DSPR2; 248#endif 249 cpu_info_ |= kCpuHasMIPS; 250 251 if (getenv("LIBYUV_DISABLE_MIPS")) { 252 cpu_info_ &= ~kCpuHasMIPS; 253 } 254 if (getenv("LIBYUV_DISABLE_MIPS_DSP")) { 255 cpu_info_ &= ~kCpuHasMIPS_DSP; 256 } 257 if (getenv("LIBYUV_DISABLE_MIPS_DSPR2")) { 258 cpu_info_ &= ~kCpuHasMIPS_DSPR2; 259 } 260#elif defined(__arm__) || defined(__aarch64__) 261// gcc -mfpu=neon defines __ARM_NEON__ 262// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon. 263// For Linux, /proc/cpuinfo can be tested but without that assume Neon. 264#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__) 265 cpu_info_ = kCpuHasNEON; 266// For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon 267// flag in it. 268// So for aarch64, neon enabling is hard coded here. 269#elif defined(__aarch64__) 270 cpu_info_ = kCpuHasNEON; 271#else 272 // Linux arm parse text file for neon detect. 273 cpu_info_ = ArmCpuCaps("/proc/cpuinfo"); 274#endif 275 cpu_info_ |= kCpuHasARM; 276 if (TestEnv("LIBYUV_DISABLE_NEON")) { 277 cpu_info_ &= ~kCpuHasNEON; 278 } 279#endif // __arm__ 280 if (TestEnv("LIBYUV_DISABLE_ASM")) { 281 cpu_info_ = 0; 282 } 283 return cpu_info_; 284} 285 286LIBYUV_API 287void MaskCpuFlags(int enable_flags) { 288 cpu_info_ = InitCpuFlags() & enable_flags; 289} 290 291#ifdef __cplusplus 292} // extern "C" 293} // namespace libyuv 294#endif 295