1a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// Copyright 2011 Google Inc. All Rights Reserved. 2a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// 30406ce1417f76f2034833414dcecc9f56253640cVikas Arora// Use of this source code is governed by a BSD-style license 40406ce1417f76f2034833414dcecc9f56253640cVikas Arora// that can be found in the COPYING file in the root of the source 50406ce1417f76f2034833414dcecc9f56253640cVikas Arora// tree. An additional intellectual property rights grant can be found 60406ce1417f76f2034833414dcecc9f56253640cVikas Arora// in the file PATENTS. All contributing project authors may 70406ce1417f76f2034833414dcecc9f56253640cVikas Arora// be found in the AUTHORS file in the root of the source tree. 8a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// ----------------------------------------------------------------------------- 9a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// 10a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// CPU detection 11a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// 12a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// Author: Christian Duvivier (cduvivier@google.com) 13a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 14a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#include "./dsp.h" 15a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 161e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora#if defined(__ANDROID__) 17af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#include "cpu-features.h" 181e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora#endif 19a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 20a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------ 21a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// SSE2 detection. 22a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// 23a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 24a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC. 25a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#if (defined(__pic__) || defined(__PIC__)) && defined(__i386__) 26a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorastatic WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) { 27a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora __asm__ volatile ( 28a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora "mov %%ebx, %%edi\n" 29a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora "cpuid\n" 30a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora "xchg %%edi, %%ebx\n" 31a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) 32a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora : "a"(info_type)); 33a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora} 34a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#elif defined(__i386__) || defined(__x86_64__) 35a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorastatic WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) { 36a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora __asm__ volatile ( 37a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora "cpuid\n" 38a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) 39a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora : "a"(info_type)); 40a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora} 41af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729 // >= VS2008 SP1 42af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#define GetCPUInfo(info, type) __cpuidex(info, type, 0) // set ecx=0 43a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#elif defined(WEBP_MSC_SSE2) 44a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#define GetCPUInfo __cpuid 45a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#endif 46a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 47af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora// NaCl has no support for xgetbv or the raw opcode. 48af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__)) 49af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arorastatic WEBP_INLINE uint64_t xgetbv(void) { 50af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora const uint32_t ecx = 0; 51af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora uint32_t eax, edx; 52af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora // Use the raw opcode for xgetbv for compatibility with older toolchains. 53af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora __asm__ volatile ( 54af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora ".byte 0x0f, 0x01, 0xd0\n" 55af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora : "=a"(eax), "=d"(edx) : "c" (ecx)); 56af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora return ((uint64_t)edx << 32) | eax; 57af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora} 58af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1 59af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#define xgetbv() _xgetbv(0) 60af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#elif defined(_M_IX86) 61af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arorastatic WEBP_INLINE uint64_t xgetbv(void) { 62af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora uint32_t eax_, edx_; 63af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora __asm { 64af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora xor ecx, ecx // ecx = 0 65af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora // Use the raw opcode for xgetbv for compatibility with older toolchains. 66af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 67af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora mov eax_, eax 68af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora mov edx_, edx 69af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora } 70af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora return ((uint64_t)edx_ << 32) | eax_; 71af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora} 72af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#else 73af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains. 74af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#endif 75af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora 76a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2) 77a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorastatic int x86CPUInfo(CPUFeature feature) { 78a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora int cpu_info[4]; 79a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora GetCPUInfo(cpu_info, 1); 80a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora if (feature == kSSE2) { 81a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora return 0 != (cpu_info[3] & 0x04000000); 82a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora } 83a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora if (feature == kSSE3) { 84a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora return 0 != (cpu_info[2] & 0x00000001); 85a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora } 86af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora if (feature == kAVX) { 87af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora // bits 27 (OSXSAVE) & 28 (256-bit AVX) 88af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora if ((cpu_info[2] & 0x18000000) == 0x18000000) { 89af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora // XMM state and YMM state enabled by the OS. 90af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora return (xgetbv() & 0x6) == 0x6; 91af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora } 92af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora } 93af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora if (feature == kAVX2) { 94af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora if (x86CPUInfo(kAVX)) { 95af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora GetCPUInfo(cpu_info, 7); 96af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora return ((cpu_info[1] & 0x00000020) == 0x00000020); 97af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora } 98af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora } 99a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora return 0; 100a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora} 101a2415724fb3466168b2af5b08bd94ba732c0e753Vikas AroraVP8CPUInfo VP8GetCPUInfo = x86CPUInfo; 102af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#elif defined(WEBP_ANDROID_NEON) // NB: needs to be before generic NEON test. 1031e7bf8805bd030c19924a5306837ecd72c295751Vikas Arorastatic int AndroidCPUInfo(CPUFeature feature) { 1041e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora const AndroidCpuFamily cpu_family = android_getCpuFamily(); 1051e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora const uint64_t cpu_features = android_getCpuFeatures(); 1061e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora if (feature == kNEON) { 1071e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora return (cpu_family == ANDROID_CPU_FAMILY_ARM && 1081e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora 0 != (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON)); 1091e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora } 1101e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora return 0; 1111e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora} 1121e7bf8805bd030c19924a5306837ecd72c295751Vikas AroraVP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo; 113af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#elif defined(WEBP_USE_NEON) 114a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// define a dummy function to enable turning off NEON at runtime by setting 115a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// VP8DecGetCPUInfo = NULL 116a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorastatic int armCPUInfo(CPUFeature feature) { 117a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora (void)feature; 118a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora return 1; 119a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora} 120a2415724fb3466168b2af5b08bd94ba732c0e753Vikas AroraVP8CPUInfo VP8GetCPUInfo = armCPUInfo; 121af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#elif defined(WEBP_USE_MIPS32) 122af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arorastatic int mipsCPUInfo(CPUFeature feature) { 123af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora (void)feature; 124af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora return 1; 125af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora} 126af51b94a435132e9014c324e25fb686b3d07a8c8Vikas AroraVP8CPUInfo VP8GetCPUInfo = mipsCPUInfo; 127a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#else 128a2415724fb3466168b2af5b08bd94ba732c0e753Vikas AroraVP8CPUInfo VP8GetCPUInfo = NULL; 129a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#endif 130a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 131