15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2011 Google Inc. All Rights Reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 3eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// Use of this source code is governed by a BSD-style license 4eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// that can be found in the COPYING file in the root of the source 5eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// tree. An additional intellectual property rights grant can be found 6eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// in the file PATENTS. All contributing project authors may 7eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// be found in the AUTHORS file in the root of the source tree. 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ----------------------------------------------------------------------------- 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// CPU detection 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Author: Christian Duvivier (cduvivier@google.com) 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "./dsp.h" 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if defined(__ANDROID__) 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <cpu-features.h> 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//------------------------------------------------------------------------------ 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// SSE2 detection. 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC. 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if (defined(__pic__) || defined(__PIC__)) && defined(__i386__) 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) { 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) __asm__ volatile ( 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "mov %%ebx, %%edi\n" 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "cpuid\n" 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "xchg %%edi, %%ebx\n" 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : "a"(info_type)); 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#elif defined(__i386__) || defined(__x86_64__) 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) { 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) __asm__ volatile ( 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "cpuid\n" 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : "a"(info_type)); 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 415f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729 // >= VS2008 SP1 425f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#define GetCPUInfo(info, type) __cpuidex(info, type, 0) // set ecx=0 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#elif defined(WEBP_MSC_SSE2) 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define GetCPUInfo __cpuid 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 475f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// NaCl has no support for xgetbv or the raw opcode. 485f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__)) 495f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)static WEBP_INLINE uint64_t xgetbv(void) { 505f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) const uint32_t ecx = 0; 515f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) uint32_t eax, edx; 525f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) // Use the raw opcode for xgetbv for compatibility with older toolchains. 535f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) __asm__ volatile ( 545f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) ".byte 0x0f, 0x01, 0xd0\n" 555f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) : "=a"(eax), "=d"(edx) : "c" (ecx)); 565f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) return ((uint64_t)edx << 32) | eax; 575f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)} 585f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1 595f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#define xgetbv() _xgetbv(0) 605f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#elif defined(_M_IX86) 615f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)static WEBP_INLINE uint64_t xgetbv(void) { 625f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) uint32_t eax_, edx_; 635f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) __asm { 645f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) xor ecx, ecx // ecx = 0 655f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) // Use the raw opcode for xgetbv for compatibility with older toolchains. 665f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 675f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) mov eax_, eax 685f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) mov edx_, edx 695f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) } 705f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) return ((uint64_t)edx_ << 32) | eax_; 715f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)} 725f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#else 735f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains. 745f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#endif 755f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2) 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int x86CPUInfo(CPUFeature feature) { 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int cpu_info[4]; 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GetCPUInfo(cpu_info, 1); 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (feature == kSSE2) { 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 0 != (cpu_info[3] & 0x04000000); 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (feature == kSSE3) { 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 0 != (cpu_info[2] & 0x00000001); 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 865f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) if (feature == kAVX) { 875f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) // bits 27 (OSXSAVE) & 28 (256-bit AVX) 885f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) if ((cpu_info[2] & 0x18000000) == 0x18000000) { 895f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) // XMM state and YMM state enabled by the OS. 905f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) return (xgetbv() & 0x6) == 0x6; 915f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) } 925f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) } 935f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) if (feature == kAVX2) { 945f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) if (x86CPUInfo(kAVX)) { 955f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) GetCPUInfo(cpu_info, 7); 965f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) return ((cpu_info[1] & 0x00000020) == 0x00000020); 975f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) } 985f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) } 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 0; 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8CPUInfo VP8GetCPUInfo = x86CPUInfo; 1025f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#elif defined(WEBP_ANDROID_NEON) // NB: needs to be before generic NEON test. 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int AndroidCPUInfo(CPUFeature feature) { 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const AndroidCpuFamily cpu_family = android_getCpuFamily(); 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const uint64_t cpu_features = android_getCpuFeatures(); 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (feature == kNEON) { 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (cpu_family == ANDROID_CPU_FAMILY_ARM && 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 0 != (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON)); 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 0; 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo; 1135f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#elif defined(WEBP_USE_NEON) 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// define a dummy function to enable turning off NEON at runtime by setting 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// VP8DecGetCPUInfo = NULL 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int armCPUInfo(CPUFeature feature) { 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (void)feature; 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 1; 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8CPUInfo VP8GetCPUInfo = armCPUInfo; 1215f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#elif defined(WEBP_USE_MIPS32) 1225f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)static int mipsCPUInfo(CPUFeature feature) { 1235f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) (void)feature; 1245f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) return 1; 1255f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)} 1265f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo; 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#else 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8CPUInfo VP8GetCPUInfo = NULL; 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 131