15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2011 Google Inc. All Rights Reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
3eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// Use of this source code is governed by a BSD-style license
4eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// that can be found in the COPYING file in the root of the source
5eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// tree. An additional intellectual property rights grant can be found
6eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// in the file PATENTS. All contributing project authors may
7eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// be found in the AUTHORS file in the root of the source tree.
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// -----------------------------------------------------------------------------
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// CPU detection
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Author: Christian Duvivier (cduvivier@google.com)
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "./dsp.h"
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if defined(__ANDROID__)
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <cpu-features.h>
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//------------------------------------------------------------------------------
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// SSE2 detection.
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  __asm__ volatile (
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "mov %%ebx, %%edi\n"
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "cpuid\n"
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "xchg %%edi, %%ebx\n"
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : "a"(info_type));
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#elif defined(__i386__) || defined(__x86_64__)
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  __asm__ volatile (
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    "cpuid\n"
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : "a"(info_type));
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
415f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729  // >= VS2008 SP1
425f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#define GetCPUInfo(info, type) __cpuidex(info, type, 0)  // set ecx=0
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#elif defined(WEBP_MSC_SSE2)
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define GetCPUInfo __cpuid
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
475f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// NaCl has no support for xgetbv or the raw opcode.
485f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
495f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)static WEBP_INLINE uint64_t xgetbv(void) {
505f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  const uint32_t ecx = 0;
515f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  uint32_t eax, edx;
525f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  // Use the raw opcode for xgetbv for compatibility with older toolchains.
535f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  __asm__ volatile (
545f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    ".byte 0x0f, 0x01, 0xd0\n"
555f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    : "=a"(eax), "=d"(edx) : "c" (ecx));
565f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  return ((uint64_t)edx << 32) | eax;
575f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)}
585f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219  // >= VS2010 SP1
595f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#define xgetbv() _xgetbv(0)
605f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#elif defined(_M_IX86)
615f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)static WEBP_INLINE uint64_t xgetbv(void) {
625f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  uint32_t eax_, edx_;
635f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  __asm {
645f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    xor ecx, ecx  // ecx = 0
655f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    // Use the raw opcode for xgetbv for compatibility with older toolchains.
665f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
675f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    mov eax_, eax
685f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    mov edx_, edx
695f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  }
705f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  return ((uint64_t)edx_ << 32) | eax_;
715f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)}
725f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#else
735f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#define xgetbv() 0U  // no AVX for older x64 or unrecognized toolchains.
745f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#endif
755f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2)
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int x86CPUInfo(CPUFeature feature) {
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int cpu_info[4];
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GetCPUInfo(cpu_info, 1);
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (feature == kSSE2) {
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return 0 != (cpu_info[3] & 0x04000000);
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (feature == kSSE3) {
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return 0 != (cpu_info[2] & 0x00000001);
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
865f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  if (feature == kAVX) {
875f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    // bits 27 (OSXSAVE) & 28 (256-bit AVX)
885f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    if ((cpu_info[2] & 0x18000000) == 0x18000000) {
895f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      // XMM state and YMM state enabled by the OS.
905f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      return (xgetbv() & 0x6) == 0x6;
915f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    }
925f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  }
935f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  if (feature == kAVX2) {
945f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    if (x86CPUInfo(kAVX)) {
955f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      GetCPUInfo(cpu_info, 7);
965f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      return ((cpu_info[1] & 0x00000020) == 0x00000020);
975f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    }
985f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  }
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return 0;
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8CPUInfo VP8GetCPUInfo = x86CPUInfo;
1025f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#elif defined(WEBP_ANDROID_NEON)  // NB: needs to be before generic NEON test.
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int AndroidCPUInfo(CPUFeature feature) {
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const AndroidCpuFamily cpu_family = android_getCpuFamily();
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const uint64_t cpu_features = android_getCpuFeatures();
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (feature == kNEON) {
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return (cpu_family == ANDROID_CPU_FAMILY_ARM &&
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            0 != (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON));
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return 0;
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
1135f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#elif defined(WEBP_USE_NEON)
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// define a dummy function to enable turning off NEON at runtime by setting
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// VP8DecGetCPUInfo = NULL
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int armCPUInfo(CPUFeature feature) {
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  (void)feature;
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return 1;
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
1215f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#elif defined(WEBP_USE_MIPS32)
1225f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)static int mipsCPUInfo(CPUFeature feature) {
1235f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  (void)feature;
1245f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  return 1;
1255f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)}
1265f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#else
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)VP8CPUInfo VP8GetCPUInfo = NULL;
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
131