1a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// Copyright 2011 Google Inc. All Rights Reserved.
2a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//
30406ce1417f76f2034833414dcecc9f56253640cVikas Arora// Use of this source code is governed by a BSD-style license
40406ce1417f76f2034833414dcecc9f56253640cVikas Arora// that can be found in the COPYING file in the root of the source
50406ce1417f76f2034833414dcecc9f56253640cVikas Arora// tree. An additional intellectual property rights grant can be found
60406ce1417f76f2034833414dcecc9f56253640cVikas Arora// in the file PATENTS. All contributing project authors may
70406ce1417f76f2034833414dcecc9f56253640cVikas Arora// be found in the AUTHORS file in the root of the source tree.
8a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// -----------------------------------------------------------------------------
9a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//
10a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// CPU detection
11a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//
12a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// Author: Christian Duvivier (cduvivier@google.com)
13a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora
14a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#include "./dsp.h"
15a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora
161e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora#if defined(__ANDROID__)
17af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#include "cpu-features.h"
181e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora#endif
19a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora
20a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//------------------------------------------------------------------------------
21a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// SSE2 detection.
22a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//
23a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora
24a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
25a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
26a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorastatic WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
27a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  __asm__ volatile (
28a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora    "mov %%ebx, %%edi\n"
29a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora    "cpuid\n"
30a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora    "xchg %%edi, %%ebx\n"
31a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora    : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
32a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora    : "a"(info_type));
33a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora}
34a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#elif defined(__i386__) || defined(__x86_64__)
35a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorastatic WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
36a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  __asm__ volatile (
37a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora    "cpuid\n"
38a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora    : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
39a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora    : "a"(info_type));
40a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora}
41af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729  // >= VS2008 SP1
42af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#define GetCPUInfo(info, type) __cpuidex(info, type, 0)  // set ecx=0
43a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#elif defined(WEBP_MSC_SSE2)
44a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#define GetCPUInfo __cpuid
45a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#endif
46a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora
47af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora// NaCl has no support for xgetbv or the raw opcode.
48af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
49af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arorastatic WEBP_INLINE uint64_t xgetbv(void) {
50af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  const uint32_t ecx = 0;
51af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  uint32_t eax, edx;
52af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  // Use the raw opcode for xgetbv for compatibility with older toolchains.
53af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  __asm__ volatile (
54af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    ".byte 0x0f, 0x01, 0xd0\n"
55af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    : "=a"(eax), "=d"(edx) : "c" (ecx));
56af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  return ((uint64_t)edx << 32) | eax;
57af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora}
58af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219  // >= VS2010 SP1
59af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#define xgetbv() _xgetbv(0)
60af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#elif defined(_M_IX86)
61af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arorastatic WEBP_INLINE uint64_t xgetbv(void) {
62af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  uint32_t eax_, edx_;
63af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  __asm {
64af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    xor ecx, ecx  // ecx = 0
65af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    // Use the raw opcode for xgetbv for compatibility with older toolchains.
66af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
67af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    mov eax_, eax
68af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    mov edx_, edx
69af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  }
70af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  return ((uint64_t)edx_ << 32) | eax_;
71af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora}
72af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#else
73af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#define xgetbv() 0U  // no AVX for older x64 or unrecognized toolchains.
74af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#endif
75af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora
76a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2)
77a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorastatic int x86CPUInfo(CPUFeature feature) {
78a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  int cpu_info[4];
79a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  GetCPUInfo(cpu_info, 1);
80a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  if (feature == kSSE2) {
81a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora    return 0 != (cpu_info[3] & 0x04000000);
82a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  }
83a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  if (feature == kSSE3) {
84a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora    return 0 != (cpu_info[2] & 0x00000001);
85a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  }
86af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  if (feature == kAVX) {
87af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    // bits 27 (OSXSAVE) & 28 (256-bit AVX)
88af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    if ((cpu_info[2] & 0x18000000) == 0x18000000) {
89af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      // XMM state and YMM state enabled by the OS.
90af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      return (xgetbv() & 0x6) == 0x6;
91af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    }
92af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  }
93af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  if (feature == kAVX2) {
94af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    if (x86CPUInfo(kAVX)) {
95af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      GetCPUInfo(cpu_info, 7);
96af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora      return ((cpu_info[1] & 0x00000020) == 0x00000020);
97af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora    }
98af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  }
99a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  return 0;
100a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora}
101a2415724fb3466168b2af5b08bd94ba732c0e753Vikas AroraVP8CPUInfo VP8GetCPUInfo = x86CPUInfo;
102af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#elif defined(WEBP_ANDROID_NEON)  // NB: needs to be before generic NEON test.
1031e7bf8805bd030c19924a5306837ecd72c295751Vikas Arorastatic int AndroidCPUInfo(CPUFeature feature) {
1041e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  const AndroidCpuFamily cpu_family = android_getCpuFamily();
1051e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  const uint64_t cpu_features = android_getCpuFeatures();
1061e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  if (feature == kNEON) {
1071e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora    return (cpu_family == ANDROID_CPU_FAMILY_ARM &&
1081e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora            0 != (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON));
1091e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  }
1101e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora  return 0;
1111e7bf8805bd030c19924a5306837ecd72c295751Vikas Arora}
1121e7bf8805bd030c19924a5306837ecd72c295751Vikas AroraVP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
113af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#elif defined(WEBP_USE_NEON)
114a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// define a dummy function to enable turning off NEON at runtime by setting
115a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// VP8DecGetCPUInfo = NULL
116a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorastatic int armCPUInfo(CPUFeature feature) {
117a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  (void)feature;
118a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora  return 1;
119a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora}
120a2415724fb3466168b2af5b08bd94ba732c0e753Vikas AroraVP8CPUInfo VP8GetCPUInfo = armCPUInfo;
121af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora#elif defined(WEBP_USE_MIPS32)
122af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arorastatic int mipsCPUInfo(CPUFeature feature) {
123af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  (void)feature;
124af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora  return 1;
125af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Arora}
126af51b94a435132e9014c324e25fb686b3d07a8c8Vikas AroraVP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
127a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#else
128a2415724fb3466168b2af5b08bd94ba732c0e753Vikas AroraVP8CPUInfo VP8GetCPUInfo = NULL;
129a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#endif
130a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora
131