14311f016612a814282029daa4bd102053a853d82mtklein/* 24311f016612a814282029daa4bd102053a853d82mtklein * Copyright 2016 Google Inc. 34311f016612a814282029daa4bd102053a853d82mtklein * 44311f016612a814282029daa4bd102053a853d82mtklein * Use of this source code is governed by a BSD-style license that can be 54311f016612a814282029daa4bd102053a853d82mtklein * found in the LICENSE file. 64311f016612a814282029daa4bd102053a853d82mtklein */ 74311f016612a814282029daa4bd102053a853d82mtklein 84311f016612a814282029daa4bd102053a853d82mtklein#include "SkCpu.h" 9eb85fd746d6390f53e250583a0544bf59ed34b35mtklein#include "SkOnce.h" 104311f016612a814282029daa4bd102053a853d82mtklein 114ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein#if !defined(__has_include) 124ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein #define __has_include(x) 0 134ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein#endif 144ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein 154311f016612a814282029daa4bd102053a853d82mtklein#if defined(SK_CPU_X86) 164311f016612a814282029daa4bd102053a853d82mtklein #if defined(SK_BUILD_FOR_WIN32) 174311f016612a814282029daa4bd102053a853d82mtklein #include <intrin.h> 184311f016612a814282029daa4bd102053a853d82mtklein static void cpuid (uint32_t abcd[4]) { __cpuid ((int*)abcd, 1); } 194311f016612a814282029daa4bd102053a853d82mtklein static void cpuid7(uint32_t abcd[4]) { __cpuidex((int*)abcd, 7, 0); } 204311f016612a814282029daa4bd102053a853d82mtklein static uint64_t xgetbv(uint32_t xcr) { return _xgetbv(xcr); } 214311f016612a814282029daa4bd102053a853d82mtklein #else 224311f016612a814282029daa4bd102053a853d82mtklein #include <cpuid.h> 234311f016612a814282029daa4bd102053a853d82mtklein #if !defined(__cpuid_count) // Old Mac Clang doesn't have this defined. 244311f016612a814282029daa4bd102053a853d82mtklein #define __cpuid_count(eax, ecx, a, b, c, d) \ 254311f016612a814282029daa4bd102053a853d82mtklein __asm__("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eax), "2"(ecx)) 264311f016612a814282029daa4bd102053a853d82mtklein #endif 274311f016612a814282029daa4bd102053a853d82mtklein static void cpuid (uint32_t abcd[4]) { __get_cpuid(1, abcd+0, abcd+1, abcd+2, abcd+3); } 284311f016612a814282029daa4bd102053a853d82mtklein static void cpuid7(uint32_t abcd[4]) { 294311f016612a814282029daa4bd102053a853d82mtklein __cpuid_count(7, 0, abcd[0], abcd[1], abcd[2], abcd[3]); 304311f016612a814282029daa4bd102053a853d82mtklein } 314311f016612a814282029daa4bd102053a853d82mtklein static uint64_t xgetbv(uint32_t xcr) { 324311f016612a814282029daa4bd102053a853d82mtklein uint32_t eax, edx; 334311f016612a814282029daa4bd102053a853d82mtklein __asm__ __volatile__ ( "xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr)); 344311f016612a814282029daa4bd102053a853d82mtklein return (uint64_t)(edx) << 32 | eax; 354311f016612a814282029daa4bd102053a853d82mtklein } 364311f016612a814282029daa4bd102053a853d82mtklein #endif 374311f016612a814282029daa4bd102053a853d82mtklein 384311f016612a814282029daa4bd102053a853d82mtklein static uint32_t read_cpu_features() { 394311f016612a814282029daa4bd102053a853d82mtklein uint32_t features = 0; 404311f016612a814282029daa4bd102053a853d82mtklein uint32_t abcd[4] = {0,0,0,0}; 414311f016612a814282029daa4bd102053a853d82mtklein 424311f016612a814282029daa4bd102053a853d82mtklein // You might want to refer to http://www.sandpile.org/x86/cpuid.htm 434311f016612a814282029daa4bd102053a853d82mtklein 444311f016612a814282029daa4bd102053a853d82mtklein cpuid(abcd); 454311f016612a814282029daa4bd102053a853d82mtklein if (abcd[3] & (1<<25)) { features |= SkCpu:: SSE1; } 464311f016612a814282029daa4bd102053a853d82mtklein if (abcd[3] & (1<<26)) { features |= SkCpu:: SSE2; } 474311f016612a814282029daa4bd102053a853d82mtklein if (abcd[2] & (1<< 0)) { features |= SkCpu:: SSE3; } 484311f016612a814282029daa4bd102053a853d82mtklein if (abcd[2] & (1<< 9)) { features |= SkCpu::SSSE3; } 494311f016612a814282029daa4bd102053a853d82mtklein if (abcd[2] & (1<<19)) { features |= SkCpu::SSE41; } 504311f016612a814282029daa4bd102053a853d82mtklein if (abcd[2] & (1<<20)) { features |= SkCpu::SSE42; } 514311f016612a814282029daa4bd102053a853d82mtklein 52c6a449d6bf80c5bb9e02aeaed049a99870e8b1e8Mike Klein if ((abcd[2] & (3<<26)) == (3<<26) // XSAVE + OSXSAVE 53c6a449d6bf80c5bb9e02aeaed049a99870e8b1e8Mike Klein && (xgetbv(0) & (3<<1)) == (3<<1)) { // XMM and YMM state enabled. 544311f016612a814282029daa4bd102053a853d82mtklein if (abcd[2] & (1<<28)) { features |= SkCpu:: AVX; } 554311f016612a814282029daa4bd102053a853d82mtklein if (abcd[2] & (1<<29)) { features |= SkCpu::F16C; } 564311f016612a814282029daa4bd102053a853d82mtklein if (abcd[2] & (1<<12)) { features |= SkCpu:: FMA; } 574311f016612a814282029daa4bd102053a853d82mtklein 584311f016612a814282029daa4bd102053a853d82mtklein cpuid7(abcd); 594311f016612a814282029daa4bd102053a853d82mtklein if (abcd[1] & (1<<5)) { features |= SkCpu::AVX2; } 6078d5a3bac5cbde50cd12d8b9ab6dd269324b5272Mike Klein if (abcd[1] & (1<<3)) { features |= SkCpu::BMI1; } 6178d5a3bac5cbde50cd12d8b9ab6dd269324b5272Mike Klein if (abcd[1] & (1<<8)) { features |= SkCpu::BMI2; } 62c6a449d6bf80c5bb9e02aeaed049a99870e8b1e8Mike Klein 63c6a449d6bf80c5bb9e02aeaed049a99870e8b1e8Mike Klein if ((xgetbv(0) & (7<<5)) == (7<<5)) { // All ZMM state bits enabled too. 64c6a449d6bf80c5bb9e02aeaed049a99870e8b1e8Mike Klein if (abcd[1] & (1<<16)) { features |= SkCpu::AVX512F; } 65c6a449d6bf80c5bb9e02aeaed049a99870e8b1e8Mike Klein if (abcd[1] & (1<<17)) { features |= SkCpu::AVX512DQ; } 66c6a449d6bf80c5bb9e02aeaed049a99870e8b1e8Mike Klein if (abcd[1] & (1<<21)) { features |= SkCpu::AVX512IFMA; } 67c6a449d6bf80c5bb9e02aeaed049a99870e8b1e8Mike Klein if (abcd[1] & (1<<26)) { features |= SkCpu::AVX512PF; } 68c6a449d6bf80c5bb9e02aeaed049a99870e8b1e8Mike Klein if (abcd[1] & (1<<27)) { features |= SkCpu::AVX512ER; } 69c6a449d6bf80c5bb9e02aeaed049a99870e8b1e8Mike Klein if (abcd[1] & (1<<28)) { features |= SkCpu::AVX512CD; } 70c6a449d6bf80c5bb9e02aeaed049a99870e8b1e8Mike Klein if (abcd[1] & (1<<30)) { features |= SkCpu::AVX512BW; } 71c6a449d6bf80c5bb9e02aeaed049a99870e8b1e8Mike Klein if (abcd[1] & (1<<31)) { features |= SkCpu::AVX512VL; } 72c6a449d6bf80c5bb9e02aeaed049a99870e8b1e8Mike Klein } 734311f016612a814282029daa4bd102053a853d82mtklein } 744311f016612a814282029daa4bd102053a853d82mtklein return features; 754311f016612a814282029daa4bd102053a853d82mtklein } 764311f016612a814282029daa4bd102053a853d82mtklein 7730ec0b3735d5f728c2aea4184736a3e286a5ccdaMike Klein#elif defined(SK_CPU_ARM64) && __has_include(<asm/hwcap.h>) && __has_include(<sys/auxv.h>) 78f44703a87f532b3f593d91605d66d52c6bbc45c9Mike Klein #include <asm/hwcap.h> 79f44703a87f532b3f593d91605d66d52c6bbc45c9Mike Klein #include <sys/auxv.h> 804311f016612a814282029daa4bd102053a853d82mtklein 814311f016612a814282029daa4bd102053a853d82mtklein static uint32_t read_cpu_features() { 824311f016612a814282029daa4bd102053a853d82mtklein uint32_t features = 0; 83f44703a87f532b3f593d91605d66d52c6bbc45c9Mike Klein uint32_t hwcaps = getauxval(AT_HWCAP); 84f44703a87f532b3f593d91605d66d52c6bbc45c9Mike Klein if (hwcaps & HWCAP_CRC32) { features |= SkCpu::CRC32; } 85f1b6030b44a4a9523183c3809a165b6b5353fff5mtklein return features; 86f1b6030b44a4a9523183c3809a165b6b5353fff5mtklein } 87f1b6030b44a4a9523183c3809a165b6b5353fff5mtklein 8830ec0b3735d5f728c2aea4184736a3e286a5ccdaMike Klein#elif defined(SK_CPU_ARM32) && __has_include(<asm/hwcap.h>) && __has_include(<sys/auxv.h>) 8930ec0b3735d5f728c2aea4184736a3e286a5ccdaMike Klein // asm/hwcap.h and sys/auxv.h won't be present on NDK builds before API v21. 904ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein #include <asm/hwcap.h> 914ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein #include <sys/auxv.h> 924ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein 934ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein static uint32_t read_cpu_features() { 944ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein uint32_t features = 0; 954ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein uint32_t hwcaps = getauxval(AT_HWCAP); 964ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein if (hwcaps & HWCAP_VFPv4) { features |= SkCpu::NEON|SkCpu::NEON_FMA|SkCpu::VFP_FP16; } 974ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein return features; 984ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein } 994ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein 10030ec0b3735d5f728c2aea4184736a3e286a5ccdaMike Klein#elif defined(SK_CPU_ARM32) && __has_include(<cpu-features.h>) 1014ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein #include <cpu-features.h> 1024ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein 1034ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein static uint32_t read_cpu_features() { 1044ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein uint32_t features = 0; 1054ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein uint64_t cpu_features = android_getCpuFeatures(); 1064ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein if (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) { features |= SkCpu::NEON; } 1074ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein if (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON_FMA) { features |= SkCpu::NEON_FMA; } 1084ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein if (cpu_features & ANDROID_CPU_ARM_FEATURE_VFP_FP16) { features |= SkCpu::VFP_FP16; } 1094ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein return features; 1104ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein } 1114ef8cb3527b7e3f453dccd39eea76e31eb2c33c7Mike Klein 1124311f016612a814282029daa4bd102053a853d82mtklein#else 1134311f016612a814282029daa4bd102053a853d82mtklein static uint32_t read_cpu_features() { 1144311f016612a814282029daa4bd102053a853d82mtklein return 0; 1154311f016612a814282029daa4bd102053a853d82mtklein } 1164311f016612a814282029daa4bd102053a853d82mtklein 1174311f016612a814282029daa4bd102053a853d82mtklein#endif 1184311f016612a814282029daa4bd102053a853d82mtklein 1195608e2ed2299496eee3c57e0fe426ae9bd0d07a4mtkleinuint32_t SkCpu::gCachedFeatures = 0; 120eb85fd746d6390f53e250583a0544bf59ed34b35mtklein 1215608e2ed2299496eee3c57e0fe426ae9bd0d07a4mtkleinvoid SkCpu::CacheRuntimeFeatures() { 1225608e2ed2299496eee3c57e0fe426ae9bd0d07a4mtklein static SkOnce once; 1235608e2ed2299496eee3c57e0fe426ae9bd0d07a4mtklein once([] { gCachedFeatures = read_cpu_features(); }); 1245608e2ed2299496eee3c57e0fe426ae9bd0d07a4mtklein} 125