190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/* 2f71323e297a928af368937089d3ed71239786f86Andreas Huber * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 4f71323e297a928af368937089d3ed71239786f86Andreas Huber * Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber * that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber * tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber * in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber * be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber */ 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 122ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#ifndef VPX_PORTS_X86_H_ 132ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#define VPX_PORTS_X86_H_ 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include <stdlib.h> 151b362b15af34006e6a11974088a46d42b903418eJohann#include "vpx_config.h" 167ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#include "vpx/vpx_integer.h" 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 182ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#ifdef __cplusplus 192ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanianextern "C" { 202ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#endif 212ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 22ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangtypedef enum { 23ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang VPX_CPU_UNKNOWN = -1, 24ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang VPX_CPU_AMD, 25ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang VPX_CPU_AMD_OLD, 26ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang VPX_CPU_CENTAUR, 27ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang VPX_CPU_CYRIX, 28ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang VPX_CPU_INTEL, 29ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang VPX_CPU_NEXGEN, 30ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang VPX_CPU_NSC, 31ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang VPX_CPU_RISE, 32ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang VPX_CPU_SIS, 33ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang VPX_CPU_TRANSMETA, 34ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang VPX_CPU_TRANSMETA_OLD, 35ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang VPX_CPU_UMC, 36ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang VPX_CPU_VIA, 37ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 38ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang VPX_CPU_LAST 39538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber} vpx_cpu_t; 40538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 41ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__) 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#if ARCH_X86_64 432ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#define cpuid(func, func2, ax, bx, cx, dx)\ 44ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang __asm__ __volatile__ (\ 45ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang "cpuid \n\t" \ 46ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \ 472ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian : "a" (func), "c" (func2)); 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#else 492ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#define cpuid(func, func2, ax, bx, cx, dx)\ 50ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang __asm__ __volatile__ (\ 51ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang "mov %%ebx, %%edi \n\t" \ 52ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang "cpuid \n\t" \ 53ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang "xchg %%edi, %%ebx \n\t" \ 54ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ 552ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian : "a" (func), "c" (func2)); 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#endif 57ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/ 581b362b15af34006e6a11974088a46d42b903418eJohann#if ARCH_X86_64 592ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#define cpuid(func, func2, ax, bx, cx, dx)\ 60ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang asm volatile (\ 61ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang "xchg %rsi, %rbx \n\t" \ 62ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang "cpuid \n\t" \ 63ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang "movl %ebx, %edi \n\t" \ 64ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang "xchg %rsi, %rbx \n\t" \ 65ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ 662ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian : "a" (func), "c" (func2)); 671b362b15af34006e6a11974088a46d42b903418eJohann#else 682ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#define cpuid(func, func2, ax, bx, cx, dx)\ 69ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang asm volatile (\ 70ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang "pushl %ebx \n\t" \ 71ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang "cpuid \n\t" \ 72ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang "movl %ebx, %edi \n\t" \ 73ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang "popl %ebx \n\t" \ 74ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ 752ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian : "a" (func), "c" (func2)); 761b362b15af34006e6a11974088a46d42b903418eJohann#endif 77ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#else /* end __SUNPRO__ */ 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#if ARCH_X86_64 792ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#if defined(_MSC_VER) && _MSC_VER > 1500 802ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanianvoid __cpuidex(int CPUInfo[4], int info_type, int ecxvalue); 812ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#pragma intrinsic(__cpuidex) 822ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#define cpuid(func, func2, a, b, c, d) do {\ 832ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int regs[4];\ 842ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian __cpuidex(regs, func, func2); \ 852ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian a = regs[0]; b = regs[1]; c = regs[2]; d = regs[3];\ 862ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } while(0) 872ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#else 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid __cpuid(int CPUInfo[4], int info_type); 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#pragma intrinsic(__cpuid) 902ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#define cpuid(func, func2, a, b, c, d) do {\ 91ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int regs[4];\ 922ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian __cpuid(regs, func); \ 932ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian a = regs[0]; b = regs[1]; c = regs[2]; d = regs[3];\ 942ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } while (0) 952ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#endif 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#else 972ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#define cpuid(func, func2, a, b, c, d)\ 98ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang __asm mov eax, func\ 992ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian __asm mov ecx, func2\ 100ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang __asm cpuid\ 101ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang __asm mov a, eax\ 102ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang __asm mov b, ebx\ 103ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang __asm mov c, ecx\ 104ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang __asm mov d, edx 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#endif 106ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#endif /* end others */ 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1087ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian// NaCl has no support for xgetbv or the raw opcode. 1097ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__)) 1107ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanianstatic INLINE uint64_t xgetbv(void) { 1117ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian const uint32_t ecx = 0; 1127ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian uint32_t eax, edx; 1137ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian // Use the raw opcode for xgetbv for compatibility with older toolchains. 1147ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian __asm__ volatile ( 1157ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ".byte 0x0f, 0x01, 0xd0\n" 1167ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian : "=a"(eax), "=d"(edx) : "c" (ecx)); 1177ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian return ((uint64_t)edx << 32) | eax; 1187ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian} 1197ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#elif (defined(_M_X64) || defined(_M_IX86)) && \ 1207ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1 1217ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#include <immintrin.h> 1227ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#define xgetbv() _xgetbv(0) 1237ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#elif defined(_MSC_VER) && defined(_M_IX86) 1247ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanianstatic INLINE uint64_t xgetbv(void) { 1257ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian uint32_t eax_, edx_; 1267ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian __asm { 1277ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian xor ecx, ecx // ecx = 0 1287ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian // Use the raw opcode for xgetbv for compatibility with older toolchains. 1297ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 1307ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian mov eax_, eax 1317ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian mov edx_, edx 1327ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian } 1337ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian return ((uint64_t)edx_ << 32) | eax_; 1347ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian} 1357ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#else 1367ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains. 1377ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#endif 1387ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 1397ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#if defined(_MSC_VER) && _MSC_VER >= 1700 1407ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#include <windows.h> 1417ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#if WINAPI_FAMILY_PARTITION(WINAPI_FAMILY_APP) 1427ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#define getenv(x) NULL 1437ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#endif 1447ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#endif 1457ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 1465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#define HAS_MMX 0x01 1475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#define HAS_SSE 0x02 1485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#define HAS_SSE2 0x04 1495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#define HAS_SSE3 0x08 1505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#define HAS_SSSE3 0x10 1515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#define HAS_SSE4_1 0x20 1525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#define HAS_AVX 0x40 1535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#define HAS_AVX2 0x80 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#ifndef BIT 15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#define BIT(n) (1<<n) 15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#endif 15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 158ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianstatic INLINE int 159ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangx86_simd_caps(void) { 160ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang unsigned int flags = 0; 161ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang unsigned int mask = ~0; 1627ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx; 163ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang char *env; 164ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang (void)reg_ebx; 16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 166ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /* See if the CPU capabilities are being overridden by the environment */ 167ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang env = getenv("VPX_SIMD_CAPS"); 16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 169ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (env && *env) 170ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (int)strtol(env, NULL, 0); 17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 172ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang env = getenv("VPX_SIMD_CAPS_MASK"); 17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 174ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (env && *env) 175ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang mask = strtol(env, NULL, 0); 17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 177ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /* Ensure that the CPUID instruction supports extended features */ 1787ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx); 17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1807ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian if (max_cpuid_val < 1) 181ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return 0; 18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 183ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /* Get the standard feature flags */ 1842ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); 18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 186ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (reg_edx & BIT(23)) flags |= HAS_MMX; 18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 188ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ 18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 190ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ 19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang if (reg_ecx & BIT(0)) flags |= HAS_SSE3; 19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; 19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 196ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; 197538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber 1987ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian // bits 27 (OSXSAVE) & 28 (256-bit AVX) 1997ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian if ((reg_ecx & (BIT(27) | BIT(28))) == (BIT(27) | BIT(28))) { 2007ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian if ((xgetbv() & 0x6) == 0x6) { 2017ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian flags |= HAS_AVX; 2025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2037ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian if (max_cpuid_val >= 7) { 2047ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ 2057ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); 2062ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 2077ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian if (reg_ebx & BIT(5)) flags |= HAS_AVX2; 2087ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian } 2097ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian } 2107ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian } 2115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 212ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return flags & mask; 21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#if ARCH_X86_64 && defined(_MSC_VER) 21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned __int64 __rdtsc(void); 21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#pragma intrinsic(__rdtsc) 21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#endif 219ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianstatic INLINE unsigned int 220ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangx86_readtsc(void) { 22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#if defined(__GNUC__) && __GNUC__ 222ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang unsigned int tsc; 223ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang __asm__ __volatile__("rdtsc\n\t":"=a"(tsc):); 224ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return tsc; 2251b362b15af34006e6a11974088a46d42b903418eJohann#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) 226ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang unsigned int tsc; 227ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang asm volatile("rdtsc\n\t":"=a"(tsc):); 228ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return tsc; 22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#else 23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#if ARCH_X86_64 231ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (unsigned int)__rdtsc(); 23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#else 233ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang __asm rdtsc; 23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#endif 23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#endif 23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#if defined(__GNUC__) && __GNUC__ 24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#define x86_pause_hint()\ 241ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang __asm__ __volatile__ ("pause \n\t") 2421b362b15af34006e6a11974088a46d42b903418eJohann#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) 2431b362b15af34006e6a11974088a46d42b903418eJohann#define x86_pause_hint()\ 244ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang asm volatile ("pause \n\t") 24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#else 24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#if ARCH_X86_64 2471b362b15af34006e6a11974088a46d42b903418eJohann#define x86_pause_hint()\ 248ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang _mm_pause(); 24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#else 25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#define x86_pause_hint()\ 251ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang __asm pause 25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#endif 25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#endif 25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#if defined(__GNUC__) && __GNUC__ 25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic void 257ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangx87_set_control_word(unsigned short mode) { 258ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang __asm__ __volatile__("fldcw %0" : : "m"(*&mode)); 25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic unsigned short 261ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangx87_get_control_word(void) { 262ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang unsigned short mode; 263ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang __asm__ __volatile__("fstcw %0\n\t":"=m"(*&mode):); 26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber return mode; 26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 2661b362b15af34006e6a11974088a46d42b903418eJohann#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) 2671b362b15af34006e6a11974088a46d42b903418eJohannstatic void 268ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangx87_set_control_word(unsigned short mode) { 269ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang asm volatile("fldcw %0" : : "m"(*&mode)); 2701b362b15af34006e6a11974088a46d42b903418eJohann} 2711b362b15af34006e6a11974088a46d42b903418eJohannstatic unsigned short 272ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangx87_get_control_word(void) { 273ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang unsigned short mode; 274ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang asm volatile("fstcw %0\n\t":"=m"(*&mode):); 275ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return mode; 2761b362b15af34006e6a11974088a46d42b903418eJohann} 27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#elif ARCH_X86_64 27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/* No fldcw intrinsics on Windows x64, punt to external asm */ 27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern void vpx_winx64_fldcw(unsigned short mode); 28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern unsigned short vpx_winx64_fstcw(void); 28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#define x87_set_control_word vpx_winx64_fldcw 28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#define x87_get_control_word vpx_winx64_fstcw 28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#else 28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic void 285ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangx87_set_control_word(unsigned short mode) { 286ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang __asm { fldcw mode } 28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic unsigned short 289ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangx87_get_control_word(void) { 290ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang unsigned short mode; 291ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang __asm { fstcw mode } 292ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return mode; 29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#endif 29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 296ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianstatic INLINE unsigned int 297ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangx87_set_double_precision(void) { 298ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian unsigned int mode = x87_get_control_word(); 299ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x87_set_control_word((mode&~0x300) | 0x200); 300ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return mode; 30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern void vpx_reset_mmx_state(void); 3052ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 3062ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#ifdef __cplusplus 3072ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian} // extern "C" 30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#endif 30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3102ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#endif // VPX_PORTS_X86_H_ 311