190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/*
2f71323e297a928af368937089d3ed71239786f86Andreas Huber *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
4f71323e297a928af368937089d3ed71239786f86Andreas Huber *  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber *  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber *  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber *  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber *  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber */
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
122ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#ifndef VPX_PORTS_X86_H_
132ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#define VPX_PORTS_X86_H_
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include <stdlib.h>
151b362b15af34006e6a11974088a46d42b903418eJohann#include "vpx_config.h"
167ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#include "vpx/vpx_integer.h"
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
182ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#ifdef __cplusplus
192ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanianextern "C" {
202ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#endif
212ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian
22ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangtypedef enum {
23ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  VPX_CPU_UNKNOWN = -1,
24ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  VPX_CPU_AMD,
25ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  VPX_CPU_AMD_OLD,
26ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  VPX_CPU_CENTAUR,
27ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  VPX_CPU_CYRIX,
28ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  VPX_CPU_INTEL,
29ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  VPX_CPU_NEXGEN,
30ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  VPX_CPU_NSC,
31ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  VPX_CPU_RISE,
32ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  VPX_CPU_SIS,
33ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  VPX_CPU_TRANSMETA,
34ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  VPX_CPU_TRANSMETA_OLD,
35ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  VPX_CPU_UMC,
36ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  VPX_CPU_VIA,
37ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
38ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  VPX_CPU_LAST
39538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber}  vpx_cpu_t;
40538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber
41ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__)
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#if ARCH_X86_64
432ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#define cpuid(func, func2, ax, bx, cx, dx)\
44ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  __asm__ __volatile__ (\
45ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        "cpuid           \n\t" \
46ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \
472ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian                        : "a" (func), "c" (func2));
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#else
492ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#define cpuid(func, func2, ax, bx, cx, dx)\
50ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  __asm__ __volatile__ (\
51ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        "mov %%ebx, %%edi   \n\t" \
52ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        "cpuid              \n\t" \
53ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        "xchg %%edi, %%ebx  \n\t" \
54ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
552ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian                        : "a" (func), "c" (func2));
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#endif
57ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/
581b362b15af34006e6a11974088a46d42b903418eJohann#if ARCH_X86_64
592ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#define cpuid(func, func2, ax, bx, cx, dx)\
60ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  asm volatile (\
61ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                "xchg %rsi, %rbx \n\t" \
62ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                "cpuid           \n\t" \
63ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                "movl %ebx, %edi \n\t" \
64ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                "xchg %rsi, %rbx \n\t" \
65ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
662ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian                : "a" (func), "c" (func2));
671b362b15af34006e6a11974088a46d42b903418eJohann#else
682ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#define cpuid(func, func2, ax, bx, cx, dx)\
69ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  asm volatile (\
70ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                "pushl %ebx       \n\t" \
71ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                "cpuid            \n\t" \
72ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                "movl %ebx, %edi  \n\t" \
73ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                "popl %ebx        \n\t" \
74ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
752ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian                : "a" (func), "c" (func2));
761b362b15af34006e6a11974088a46d42b903418eJohann#endif
77ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#else /* end __SUNPRO__ */
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#if ARCH_X86_64
792ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#if defined(_MSC_VER) && _MSC_VER > 1500
802ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanianvoid __cpuidex(int CPUInfo[4], int info_type, int ecxvalue);
812ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#pragma intrinsic(__cpuidex)
822ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#define cpuid(func, func2, a, b, c, d) do {\
832ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian    int regs[4];\
842ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian    __cpuidex(regs, func, func2); \
852ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian    a = regs[0];  b = regs[1];  c = regs[2];  d = regs[3];\
862ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian  } while(0)
872ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#else
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid __cpuid(int CPUInfo[4], int info_type);
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#pragma intrinsic(__cpuid)
902ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#define cpuid(func, func2, a, b, c, d) do {\
91ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    int regs[4];\
922ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian    __cpuid(regs, func); \
932ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian    a = regs[0];  b = regs[1];  c = regs[2];  d = regs[3];\
942ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian  } while (0)
952ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#endif
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#else
972ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#define cpuid(func, func2, a, b, c, d)\
98ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  __asm mov eax, func\
992ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian  __asm mov ecx, func2\
100ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  __asm cpuid\
101ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  __asm mov a, eax\
102ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  __asm mov b, ebx\
103ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  __asm mov c, ecx\
104ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  __asm mov d, edx
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#endif
106ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#endif /* end others */
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1087ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian// NaCl has no support for xgetbv or the raw opcode.
1097ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
1107ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanianstatic INLINE uint64_t xgetbv(void) {
1117ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian  const uint32_t ecx = 0;
1127ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian  uint32_t eax, edx;
1137ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian  // Use the raw opcode for xgetbv for compatibility with older toolchains.
1147ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian  __asm__ volatile (
1157ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian    ".byte 0x0f, 0x01, 0xd0\n"
1167ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian    : "=a"(eax), "=d"(edx) : "c" (ecx));
1177ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian  return ((uint64_t)edx << 32) | eax;
1187ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian}
1197ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#elif (defined(_M_X64) || defined(_M_IX86)) && \
1207ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian      defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219  // >= VS2010 SP1
1217ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#include <immintrin.h>
1227ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#define xgetbv() _xgetbv(0)
1237ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#elif defined(_MSC_VER) && defined(_M_IX86)
1247ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanianstatic INLINE uint64_t xgetbv(void) {
1257ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian  uint32_t eax_, edx_;
1267ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian  __asm {
1277ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian    xor ecx, ecx  // ecx = 0
1287ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian    // Use the raw opcode for xgetbv for compatibility with older toolchains.
1297ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian    __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
1307ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian    mov eax_, eax
1317ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian    mov edx_, edx
1327ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian  }
1337ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian  return ((uint64_t)edx_ << 32) | eax_;
1347ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian}
1357ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#else
1367ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#define xgetbv() 0U  // no AVX for older x64 or unrecognized toolchains.
1377ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#endif
1387ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian
1397ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#if defined(_MSC_VER) && _MSC_VER >= 1700
1407ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#include <windows.h>
1417ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#if WINAPI_FAMILY_PARTITION(WINAPI_FAMILY_APP)
1427ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#define getenv(x) NULL
1437ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#endif
1447ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#endif
1457ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian
1465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#define HAS_MMX     0x01
1475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#define HAS_SSE     0x02
1485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#define HAS_SSE2    0x04
1495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#define HAS_SSE3    0x08
1505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#define HAS_SSSE3   0x10
1515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#define HAS_SSE4_1  0x20
1525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#define HAS_AVX     0x40
1535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#define HAS_AVX2    0x80
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#ifndef BIT
15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#define BIT(n) (1<<n)
15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#endif
15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
158ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianstatic INLINE int
159ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangx86_simd_caps(void) {
160ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int flags = 0;
161ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int mask = ~0;
1627ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian  unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx;
163ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  char *env;
164ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  (void)reg_ebx;
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
166ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  /* See if the CPU capabilities are being overridden by the environment */
167ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  env = getenv("VPX_SIMD_CAPS");
16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
169ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (env && *env)
170ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return (int)strtol(env, NULL, 0);
17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
172ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  env = getenv("VPX_SIMD_CAPS_MASK");
17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
174ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (env && *env)
175ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    mask = strtol(env, NULL, 0);
17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
177ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  /* Ensure that the CPUID instruction supports extended features */
1787ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian  cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx);
17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1807ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian  if (max_cpuid_val < 1)
181ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return 0;
18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
183ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  /* Get the standard feature flags */
1842ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian  cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
186ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (reg_edx & BIT(23)) flags |= HAS_MMX;
18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
188ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */
18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
190ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */
19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  if (reg_ecx & BIT(0)) flags |= HAS_SSE3;
19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  if (reg_ecx & BIT(9)) flags |= HAS_SSSE3;
19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
196ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
197538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber
1987ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian  // bits 27 (OSXSAVE) & 28 (256-bit AVX)
1997ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian  if ((reg_ecx & (BIT(27) | BIT(28))) == (BIT(27) | BIT(28))) {
2007ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian    if ((xgetbv() & 0x6) == 0x6) {
2017ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian      flags |= HAS_AVX;
2025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2037ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian      if (max_cpuid_val >= 7) {
2047ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian        /* Get the leaf 7 feature flags. Needed to check for AVX2 support */
2057ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian        cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
2062ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian
2077ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian        if (reg_ebx & BIT(5)) flags |= HAS_AVX2;
2087ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian      }
2097ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian    }
2107ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian  }
2115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
212ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return flags & mask;
21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#if ARCH_X86_64 && defined(_MSC_VER)
21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberunsigned __int64 __rdtsc(void);
21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#pragma intrinsic(__rdtsc)
21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#endif
219ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianstatic INLINE unsigned int
220ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangx86_readtsc(void) {
22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#if defined(__GNUC__) && __GNUC__
222ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int tsc;
223ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  __asm__ __volatile__("rdtsc\n\t":"=a"(tsc):);
224ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return tsc;
2251b362b15af34006e6a11974088a46d42b903418eJohann#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
226ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int tsc;
227ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  asm volatile("rdtsc\n\t":"=a"(tsc):);
228ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return tsc;
22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#else
23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#if ARCH_X86_64
231ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return (unsigned int)__rdtsc();
23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#else
233ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  __asm  rdtsc;
23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#endif
23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#endif
23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#if defined(__GNUC__) && __GNUC__
24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#define x86_pause_hint()\
241ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  __asm__ __volatile__ ("pause \n\t")
2421b362b15af34006e6a11974088a46d42b903418eJohann#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
2431b362b15af34006e6a11974088a46d42b903418eJohann#define x86_pause_hint()\
244ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  asm volatile ("pause \n\t")
24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#else
24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#if ARCH_X86_64
2471b362b15af34006e6a11974088a46d42b903418eJohann#define x86_pause_hint()\
248ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  _mm_pause();
24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#else
25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#define x86_pause_hint()\
251ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  __asm pause
25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#endif
25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#endif
25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#if defined(__GNUC__) && __GNUC__
25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic void
257ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangx87_set_control_word(unsigned short mode) {
258ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  __asm__ __volatile__("fldcw %0" : : "m"(*&mode));
25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic unsigned short
261ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangx87_get_control_word(void) {
262ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned short mode;
263ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  __asm__ __volatile__("fstcw %0\n\t":"=m"(*&mode):);
26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    return mode;
26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
2661b362b15af34006e6a11974088a46d42b903418eJohann#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
2671b362b15af34006e6a11974088a46d42b903418eJohannstatic void
268ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangx87_set_control_word(unsigned short mode) {
269ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  asm volatile("fldcw %0" : : "m"(*&mode));
2701b362b15af34006e6a11974088a46d42b903418eJohann}
2711b362b15af34006e6a11974088a46d42b903418eJohannstatic unsigned short
272ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangx87_get_control_word(void) {
273ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned short mode;
274ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  asm volatile("fstcw %0\n\t":"=m"(*&mode):);
275ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return mode;
2761b362b15af34006e6a11974088a46d42b903418eJohann}
27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#elif ARCH_X86_64
27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/* No fldcw intrinsics on Windows x64, punt to external asm */
27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern void           vpx_winx64_fldcw(unsigned short mode);
28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern unsigned short vpx_winx64_fstcw(void);
28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#define x87_set_control_word vpx_winx64_fldcw
28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#define x87_get_control_word vpx_winx64_fstcw
28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#else
28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic void
285ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangx87_set_control_word(unsigned short mode) {
286ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  __asm { fldcw mode }
28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic unsigned short
289ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangx87_get_control_word(void) {
290ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned short mode;
291ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  __asm { fstcw mode }
292ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return mode;
29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#endif
29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
296ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianstatic INLINE unsigned int
297ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangx87_set_double_precision(void) {
298ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian  unsigned int mode = x87_get_control_word();
299ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  x87_set_control_word((mode&~0x300) | 0x200);
300ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return mode;
30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberextern void vpx_reset_mmx_state(void);
3052ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian
3062ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#ifdef __cplusplus
3072ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian}  // extern "C"
30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#endif
30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3102ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian#endif  // VPX_PORTS_X86_H_
311