1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#include "tensorflow/core/platform/cpu_info.h"
17#include "tensorflow/core/platform/logging.h"
18#include "tensorflow/core/platform/platform.h"
19#include "tensorflow/core/platform/types.h"
20#if defined(PLATFORM_IS_X86)
21#include <mutex>  // NOLINT
22#endif
23
24// SIMD extension querying is only available on x86.
25#ifdef PLATFORM_IS_X86
26#ifdef PLATFORM_WINDOWS
27// Visual Studio defines a builtin function for CPUID, so use that if possible.
28#define GETCPUID(a, b, c, d, a_inp, c_inp) \
29  {                                        \
30    int cpu_info[4] = {-1};                \
31    __cpuidex(cpu_info, a_inp, c_inp);     \
32    a = cpu_info[0];                       \
33    b = cpu_info[1];                       \
34    c = cpu_info[2];                       \
35    d = cpu_info[3];                       \
36  }
37#else
38// Otherwise use gcc-format assembler to implement the underlying instructions.
39#define GETCPUID(a, b, c, d, a_inp, c_inp) \
40  asm("mov %%rbx, %%rdi\n"                 \
41      "cpuid\n"                            \
42      "xchg %%rdi, %%rbx\n"                \
43      : "=a"(a), "=D"(b), "=c"(c), "=d"(d) \
44      : "a"(a_inp), "2"(c_inp))
45#endif
46#endif
47
48namespace tensorflow {
49namespace port {
50namespace {
51
52#ifdef PLATFORM_IS_X86
53class CPUIDInfo;
54void InitCPUIDInfo();
55
56CPUIDInfo *cpuid = nullptr;
57
58#ifdef PLATFORM_WINDOWS
59// Visual Studio defines a builtin function, so use that if possible.
60int GetXCR0EAX() { return _xgetbv(0); }
61#else
62int GetXCR0EAX() {
63  int eax, edx;
64  asm("XGETBV" : "=a"(eax), "=d"(edx) : "c"(0));
65  return eax;
66}
67#endif
68
69// Structure for basic CPUID info
70class CPUIDInfo {
71 public:
72  CPUIDInfo()
73      : have_adx_(0),
74        have_aes_(0),
75        have_avx_(0),
76        have_avx2_(0),
77        have_avx512f_(0),
78        have_avx512cd_(0),
79        have_avx512er_(0),
80        have_avx512pf_(0),
81        have_avx512vl_(0),
82        have_avx512bw_(0),
83        have_avx512dq_(0),
84        have_avx512vbmi_(0),
85        have_avx512ifma_(0),
86        have_avx512_4vnniw_(0),
87        have_avx512_4fmaps_(0),
88        have_bmi1_(0),
89        have_bmi2_(0),
90        have_cmov_(0),
91        have_cmpxchg16b_(0),
92        have_cmpxchg8b_(0),
93        have_f16c_(0),
94        have_fma_(0),
95        have_mmx_(0),
96        have_pclmulqdq_(0),
97        have_popcnt_(0),
98        have_prefetchw_(0),
99        have_prefetchwt1_(0),
100        have_rdrand_(0),
101        have_rdseed_(0),
102        have_smap_(0),
103        have_sse_(0),
104        have_sse2_(0),
105        have_sse3_(0),
106        have_sse4_1_(0),
107        have_sse4_2_(0),
108        have_ssse3_(0),
109        have_hypervisor_(0) {}
110
111  static void Initialize() {
112    // Initialize cpuid struct
113    CHECK(cpuid == nullptr) << __func__ << " ran more than once";
114    cpuid = new CPUIDInfo;
115
116    uint32 eax, ebx, ecx, edx;
117
118    // Get vendor string (issue CPUID with eax = 0)
119    GETCPUID(eax, ebx, ecx, edx, 0, 0);
120    cpuid->vendor_str_.append(reinterpret_cast<char *>(&ebx), 4);
121    cpuid->vendor_str_.append(reinterpret_cast<char *>(&edx), 4);
122    cpuid->vendor_str_.append(reinterpret_cast<char *>(&ecx), 4);
123
124    // To get general information and extended features we send eax = 1 and
125    // ecx = 0 to cpuid.  The response is returned in eax, ebx, ecx and edx.
126    // (See Intel 64 and IA-32 Architectures Software Developer's Manual
127    // Volume 2A: Instruction Set Reference, A-M CPUID).
128    GETCPUID(eax, ebx, ecx, edx, 1, 0);
129
130    cpuid->model_num_ = static_cast<int>((eax >> 4) & 0xf);
131    cpuid->family_ = static_cast<int>((eax >> 8) & 0xf);
132
133    cpuid->have_aes_ = (ecx >> 25) & 0x1;
134    cpuid->have_cmov_ = (edx >> 15) & 0x1;
135    cpuid->have_cmpxchg16b_ = (ecx >> 13) & 0x1;
136    cpuid->have_cmpxchg8b_ = (edx >> 8) & 0x1;
137    cpuid->have_mmx_ = (edx >> 23) & 0x1;
138    cpuid->have_pclmulqdq_ = (ecx >> 1) & 0x1;
139    cpuid->have_popcnt_ = (ecx >> 23) & 0x1;
140    cpuid->have_rdrand_ = (ecx >> 30) & 0x1;
141    cpuid->have_sse2_ = (edx >> 26) & 0x1;
142    cpuid->have_sse3_ = ecx & 0x1;
143    cpuid->have_sse4_1_ = (ecx >> 19) & 0x1;
144    cpuid->have_sse4_2_ = (ecx >> 20) & 0x1;
145    cpuid->have_sse_ = (edx >> 25) & 0x1;
146    cpuid->have_ssse3_ = (ecx >> 9) & 0x1;
147    cpuid->have_hypervisor_ = (ecx >> 31) & 1;
148
149    const uint64 xcr0_xmm_mask = 0x2;
150    const uint64 xcr0_ymm_mask = 0x4;
151    const uint64 xcr0_maskreg_mask = 0x20;
152    const uint64 xcr0_zmm0_15_mask = 0x40;
153    const uint64 xcr0_zmm16_31_mask = 0x80;
154
155    const uint64 xcr0_avx_mask = xcr0_xmm_mask | xcr0_ymm_mask;
156    const uint64 xcr0_avx512_mask = xcr0_avx_mask | xcr0_maskreg_mask |
157                                    xcr0_zmm0_15_mask | xcr0_zmm16_31_mask;
158
159    const bool have_avx =
160        // Does the OS support XGETBV instruction use by applications?
161        ((ecx >> 27) & 0x1) &&
162        // Does the OS save/restore XMM and YMM state?
163        ((GetXCR0EAX() & xcr0_avx_mask) == xcr0_avx_mask) &&
164        // Is AVX supported in hardware?
165        ((ecx >> 28) & 0x1);
166
167    const bool have_avx512 =
168        // Does the OS support XGETBV instruction use by applications?
169        ((ecx >> 27) & 0x1) &&
170        // Does the OS save/restore ZMM state?
171        ((GetXCR0EAX() & xcr0_avx512_mask) == xcr0_avx512_mask);
172
173    cpuid->have_avx_ = have_avx;
174    cpuid->have_fma_ = have_avx && ((ecx >> 12) & 0x1);
175    cpuid->have_f16c_ = have_avx && ((ecx >> 29) & 0x1);
176
177    // Get standard level 7 structured extension features (issue CPUID with
178    // eax = 7 and ecx= 0), which is required to check for AVX2 support as
179    // well as other Haswell (and beyond) features.  (See Intel 64 and IA-32
180    // Architectures Software Developer's Manual Volume 2A: Instruction Set
181    // Reference, A-M CPUID).
182    GETCPUID(eax, ebx, ecx, edx, 7, 0);
183
184    cpuid->have_adx_ = (ebx >> 19) & 0x1;
185    cpuid->have_avx2_ = have_avx && ((ebx >> 5) & 0x1);
186    cpuid->have_bmi1_ = (ebx >> 3) & 0x1;
187    cpuid->have_bmi2_ = (ebx >> 8) & 0x1;
188    cpuid->have_prefetchwt1_ = ecx & 0x1;
189    cpuid->have_rdseed_ = (ebx >> 18) & 0x1;
190    cpuid->have_smap_ = (ebx >> 20) & 0x1;
191
192    cpuid->have_avx512f_ = have_avx512 && ((ebx >> 16) & 0x1);
193    cpuid->have_avx512cd_ = have_avx512 && ((ebx >> 28) & 0x1);
194    cpuid->have_avx512er_ = have_avx512 && ((ebx >> 27) & 0x1);
195    cpuid->have_avx512pf_ = have_avx512 && ((ebx >> 26) & 0x1);
196    cpuid->have_avx512vl_ = have_avx512 && ((ebx >> 31) & 0x1);
197    cpuid->have_avx512bw_ = have_avx512 && ((ebx >> 30) & 0x1);
198    cpuid->have_avx512dq_ = have_avx512 && ((ebx >> 17) & 0x1);
199    cpuid->have_avx512vbmi_ = have_avx512 && ((ecx >> 1) & 0x1);
200    cpuid->have_avx512ifma_ = have_avx512 && ((ebx >> 21) & 0x1);
201    cpuid->have_avx512_4vnniw_ = have_avx512 && ((edx >> 2) & 0x1);
202    cpuid->have_avx512_4fmaps_ = have_avx512 && ((edx >> 3) & 0x1);
203  }
204
205  static bool TestFeature(CPUFeature feature) {
206    InitCPUIDInfo();
207    // clang-format off
208    switch (feature) {
209      case ADX:           return cpuid->have_adx_;
210      case AES:           return cpuid->have_aes_;
211      case AVX2:          return cpuid->have_avx2_;
212      case AVX:           return cpuid->have_avx_;
213      case AVX512F:       return cpuid->have_avx512f_;
214      case AVX512CD:      return cpuid->have_avx512cd_;
215      case AVX512PF:      return cpuid->have_avx512pf_;
216      case AVX512ER:      return cpuid->have_avx512er_;
217      case AVX512VL:      return cpuid->have_avx512vl_;
218      case AVX512BW:      return cpuid->have_avx512bw_;
219      case AVX512DQ:      return cpuid->have_avx512dq_;
220      case AVX512VBMI:    return cpuid->have_avx512vbmi_;
221      case AVX512IFMA:    return cpuid->have_avx512ifma_;
222      case AVX512_4VNNIW: return cpuid->have_avx512_4vnniw_;
223      case AVX512_4FMAPS: return cpuid->have_avx512_4fmaps_;
224      case BMI1:          return cpuid->have_bmi1_;
225      case BMI2:          return cpuid->have_bmi2_;
226      case CMOV:          return cpuid->have_cmov_;
227      case CMPXCHG16B:    return cpuid->have_cmpxchg16b_;
228      case CMPXCHG8B:     return cpuid->have_cmpxchg8b_;
229      case F16C:          return cpuid->have_f16c_;
230      case FMA:           return cpuid->have_fma_;
231      case MMX:           return cpuid->have_mmx_;
232      case PCLMULQDQ:     return cpuid->have_pclmulqdq_;
233      case POPCNT:        return cpuid->have_popcnt_;
234      case PREFETCHW:     return cpuid->have_prefetchw_;
235      case PREFETCHWT1:   return cpuid->have_prefetchwt1_;
236      case RDRAND:        return cpuid->have_rdrand_;
237      case RDSEED:        return cpuid->have_rdseed_;
238      case SMAP:          return cpuid->have_smap_;
239      case SSE2:          return cpuid->have_sse2_;
240      case SSE3:          return cpuid->have_sse3_;
241      case SSE4_1:        return cpuid->have_sse4_1_;
242      case SSE4_2:        return cpuid->have_sse4_2_;
243      case SSE:           return cpuid->have_sse_;
244      case SSSE3:         return cpuid->have_ssse3_;
245      case HYPERVISOR:    return cpuid->have_hypervisor_;
246      default:
247        break;
248    }
249    // clang-format on
250    return false;
251  }
252
253  string vendor_str() const { return vendor_str_; }
254  int family() const { return family_; }
255  int model_num() { return model_num_; }
256
257 private:
258  int have_adx_ : 1;
259  int have_aes_ : 1;
260  int have_avx_ : 1;
261  int have_avx2_ : 1;
262  int have_avx512f_ : 1;
263  int have_avx512cd_ : 1;
264  int have_avx512er_ : 1;
265  int have_avx512pf_ : 1;
266  int have_avx512vl_ : 1;
267  int have_avx512bw_ : 1;
268  int have_avx512dq_ : 1;
269  int have_avx512vbmi_ : 1;
270  int have_avx512ifma_ : 1;
271  int have_avx512_4vnniw_ : 1;
272  int have_avx512_4fmaps_ : 1;
273  int have_bmi1_ : 1;
274  int have_bmi2_ : 1;
275  int have_cmov_ : 1;
276  int have_cmpxchg16b_ : 1;
277  int have_cmpxchg8b_ : 1;
278  int have_f16c_ : 1;
279  int have_fma_ : 1;
280  int have_mmx_ : 1;
281  int have_pclmulqdq_ : 1;
282  int have_popcnt_ : 1;
283  int have_prefetchw_ : 1;
284  int have_prefetchwt1_ : 1;
285  int have_rdrand_ : 1;
286  int have_rdseed_ : 1;
287  int have_smap_ : 1;
288  int have_sse_ : 1;
289  int have_sse2_ : 1;
290  int have_sse3_ : 1;
291  int have_sse4_1_ : 1;
292  int have_sse4_2_ : 1;
293  int have_ssse3_ : 1;
294  int have_hypervisor_ : 1;
295  string vendor_str_;
296  int family_;
297  int model_num_;
298};
299
300std::once_flag cpuid_once_flag;
301
302void InitCPUIDInfo() {
303  // This ensures that CPUIDInfo::Initialize() is called exactly
304  // once regardless of how many threads concurrently call us
305  std::call_once(cpuid_once_flag, CPUIDInfo::Initialize);
306}
307
308#endif  // PLATFORM_IS_X86
309
310}  // namespace
311
312bool TestCPUFeature(CPUFeature feature) {
313#ifdef PLATFORM_IS_X86
314  return CPUIDInfo::TestFeature(feature);
315#else
316  return false;
317#endif
318}
319
320std::string CPUVendorIDString() {
321#ifdef PLATFORM_IS_X86
322  InitCPUIDInfo();
323  return cpuid->vendor_str();
324#else
325  return "";
326#endif
327}
328
329int CPUFamily() {
330#ifdef PLATFORM_IS_X86
331  InitCPUIDInfo();
332  return cpuid->family();
333#else
334  return 0;
335#endif
336}
337
338int CPUModelNum() {
339#ifdef PLATFORM_IS_X86
340  InitCPUIDInfo();
341  return cpuid->model_num();
342#else
343  return 0;
344#endif
345}
346
347}  // namespace port
348}  // namespace tensorflow
349