1// ---------------------------------------------------------------------- 2// CycleClock 3// A CycleClock tells you the current time in Cycles. The "time" 4// is actually time since power-on. This is like time() but doesn't 5// involve a system call and is much more precise. 6// 7// NOTE: Not all cpu/platform/kernel combinations guarantee that this 8// clock increments at a constant rate or is synchronized across all logical 9// cpus in a system. 10// 11// If you need the above guarantees, please consider using a different 12// API. There are efforts to provide an interface which provides a millisecond 13// granularity and implemented as a memory read. A memory read is generally 14// cheaper than the CycleClock for many architectures. 15// 16// Also, in some out of order CPU implementations, the CycleClock is not 17// serializing. So if you're trying to count at cycles granularity, your 18// data might be inaccurate due to out of order instruction execution. 19// ---------------------------------------------------------------------- 20 21#ifndef BENCHMARK_CYCLECLOCK_H_ 22#define BENCHMARK_CYCLECLOCK_H_ 23 24#include <cstdint> 25 26#include "benchmark/macros.h" 27#include "internal_macros.h" 28 29#if defined(BENCHMARK_OS_MACOSX) 30#include <mach/mach_time.h> 31#endif 32// For MSVC, we want to use '_asm rdtsc' when possible (since it works 33// with even ancient MSVC compilers), and when not possible the 34// __rdtsc intrinsic, declared in <intrin.h>. Unfortunately, in some 35// environments, <windows.h> and <intrin.h> have conflicting 36// declarations of some other intrinsics, breaking compilation. 37// Therefore, we simply declare __rdtsc ourselves. See also 38// http://connect.microsoft.com/VisualStudio/feedback/details/262047 39#if defined(COMPILER_MSVC) && !defined(_M_IX86) 40extern "C" uint64_t __rdtsc(); 41#pragma intrinsic(__rdtsc) 42#endif 43 44#ifndef BENCHMARK_OS_WINDOWS 45#include <sys/time.h> 46#endif 47 48namespace benchmark { 49// NOTE: only i386 and x86_64 have been well tested. 50// PPC, sparc, alpha, and ia64 are based on 51// http://peter.kuscsik.com/wordpress/?p=14 52// with modifications by m3b. See also 53// https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h 54namespace cycleclock { 55// This should return the number of cycles since power-on. Thread-safe. 56inline BENCHMARK_ALWAYS_INLINE int64_t Now() { 57#if defined(BENCHMARK_OS_MACOSX) 58 // this goes at the top because we need ALL Macs, regardless of 59 // architecture, to return the number of "mach time units" that 60 // have passed since startup. See sysinfo.cc where 61 // InitializeSystemInfo() sets the supposed cpu clock frequency of 62 // macs to the number of mach time units per second, not actual 63 // CPU clock frequency (which can change in the face of CPU 64 // frequency scaling). Also note that when the Mac sleeps, this 65 // counter pauses; it does not continue counting, nor does it 66 // reset to zero. 67 return mach_absolute_time(); 68#elif defined(__i386__) 69 int64_t ret; 70 __asm__ volatile("rdtsc" : "=A"(ret)); 71 return ret; 72#elif defined(__x86_64__) || defined(__amd64__) 73 uint64_t low, high; 74 __asm__ volatile("rdtsc" : "=a"(low), "=d"(high)); 75 return (high << 32) | low; 76#elif defined(__powerpc__) || defined(__ppc__) 77 // This returns a time-base, which is not always precisely a cycle-count. 78 int64_t tbl, tbu0, tbu1; 79 asm("mftbu %0" : "=r"(tbu0)); 80 asm("mftb %0" : "=r"(tbl)); 81 asm("mftbu %0" : "=r"(tbu1)); 82 tbl &= -static_cast<int64>(tbu0 == tbu1); 83 // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage) 84 return (tbu1 << 32) | tbl; 85#elif defined(__sparc__) 86 int64_t tick; 87 asm(".byte 0x83, 0x41, 0x00, 0x00"); 88 asm("mov %%g1, %0" : "=r"(tick)); 89 return tick; 90#elif defined(__ia64__) 91 int64_t itc; 92 asm("mov %0 = ar.itc" : "=r"(itc)); 93 return itc; 94#elif defined(COMPILER_MSVC) && defined(_M_IX86) 95 // Older MSVC compilers (like 7.x) don't seem to support the 96 // __rdtsc intrinsic properly, so I prefer to use _asm instead 97 // when I know it will work. Otherwise, I'll use __rdtsc and hope 98 // the code is being compiled with a non-ancient compiler. 99 _asm rdtsc 100#elif defined(COMPILER_MSVC) 101 return __rdtsc(); 102#elif defined(__aarch64__) 103 // System timer of ARMv8 runs at a different frequency than the CPU's. 104 // The frequency is fixed, typically in the range 1-50MHz. It can be 105 // read at CNTFRQ special register. We assume the OS has set up 106 // the virtual timer properly. 107 int64_t virtual_timer_value; 108 asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value)); 109 return virtual_timer_value; 110#elif defined(__ARM_ARCH) 111#if (__ARM_ARCH >= 6) // V6 is the earliest arch that has a standard cyclecount 112 uint32_t pmccntr; 113 uint32_t pmuseren; 114 uint32_t pmcntenset; 115 // Read the user mode perf monitor counter access permissions. 116 asm("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren)); 117 if (pmuseren & 1) { // Allows reading perfmon counters for user mode code. 118 asm("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset)); 119 if (pmcntenset & 0x80000000ul) { // Is it counting? 120 asm("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr)); 121 // The counter is set up to count every 64th cycle 122 return static_cast<int64_t>(pmccntr) * 64; // Should optimize to << 6 123 } 124 } 125#endif 126 struct timeval tv; 127 gettimeofday(&tv, nullptr); 128 return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec; 129#elif defined(__mips__) 130 // mips apparently only allows rdtsc for superusers, so we fall 131 // back to gettimeofday. It's possible clock_gettime would be better. 132 struct timeval tv; 133 gettimeofday(&tv, nullptr); 134 return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec; 135#else 136// The soft failover to a generic implementation is automatic only for ARM. 137// For other platforms the developer is expected to make an attempt to create 138// a fast implementation and use generic version if nothing better is available. 139#error You need to define CycleTimer for your OS and CPU 140#endif 141} 142} // end namespace cycleclock 143} // end namespace benchmark 144 145#endif // BENCHMARK_CYCLECLOCK_H_ 146