1// Copyright 2006 Google Inc. All Rights Reserved.
2// Author: nsanders, menderico
3
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7
8//      http://www.apache.org/licenses/LICENSE-2.0
9
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#ifndef STRESSAPPTEST_OS_H_  // NOLINT
17#define STRESSAPPTEST_OS_H_
18
19#include <dirent.h>
20#include <unistd.h>
21#include <sys/syscall.h>
22
23#include <string>
24#include <list>
25#include <map>
26#include <vector>
27
28// This file must work with autoconf on its public version,
29// so these includes are correct.
30#include "adler32memcpy.h"  // NOLINT
31#include "sattypes.h"       // NOLINT
32#include "clock.h"          // NOLINT
33
34const char kPagemapPath[] = "/proc/self/pagemap";
35
36struct PCIDevice {
37  int32 domain;
38  uint16 bus;
39  uint8 dev;
40  uint8 func;
41  uint16 vendor_id;
42  uint16 device_id;
43  uint64 base_addr[6];
44  uint64 size[6];
45};
46
47typedef vector<PCIDevice*> PCIDevices;
48
49class ErrorDiag;
50
51class Clock;
52
53// This class implements OS/Platform specific funtions.
54class OsLayer {
55 public:
56  OsLayer();
57  virtual ~OsLayer();
58
59  // Set the minimum amount of hugepages that should be available for testing.
60  // Must be set before Initialize().
61  void SetMinimumHugepagesSize(int64 min_bytes) {
62    min_hugepages_bytes_ = min_bytes;
63  }
64
65  // Set the minium amount of memory that should not be allocated. This only
66  // has any affect if hugepages are not used.
67  // Must be set before Initialize().
68  void SetReserveSize(int64 reserve_mb) {
69    reserve_mb_ = reserve_mb;
70  }
71
72  // Set parameters needed to translate physical address to memory module.
73  void SetDramMappingParams(uintptr_t channel_hash, int channel_width,
74                            vector< vector<string> > *channels) {
75    channel_hash_ = channel_hash;
76    channel_width_ = channel_width;
77    channels_ = channels;
78  }
79
80  // Initializes data strctures and open files.
81  // Returns false on error.
82  virtual bool Initialize();
83
84  // Virtual to physical. This implementation is optional for
85  // subclasses to implement.
86  // Takes a pointer, and returns the corresponding bus address.
87  virtual uint64 VirtualToPhysical(void *vaddr);
88
89  // Prints failed dimm. This implementation is optional for
90  // subclasses to implement.
91  // Takes a bus address and string, and prints the DIMM name
92  // into the string. Returns the DIMM number that corresponds to the
93  // address given, or -1 if unable to identify the DIMM number.
94  // Note that subclass implementations of FindDimm() MUST fill
95  // buf with at LEAST one non-whitespace character (provided len > 0).
96  virtual int FindDimm(uint64 addr, char *buf, int len);
97
98  // Classifies addresses according to "regions"
99  // This may mean different things on different platforms.
100  virtual int32 FindRegion(uint64 paddr);
101  // Find cpu cores associated with a region. Either NUMA or arbitrary.
102  virtual cpu_set_t *FindCoreMask(int32 region);
103  // Return cpu cores associated with a region in a hex string.
104  virtual string FindCoreMaskFormat(int32 region);
105
106  // Returns the HD device that contains this file.
107  virtual string FindFileDevice(string filename);
108
109  // Returns a list of paths coresponding to HD devices found on this machine.
110  virtual list<string> FindFileDevices();
111
112  // Polls for errors. This implementation is optional.
113  // This will poll once for errors and return zero iff no errors were found.
114  virtual int ErrorPoll();
115
116  // Delay an appropriate amount of time between polling.
117  virtual void ErrorWait();
118
119  // Report errors. This implementation is mandatory.
120  // This will output a machine readable line regarding the error.
121  virtual bool ErrorReport(const char *part, const char *symptom, int count);
122
123  // Flushes page cache. Used to circumvent the page cache when doing disk
124  // I/O.  This will be a NOP until ActivateFlushPageCache() is called, which
125  // is typically done when opening a file with O_DIRECT fails.
126  // Returns false on error, true on success or NOP.
127  // Subclasses may implement this in machine specific ways..
128  virtual bool FlushPageCache(void);
129  // Enable FlushPageCache() to actually do the flush instead of being a NOP.
130  virtual void ActivateFlushPageCache(void);
131
132  // Flushes cacheline. Used to distinguish read or write errors.
133  // Subclasses may implement this in machine specific ways..
134  // Takes a pointer, and flushed the cacheline containing that pointer.
135  virtual void Flush(void *vaddr);
136
137  // Fast flush, for use in performance critical code.
138  // This is bound at compile time, and will not pick up
139  // any runtime machine configuration info.
140  inline static void FastFlush(void *vaddr) {
141#ifdef STRESSAPPTEST_CPU_PPC
142    asm volatile("dcbf 0,%0; sync" : : "r" (vaddr));
143#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
144    // Put mfence before and after clflush to make sure:
145    // 1. The write before the clflush is committed to memory bus;
146    // 2. The read after the clflush is hitting the memory bus.
147    //
148    // From Intel manual:
149    // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
150    // to be ordered by any other fencing, serializing or other CLFLUSH
151    // instruction. For example, software can use an MFENCE instruction to
152    // insure that previous stores are included in the write-back.
153    asm volatile("mfence");
154    asm volatile("clflush (%0)" : : "r" (vaddr));
155    asm volatile("mfence");
156#elif defined(STRESSAPPTEST_CPU_ARMV7A) && !defined(__aarch64__)
157    // ARMv7a cachelines are 8 words (32 bytes).
158    syscall(__ARM_NR_cacheflush, vaddr, reinterpret_cast<char*>(vaddr) + 32, 0);
159#else
160  #warning "Unsupported CPU type: Unable to force cache flushes."
161#endif
162  }
163
164  // Fast flush, for use in performance critical code.
165  // This is bound at compile time, and will not pick up
166  // any runtime machine configuration info.  Takes a NULL-terminated
167  // array of addresses to flush.
168  inline static void FastFlushList(void **vaddrs) {
169#ifdef STRESSAPPTEST_CPU_PPC
170    while (*vaddrs) {
171      asm volatile("dcbf 0,%0" : : "r" (*vaddrs++));
172    }
173    asm volatile("sync");
174#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
175    // Put mfence before and after clflush to make sure:
176    // 1. The write before the clflush is committed to memory bus;
177    // 2. The read after the clflush is hitting the memory bus.
178    //
179    // From Intel manual:
180    // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
181    // to be ordered by any other fencing, serializing or other CLFLUSH
182    // instruction. For example, software can use an MFENCE instruction to
183    // insure that previous stores are included in the write-back.
184    asm volatile("mfence");
185    while (*vaddrs) {
186      asm volatile("clflush (%0)" : : "r" (*vaddrs++));
187    }
188    asm volatile("mfence");
189#elif defined(STRESSAPPTEST_CPU_ARMV7A)
190    while (*vaddrs) {
191      FastFlush(*vaddrs++);
192    }
193#else
194    #warning "Unsupported CPU type: Unable to force cache flushes."
195#endif
196  }
197
198  // Fast flush hint, for use in performance critical code.
199  // This is bound at compile time, and will not pick up
200  // any runtime machine configuration info.  Note that this
201  // will not guarantee that a flush happens, but will at least
202  // hint that it should.  This is useful for speeding up
203  // parallel march algorithms.
204  inline static void FastFlushHint(void *vaddr) {
205#ifdef STRESSAPPTEST_CPU_PPC
206    asm volatile("dcbf 0,%0" : : "r" (vaddr));
207#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
208    // From Intel manual:
209    // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
210    // to be ordered by any other fencing, serializing or other CLFLUSH
211    // instruction. For example, software can use an MFENCE instruction to
212    // insure that previous stores are included in the write-back.
213    asm volatile("clflush (%0)" : : "r" (vaddr));
214#elif defined(STRESSAPPTEST_CPU_ARMV7A)
215    FastFlush(vaddr);
216#else
217    #warning "Unsupported CPU type: Unable to force cache flushes."
218#endif
219  }
220
221  // Fast flush, for use in performance critical code.
222  // This is bound at compile time, and will not pick up
223  // any runtime machine configuration info.  Sync's any
224  // transactions for ordering FastFlushHints.
225  inline static void FastFlushSync() {
226#ifdef STRESSAPPTEST_CPU_PPC
227    asm volatile("sync");
228#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
229    // Put mfence before and after clflush to make sure:
230    // 1. The write before the clflush is committed to memory bus;
231    // 2. The read after the clflush is hitting the memory bus.
232    //
233    // From Intel manual:
234    // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
235    // to be ordered by any other fencing, serializing or other CLFLUSH
236    // instruction. For example, software can use an MFENCE instruction to
237    // insure that previous stores are included in the write-back.
238    asm volatile("mfence");
239#elif defined(STRESSAPPTEST_CPU_ARMV7A)
240    // This is a NOP, FastFlushHint() always does a full flush, so there's
241    // nothing to do for FastFlushSync().
242#else
243  #warning "Unsupported CPU type: Unable to force cache flushes."
244#endif
245  }
246
247  // Get time in cpu timer ticks. Useful for matching MCEs with software
248  // actions.
249  inline static uint64 GetTimestamp(void) {
250    uint64 tsc;
251#ifdef STRESSAPPTEST_CPU_PPC
252    uint32 tbl, tbu, temp;
253    __asm __volatile(
254      "1:\n"
255      "mftbu  %2\n"
256      "mftb   %0\n"
257      "mftbu  %1\n"
258      "cmpw   %2,%1\n"
259      "bne    1b\n"
260      : "=r"(tbl), "=r"(tbu), "=r"(temp)
261      :
262      : "cc");
263
264    tsc = (static_cast<uint64>(tbu) << 32) | static_cast<uint64>(tbl);
265#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
266    datacast_t data;
267    __asm __volatile("rdtsc" : "=a" (data.l32.l), "=d"(data.l32.h));
268    tsc = data.l64;
269#elif defined(STRESSAPPTEST_CPU_ARMV7A)
270    #warning "Unsupported CPU type ARMV7A: your timer may not function correctly"
271    tsc = 0;
272#else
273    #warning "Unsupported CPU type: your timer may not function correctly"
274    tsc = 0;
275#endif
276    return (tsc);
277  }
278
279  // Find the free memory on the machine.
280  virtual int64 FindFreeMemSize();
281
282  // Allocates test memory of length bytes.
283  // Subclasses must implement this.
284  // Call PepareTestMem to get a pointer.
285  virtual int64 AllocateAllMem();  // Returns length.
286  // Returns success.
287  virtual bool AllocateTestMem(int64 length, uint64 paddr_base);
288  virtual void FreeTestMem();
289
290  // Prepares the memory for use. You must call this
291  // before using test memory, and after you are done.
292  virtual void *PrepareTestMem(uint64 offset, uint64 length);
293  virtual void ReleaseTestMem(void *addr, uint64 offset, uint64 length);
294
295  // Machine type detected. Can we implement all these functions correctly?
296  // Returns true if machine type is detected and implemented.
297  virtual bool IsSupported();
298
299  // Returns 32 for 32-bit, 64 for 64-bit.
300  virtual int AddressMode();
301  // Update OsLayer state regarding cpu support for various features.
302  virtual void GetFeatures();
303
304  // Open, read, write pci cfg through /proc/bus/pci. fd is /proc/pci file.
305  virtual int PciOpen(int bus, int device, int function);
306  virtual void PciWrite(int fd, uint32 offset, uint32 value, int width);
307  virtual uint32 PciRead(int fd, uint32 offset, int width);
308
309  // Read MSRs
310  virtual bool ReadMSR(uint32 core, uint32 address, uint64 *data);
311  virtual bool WriteMSR(uint32 core, uint32 address, uint64 *data);
312
313  // Extract bits [n+len-1, n] from a 32 bit word.
314  // so GetBitField(0x0f00, 8, 4) == 0xf.
315  virtual uint32 GetBitField(uint32 val, uint32 n, uint32 len);
316
317  // Platform and CPU specific CPU-stressing function.
318  // Returns true on success, false otherwise.
319  virtual bool CpuStressWorkload();
320
321  // Causes false errors for unittesting.
322  // Setting to "true" causes errors to be injected.
323  void set_error_injection(bool errors) { error_injection_ = errors; }
324  bool error_injection() const { return error_injection_; }
325
326  // Is SAT using normal malloc'd memory, or exotic mmap'd memory.
327  bool normal_mem() const { return normal_mem_; }
328
329  // Get numa config, if available..
330  int num_nodes() const { return num_nodes_; }
331  int num_cpus() const { return num_cpus_; }
332
333  // Handle to platform-specific error diagnoser.
334  ErrorDiag *error_diagnoser_;
335
336  // Disambiguate between different "warm" memcopies.
337  virtual bool AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
338                               unsigned int size_in_bytes,
339                               AdlerChecksum *checksum);
340
341  // Store a callback to use to print
342  // app-specific info about the last error location.
343  // This call back is called with a physical address, and the app can fill in
344  // the most recent transaction that occurred at that address.
345  typedef bool (*ErrCallback)(uint64 paddr, string *buf);
346  void set_err_log_callback(
347    ErrCallback err_log_callback) {
348    err_log_callback_ = err_log_callback;
349  }
350  ErrCallback get_err_log_callback() { return err_log_callback_; }
351
352  // Set a clock object that can be overridden for use with unit tests.
353  void SetClock(Clock *clock) {
354    if (clock_) {
355      delete clock_;
356    }
357    clock_ = clock;
358    time_initialized_ = clock_->Now();
359  }
360
361 protected:
362  void *testmem_;                // Location of test memory.
363  uint64 testmemsize_;           // Size of test memory.
364  int64 totalmemsize_;           // Size of available memory.
365  int64 min_hugepages_bytes_;    // Minimum hugepages size.
366  int64 reserve_mb_;             // Minimum amount of memory to reserve in MB.
367  bool  error_injection_;        // Do error injection?
368  bool  normal_mem_;             // Memory DMA capable?
369  bool  use_hugepages_;          // Use hugepage shmem?
370  bool  use_posix_shm_;          // Use 4k page shmem?
371  bool  dynamic_mapped_shmem_;   // Conserve virtual address space.
372  bool  mmapped_allocation_;     // Was memory allocated using mmap()?
373  int   shmid_;                  // Handle to shmem
374  vector< vector<string> > *channels_;  // Memory module names per channel.
375  uint64 channel_hash_;          // Mask of address bits XORed for channel.
376  int channel_width_;            // Channel width in bits.
377
378  int64 regionsize_;             // Size of memory "regions"
379  int   regioncount_;            // Number of memory "regions"
380  int   num_cpus_;               // Number of cpus in the system.
381  int   num_nodes_;              // Number of nodes in the system.
382  int   num_cpus_per_node_;      // Number of cpus per node in the system.
383  int   address_mode_;           // Are we running 32 or 64 bit?
384  bool  has_vector_;             // Do we have sse2/neon instructions?
385  bool  has_clflush_;            // Do we have clflush instructions?
386  bool  use_flush_page_cache_;   // Do we need to flush the page cache?
387
388
389  time_t time_initialized_;      // Start time of test.
390
391  vector<cpu_set_t> cpu_sets_;   // Cache for cpu masks.
392  vector<bool> cpu_sets_valid_;  // If the cpu mask cache is valid.
393
394  // Get file descriptor for dev msr.
395  virtual int OpenMSR(uint32 core, uint32 address);
396
397  // Look up how many hugepages there are.
398  virtual int64 FindHugePages();
399
400  // Link to find last transaction at an error location.
401  ErrCallback err_log_callback_;
402
403  // Object to wrap the time function.
404  Clock *clock_;
405
406 private:
407  DISALLOW_COPY_AND_ASSIGN(OsLayer);
408};
409
410// Selects and returns the proper OS and hardware interface.  Does not call
411// OsLayer::Initialize() on the new object.
412OsLayer *OsLayerFactory(const std::map<std::string, std::string> &options);
413
414#endif  // STRESSAPPTEST_OS_H_ NOLINT
415