1// Copyright 2006 Google Inc. All Rights Reserved.
2// Author: nsanders, menderico
3
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7
8//      http://www.apache.org/licenses/LICENSE-2.0
9
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16// os.cc : os and machine specific implementation
17// This file includes an abstracted interface
18// for linux-distro specific and HW specific
19// interfaces.
20
21#include "os.h"
22
23#include <errno.h>
24#include <fcntl.h>
25#include <linux/types.h>
26#include <malloc.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30#include <sys/mman.h>
31#include <sys/ioctl.h>
32#include <sys/time.h>
33#include <sys/types.h>
34#include <sys/ipc.h>
35#ifdef HAVE_SYS_SHM_H
36#include <sys/shm.h>
37#endif
38#include <unistd.h>
39
40#ifndef SHM_HUGETLB
41#define SHM_HUGETLB      04000  // remove when glibc defines it
42#endif
43
44#include <string>
45#include <list>
46
47// This file must work with autoconf on its public version,
48// so these includes are correct.
49#include "sattypes.h"
50#include "error_diag.h"
51#include "clock.h"
52
53// OsLayer initialization.
54OsLayer::OsLayer() {
55  testmem_ = 0;
56  testmemsize_ = 0;
57  totalmemsize_ = 0;
58  min_hugepages_bytes_ = 0;
59  reserve_mb_ = 0;
60  normal_mem_ = true;
61  use_hugepages_ = false;
62  use_posix_shm_ = false;
63  dynamic_mapped_shmem_ = false;
64  mmapped_allocation_ = false;
65  shmid_ = 0;
66
67  time_initialized_ = 0;
68
69  regionsize_ = 0;
70  regioncount_ = 1;
71  num_cpus_ = 0;
72  num_nodes_ = 0;
73  num_cpus_per_node_ = 0;
74  error_diagnoser_ = 0;
75  err_log_callback_ = 0;
76  error_injection_ = false;
77
78  void *pvoid = 0;
79  address_mode_ = sizeof(pvoid) * 8;
80
81  has_clflush_ = false;
82  has_vector_ = false;
83
84  use_flush_page_cache_ = false;
85
86  clock_ = NULL;
87}
88
89// OsLayer cleanup.
90OsLayer::~OsLayer() {
91  if (error_diagnoser_)
92    delete error_diagnoser_;
93  if (clock_)
94    delete clock_;
95}
96
97// OsLayer initialization.
98bool OsLayer::Initialize() {
99  if (!clock_) {
100    clock_ = new Clock();
101  }
102
103  time_initialized_ = clock_->Now();
104  // Detect asm support.
105  GetFeatures();
106
107  if (num_cpus_ == 0) {
108    num_nodes_ = 1;
109    num_cpus_ = sysconf(_SC_NPROCESSORS_ONLN);
110    num_cpus_per_node_ = num_cpus_ / num_nodes_;
111  }
112  logprintf(5, "Log: %d nodes, %d cpus.\n", num_nodes_, num_cpus_);
113  sat_assert(CPU_SETSIZE >= num_cpus_);
114  cpu_sets_.resize(num_nodes_);
115  cpu_sets_valid_.resize(num_nodes_);
116  // Create error diagnoser.
117  error_diagnoser_ = new ErrorDiag();
118  if (!error_diagnoser_->set_os(this))
119    return false;
120  return true;
121}
122
123// Machine type detected. Can we implement all these functions correctly?
124bool OsLayer::IsSupported() {
125  if (kOpenSource) {
126    // There are no explicitly supported systems in open source version.
127    return true;
128  }
129
130  // This is the default empty implementation.
131  // SAT won't report full error information.
132  return false;
133}
134
135int OsLayer::AddressMode() {
136  // Detect 32/64 bit binary.
137  void *pvoid = 0;
138  return sizeof(pvoid) * 8;
139}
140
141// Translates user virtual to physical address.
142uint64 OsLayer::VirtualToPhysical(void *vaddr) {
143  uint64 frame, shift;
144  off64_t off = ((uintptr_t)vaddr) / sysconf(_SC_PAGESIZE) * 8;
145  int fd = open(kPagemapPath, O_RDONLY);
146  // /proc/self/pagemap is available in kernel >= 2.6.25
147  if (fd < 0)
148    return 0;
149
150  if (lseek64(fd, off, SEEK_SET) != off || read(fd, &frame, 8) != 8) {
151    int err = errno;
152    string errtxt = ErrorString(err);
153    logprintf(0, "Process Error: failed to access %s with errno %d (%s)\n",
154              kPagemapPath, err, errtxt.c_str());
155    if (fd >= 0)
156      close(fd);
157    return 0;
158  }
159  close(fd);
160  if (!(frame & (1LL << 63)) || (frame & (1LL << 62)))
161    return 0;
162  shift = (frame >> 55) & 0x3f;
163  frame = (frame & 0x007fffffffffffffLL) << shift;
164  return frame | ((uintptr_t)vaddr & ((1LL << shift) - 1));
165}
166
167// Returns the HD device that contains this file.
168string OsLayer::FindFileDevice(string filename) {
169  return "hdUnknown";
170}
171
172// Returns a list of locations corresponding to HD devices.
173list<string> OsLayer::FindFileDevices() {
174  // No autodetection on unknown systems.
175  list<string> locations;
176  return locations;
177}
178
179
180// Get HW core features from cpuid instruction.
181void OsLayer::GetFeatures() {
182#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
183  unsigned int eax = 1, ebx, ecx, edx;
184  cpuid(&eax, &ebx, &ecx, &edx);
185  has_clflush_ = (edx >> 19) & 1;
186  has_vector_ = (edx >> 26) & 1;  // SSE2 caps bit.
187
188  logprintf(9, "Log: has clflush: %s, has sse2: %s\n",
189            has_clflush_ ? "true" : "false",
190            has_vector_ ? "true" : "false");
191#elif defined(STRESSAPPTEST_CPU_PPC)
192  // All PPC implementations have cache flush instructions.
193  has_clflush_ = true;
194#elif defined(STRESSAPPTEST_CPU_ARMV7A)
195  // TODO(nsanders): add detect from /proc/cpuinfo or /proc/self/auxv.
196  // For now assume neon and don't run -W if you don't have it.
197  has_vector_ = true; // NEON.
198#warning "Unsupported CPU type ARMV7A: unable to determine feature set."
199#else
200#warning "Unsupported CPU type: unable to determine feature set."
201#endif
202}
203
204
205// Enable FlushPageCache to be functional instead of a NOP.
206void OsLayer::ActivateFlushPageCache(void) {
207  logprintf(9, "Log: page cache will be flushed as needed\n");
208  use_flush_page_cache_ = true;
209}
210
211// Flush the page cache to ensure reads come from the disk.
212bool OsLayer::FlushPageCache(void) {
213  if (!use_flush_page_cache_)
214    return true;
215
216  // First, ask the kernel to write the cache to the disk.
217  sync();
218
219  // Second, ask the kernel to empty the cache by writing "1" to
220  // "/proc/sys/vm/drop_caches".
221  static const char *drop_caches_file = "/proc/sys/vm/drop_caches";
222  int dcfile = open(drop_caches_file, O_WRONLY);
223  if (dcfile < 0) {
224    int err = errno;
225    string errtxt = ErrorString(err);
226    logprintf(3, "Log: failed to open %s - err %d (%s)\n",
227              drop_caches_file, err, errtxt.c_str());
228    return false;
229  }
230
231  ssize_t bytes_written = write(dcfile, "1", 1);
232  close(dcfile);
233
234  if (bytes_written != 1) {
235    int err = errno;
236    string errtxt = ErrorString(err);
237    logprintf(3, "Log: failed to write %s - err %d (%s)\n",
238              drop_caches_file, err, errtxt.c_str());
239    return false;
240  }
241  return true;
242}
243
244
245// We need to flush the cacheline here.
246void OsLayer::Flush(void *vaddr) {
247  // Use the generic flush. This function is just so we can override
248  // this if we are so inclined.
249  if (has_clflush_) {
250    OsLayer::FastFlush(vaddr);
251  }
252}
253
254
255// Run C or ASM copy as appropriate..
256bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
257                              unsigned int size_in_bytes,
258                              AdlerChecksum *checksum) {
259  if (has_vector_) {
260    return AdlerMemcpyAsm(dstmem, srcmem, size_in_bytes, checksum);
261  } else {
262    return AdlerMemcpyWarmC(dstmem, srcmem, size_in_bytes, checksum);
263  }
264}
265
266
267// Translate physical address to memory module/chip name.
268// Assumes interleaving between two memory channels based on the XOR of
269// all address bits in the 'channel_hash' mask, with repeated 'channel_width_'
270// blocks with bits distributed from each chip in that channel.
271int OsLayer::FindDimm(uint64 addr, char *buf, int len) {
272  if (!channels_) {
273    snprintf(buf, len, "DIMM Unknown");
274    return -1;
275  }
276
277  // Find channel by XORing address bits in channel_hash mask.
278  uint32 low = static_cast<uint32>(addr & channel_hash_);
279  uint32 high = static_cast<uint32>((addr & channel_hash_) >> 32);
280  vector<string>& channel = (*channels_)[
281      __builtin_parity(high) ^ __builtin_parity(low)];
282
283  // Find dram chip by finding which byte within the channel
284  // by address mod channel width, then divide the channel
285  // evenly among the listed dram chips. Note, this will not work
286  // with x4 dram.
287  int chip = (addr % (channel_width_ / 8)) /
288             ((channel_width_ / 8) / channel.size());
289  string name = channel[chip];
290  snprintf(buf, len, "%s", name.c_str());
291  return 1;
292}
293
294
295// Classifies addresses according to "regions"
296// This isn't really implemented meaningfully here..
297int32 OsLayer::FindRegion(uint64 addr) {
298  static bool warned = false;
299
300  if (regionsize_ == 0) {
301    regionsize_ = totalmemsize_ / 8;
302    if (regionsize_ < 512 * kMegabyte)
303      regionsize_ = 512 * kMegabyte;
304    regioncount_ = totalmemsize_ / regionsize_;
305    if (regioncount_ < 1) regioncount_ = 1;
306  }
307
308  int32 region_num = addr / regionsize_;
309  if (region_num >= regioncount_) {
310    if (!warned) {
311        logprintf(0, "Log: region number %d exceeds region count %d\n",
312                  region_num, regioncount_);
313        warned = true;
314    }
315    region_num = region_num % regioncount_;
316  }
317  return region_num;
318}
319
320// Report which cores are associated with a given region.
321cpu_set_t *OsLayer::FindCoreMask(int32 region) {
322  sat_assert(region >= 0);
323  region %= num_nodes_;
324  if (!cpu_sets_valid_[region]) {
325    CPU_ZERO(&cpu_sets_[region]);
326    for (int i = 0; i < num_cpus_per_node_; ++i) {
327      CPU_SET(i + region * num_cpus_per_node_, &cpu_sets_[region]);
328    }
329    cpu_sets_valid_[region] = true;
330    logprintf(5, "Log: Region %d mask 0x%s\n",
331                 region, FindCoreMaskFormat(region).c_str());
332  }
333  return &cpu_sets_[region];
334}
335
336// Return cores associated with a given region in hex string.
337string OsLayer::FindCoreMaskFormat(int32 region) {
338  cpu_set_t* mask = FindCoreMask(region);
339  string format = cpuset_format(mask);
340  if (format.size() < 8)
341    format = string(8 - format.size(), '0') + format;
342  return format;
343}
344
345// Report an error in an easily parseable way.
346bool OsLayer::ErrorReport(const char *part, const char *symptom, int count) {
347  time_t now = clock_->Now();
348  int ttf = now - time_initialized_;
349  if (strlen(symptom) && strlen(part)) {
350    logprintf(0, "Report Error: %s : %s : %d : %ds\n",
351              symptom, part, count, ttf);
352  } else {
353    // Log something so the error still shows up, but this won't break the
354    // parser.
355    logprintf(0, "Warning: Invalid Report Error: "
356              "%s : %s : %d : %ds\n", symptom, part, count, ttf);
357  }
358  return true;
359}
360
361// Read the number of hugepages out of the kernel interface in proc.
362int64 OsLayer::FindHugePages() {
363  char buf[65] = "0";
364
365  // This is a kernel interface to query the numebr of hugepages
366  // available in the system.
367  static const char *hugepages_info_file = "/proc/sys/vm/nr_hugepages";
368  int hpfile = open(hugepages_info_file, O_RDONLY);
369
370  ssize_t bytes_read = read(hpfile, buf, 64);
371  close(hpfile);
372
373  if (bytes_read <= 0) {
374    logprintf(12, "Log: /proc/sys/vm/nr_hugepages "
375                  "read did not provide data\n");
376    return 0;
377  }
378
379  if (bytes_read == 64) {
380    logprintf(0, "Process Error: /proc/sys/vm/nr_hugepages "
381                 "is surprisingly large\n");
382    return 0;
383  }
384
385  // Add a null termintation to be string safe.
386  buf[bytes_read] = '\0';
387  // Read the page count.
388  int64 pages = strtoull(buf, NULL, 10);  // NOLINT
389
390  return pages;
391}
392
393int64 OsLayer::FindFreeMemSize() {
394  int64 size = 0;
395  int64 minsize = 0;
396  if (totalmemsize_ > 0)
397    return totalmemsize_;
398
399  int64 pages = sysconf(_SC_PHYS_PAGES);
400  int64 avpages = sysconf(_SC_AVPHYS_PAGES);
401  int64 pagesize = sysconf(_SC_PAGESIZE);
402  int64 physsize = pages * pagesize;
403  int64 avphyssize = avpages * pagesize;
404
405  // Assume 2MB hugepages.
406  int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
407
408  if ((pages == -1) || (pagesize == -1)) {
409    logprintf(0, "Process Error: sysconf could not determine memory size.\n");
410    return 0;
411  }
412
413  // We want to leave enough stuff for things to run.
414  // If the user specified a minimum amount of memory to expect, require that.
415  // Otherwise, if more than 2GB is present, leave 192M + 5% for other stuff.
416  // If less than 2GB is present use 85% of what's available.
417  // These are fairly arbitrary numbers that seem to work OK.
418  //
419  // TODO(nsanders): is there a more correct way to determine target
420  // memory size?
421  if (hugepagesize > 0) {
422    if (min_hugepages_bytes_ > 0) {
423      minsize = min_hugepages_bytes_;
424    } else {
425      minsize = hugepagesize;
426    }
427  } else {
428    if (physsize < 2048LL * kMegabyte) {
429      minsize = ((pages * 85) / 100) * pagesize;
430    } else {
431      minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
432    }
433    // Make sure that at least reserve_mb_ is left for the system.
434    if (reserve_mb_ > 0) {
435      int64 totalsize = pages * pagesize;
436      int64 reserve_kb = reserve_mb_ * kMegabyte;
437      if (reserve_kb > totalsize) {
438        logprintf(0, "Procedural Error: %lld is bigger than the total memory "
439                  "available %lld\n", reserve_kb, totalsize);
440      } else if (reserve_kb > totalsize - minsize) {
441        logprintf(5, "Warning: Overriding memory to use: original %lld, "
442                  "current %lld\n", minsize, totalsize - reserve_kb);
443        minsize = totalsize - reserve_kb;
444      }
445    }
446  }
447
448  // Use hugepage sizing if available.
449  if (hugepagesize > 0) {
450    if (hugepagesize < minsize) {
451      logprintf(0, "Procedural Error: Not enough hugepages. "
452                   "%lldMB available < %lldMB required.\n",
453                hugepagesize / kMegabyte,
454                minsize / kMegabyte);
455      // Require the calculated minimum amount of memory.
456      size = minsize;
457    } else {
458      // Require that we get all hugepages.
459      size = hugepagesize;
460    }
461  } else {
462    // Require the calculated minimum amount of memory.
463    size = minsize;
464  }
465
466  logprintf(5, "Log: Total %lld MB. Free %lld MB. Hugepages %lld MB. "
467               "Targeting %lld MB (%lld%%)\n",
468            physsize / kMegabyte,
469            avphyssize / kMegabyte,
470            hugepagesize / kMegabyte,
471            size / kMegabyte,
472            size * 100 / physsize);
473
474  totalmemsize_ = size;
475  return size;
476}
477
478// Allocates all memory available.
479int64 OsLayer::AllocateAllMem() {
480  int64 length = FindFreeMemSize();
481  bool retval = AllocateTestMem(length, 0);
482  if (retval)
483    return length;
484  else
485    return 0;
486}
487
488// Allocate the target memory. This may be from malloc, hugepage pool
489// or other platform specific sources.
490bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
491  // Try hugepages first.
492  void *buf = 0;
493
494  sat_assert(length >= 0);
495
496  if (paddr_base)
497    logprintf(0, "Process Error: non zero paddr_base %#llx is not supported,"
498              " ignore.\n", paddr_base);
499
500  // Determine optimal memory allocation path.
501  bool prefer_hugepages = false;
502  bool prefer_posix_shm = false;
503  bool prefer_dynamic_mapping = false;
504
505  // Are there enough hugepages?
506  int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
507  // TODO(nsanders): Is there enough /dev/shm? Is there enough free memeory?
508  if ((length >= 1400LL * kMegabyte) && (address_mode_ == 32)) {
509    prefer_dynamic_mapping = true;
510    prefer_posix_shm = true;
511    logprintf(3, "Log: Prefer POSIX shared memory allocation.\n");
512    logprintf(3, "Log: You may need to run "
513                 "'sudo mount -o remount,size=100\% /dev/shm.'\n");
514  } else if (hugepagesize >= length) {
515    prefer_hugepages = true;
516    logprintf(3, "Log: Prefer using hugepage allocation.\n");
517  } else {
518    logprintf(3, "Log: Prefer plain malloc memory allocation.\n");
519  }
520
521#ifdef HAVE_SYS_SHM_H
522  // Allocate hugepage mapped memory.
523  if (prefer_hugepages) {
524    do { // Allow break statement.
525      int shmid;
526      void *shmaddr;
527
528      if ((shmid = shmget(2, length,
529              SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
530        int err = errno;
531        string errtxt = ErrorString(err);
532        logprintf(3, "Log: failed to allocate shared hugepage "
533                      "object - err %d (%s)\n",
534                  err, errtxt.c_str());
535        logprintf(3, "Log: sysctl -w vm.nr_hugepages=XXX allows hugepages.\n");
536        break;
537      }
538
539      shmaddr = shmat(shmid, NULL, 0);
540      if (shmaddr == reinterpret_cast<void*>(-1)) {
541        int err = errno;
542        string errtxt = ErrorString(err);
543        logprintf(0, "Log: failed to attach shared "
544                     "hugepage object - err %d (%s).\n",
545                  err, errtxt.c_str());
546        if (shmctl(shmid, IPC_RMID, NULL) < 0) {
547          int err = errno;
548          string errtxt = ErrorString(err);
549          logprintf(0, "Log: failed to remove shared "
550                       "hugepage object - err %d (%s).\n",
551                    err, errtxt.c_str());
552        }
553        break;
554      }
555      use_hugepages_ = true;
556      shmid_ = shmid;
557      buf = shmaddr;
558      logprintf(0, "Log: Using shared hugepage object 0x%x at %p.\n",
559                shmid, shmaddr);
560    } while (0);
561  }
562
563  if ((!use_hugepages_) && prefer_posix_shm) {
564    do {
565      int shm_object;
566      void *shmaddr = NULL;
567
568      shm_object = shm_open("/stressapptest", O_CREAT | O_RDWR, S_IRWXU);
569      if (shm_object < 0) {
570        int err = errno;
571        string errtxt = ErrorString(err);
572        logprintf(3, "Log: failed to allocate shared "
573                      "smallpage object - err %d (%s)\n",
574                  err, errtxt.c_str());
575        break;
576      }
577
578      if (0 > ftruncate(shm_object, length)) {
579        int err = errno;
580        string errtxt = ErrorString(err);
581        logprintf(3, "Log: failed to ftruncate shared "
582                      "smallpage object - err %d (%s)\n",
583                  err, errtxt.c_str());
584        break;
585      }
586
587      // 32 bit linux apps can only use ~1.4G of address space.
588      // Use dynamic mapping for allocations larger than that.
589      // Currently perf hit is ~10% for this.
590      if (prefer_dynamic_mapping) {
591        dynamic_mapped_shmem_ = true;
592      } else {
593        // Do a full mapping here otherwise.
594        shmaddr = mmap64(NULL, length, PROT_READ | PROT_WRITE,
595                         MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
596                         shm_object, 0);
597        if (shmaddr == reinterpret_cast<void*>(-1)) {
598          int err = errno;
599          string errtxt = ErrorString(err);
600          logprintf(0, "Log: failed to map shared "
601                       "smallpage object - err %d (%s).\n",
602                    err, errtxt.c_str());
603          break;
604        }
605      }
606
607      use_posix_shm_ = true;
608      shmid_ = shm_object;
609      buf = shmaddr;
610      char location_message[256] = "";
611      if (dynamic_mapped_shmem_) {
612        sprintf(location_message, "mapped as needed");
613      } else {
614        sprintf(location_message, "at %p", shmaddr);
615      }
616      logprintf(0, "Log: Using posix shared memory object 0x%x %s.\n",
617                shm_object, location_message);
618    } while (0);
619    shm_unlink("/stressapptest");
620  }
621#endif  // HAVE_SYS_SHM_H
622
623  if (!use_hugepages_ && !use_posix_shm_) {
624    // If the page size is what SAT is expecting explicitly perform mmap()
625    // allocation.
626    if (sysconf(_SC_PAGESIZE) >= 4096) {
627      void *map_buf = mmap(NULL, length, PROT_READ | PROT_WRITE,
628                           MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
629      if (map_buf != MAP_FAILED) {
630        buf = map_buf;
631        mmapped_allocation_ = true;
632        logprintf(0, "Log: Using mmap() allocation at %p.\n", buf);
633      }
634    }
635    if (!mmapped_allocation_) {
636      // Use memalign to ensure that blocks are aligned enough for disk direct
637      // IO.
638      buf = static_cast<char*>(memalign(4096, length));
639      if (buf) {
640        logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
641      } else {
642        logprintf(0, "Process Error: memalign returned 0\n");
643        if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) {
644          logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 "
645                       "bit process. Please setup shared memory.\n");
646        }
647      }
648    }
649  }
650
651  testmem_ = buf;
652  if (buf || dynamic_mapped_shmem_) {
653    testmemsize_ = length;
654  } else {
655    testmemsize_ = 0;
656  }
657
658  return (buf != 0) || dynamic_mapped_shmem_;
659}
660
661// Free the test memory.
662void OsLayer::FreeTestMem() {
663  if (testmem_) {
664    if (use_hugepages_) {
665#ifdef HAVE_SYS_SHM_H
666      shmdt(testmem_);
667      shmctl(shmid_, IPC_RMID, NULL);
668#endif
669    } else if (use_posix_shm_) {
670      if (!dynamic_mapped_shmem_) {
671        munmap(testmem_, testmemsize_);
672      }
673      close(shmid_);
674    } else if (mmapped_allocation_) {
675      munmap(testmem_, testmemsize_);
676    } else {
677      free(testmem_);
678    }
679    testmem_ = 0;
680    testmemsize_ = 0;
681  }
682}
683
684
685// Prepare the target memory. It may requre mapping in, or this may be a noop.
686void *OsLayer::PrepareTestMem(uint64 offset, uint64 length) {
687  sat_assert((offset + length) <= testmemsize_);
688  if (dynamic_mapped_shmem_) {
689    // TODO(nsanders): Check if we can support MAP_NONBLOCK,
690    // and evaluate performance hit from not using it.
691#ifdef HAVE_MMAP64
692    void * mapping = mmap64(NULL, length, PROT_READ | PROT_WRITE,
693                     MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
694                     shmid_, offset);
695#else
696    void * mapping = mmap(NULL, length, PROT_READ | PROT_WRITE,
697                     MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
698                     shmid_, offset);
699#endif
700    if (mapping == MAP_FAILED) {
701      string errtxt = ErrorString(errno);
702      logprintf(0, "Process Error: PrepareTestMem mmap64(%llx, %llx) failed. "
703                   "error: %s.\n",
704                offset, length, errtxt.c_str());
705      sat_assert(0);
706    }
707    return mapping;
708  }
709
710  return reinterpret_cast<void*>(reinterpret_cast<char*>(testmem_) + offset);
711}
712
713// Release the test memory resources, if any.
714void OsLayer::ReleaseTestMem(void *addr, uint64 offset, uint64 length) {
715  if (dynamic_mapped_shmem_) {
716    int retval = munmap(addr, length);
717    if (retval == -1) {
718      string errtxt = ErrorString(errno);
719      logprintf(0, "Process Error: ReleaseTestMem munmap(%p, %llx) failed. "
720                   "error: %s.\n",
721                addr, length, errtxt.c_str());
722      sat_assert(0);
723    }
724  }
725}
726
727// No error polling on unknown systems.
728int OsLayer::ErrorPoll() {
729  return 0;
730}
731
732// Generally, poll for errors once per second.
733void OsLayer::ErrorWait() {
734  sat_sleep(1);
735  return;
736}
737
738// Open a PCI bus-dev-func as a file and return its file descriptor.
739// Error is indicated by return value less than zero.
740int OsLayer::PciOpen(int bus, int device, int function) {
741  char dev_file[256];
742
743  snprintf(dev_file, sizeof(dev_file), "/proc/bus/pci/%02x/%02x.%x",
744           bus, device, function);
745
746  int fd = open(dev_file, O_RDWR);
747  if (fd == -1) {
748    logprintf(0, "Process Error: Unable to open PCI bus %d, device %d, "
749                 "function %d (errno %d).\n",
750              bus, device, function, errno);
751    return -1;
752  }
753
754  return fd;
755}
756
757
758// Read and write functions to access PCI config.
759uint32 OsLayer::PciRead(int fd, uint32 offset, int width) {
760  // Strict aliasing rules lawyers will cause data corruption
761  // on cast pointers in some gccs.
762  union {
763    uint32 l32;
764    uint16 l16;
765    uint8 l8;
766  } datacast;
767  datacast.l32 = 0;
768  uint32 size = width / 8;
769
770  sat_assert((width == 32) || (width == 16) || (width == 8));
771  sat_assert(offset <= (256 - size));
772
773  if (lseek(fd, offset, SEEK_SET) < 0) {
774    logprintf(0, "Process Error: Can't seek %x\n", offset);
775    return 0;
776  }
777  if (read(fd, &datacast, size) != static_cast<ssize_t>(size)) {
778    logprintf(0, "Process Error: Can't read %x\n", offset);
779    return 0;
780  }
781
782  // Extract the data.
783  switch (width) {
784    case 8:
785      sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
786      return datacast.l8;
787    case 16:
788      sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
789      return datacast.l16;
790    case 32:
791      return datacast.l32;
792  }
793  return 0;
794}
795
796void OsLayer::PciWrite(int fd, uint32 offset, uint32 value, int width) {
797  // Strict aliasing rules lawyers will cause data corruption
798  // on cast pointers in some gccs.
799  union {
800    uint32 l32;
801    uint16 l16;
802    uint8 l8;
803  } datacast;
804  datacast.l32 = 0;
805  uint32 size = width / 8;
806
807  sat_assert((width == 32) || (width == 16) || (width == 8));
808  sat_assert(offset <= (256 - size));
809
810  // Cram the data into the right alignment.
811  switch (width) {
812    case 8:
813      sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
814      datacast.l8 = value;
815    case 16:
816      sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
817      datacast.l16 = value;
818    case 32:
819      datacast.l32 = value;
820  }
821
822  if (lseek(fd, offset, SEEK_SET) < 0) {
823    logprintf(0, "Process Error: Can't seek %x\n", offset);
824    return;
825  }
826  if (write(fd, &datacast, size) != static_cast<ssize_t>(size)) {
827    logprintf(0, "Process Error: Can't write %x to %x\n", datacast.l32, offset);
828    return;
829  }
830
831  return;
832}
833
834
835
836// Open dev msr.
837int OsLayer::OpenMSR(uint32 core, uint32 address) {
838  char buf[256];
839  snprintf(buf, sizeof(buf), "/dev/cpu/%d/msr", core);
840  int fd = open(buf, O_RDWR);
841  if (fd < 0)
842    return fd;
843
844  uint32 pos = lseek(fd, address, SEEK_SET);
845  if (pos != address) {
846    close(fd);
847    logprintf(5, "Log: can't seek to msr %x, cpu %d\n", address, core);
848    return -1;
849  }
850
851  return fd;
852}
853
854bool OsLayer::ReadMSR(uint32 core, uint32 address, uint64 *data) {
855  int fd = OpenMSR(core, address);
856  if (fd < 0)
857    return false;
858
859  // Read from the msr.
860  bool res = (sizeof(*data) == read(fd, data, sizeof(*data)));
861
862  if (!res)
863    logprintf(5, "Log: Failed to read msr %x core %d\n", address, core);
864
865  close(fd);
866
867  return res;
868}
869
870bool OsLayer::WriteMSR(uint32 core, uint32 address, uint64 *data) {
871  int fd = OpenMSR(core, address);
872  if (fd < 0)
873    return false;
874
875  // Write to the msr
876  bool res = (sizeof(*data) == write(fd, data, sizeof(*data)));
877
878  if (!res)
879    logprintf(5, "Log: Failed to write msr %x core %d\n", address, core);
880
881  close(fd);
882
883  return res;
884}
885
886// Extract bits [n+len-1, n] from a 32 bit word.
887// so GetBitField(0x0f00, 8, 4) == 0xf.
888uint32 OsLayer::GetBitField(uint32 val, uint32 n, uint32 len) {
889  return (val >> n) & ((1<<len) - 1);
890}
891
892// Generic CPU stress workload that would work on any CPU/Platform.
893// Float-point array moving average calculation.
894bool OsLayer::CpuStressWorkload() {
895  double float_arr[100];
896  double sum = 0;
897#ifdef HAVE_RAND_R
898  unsigned int seed = 12345;
899#endif
900
901  // Initialize array with random numbers.
902  for (int i = 0; i < 100; i++) {
903#ifdef HAVE_RAND_R
904    float_arr[i] = rand_r(&seed);
905    if (rand_r(&seed) % 2)
906      float_arr[i] *= -1.0;
907#else
908    srand(time(NULL));
909    float_arr[i] = rand();  // NOLINT
910    if (rand() % 2)         // NOLINT
911      float_arr[i] *= -1.0;
912#endif
913  }
914
915  // Calculate moving average.
916  for (int i = 0; i < 100000000; i++) {
917    float_arr[i % 100] =
918      (float_arr[i % 100] + float_arr[(i + 1) % 100] +
919       float_arr[(i + 99) % 100]) / 3;
920    sum += float_arr[i % 100];
921  }
922
923  // Artificial printf so the loops do not get optimized away.
924  if (sum == 0.0)
925    logprintf(12, "Log: I'm Feeling Lucky!\n");
926  return true;
927}
928