1// Copyright 2006 Google Inc. All Rights Reserved.
2// Author: nsanders, menderico
3
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7
8//      http://www.apache.org/licenses/LICENSE-2.0
9
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16// os.cc : os and machine specific implementation
17// This file includes an abstracted interface
18// for linux-distro specific and HW specific
19// interfaces.
20
21#include "os.h"
22
23#include <errno.h>
24#include <fcntl.h>
25#include <linux/types.h>
26#include <malloc.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30#include <sys/mman.h>
31#include <sys/ioctl.h>
32#include <sys/time.h>
33#include <sys/types.h>
34#include <sys/ipc.h>
35#ifdef HAVE_SYS_SHM_H
36#include <sys/shm.h>
37#endif
38#include <unistd.h>
39
40#ifndef SHM_HUGETLB
41#define SHM_HUGETLB      04000  // remove when glibc defines it
42#endif
43
44#include <string>
45#include <list>
46
47// This file must work with autoconf on its public version,
48// so these includes are correct.
49#include "sattypes.h"
50#include "error_diag.h"
51
52// OsLayer initialization.
53OsLayer::OsLayer() {
54  testmem_ = 0;
55  testmemsize_ = 0;
56  totalmemsize_ = 0;
57  min_hugepages_bytes_ = 0;
58  normal_mem_ = true;
59  use_hugepages_ = false;
60  use_posix_shm_ = false;
61  dynamic_mapped_shmem_ = false;
62  shmid_ = 0;
63
64  time_initialized_ = 0;
65
66  regionsize_ = 0;
67  regioncount_ = 1;
68  num_cpus_ = 0;
69  num_nodes_ = 0;
70  num_cpus_per_node_ = 0;
71  error_diagnoser_ = 0;
72  err_log_callback_ = 0;
73  error_injection_ = false;
74
75  void *pvoid = 0;
76  address_mode_ = sizeof(pvoid) * 8;
77
78  has_clflush_ = false;
79  has_sse2_ = false;
80
81  use_flush_page_cache_ = false;
82}
83
84// OsLayer cleanup.
85OsLayer::~OsLayer() {
86  if (error_diagnoser_)
87    delete error_diagnoser_;
88}
89
90// OsLayer initialization.
91bool OsLayer::Initialize() {
92  time_initialized_ = time(NULL);
93  // Detect asm support.
94  GetFeatures();
95
96  if (num_cpus_ == 0) {
97    num_nodes_ = 1;
98    num_cpus_ = sysconf(_SC_NPROCESSORS_ONLN);
99    num_cpus_per_node_ = num_cpus_ / num_nodes_;
100  }
101  logprintf(5, "Log: %d nodes, %d cpus.\n", num_nodes_, num_cpus_);
102  sat_assert(CPU_SETSIZE >= num_cpus_);
103  cpu_sets_.resize(num_nodes_);
104  cpu_sets_valid_.resize(num_nodes_);
105  // Create error diagnoser.
106  error_diagnoser_ = new ErrorDiag();
107  if (!error_diagnoser_->set_os(this))
108    return false;
109  return true;
110}
111
112// Machine type detected. Can we implement all these functions correctly?
113bool OsLayer::IsSupported() {
114  if (kOpenSource) {
115    // There are no explicitly supported systems in open source version.
116    return true;
117  }
118
119  // This is the default empty implementation.
120  // SAT won't report full error information.
121  return false;
122}
123
124int OsLayer::AddressMode() {
125  // Detect 32/64 bit binary.
126  void *pvoid = 0;
127  return sizeof(pvoid) * 8;
128}
129
130// Translates user virtual to physical address.
131uint64 OsLayer::VirtualToPhysical(void *vaddr) {
132  // Needs platform specific implementation.
133  return 0;
134}
135
136// Returns the HD device that contains this file.
137string OsLayer::FindFileDevice(string filename) {
138  return "hdUnknown";
139}
140
141// Returns a list of locations corresponding to HD devices.
142list<string> OsLayer::FindFileDevices() {
143  // No autodetection on unknown systems.
144  list<string> locations;
145  return locations;
146}
147
148
149// Get HW core features from cpuid instruction.
150void OsLayer::GetFeatures() {
151#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
152  // CPUID features documented at:
153  // http://www.sandpile.org/ia32/cpuid.htm
154  int ax, bx, cx, dx;
155  __asm__ __volatile__ (
156      "cpuid": "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (1));
157  has_clflush_ = (dx >> 19) & 1;
158  has_sse2_ = (dx >> 26) & 1;
159
160  logprintf(9, "Log: has clflush: %s, has sse2: %s\n",
161            has_clflush_ ? "true" : "false",
162            has_sse2_ ? "true" : "false");
163#elif defined(STRESSAPPTEST_CPU_PPC)
164  // All PPC implementations have cache flush instructions.
165  has_clflush_ = true;
166#elif defined(STRESSAPPTEST_CPU_ARMV7A)
167#warning "Unsupported CPU type ARMV7A: unable to determine feature set."
168#else
169#warning "Unsupported CPU type: unable to determine feature set."
170#endif
171}
172
173
174// Enable FlushPageCache to be functional instead of a NOP.
175void OsLayer::ActivateFlushPageCache(void) {
176  logprintf(9, "Log: page cache will be flushed as needed\n");
177  use_flush_page_cache_ = true;
178}
179
180// Flush the page cache to ensure reads come from the disk.
181bool OsLayer::FlushPageCache(void) {
182  if (!use_flush_page_cache_)
183    return true;
184
185  // First, ask the kernel to write the cache to the disk.
186  sync();
187
188  // Second, ask the kernel to empty the cache by writing "1" to
189  // "/proc/sys/vm/drop_caches".
190  static const char *drop_caches_file = "/proc/sys/vm/drop_caches";
191  int dcfile = open(drop_caches_file, O_WRONLY);
192  if (dcfile < 0) {
193    int err = errno;
194    string errtxt = ErrorString(err);
195    logprintf(3, "Log: failed to open %s - err %d (%s)\n",
196              drop_caches_file, err, errtxt.c_str());
197    return false;
198  }
199
200  ssize_t bytes_written = write(dcfile, "1", 1);
201  close(dcfile);
202
203  if (bytes_written != 1) {
204    int err = errno;
205    string errtxt = ErrorString(err);
206    logprintf(3, "Log: failed to write %s - err %d (%s)\n",
207              drop_caches_file, err, errtxt.c_str());
208    return false;
209  }
210  return true;
211}
212
213
214// We need to flush the cacheline here.
215void OsLayer::Flush(void *vaddr) {
216  // Use the generic flush. This function is just so we can override
217  // this if we are so inclined.
218  if (has_clflush_)
219    FastFlush(vaddr);
220}
221
222
223// Run C or ASM copy as appropriate..
224bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
225                              unsigned int size_in_bytes,
226                              AdlerChecksum *checksum) {
227  if (has_sse2_) {
228    return AdlerMemcpyAsm(dstmem, srcmem, size_in_bytes, checksum);
229  } else {
230    return AdlerMemcpyWarmC(dstmem, srcmem, size_in_bytes, checksum);
231  }
232}
233
234
235// Translate user virtual to physical address.
236int OsLayer::FindDimm(uint64 addr, char *buf, int len) {
237  char tmpbuf[256];
238  snprintf(tmpbuf, sizeof(tmpbuf), "DIMM Unknown");
239  snprintf(buf, len, "%s", tmpbuf);
240  return 0;
241}
242
243
244// Classifies addresses according to "regions"
245// This isn't really implemented meaningfully here..
246int32 OsLayer::FindRegion(uint64 addr) {
247  static bool warned = false;
248
249  if (regionsize_ == 0) {
250    regionsize_ = totalmemsize_ / 8;
251    if (regionsize_ < 512 * kMegabyte)
252      regionsize_ = 512 * kMegabyte;
253    regioncount_ = totalmemsize_ / regionsize_;
254    if (regioncount_ < 1) regioncount_ = 1;
255  }
256
257  int32 region_num = addr / regionsize_;
258  if (region_num >= regioncount_) {
259    if (!warned) {
260        logprintf(0, "Log: region number %d exceeds region count %d\n",
261                  region_num, regioncount_);
262        warned = true;
263    }
264    region_num = region_num % regioncount_;
265  }
266  return region_num;
267}
268
269// Report which cores are associated with a given region.
270cpu_set_t *OsLayer::FindCoreMask(int32 region) {
271  sat_assert(region >= 0);
272  region %= num_nodes_;
273  if (!cpu_sets_valid_[region]) {
274    CPU_ZERO(&cpu_sets_[region]);
275    for (int i = 0; i < num_cpus_per_node_; ++i) {
276      CPU_SET(i + region * num_cpus_per_node_, &cpu_sets_[region]);
277    }
278    cpu_sets_valid_[region] = true;
279    logprintf(5, "Log: Region %d mask 0x%s\n",
280                 region, FindCoreMaskFormat(region).c_str());
281  }
282  return &cpu_sets_[region];
283}
284
285// Return cores associated with a given region in hex string.
286string OsLayer::FindCoreMaskFormat(int32 region) {
287  cpu_set_t* mask = FindCoreMask(region);
288  string format = cpuset_format(mask);
289  if (format.size() < 8)
290    format = string(8 - format.size(), '0') + format;
291  return format;
292}
293
294// Report an error in an easily parseable way.
295bool OsLayer::ErrorReport(const char *part, const char *symptom, int count) {
296  time_t now = time(NULL);
297  int ttf = now - time_initialized_;
298  logprintf(0, "Report Error: %s : %s : %d : %ds\n", symptom, part, count, ttf);
299  return true;
300}
301
302// Read the number of hugepages out of the kernel interface in proc.
303int64 OsLayer::FindHugePages() {
304  char buf[65] = "0";
305
306  // This is a kernel interface to query the numebr of hugepages
307  // available in the system.
308  static const char *hugepages_info_file = "/proc/sys/vm/nr_hugepages";
309  int hpfile = open(hugepages_info_file, O_RDONLY);
310
311  ssize_t bytes_read = read(hpfile, buf, 64);
312  close(hpfile);
313
314  if (bytes_read <= 0) {
315    logprintf(12, "Log: /proc/sys/vm/nr_hugepages "
316                  "read did not provide data\n");
317    return 0;
318  }
319
320  if (bytes_read == 64) {
321    logprintf(0, "Process Error: /proc/sys/vm/nr_hugepages "
322                 "is surprisingly large\n");
323    return 0;
324  }
325
326  // Add a null termintation to be string safe.
327  buf[bytes_read] = '\0';
328  // Read the page count.
329  int64 pages = strtoull(buf, NULL, 10);  // NOLINT
330
331  return pages;
332}
333
334int64 OsLayer::FindFreeMemSize() {
335  int64 size = 0;
336  int64 minsize = 0;
337  if (totalmemsize_ > 0)
338    return totalmemsize_;
339
340  int64 pages = sysconf(_SC_PHYS_PAGES);
341  int64 avpages = sysconf(_SC_AVPHYS_PAGES);
342  int64 pagesize = sysconf(_SC_PAGESIZE);
343  int64 physsize = pages * pagesize;
344  int64 avphyssize = avpages * pagesize;
345
346  // Assume 2MB hugepages.
347  int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
348
349  if ((pages == -1) || (pagesize == -1)) {
350    logprintf(0, "Process Error: sysconf could not determine memory size.\n");
351    return 0;
352  }
353
354  // We want to leave enough stuff for things to run.
355  // If the user specified a minimum amount of memory to expect, require that.
356  // Otherwise, if more than 2GB is present, leave 192M + 5% for other stuff.
357  // If less than 2GB is present use 85% of what's available.
358  // These are fairly arbitrary numbers that seem to work OK.
359  //
360  // TODO(nsanders): is there a more correct way to determine target
361  // memory size?
362  if (hugepagesize > 0 && min_hugepages_bytes_ > 0) {
363    minsize = min_hugepages_bytes_;
364  } else if (physsize < 2048LL * kMegabyte) {
365    minsize = ((pages * 85) / 100) * pagesize;
366  } else {
367    minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
368  }
369
370  // Use hugepage sizing if available.
371  if (hugepagesize > 0) {
372    if (hugepagesize < minsize) {
373      logprintf(0, "Procedural Error: Not enough hugepages. "
374                   "%lldMB available < %lldMB required.\n",
375                hugepagesize / kMegabyte,
376                minsize / kMegabyte);
377      // Require the calculated minimum amount of memory.
378      size = minsize;
379    } else {
380      // Require that we get all hugepages.
381      size = hugepagesize;
382    }
383  } else {
384    // Require the calculated minimum amount of memory.
385    size = minsize;
386  }
387
388  logprintf(5, "Log: Total %lld MB. Free %lld MB. Hugepages %lld MB. "
389               "Targeting %lld MB (%lld%%)\n",
390            physsize / kMegabyte,
391            avphyssize / kMegabyte,
392            hugepagesize / kMegabyte,
393            size / kMegabyte,
394            size * 100 / physsize);
395
396  totalmemsize_ = size;
397  return size;
398}
399
400// Allocates all memory available.
401int64 OsLayer::AllocateAllMem() {
402  int64 length = FindFreeMemSize();
403  bool retval = AllocateTestMem(length, 0);
404  if (retval)
405    return length;
406  else
407    return 0;
408}
409
410// Allocate the target memory. This may be from malloc, hugepage pool
411// or other platform specific sources.
412bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
413  // Try hugepages first.
414  void *buf = 0;
415
416  sat_assert(length >= 0);
417
418  if (paddr_base)
419    logprintf(0, "Process Error: non zero paddr_base %#llx is not supported,"
420              " ignore.\n", paddr_base);
421
422  // Determine optimal memory allocation path.
423  bool prefer_hugepages = false;
424  bool prefer_posix_shm = false;
425  bool prefer_dynamic_mapping = false;
426
427  // Are there enough hugepages?
428  int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
429  // TODO(nsanders): Is there enough /dev/shm? Is there enough free memeory?
430  if ((length >= 1400LL * kMegabyte) && (address_mode_ == 32)) {
431    prefer_dynamic_mapping = true;
432    prefer_posix_shm = true;
433    logprintf(3, "Log: Prefer POSIX shared memory allocation.\n");
434    logprintf(3, "Log: You may need to run "
435                 "'sudo mount -o remount,size=100\% /dev/shm.'\n");
436  } else if (hugepagesize >= length) {
437    prefer_hugepages = true;
438    logprintf(3, "Log: Prefer using hugepace allocation.\n");
439  } else {
440    logprintf(3, "Log: Prefer plain malloc memory allocation.\n");
441  }
442
443#ifdef HAVE_SYS_SHM_H
444  // Allocate hugepage mapped memory.
445  if (prefer_hugepages) {
446    do { // Allow break statement.
447      int shmid;
448      void *shmaddr;
449
450      if ((shmid = shmget(2, length,
451              SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
452        int err = errno;
453        string errtxt = ErrorString(err);
454        logprintf(3, "Log: failed to allocate shared hugepage "
455                      "object - err %d (%s)\n",
456                  err, errtxt.c_str());
457        logprintf(3, "Log: sysctl -w vm.nr_hugepages=XXX allows hugepages.\n");
458        break;
459      }
460
461      shmaddr = shmat(shmid, NULL, NULL);
462      if (shmaddr == reinterpret_cast<void*>(-1)) {
463        int err = errno;
464        string errtxt = ErrorString(err);
465        logprintf(0, "Log: failed to attach shared "
466                     "hugepage object - err %d (%s).\n",
467                  err, errtxt.c_str());
468        if (shmctl(shmid, IPC_RMID, NULL) < 0) {
469          int err = errno;
470          string errtxt = ErrorString(err);
471          logprintf(0, "Log: failed to remove shared "
472                       "hugepage object - err %d (%s).\n",
473                    err, errtxt.c_str());
474        }
475        break;
476      }
477      use_hugepages_ = true;
478      shmid_ = shmid;
479      buf = shmaddr;
480      logprintf(0, "Log: Using shared hugepage object 0x%x at %p.\n",
481                shmid, shmaddr);
482    } while (0);
483  }
484
485  if ((!use_hugepages_) && prefer_posix_shm) {
486    do {
487      int shm_object;
488      void *shmaddr = NULL;
489
490      shm_object = shm_open("/stressapptest", O_CREAT | O_RDWR, S_IRWXU);
491      if (shm_object < 0) {
492        int err = errno;
493        string errtxt = ErrorString(err);
494        logprintf(3, "Log: failed to allocate shared "
495                      "smallpage object - err %d (%s)\n",
496                  err, errtxt.c_str());
497        break;
498      }
499
500      if (0 > ftruncate(shm_object, length)) {
501        int err = errno;
502        string errtxt = ErrorString(err);
503        logprintf(3, "Log: failed to ftruncate shared "
504                      "smallpage object - err %d (%s)\n",
505                  err, errtxt.c_str());
506        break;
507      }
508
509      // 32 bit linux apps can only use ~1.4G of address space.
510      // Use dynamic mapping for allocations larger than that.
511      // Currently perf hit is ~10% for this.
512      if (prefer_dynamic_mapping) {
513        dynamic_mapped_shmem_ = true;
514      } else {
515        // Do a full mapping here otherwise.
516        shmaddr = mmap64(NULL, length, PROT_READ | PROT_WRITE,
517                         MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
518                         shm_object, NULL);
519        if (shmaddr == reinterpret_cast<void*>(-1)) {
520          int err = errno;
521          string errtxt = ErrorString(err);
522          logprintf(0, "Log: failed to map shared "
523                       "smallpage object - err %d (%s).\n",
524                    err, errtxt.c_str());
525          break;
526        }
527      }
528
529      use_posix_shm_ = true;
530      shmid_ = shm_object;
531      buf = shmaddr;
532      char location_message[256] = "";
533      if (dynamic_mapped_shmem_) {
534        sprintf(location_message, "mapped as needed");
535      } else {
536        sprintf(location_message, "at %p", shmaddr);
537      }
538      logprintf(0, "Log: Using posix shared memory object 0x%x %s.\n",
539                shm_object, location_message);
540    } while (0);
541    shm_unlink("/stressapptest");
542  }
543#endif // HAVE_SYS_SHM_H
544
545  if (!use_hugepages_ && !use_posix_shm_) {
546    // Use memalign to ensure that blocks are aligned enough for disk direct IO.
547    buf = static_cast<char*>(memalign(4096, length));
548    if (buf) {
549      logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
550    } else {
551      logprintf(0, "Process Error: memalign returned 0\n");
552      if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) {
553        logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 "
554                     "bit process. Please setup shared memory.\n");
555      }
556    }
557  }
558
559  testmem_ = buf;
560  if (buf || dynamic_mapped_shmem_) {
561    testmemsize_ = length;
562  } else {
563    testmemsize_ = 0;
564  }
565
566  return (buf != 0) || dynamic_mapped_shmem_;
567}
568
569// Free the test memory.
570void OsLayer::FreeTestMem() {
571  if (testmem_) {
572    if (use_hugepages_) {
573#ifdef HAVE_SYS_SHM_H
574      shmdt(testmem_);
575      shmctl(shmid_, IPC_RMID, NULL);
576#endif
577    } else if (use_posix_shm_) {
578      if (!dynamic_mapped_shmem_) {
579        munmap(testmem_, testmemsize_);
580      }
581      close(shmid_);
582    } else {
583      free(testmem_);
584    }
585    testmem_ = 0;
586    testmemsize_ = 0;
587  }
588}
589
590
591// Prepare the target memory. It may requre mapping in, or this may be a noop.
592void *OsLayer::PrepareTestMem(uint64 offset, uint64 length) {
593  sat_assert((offset + length) <= testmemsize_);
594  if (dynamic_mapped_shmem_) {
595    // TODO(nsanders): Check if we can support MAP_NONBLOCK,
596    // and evaluate performance hit from not using it.
597#ifdef HAVE_MMAP64
598    void * mapping = mmap64(NULL, length, PROT_READ | PROT_WRITE,
599                     MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
600                     shmid_, offset);
601#else
602    void * mapping = mmap(NULL, length, PROT_READ | PROT_WRITE,
603                     MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
604                     shmid_, offset);
605#endif
606    if (mapping == MAP_FAILED) {
607      string errtxt = ErrorString(errno);
608      logprintf(0, "Process Error: PrepareTestMem mmap64(%llx, %llx) failed. "
609                   "error: %s.\n",
610                offset, length, errtxt.c_str());
611      sat_assert(0);
612    }
613    return mapping;
614  }
615
616  return reinterpret_cast<void*>(reinterpret_cast<char*>(testmem_) + offset);
617}
618
619// Release the test memory resources, if any.
620void OsLayer::ReleaseTestMem(void *addr, uint64 offset, uint64 length) {
621  if (dynamic_mapped_shmem_) {
622    int retval = munmap(addr, length);
623    if (retval == -1) {
624      string errtxt = ErrorString(errno);
625      logprintf(0, "Process Error: ReleaseTestMem munmap(%p, %llx) failed. "
626                   "error: %s.\n",
627                addr, length, errtxt.c_str());
628      sat_assert(0);
629    }
630  }
631}
632
633// No error polling on unknown systems.
634int OsLayer::ErrorPoll() {
635  return 0;
636}
637
638// Generally, poll for errors once per second.
639void OsLayer::ErrorWait() {
640  sat_sleep(1);
641  return;
642}
643
644// Open a PCI bus-dev-func as a file and return its file descriptor.
645// Error is indicated by return value less than zero.
646int OsLayer::PciOpen(int bus, int device, int function) {
647  char dev_file[256];
648
649  snprintf(dev_file, sizeof(dev_file), "/proc/bus/pci/%02x/%02x.%x",
650           bus, device, function);
651
652  int fd = open(dev_file, O_RDWR);
653  if (fd == -1) {
654    logprintf(0, "Process Error: Unable to open PCI bus %d, device %d, "
655                 "function %d (errno %d).\n",
656              bus, device, function, errno);
657    return -1;
658  }
659
660  return fd;
661}
662
663
664// Read and write functions to access PCI config.
665uint32 OsLayer::PciRead(int fd, uint32 offset, int width) {
666  // Strict aliasing rules lawyers will cause data corruption
667  // on cast pointers in some gccs.
668  union {
669    uint32 l32;
670    uint16 l16;
671    uint8 l8;
672  } datacast;
673  datacast.l32 = 0;
674  uint32 size = width / 8;
675
676  sat_assert((width == 32) || (width == 16) || (width == 8));
677  sat_assert(offset <= (256 - size));
678
679  if (lseek(fd, offset, SEEK_SET) < 0) {
680    logprintf(0, "Process Error: Can't seek %x\n", offset);
681    return 0;
682  }
683  if (read(fd, &datacast, size) != static_cast<ssize_t>(size)) {
684    logprintf(0, "Process Error: Can't read %x\n", offset);
685    return 0;
686  }
687
688  // Extract the data.
689  switch (width) {
690    case 8:
691      sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
692      return datacast.l8;
693    case 16:
694      sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
695      return datacast.l16;
696    case 32:
697      return datacast.l32;
698  }
699  return 0;
700}
701
702void OsLayer::PciWrite(int fd, uint32 offset, uint32 value, int width) {
703  // Strict aliasing rules lawyers will cause data corruption
704  // on cast pointers in some gccs.
705  union {
706    uint32 l32;
707    uint16 l16;
708    uint8 l8;
709  } datacast;
710  datacast.l32 = 0;
711  uint32 size = width / 8;
712
713  sat_assert((width == 32) || (width == 16) || (width == 8));
714  sat_assert(offset <= (256 - size));
715
716  // Cram the data into the right alignment.
717  switch (width) {
718    case 8:
719      sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
720      datacast.l8 = value;
721    case 16:
722      sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
723      datacast.l16 = value;
724    case 32:
725      datacast.l32 = value;
726  }
727
728  if (lseek(fd, offset, SEEK_SET) < 0) {
729    logprintf(0, "Process Error: Can't seek %x\n", offset);
730    return;
731  }
732  if (write(fd, &datacast, size) != static_cast<ssize_t>(size)) {
733    logprintf(0, "Process Error: Can't write %x to %x\n", datacast.l32, offset);
734    return;
735  }
736
737  return;
738}
739
740
741
742// Open dev msr.
743int OsLayer::OpenMSR(uint32 core, uint32 address) {
744  char buf[256];
745  snprintf(buf, sizeof(buf), "/dev/cpu/%d/msr", core);
746  int fd = open(buf, O_RDWR);
747  if (fd < 0)
748    return fd;
749
750  uint32 pos = lseek(fd, address, SEEK_SET);
751  if (pos != address) {
752    close(fd);
753    logprintf(5, "Log: can't seek to msr %x, cpu %d\n", address, core);
754    return -1;
755  }
756
757  return fd;
758}
759
760bool OsLayer::ReadMSR(uint32 core, uint32 address, uint64 *data) {
761  int fd = OpenMSR(core, address);
762  if (fd < 0)
763    return false;
764
765  // Read from the msr.
766  bool res = (sizeof(*data) == read(fd, data, sizeof(*data)));
767
768  if (!res)
769    logprintf(5, "Log: Failed to read msr %x core %d\n", address, core);
770
771  close(fd);
772
773  return res;
774}
775
776bool OsLayer::WriteMSR(uint32 core, uint32 address, uint64 *data) {
777  int fd = OpenMSR(core, address);
778  if (fd < 0)
779    return false;
780
781  // Write to the msr
782  bool res = (sizeof(*data) == write(fd, data, sizeof(*data)));
783
784  if (!res)
785    logprintf(5, "Log: Failed to write msr %x core %d\n", address, core);
786
787  close(fd);
788
789  return res;
790}
791
792// Extract bits [n+len-1, n] from a 32 bit word.
793// so GetBitField(0x0f00, 8, 4) == 0xf.
794uint32 OsLayer::GetBitField(uint32 val, uint32 n, uint32 len) {
795  return (val >> n) & ((1<<len) - 1);
796}
797
798// Generic CPU stress workload that would work on any CPU/Platform.
799// Float-point array moving average calculation.
800bool OsLayer::CpuStressWorkload() {
801  double float_arr[100];
802  double sum = 0;
803  unsigned int seed = 12345;
804
805  // Initialize array with random numbers.
806  for (int i = 0; i < 100; i++) {
807#ifdef HAVE_RAND_R
808    float_arr[i] = rand_r(&seed);
809    if (rand_r(&seed) % 2)
810      float_arr[i] *= -1.0;
811#else
812    float_arr[i] = rand();
813    if (rand() % 2)
814      float_arr[i] *= -1.0;
815#endif
816  }
817
818  // Calculate moving average.
819  for (int i = 0; i < 100000000; i++) {
820    float_arr[i % 100] =
821      (float_arr[i % 100] + float_arr[(i + 1) % 100] +
822       float_arr[(i + 99) % 100]) / 3;
823    sum += float_arr[i % 100];
824  }
825
826  // Artificial printf so the loops do not get optimized away.
827  if (sum == 0.0)
828    logprintf(12, "Log: I'm Feeling Lucky!\n");
829  return true;
830}
831
832PCIDevices OsLayer::GetPCIDevices() {
833  PCIDevices device_list;
834  DIR *dir;
835  struct dirent *buf = new struct dirent();
836  struct dirent *entry;
837  dir = opendir(kSysfsPath);
838  if (!dir)
839    logprintf(0, "Process Error: Cannot open %s", kSysfsPath);
840  while (readdir_r(dir, buf, &entry) == 0 && entry) {
841    PCIDevice *device;
842    unsigned int dev, func;
843    // ".", ".." or a special non-device perhaps.
844    if (entry->d_name[0] == '.')
845      continue;
846
847    device = new PCIDevice();
848    if (sscanf(entry->d_name, "%04x:%02hx:%02x.%d",
849               &device->domain, &device->bus, &dev, &func) < 4) {
850      logprintf(0, "Process Error: Couldn't parse %s", entry->d_name);
851      free(device);
852      continue;
853    }
854    device->dev = dev;
855    device->func = func;
856    device->vendor_id = PCIGetValue(entry->d_name, "vendor");
857    device->device_id = PCIGetValue(entry->d_name, "device");
858    PCIGetResources(entry->d_name, device);
859    device_list.insert(device_list.end(), device);
860  }
861  closedir(dir);
862  delete buf;
863  return device_list;
864}
865
866int OsLayer::PCIGetValue(string name, string object) {
867  int fd, len;
868  char filename[256];
869  char buf[256];
870  snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath,
871           name.c_str(), object.c_str());
872  fd = open(filename, O_RDONLY);
873  if (fd < 0)
874    return 0;
875  len = read(fd, buf, 256);
876  close(fd);
877  buf[len] = '\0';
878  return strtol(buf, NULL, 0);  // NOLINT
879}
880
881int OsLayer::PCIGetResources(string name, PCIDevice *device) {
882  char filename[256];
883  char buf[256];
884  FILE *file;
885  int64 start;
886  int64 end;
887  int64 size;
888  int i;
889  snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath,
890           name.c_str(), "resource");
891  file = fopen(filename, "r");
892  if (!file) {
893    logprintf(0, "Process Error: impossible to find resource file for %s",
894              filename);
895    return errno;
896  }
897  for (i = 0; i < 6; i++) {
898    if (!fgets(buf, 256, file))
899      break;
900    sscanf(buf, "%llx %llx", &start, &end);  // NOLINT
901    size = 0;
902    if (start)
903      size = end - start + 1;
904    device->base_addr[i] = start;
905    device->size[i] = size;
906  }
907  fclose(file);
908  return 0;
909}
910