1// Copyright (c) 2006, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30#include <config.h>
31#if (defined(_WIN32) || defined(__MINGW32__)) && !defined(__CYGWIN__) && !defined(__CYGWIN32)
32# define PLATFORM_WINDOWS 1
33#endif
34
35#include <stdlib.h>   // for getenv()
36#include <stdio.h>    // for snprintf(), sscanf()
37#include <string.h>   // for memmove(), memchr(), etc.
38#include <fcntl.h>    // for open()
39#include <errno.h>    // for errno
40#ifdef HAVE_UNISTD_H
41#include <unistd.h>   // for read()
42#endif
43#if defined __MACH__          // Mac OS X, almost certainly
44#include <mach-o/dyld.h>      // for iterating over dll's in ProcMapsIter
45#include <mach-o/loader.h>    // for iterating over dll's in ProcMapsIter
46#include <sys/types.h>
47#include <sys/sysctl.h>       // how we figure out numcpu's on OS X
48#elif defined __FreeBSD__
49#include <sys/sysctl.h>
50#elif defined __sun__         // Solaris
51#include <procfs.h>           // for, e.g., prmap_t
52#elif defined(PLATFORM_WINDOWS)
53#include <process.h>          // for getpid() (actually, _getpid())
54#include <shlwapi.h>          // for SHGetValueA()
55#include <tlhelp32.h>         // for Module32First()
56#endif
57#include "base/sysinfo.h"
58#include "base/commandlineflags.h"
59#include "base/dynamic_annotations.h"   // for RunningOnValgrind
60#include "base/logging.h"
61#include "base/cycleclock.h"
62
63#ifdef PLATFORM_WINDOWS
64#ifdef MODULEENTRY32
65// In a change from the usual W-A pattern, there is no A variant of
66// MODULEENTRY32.  Tlhelp32.h #defines the W variant, but not the A.
67// In unicode mode, tlhelp32.h #defines MODULEENTRY32 to be
68// MODULEENTRY32W.  These #undefs are the only way I see to get back
69// access to the original, ascii struct (and related functions).
70#undef MODULEENTRY32
71#undef Module32First
72#undef Module32Next
73#undef PMODULEENTRY32
74#undef LPMODULEENTRY32
75#endif  /* MODULEENTRY32 */
76// MinGW doesn't seem to define this, perhaps some windowsen don't either.
77#ifndef TH32CS_SNAPMODULE32
78#define TH32CS_SNAPMODULE32  0
79#endif  /* TH32CS_SNAPMODULE32 */
80#endif  /* PLATFORM_WINDOWS */
81
82// Re-run fn until it doesn't cause EINTR.
83#define NO_INTR(fn)  do {} while ((fn) < 0 && errno == EINTR)
84
85// open/read/close can set errno, which may be illegal at this
86// time, so prefer making the syscalls directly if we can.
87#ifdef HAVE_SYS_SYSCALL_H
88# include <sys/syscall.h>
89#endif
90#ifdef SYS_open   // solaris 11, at least sometimes, only defines SYS_openat
91# define safeopen(filename, mode)  syscall(SYS_open, filename, mode)
92#else
93# define safeopen(filename, mode)  open(filename, mode)
94#endif
95#ifdef SYS_read
96# define saferead(fd, buffer, size)  syscall(SYS_read, fd, buffer, size)
97#else
98# define saferead(fd, buffer, size)  read(fd, buffer, size)
99#endif
100#ifdef SYS_close
101# define safeclose(fd)  syscall(SYS_close, fd)
102#else
103# define safeclose(fd)  close(fd)
104#endif
105
106// ----------------------------------------------------------------------
107// GetenvBeforeMain()
108// GetUniquePathFromEnv()
109//    Some non-trivial getenv-related functions.
110// ----------------------------------------------------------------------
111
112// It's not safe to call getenv() in the malloc hooks, because they
113// might be called extremely early, before libc is done setting up
114// correctly.  In particular, the thread library may not be done
115// setting up errno.  So instead, we use the built-in __environ array
116// if it exists, and otherwise read /proc/self/environ directly, using
117// system calls to read the file, and thus avoid setting errno.
118// /proc/self/environ has a limit of how much data it exports (around
119// 8K), so it's not an ideal solution.
120const char* GetenvBeforeMain(const char* name) {
121#if defined(HAVE___ENVIRON)   // if we have it, it's declared in unistd.h
122  if (__environ) {            // can exist but be NULL, if statically linked
123    const int namelen = strlen(name);
124    for (char** p = __environ; *p; p++) {
125      if (!memcmp(*p, name, namelen) && (*p)[namelen] == '=')  // it's a match
126        return *p + namelen+1;                                 // point after =
127    }
128    return NULL;
129  }
130#endif
131#if defined(PLATFORM_WINDOWS)
132  // TODO(mbelshe) - repeated calls to this function will overwrite the
133  // contents of the static buffer.
134  static char envvar_buf[1024];  // enough to hold any envvar we care about
135  if (!GetEnvironmentVariableA(name, envvar_buf, sizeof(envvar_buf)-1))
136    return NULL;
137  return envvar_buf;
138#endif
139  // static is ok because this function should only be called before
140  // main(), when we're single-threaded.
141  static char envbuf[16<<10];
142  if (*envbuf == '\0') {    // haven't read the environ yet
143    int fd = safeopen("/proc/self/environ", O_RDONLY);
144    // The -2 below guarantees the last two bytes of the buffer will be \0\0
145    if (fd == -1 ||           // unable to open the file, fall back onto libc
146        saferead(fd, envbuf, sizeof(envbuf) - 2) < 0) { // error reading file
147      RAW_VLOG(1, "Unable to open /proc/self/environ, falling back "
148               "on getenv(\"%s\"), which may not work", name);
149      if (fd != -1) safeclose(fd);
150      return getenv(name);
151    }
152    safeclose(fd);
153  }
154  const int namelen = strlen(name);
155  const char* p = envbuf;
156  while (*p != '\0') {    // will happen at the \0\0 that terminates the buffer
157    // proc file has the format NAME=value\0NAME=value\0NAME=value\0...
158    const char* endp = (char*)memchr(p, '\0', sizeof(envbuf) - (p - envbuf));
159    if (endp == NULL)            // this entry isn't NUL terminated
160      return NULL;
161    else if (!memcmp(p, name, namelen) && p[namelen] == '=')    // it's a match
162      return p + namelen+1;      // point after =
163    p = endp + 1;
164  }
165  return NULL;                   // env var never found
166}
167
168// This takes as an argument an environment-variable name (like
169// CPUPROFILE) whose value is supposed to be a file-path, and sets
170// path to that path, and returns true.  If the env var doesn't exist,
171// or is the empty string, leave path unchanged and returns false.
172// The reason this is non-trivial is that this function handles munged
173// pathnames.  Here's why:
174//
175// If we're a child process of the 'main' process, we can't just use
176// getenv("CPUPROFILE") -- the parent process will be using that path.
177// Instead we append our pid to the pathname.  How do we tell if we're a
178// child process?  Ideally we'd set an environment variable that all
179// our children would inherit.  But -- and this is seemingly a bug in
180// gcc -- if you do a setenv() in a shared libarary in a global
181// constructor, the environment setting is lost by the time main() is
182// called.  The only safe thing we can do in such a situation is to
183// modify the existing envvar.  So we do a hack: in the parent, we set
184// the high bit of the 1st char of CPUPROFILE.  In the child, we
185// notice the high bit is set and append the pid().  This works
186// assuming cpuprofile filenames don't normally have the high bit set
187// in their first character!  If that assumption is violated, we'll
188// still get a profile, but one with an unexpected name.
189// TODO(csilvers): set an envvar instead when we can do it reliably.
190bool GetUniquePathFromEnv(const char* env_name, char* path) {
191  char* envval = getenv(env_name);
192  if (envval == NULL || *envval == '\0')
193    return false;
194  if (envval[0] & 128) {                  // high bit is set
195    snprintf(path, PATH_MAX, "%c%s_%u",   // add pid and clear high bit
196             envval[0] & 127, envval+1, (unsigned int)(getpid()));
197  } else {
198    snprintf(path, PATH_MAX, "%s", envval);
199    envval[0] |= 128;                     // set high bit for kids to see
200  }
201  return true;
202}
203
204// ----------------------------------------------------------------------
205// CyclesPerSecond()
206// NumCPUs()
207//    It's important this not call malloc! -- they may be called at
208//    global-construct time, before we've set up all our proper malloc
209//    hooks and such.
210// ----------------------------------------------------------------------
211
212static double cpuinfo_cycles_per_second = 1.0;  // 0.0 might be dangerous
213static int cpuinfo_num_cpus = 1;  // Conservative guess
214
215void SleepForMilliseconds(int milliseconds) {
216#ifdef PLATFORM_WINDOWS
217  _sleep(milliseconds);   // Windows's _sleep takes milliseconds argument
218#else
219  // Sleep for a few milliseconds
220  struct timespec sleep_time;
221  sleep_time.tv_sec = milliseconds / 1000;
222  sleep_time.tv_nsec = (milliseconds % 1000) * 1000000;
223  while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
224    ;  // Ignore signals and wait for the full interval to elapse.
225#endif
226}
227
228// Helper function estimates cycles/sec by observing cycles elapsed during
229// sleep(). Using small sleep time decreases accuracy significantly.
230static int64 EstimateCyclesPerSecond(const int estimate_time_ms) {
231  assert(estimate_time_ms > 0);
232  if (estimate_time_ms <= 0)
233    return 1;
234  double multiplier = 1000.0 / (double)estimate_time_ms;  // scale by this much
235
236  const int64 start_ticks = CycleClock::Now();
237  SleepForMilliseconds(estimate_time_ms);
238  const int64 guess = int64(multiplier * (CycleClock::Now() - start_ticks));
239  return guess;
240}
241
242// ReadIntFromFile is only called on linux and cygwin platforms.
243#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
244// Helper function for reading an int from a file. Returns true if successful
245// and the memory location pointed to by value is set to the value read.
246static bool ReadIntFromFile(const char *file, int *value) {
247  bool ret = false;
248  int fd = open(file, O_RDONLY);
249  if (fd != -1) {
250    char line[1024];
251    char* err;
252    memset(line, '\0', sizeof(line));
253    read(fd, line, sizeof(line) - 1);
254    const int temp_value = strtol(line, &err, 10);
255    if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
256      *value = temp_value;
257      ret = true;
258    }
259    close(fd);
260  }
261  return ret;
262}
263#endif
264
265// WARNING: logging calls back to InitializeSystemInfo() so it must
266// not invoke any logging code.  Also, InitializeSystemInfo() can be
267// called before main() -- in fact it *must* be since already_called
268// isn't protected -- before malloc hooks are properly set up, so
269// we make an effort not to call any routines which might allocate
270// memory.
271
272static void InitializeSystemInfo() {
273  static bool already_called = false;   // safe if we run before threads
274  if (already_called)  return;
275  already_called = true;
276
277  bool saw_mhz = false;
278
279  if (RunningOnValgrind()) {
280    // Valgrind may slow the progress of time artificially (--scale-time=N
281    // option). We thus can't rely on CPU Mhz info stored in /sys or /proc
282    // files. Thus, actually measure the cps.
283    cpuinfo_cycles_per_second = EstimateCyclesPerSecond(100);
284    saw_mhz = true;
285  }
286
287#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
288  char line[1024];
289  char* err;
290  int freq;
291
292  // If the kernel is exporting the tsc frequency use that. There are issues
293  // where cpuinfo_max_freq cannot be relied on because the BIOS may be
294  // exporintg an invalid p-state (on x86) or p-states may be used to put the
295  // processor in a new mode (turbo mode). Essentially, those frequencies
296  // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
297  // well.
298  if (!saw_mhz &&
299      ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
300      // The value is in kHz (as the file name suggests).  For example, on a
301      // 2GHz warpstation, the file contains the value "2000000".
302      cpuinfo_cycles_per_second = freq * 1000.0;
303      saw_mhz = true;
304  }
305
306  // If CPU scaling is in effect, we want to use the *maximum* frequency,
307  // not whatever CPU speed some random processor happens to be using now.
308  if (!saw_mhz &&
309      ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
310                      &freq)) {
311    // The value is in kHz.  For example, on a 2GHz machine, the file
312    // contains the value "2000000".
313    cpuinfo_cycles_per_second = freq * 1000.0;
314    saw_mhz = true;
315  }
316
317  // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq.
318  const char* pname = "/proc/cpuinfo";
319  int fd = open(pname, O_RDONLY);
320  if (fd == -1) {
321    perror(pname);
322    if (!saw_mhz) {
323      cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
324    }
325    return;          // TODO: use generic tester instead?
326  }
327
328  double bogo_clock = 1.0;
329  bool saw_bogo = false;
330  int num_cpus = 0;
331  line[0] = line[1] = '\0';
332  int chars_read = 0;
333  do {   // we'll exit when the last read didn't read anything
334    // Move the next line to the beginning of the buffer
335    const int oldlinelen = strlen(line);
336    if (sizeof(line) == oldlinelen + 1)    // oldlinelen took up entire line
337      line[0] = '\0';
338    else                                   // still other lines left to save
339      memmove(line, line + oldlinelen+1, sizeof(line) - (oldlinelen+1));
340    // Terminate the new line, reading more if we can't find the newline
341    char* newline = strchr(line, '\n');
342    if (newline == NULL) {
343      const int linelen = strlen(line);
344      const int bytes_to_read = sizeof(line)-1 - linelen;
345      assert(bytes_to_read > 0);  // because the memmove recovered >=1 bytes
346      chars_read = read(fd, line + linelen, bytes_to_read);
347      line[linelen + chars_read] = '\0';
348      newline = strchr(line, '\n');
349    }
350    if (newline != NULL)
351      *newline = '\0';
352
353    // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
354    // accept postive values. Some environments (virtual machines) report zero,
355    // which would cause infinite looping in WallTime_Init.
356    if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz")-1) == 0) {
357      const char* freqstr = strchr(line, ':');
358      if (freqstr) {
359        cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0;
360        if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
361          saw_mhz = true;
362      }
363    } else if (strncasecmp(line, "bogomips", sizeof("bogomips")-1) == 0) {
364      const char* freqstr = strchr(line, ':');
365      if (freqstr) {
366        bogo_clock = strtod(freqstr+1, &err) * 1000000.0;
367        if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0)
368          saw_bogo = true;
369      }
370    } else if (strncasecmp(line, "processor", sizeof("processor")-1) == 0) {
371      num_cpus++;  // count up every time we see an "processor :" entry
372    }
373  } while (chars_read > 0);
374  close(fd);
375
376  if (!saw_mhz) {
377    if (saw_bogo) {
378      // If we didn't find anything better, we'll use bogomips, but
379      // we're not happy about it.
380      cpuinfo_cycles_per_second = bogo_clock;
381    } else {
382      // If we don't even have bogomips, we'll use the slow estimation.
383      cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
384    }
385  }
386  if (cpuinfo_cycles_per_second == 0.0) {
387    cpuinfo_cycles_per_second = 1.0;   // maybe unnecessary, but safe
388  }
389  if (num_cpus > 0) {
390    cpuinfo_num_cpus = num_cpus;
391  }
392
393#elif defined __FreeBSD__
394  // For this sysctl to work, the machine must be configured without
395  // SMP, APIC, or APM support.  hz should be 64-bit in freebsd 7.0
396  // and later.  Before that, it's a 32-bit quantity (and gives the
397  // wrong answer on machines faster than 2^32 Hz).  See
398  //  http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html
399  // But also compare FreeBSD 7.0:
400  //  http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223
401  //  231         error = sysctl_handle_quad(oidp, &freq, 0, req);
402  // To FreeBSD 6.3 (it's the same in 6-STABLE):
403  //  http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131
404  //  139         error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
405#if __FreeBSD__ >= 7
406  uint64_t hz = 0;
407#else
408  unsigned int hz = 0;
409#endif
410  size_t sz = sizeof(hz);
411  const char *sysctl_path = "machdep.tsc_freq";
412  if ( sysctlbyname(sysctl_path, &hz, &sz, NULL, 0) != 0 ) {
413    fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
414            sysctl_path, strerror(errno));
415    cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
416  } else {
417    cpuinfo_cycles_per_second = hz;
418  }
419  // TODO(csilvers): also figure out cpuinfo_num_cpus
420
421#elif defined(PLATFORM_WINDOWS)
422# pragma comment(lib, "shlwapi.lib")  // for SHGetValue()
423  // In NT, read MHz from the registry. If we fail to do so or we're in win9x
424  // then make a crude estimate.
425  OSVERSIONINFO os;
426  os.dwOSVersionInfoSize = sizeof(os);
427  DWORD data, data_size = sizeof(data);
428  if (GetVersionEx(&os) &&
429      os.dwPlatformId == VER_PLATFORM_WIN32_NT &&
430      SUCCEEDED(SHGetValueA(HKEY_LOCAL_MACHINE,
431                         "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
432                           "~MHz", NULL, &data, &data_size)))
433    cpuinfo_cycles_per_second = (int64)data * (int64)(1000 * 1000); // was mhz
434  else
435    cpuinfo_cycles_per_second = EstimateCyclesPerSecond(500); // TODO <500?
436
437  // Get the number of processors.
438  SYSTEM_INFO info;
439  GetSystemInfo(&info);
440  cpuinfo_num_cpus = info.dwNumberOfProcessors;
441
442#elif defined(__MACH__) && defined(__APPLE__)
443  // returning "mach time units" per second. the current number of elapsed
444  // mach time units can be found by calling uint64 mach_absolute_time();
445  // while not as precise as actual CPU cycles, it is accurate in the face
446  // of CPU frequency scaling and multi-cpu/core machines.
447  // Our mac users have these types of machines, and accuracy
448  // (i.e. correctness) trumps precision.
449  // See cycleclock.h: CycleClock::Now(), which returns number of mach time
450  // units on Mac OS X.
451  mach_timebase_info_data_t timebase_info;
452  mach_timebase_info(&timebase_info);
453  double mach_time_units_per_nanosecond =
454      static_cast<double>(timebase_info.denom) /
455      static_cast<double>(timebase_info.numer);
456  cpuinfo_cycles_per_second = mach_time_units_per_nanosecond * 1e9;
457
458  int num_cpus = 0;
459  size_t size = sizeof(num_cpus);
460  int numcpus_name[] = { CTL_HW, HW_NCPU };
461  if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, 0, 0)
462      == 0
463      && (size == sizeof(num_cpus)))
464    cpuinfo_num_cpus = num_cpus;
465
466#else
467  // Generic cycles per second counter
468  cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
469#endif
470}
471
472double CyclesPerSecond(void) {
473  InitializeSystemInfo();
474  return cpuinfo_cycles_per_second;
475}
476
477int NumCPUs(void) {
478  InitializeSystemInfo();
479  return cpuinfo_num_cpus;
480}
481
482// ----------------------------------------------------------------------
483// HasPosixThreads()
484//      Return true if we're running POSIX (e.g., NPTL on Linux)
485//      threads, as opposed to a non-POSIX thread libary.  The thing
486//      that we care about is whether a thread's pid is the same as
487//      the thread that spawned it.  If so, this function returns
488//      true.
489// ----------------------------------------------------------------------
490bool HasPosixThreads() {
491#if defined(__linux__)
492#ifndef _CS_GNU_LIBPTHREAD_VERSION
493#define _CS_GNU_LIBPTHREAD_VERSION 3
494#endif
495  char buf[32];
496  //  We assume that, if confstr() doesn't know about this name, then
497  //  the same glibc is providing LinuxThreads.
498  if (confstr(_CS_GNU_LIBPTHREAD_VERSION, buf, sizeof(buf)) == 0)
499    return false;
500  return strncmp(buf, "NPTL", 4) == 0;
501#elif defined(PLATFORM_WINDOWS) || defined(__CYGWIN__) || defined(__CYGWIN32__)
502  return false;
503#else  // other OS
504  return true;      //  Assume that everything else has Posix
505#endif  // else OS_LINUX
506}
507
508// ----------------------------------------------------------------------
509
510#if defined __linux__ || defined __FreeBSD__ || defined __sun__ || defined __CYGWIN__ || defined __CYGWIN32__
511static void ConstructFilename(const char* spec, pid_t pid,
512                              char* buf, int buf_size) {
513  CHECK_LT(snprintf(buf, buf_size,
514                    spec,
515                    static_cast<int>(pid ? pid : getpid())), buf_size);
516}
517#endif
518
519// A templatized helper function instantiated for Mach (OS X) only.
520// It can handle finding info for both 32 bits and 64 bits.
521// Returns true if it successfully handled the hdr, false else.
522#ifdef __MACH__          // Mac OS X, almost certainly
523template<uint32_t kMagic, uint32_t kLCSegment,
524         typename MachHeader, typename SegmentCommand>
525static bool NextExtMachHelper(const mach_header* hdr,
526                              int current_image, int current_load_cmd,
527                              uint64 *start, uint64 *end, char **flags,
528                              uint64 *offset, int64 *inode, char **filename,
529                              uint64 *file_mapping, uint64 *file_pages,
530                              uint64 *anon_mapping, uint64 *anon_pages,
531                              dev_t *dev) {
532  static char kDefaultPerms[5] = "r-xp";
533  if (hdr->magic != kMagic)
534    return false;
535  const char* lc = (const char *)hdr + sizeof(MachHeader);
536  // TODO(csilvers): make this not-quadradic (increment and hold state)
537  for (int j = 0; j < current_load_cmd; j++)  // advance to *our* load_cmd
538    lc += ((const load_command *)lc)->cmdsize;
539  if (((const load_command *)lc)->cmd == kLCSegment) {
540    const intptr_t dlloff = _dyld_get_image_vmaddr_slide(current_image);
541    const SegmentCommand* sc = (const SegmentCommand *)lc;
542    if (start) *start = sc->vmaddr + dlloff;
543    if (end) *end = sc->vmaddr + sc->vmsize + dlloff;
544    if (flags) *flags = kDefaultPerms;  // can we do better?
545    if (offset) *offset = sc->fileoff;
546    if (inode) *inode = 0;
547    if (filename)
548      *filename = const_cast<char*>(_dyld_get_image_name(current_image));
549    if (file_mapping) *file_mapping = 0;
550    if (file_pages) *file_pages = 0;   // could we use sc->filesize?
551    if (anon_mapping) *anon_mapping = 0;
552    if (anon_pages) *anon_pages = 0;
553    if (dev) *dev = 0;
554    return true;
555  }
556
557  return false;
558}
559#endif
560
561ProcMapsIterator::ProcMapsIterator(pid_t pid) {
562  Init(pid, NULL, false);
563}
564
565ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer *buffer) {
566  Init(pid, buffer, false);
567}
568
569ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer *buffer,
570                                   bool use_maps_backing) {
571  Init(pid, buffer, use_maps_backing);
572}
573
574void ProcMapsIterator::Init(pid_t pid, Buffer *buffer,
575                            bool use_maps_backing) {
576  pid_ = pid;
577  using_maps_backing_ = use_maps_backing;
578  dynamic_buffer_ = NULL;
579  if (!buffer) {
580    // If the user didn't pass in any buffer storage, allocate it
581    // now. This is the normal case; the signal handler passes in a
582    // static buffer.
583    buffer = dynamic_buffer_ = new Buffer;
584  } else {
585    dynamic_buffer_ = NULL;
586  }
587
588  ibuf_ = buffer->buf_;
589
590  stext_ = etext_ = nextline_ = ibuf_;
591  ebuf_ = ibuf_ + Buffer::kBufSize - 1;
592  nextline_ = ibuf_;
593
594#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
595  if (use_maps_backing) {  // don't bother with clever "self" stuff in this case
596    ConstructFilename("/proc/%d/maps_backing", pid, ibuf_, Buffer::kBufSize);
597  } else if (pid == 0) {
598    // We have to kludge a bit to deal with the args ConstructFilename
599    // expects.  The 1 is never used -- it's only impt. that it's not 0.
600    ConstructFilename("/proc/self/maps", 1, ibuf_, Buffer::kBufSize);
601  } else {
602    ConstructFilename("/proc/%d/maps", pid, ibuf_, Buffer::kBufSize);
603  }
604  // No error logging since this can be called from the crash dump
605  // handler at awkward moments. Users should call Valid() before
606  // using.
607  NO_INTR(fd_ = open(ibuf_, O_RDONLY));
608#elif defined(__FreeBSD__)
609  // We don't support maps_backing on freebsd
610  if (pid == 0) {
611    ConstructFilename("/proc/curproc/map", 1, ibuf_, Buffer::kBufSize);
612  } else {
613    ConstructFilename("/proc/%d/map", pid, ibuf_, Buffer::kBufSize);
614  }
615  NO_INTR(fd_ = open(ibuf_, O_RDONLY));
616#elif defined(__sun__)
617  if (pid == 0) {
618    ConstructFilename("/proc/self/map", 1, ibuf_, Buffer::kBufSize);
619  } else {
620    ConstructFilename("/proc/%d/map", pid, ibuf_, Buffer::kBufSize);
621  }
622  NO_INTR(fd_ = open(ibuf_, O_RDONLY));
623#elif defined(__MACH__)
624  current_image_ = _dyld_image_count();   // count down from the top
625  current_load_cmd_ = -1;
626#elif defined(PLATFORM_WINDOWS)
627  snapshot_ = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE |
628                                       TH32CS_SNAPMODULE32,
629                                       GetCurrentProcessId());
630  memset(&module_, 0, sizeof(module_));
631#else
632  fd_ = -1;   // so Valid() is always false
633#endif
634
635}
636
637ProcMapsIterator::~ProcMapsIterator() {
638#if defined(PLATFORM_WINDOWS)
639  if (snapshot_ != INVALID_HANDLE_VALUE) CloseHandle(snapshot_);
640#elif defined(__MACH__)
641  // no cleanup necessary!
642#else
643  if (fd_ >= 0) NO_INTR(close(fd_));
644#endif
645  delete dynamic_buffer_;
646}
647
648bool ProcMapsIterator::Valid() const {
649#if defined(PLATFORM_WINDOWS)
650  return snapshot_ != INVALID_HANDLE_VALUE;
651#elif defined(__MACH__)
652  return 1;
653#else
654  return fd_ != -1;
655#endif
656}
657
658bool ProcMapsIterator::Next(uint64 *start, uint64 *end, char **flags,
659                            uint64 *offset, int64 *inode, char **filename) {
660  return NextExt(start, end, flags, offset, inode, filename, NULL, NULL,
661                 NULL, NULL, NULL);
662}
663
664// This has too many arguments.  It should really be building
665// a map object and returning it.  The problem is that this is called
666// when the memory allocator state is undefined, hence the arguments.
667bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags,
668                               uint64 *offset, int64 *inode, char **filename,
669                               uint64 *file_mapping, uint64 *file_pages,
670                               uint64 *anon_mapping, uint64 *anon_pages,
671                               dev_t *dev) {
672
673#if defined(__linux__) || defined(__FreeBSD__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
674  do {
675    // Advance to the start of the next line
676    stext_ = nextline_;
677
678    // See if we have a complete line in the buffer already
679    nextline_ = static_cast<char *>(memchr (stext_, '\n', etext_ - stext_));
680    if (!nextline_) {
681      // Shift/fill the buffer so we do have a line
682      int count = etext_ - stext_;
683
684      // Move the current text to the start of the buffer
685      memmove(ibuf_, stext_, count);
686      stext_ = ibuf_;
687      etext_ = ibuf_ + count;
688
689      int nread = 0;            // fill up buffer with text
690      while (etext_ < ebuf_) {
691        NO_INTR(nread = read(fd_, etext_, ebuf_ - etext_));
692        if (nread > 0)
693          etext_ += nread;
694        else
695          break;
696      }
697
698      // Zero out remaining characters in buffer at EOF to avoid returning
699      // garbage from subsequent calls.
700      if (etext_ != ebuf_ && nread == 0) {
701        memset(etext_, 0, ebuf_ - etext_);
702      }
703      *etext_ = '\n';   // sentinel; safe because ibuf extends 1 char beyond ebuf
704      nextline_ = static_cast<char *>(memchr (stext_, '\n', etext_ + 1 - stext_));
705    }
706    *nextline_ = 0;                // turn newline into nul
707    nextline_ += ((nextline_ < etext_)? 1 : 0);  // skip nul if not end of text
708    // stext_ now points at a nul-terminated line
709    uint64 tmpstart, tmpend, tmpoffset;
710    int64 tmpinode;
711    int major, minor;
712    unsigned filename_offset = 0;
713#if defined(__linux__)
714    // for now, assume all linuxes have the same format
715    if (sscanf(stext_, "%"SCNx64"-%"SCNx64" %4s %"SCNx64" %x:%x %"SCNd64" %n",
716               start ? start : &tmpstart,
717               end ? end : &tmpend,
718               flags_,
719               offset ? offset : &tmpoffset,
720               &major, &minor,
721               inode ? inode : &tmpinode, &filename_offset) != 7) continue;
722#elif defined(__CYGWIN__) || defined(__CYGWIN32__)
723    // cygwin is like linux, except the third field is the "entry point"
724    // rather than the offset (see format_process_maps at
725    // http://cygwin.com/cgi-bin/cvsweb.cgi/src/winsup/cygwin/fhandler_process.cc?rev=1.89&content-type=text/x-cvsweb-markup&cvsroot=src
726    // Offset is always be 0 on cygwin: cygwin implements an mmap
727    // by loading the whole file and then calling NtMapViewOfSection.
728    // Cygwin also seems to set its flags kinda randomly; use windows default.
729    char tmpflags[5];
730    if (offset)
731      *offset = 0;
732    strcpy(flags_, "r-xp");
733    if (sscanf(stext_, "%llx-%llx %4s %llx %x:%x %lld %n",
734               start ? start : &tmpstart,
735               end ? end : &tmpend,
736               tmpflags,
737               &tmpoffset,
738               &major, &minor,
739               inode ? inode : &tmpinode, &filename_offset) != 7) continue;
740#elif defined(__FreeBSD__)
741    // For the format, see http://www.freebsd.org/cgi/cvsweb.cgi/src/sys/fs/procfs/procfs_map.c?rev=1.31&content-type=text/x-cvsweb-markup
742    tmpstart = tmpend = tmpoffset = 0;
743    tmpinode = 0;
744    major = minor = 0;   // can't get this info in freebsd
745    if (inode)
746      *inode = 0;        // nor this
747    if (offset)
748      *offset = 0;       // seems like this should be in there, but maybe not
749    // start end resident privateresident obj(?) prot refcnt shadowcnt
750    // flags copy_on_write needs_copy type filename:
751    // 0x8048000 0x804a000 2 0 0xc104ce70 r-x 1 0 0x0 COW NC vnode /bin/cat
752    if (sscanf(stext_, "0x%"SCNx64" 0x%"SCNx64" %*d %*d %*p %3s %*d %*d 0x%*x %*s %*s %*s %n",
753               start ? start : &tmpstart,
754               end ? end : &tmpend,
755               flags_,
756               &filename_offset) != 3) continue;
757#endif
758
759    // Depending on the Linux kernel being used, there may or may not be a space
760    // after the inode if there is no filename.  sscanf will in such situations
761    // nondeterministically either fill in filename_offset or not (the results
762    // differ on multiple calls in the same run even with identical arguments).
763    // We don't want to wander off somewhere beyond the end of the string.
764    size_t stext_length = strlen(stext_);
765    if (filename_offset == 0 || filename_offset > stext_length)
766      filename_offset = stext_length;
767
768    // We found an entry
769    if (flags) *flags = flags_;
770    if (filename) *filename = stext_ + filename_offset;
771    if (dev) *dev = minor | (major << 8);
772
773    if (using_maps_backing_) {
774      // Extract and parse physical page backing info.
775      char *backing_ptr = stext_ + filename_offset +
776          strlen(stext_+filename_offset);
777
778      // find the second '('
779      int paren_count = 0;
780      while (--backing_ptr > stext_) {
781        if (*backing_ptr == '(') {
782          ++paren_count;
783          if (paren_count >= 2) {
784            uint64 tmp_file_mapping;
785            uint64 tmp_file_pages;
786            uint64 tmp_anon_mapping;
787            uint64 tmp_anon_pages;
788
789            sscanf(backing_ptr+1, "F %"SCNx64" %"SCNd64") (A %"SCNx64" %"SCNd64")",
790                   file_mapping ? file_mapping : &tmp_file_mapping,
791                   file_pages ? file_pages : &tmp_file_pages,
792                   anon_mapping ? anon_mapping : &tmp_anon_mapping,
793                   anon_pages ? anon_pages : &tmp_anon_pages);
794            // null terminate the file name (there is a space
795            // before the first (.
796            backing_ptr[-1] = 0;
797            break;
798          }
799        }
800      }
801    }
802
803    return true;
804  } while (etext_ > ibuf_);
805#elif defined(__sun__)
806  // This is based on MA_READ == 4, MA_WRITE == 2, MA_EXEC == 1
807  static char kPerms[8][4] = { "---", "--x", "-w-", "-wx",
808                               "r--", "r-x", "rw-", "rwx" };
809  COMPILE_ASSERT(MA_READ == 4, solaris_ma_read_must_equal_4);
810  COMPILE_ASSERT(MA_WRITE == 2, solaris_ma_write_must_equal_2);
811  COMPILE_ASSERT(MA_EXEC == 1, solaris_ma_exec_must_equal_1);
812  Buffer object_path;
813  int nread = 0;            // fill up buffer with text
814  NO_INTR(nread = read(fd_, ibuf_, sizeof(prmap_t)));
815  if (nread == sizeof(prmap_t)) {
816    long inode_from_mapname = 0;
817    prmap_t* mapinfo = reinterpret_cast<prmap_t*>(ibuf_);
818    // Best-effort attempt to get the inode from the filename.  I think the
819    // two middle ints are major and minor device numbers, but I'm not sure.
820    sscanf(mapinfo->pr_mapname, "ufs.%*d.%*d.%ld", &inode_from_mapname);
821
822    if (pid_ == 0) {
823      CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize,
824                        "/proc/self/path/%s", mapinfo->pr_mapname),
825               Buffer::kBufSize);
826    } else {
827      CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize,
828                        "/proc/%d/path/%s",
829                        static_cast<int>(pid_), mapinfo->pr_mapname),
830               Buffer::kBufSize);
831    }
832    ssize_t len = readlink(object_path.buf_, current_filename_, PATH_MAX);
833    CHECK_LT(len, PATH_MAX);
834    if (len < 0)
835      len = 0;
836    current_filename_[len] = '\0';
837
838    if (start) *start = mapinfo->pr_vaddr;
839    if (end) *end = mapinfo->pr_vaddr + mapinfo->pr_size;
840    if (flags) *flags = kPerms[mapinfo->pr_mflags & 7];
841    if (offset) *offset = mapinfo->pr_offset;
842    if (inode) *inode = inode_from_mapname;
843    if (filename) *filename = current_filename_;
844    if (file_mapping) *file_mapping = 0;
845    if (file_pages) *file_pages = 0;
846    if (anon_mapping) *anon_mapping = 0;
847    if (anon_pages) *anon_pages = 0;
848    if (dev) *dev = 0;
849    return true;
850  }
851#elif defined(__MACH__)
852  // We return a separate entry for each segment in the DLL. (TODO(csilvers):
853  // can we do better?)  A DLL ("image") has load-commands, some of which
854  // talk about segment boundaries.
855  // cf image_for_address from http://svn.digium.com/view/asterisk/team/oej/minivoicemail/dlfcn.c?revision=53912
856  for (; current_image_ >= 0; current_image_--) {
857    const mach_header* hdr = _dyld_get_image_header(current_image_);
858    if (!hdr) continue;
859    if (current_load_cmd_ < 0)   // set up for this image
860      current_load_cmd_ = hdr->ncmds;  // again, go from the top down
861
862    // We start with the next load command (we've already looked at this one).
863    for (current_load_cmd_--; current_load_cmd_ >= 0; current_load_cmd_--) {
864#ifdef MH_MAGIC_64
865      if (NextExtMachHelper<MH_MAGIC_64, LC_SEGMENT_64,
866                            struct mach_header_64, struct segment_command_64>(
867                                hdr, current_image_, current_load_cmd_,
868                                start, end, flags, offset, inode, filename,
869                                file_mapping, file_pages, anon_mapping,
870                                anon_pages, dev)) {
871        return true;
872      }
873#endif
874      if (NextExtMachHelper<MH_MAGIC, LC_SEGMENT,
875                            struct mach_header, struct segment_command>(
876                                hdr, current_image_, current_load_cmd_,
877                                start, end, flags, offset, inode, filename,
878                                file_mapping, file_pages, anon_mapping,
879                                anon_pages, dev)) {
880        return true;
881      }
882    }
883    // If we get here, no more load_cmd's in this image talk about
884    // segments.  Go on to the next image.
885  }
886#elif defined(PLATFORM_WINDOWS)
887  static char kDefaultPerms[5] = "r-xp";
888  BOOL ok;
889  if (module_.dwSize == 0) {  // only possible before first call
890    module_.dwSize = sizeof(module_);
891    ok = Module32First(snapshot_, &module_);
892  } else {
893    ok = Module32Next(snapshot_, &module_);
894  }
895  if (ok) {
896    uint64 base_addr = reinterpret_cast<DWORD_PTR>(module_.modBaseAddr);
897    if (start) *start = base_addr;
898    if (end) *end = base_addr + module_.modBaseSize;
899    if (flags) *flags = kDefaultPerms;
900    if (offset) *offset = 0;
901    if (inode) *inode = 0;
902    if (filename) *filename = module_.szExePath;
903    if (file_mapping) *file_mapping = 0;
904    if (file_pages) *file_pages = 0;
905    if (anon_mapping) *anon_mapping = 0;
906    if (anon_pages) *anon_pages = 0;
907    if (dev) *dev = 0;
908    return true;
909  }
910#endif
911
912  // We didn't find anything
913  return false;
914}
915
916int ProcMapsIterator::FormatLine(char* buffer, int bufsize,
917                                 uint64 start, uint64 end, const char *flags,
918                                 uint64 offset, int64 inode,
919                                 const char *filename, dev_t dev) {
920  // We assume 'flags' looks like 'rwxp' or 'rwx'.
921  char r = (flags && flags[0] == 'r') ? 'r' : '-';
922  char w = (flags && flags[0] && flags[1] == 'w') ? 'w' : '-';
923  char x = (flags && flags[0] && flags[1] && flags[2] == 'x') ? 'x' : '-';
924  // p always seems set on linux, so we set the default to 'p', not '-'
925  char p = (flags && flags[0] && flags[1] && flags[2] && flags[3] != 'p')
926      ? '-' : 'p';
927
928  const int rc = snprintf(buffer, bufsize,
929                          "%08"PRIx64"-%08"PRIx64" %c%c%c%c %08"PRIx64" %02x:%02x %-11"PRId64" %s\n",
930                          start, end, r,w,x,p, offset,
931                          static_cast<int>(dev/256), static_cast<int>(dev%256),
932                          inode, filename);
933  return (rc < 0 || rc >= bufsize) ? 0 : rc;
934}
935
936namespace tcmalloc {
937
938// Helper to add the list of mapped shared libraries to a profile.
939// Fill formatted "/proc/self/maps" contents into buffer 'buf' of size 'size'
940// and return the actual size occupied in 'buf'.  We fill wrote_all to true
941// if we successfully wrote all proc lines to buf, false else.
942// We do not provision for 0-terminating 'buf'.
943int FillProcSelfMaps(char buf[], int size, bool* wrote_all) {
944  ProcMapsIterator::Buffer iterbuf;
945  ProcMapsIterator it(0, &iterbuf);   // 0 means "current pid"
946
947  uint64 start, end, offset;
948  int64 inode;
949  char *flags, *filename;
950  int bytes_written = 0;
951  *wrote_all = true;
952  while (it.Next(&start, &end, &flags, &offset, &inode, &filename)) {
953    const int line_length = it.FormatLine(buf + bytes_written,
954                                          size - bytes_written,
955                                          start, end, flags, offset,
956                                          inode, filename, 0);
957    if (line_length == 0)
958      *wrote_all = false;     // failed to write this line out
959    else
960      bytes_written += line_length;
961
962  }
963  return bytes_written;
964}
965
966// Dump the same data as FillProcSelfMaps reads to fd.
967// It seems easier to repeat parts of FillProcSelfMaps here than to
968// reuse it via a call.
969void DumpProcSelfMaps(RawFD fd) {
970  ProcMapsIterator::Buffer iterbuf;
971  ProcMapsIterator it(0, &iterbuf);   // 0 means "current pid"
972
973  uint64 start, end, offset;
974  int64 inode;
975  char *flags, *filename;
976  ProcMapsIterator::Buffer linebuf;
977  while (it.Next(&start, &end, &flags, &offset, &inode, &filename)) {
978    int written = it.FormatLine(linebuf.buf_, sizeof(linebuf.buf_),
979                                start, end, flags, offset, inode, filename,
980                                0);
981    RawWrite(fd, linebuf.buf_, written);
982  }
983}
984
985}  // namespace tcmalloc
986