1// Copyright (c) 2009, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// ---
31// Author: Craig Silverstein
32//
33// This forks out to pprof to do the actual symbolizing.  We might
34// be better off writing our own in C++.
35
36#include "config.h"
37#include "symbolize.h"
38#include <stdlib.h>
39#ifdef HAVE_UNISTD_H
40#include <unistd.h>   // for write()
41#endif
42#ifdef HAVE_SYS_SOCKET_H
43#include <sys/socket.h>   // for socketpair() -- needed by Symbolize
44#endif
45#ifdef HAVE_SYS_WAIT_H
46#include <sys/wait.h>   // for wait() -- needed by Symbolize
47#endif
48#ifdef HAVE_POLL_H
49#include <poll.h>
50#endif
51#ifdef __MACH__
52#include <mach-o/dyld.h>   // for GetProgramInvocationName()
53#include <limits.h>        // for PATH_MAX
54#endif
55#if defined(__CYGWIN__) || defined(__CYGWIN32__)
56#include <io.h>            // for get_osfhandle()
57#endif
58#include <string>
59#include "base/commandlineflags.h"
60#include "base/logging.h"
61#include "base/sysinfo.h"
62
63using std::string;
64using tcmalloc::DumpProcSelfMaps;   // from sysinfo.h
65
66
67DEFINE_string(symbolize_pprof,
68              EnvToString("PPROF_PATH", "pprof"),
69              "Path to pprof to call for reporting function names.");
70
71// Returns NULL if we're on an OS where we can't get the invocation name.
72// Using a static var is ok because we're not called from a thread.
73static char* GetProgramInvocationName() {
74#if defined(HAVE_PROGRAM_INVOCATION_NAME)
75  extern char* program_invocation_name;  // gcc provides this
76  return program_invocation_name;
77#elif defined(__MACH__)
78  // We don't want to allocate memory for this since we may be
79  // calculating it when memory is corrupted.
80  static char program_invocation_name[PATH_MAX];
81  if (program_invocation_name[0] == '\0') {  // first time calculating
82    uint32_t length = sizeof(program_invocation_name);
83    if (_NSGetExecutablePath(program_invocation_name, &length))
84      return NULL;
85  }
86  return program_invocation_name;
87#else
88  return NULL;   // figure out a way to get argv[0]
89#endif
90}
91
92// Prints an error message when you can't run Symbolize().
93static void PrintError(const char* reason) {
94  RAW_LOG(ERROR,
95          "*** WARNING: Cannot convert addresses to symbols in output below.\n"
96          "*** Reason: %s\n"
97          "*** If you cannot fix this, try running pprof directly.\n",
98          reason);
99}
100
101void SymbolTable::Add(const void* addr) {
102  symbolization_table_[addr] = "";
103}
104
105const char* SymbolTable::GetSymbol(const void* addr) {
106  return symbolization_table_[addr];
107}
108
109// Updates symbolization_table with the pointers to symbol names corresponding
110// to its keys. The symbol names are stored in out, which is allocated and
111// freed by the caller of this routine.
112// Note that the forking/etc is not thread-safe or re-entrant.  That's
113// ok for the purpose we need -- reporting leaks detected by heap-checker
114// -- but be careful if you decide to use this routine for other purposes.
115// Returns number of symbols read on error.  If can't symbolize, returns 0
116// and emits an error message about why.
117int SymbolTable::Symbolize() {
118#if !defined(HAVE_UNISTD_H)  || !defined(HAVE_SYS_SOCKET_H) || !defined(HAVE_SYS_WAIT_H)
119  PrintError("Perftools does not know how to call a sub-process on this O/S");
120  return 0;
121#else
122  const char* argv0 = GetProgramInvocationName();
123  if (argv0 == NULL) {  // can't call symbolize if we can't figure out our name
124    PrintError("Cannot figure out the name of this executable (argv0)");
125    return 0;
126  }
127  if (access(FLAGS_symbolize_pprof, R_OK) != 0) {
128    PrintError("Cannot find 'pprof' (is PPROF_PATH set correctly?)");
129    return 0;
130  }
131
132  // All this work is to do two-way communication.  ugh.
133  int *child_in = NULL;   // file descriptors
134  int *child_out = NULL;  // for now, we don't worry about child_err
135  int child_fds[5][2];    // socketpair may be called up to five times below
136
137  // The client program may close its stdin and/or stdout and/or stderr
138  // thus allowing socketpair to reuse file descriptors 0, 1 or 2.
139  // In this case the communication between the forked processes may be broken
140  // if either the parent or the child tries to close or duplicate these
141  // descriptors. The loop below produces two pairs of file descriptors, each
142  // greater than 2 (stderr).
143  for (int i = 0; i < 5; i++) {
144    if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_fds[i]) == -1) {
145      for (int j = 0; j < i; j++) {
146        close(child_fds[j][0]);
147        close(child_fds[j][1]);
148        PrintError("Cannot create a socket pair");
149        return 0;
150      }
151    } else {
152      if ((child_fds[i][0] > 2) && (child_fds[i][1] > 2)) {
153        if (child_in == NULL) {
154          child_in = child_fds[i];
155        } else {
156          child_out = child_fds[i];
157          for (int j = 0; j < i; j++) {
158            if (child_fds[j] == child_in) continue;
159            close(child_fds[j][0]);
160            close(child_fds[j][1]);
161          }
162          break;
163        }
164      }
165    }
166  }
167
168  switch (fork()) {
169    case -1: {  // error
170      close(child_in[0]);
171      close(child_in[1]);
172      close(child_out[0]);
173      close(child_out[1]);
174      PrintError("Unknown error calling fork()");
175      return 0;
176    }
177    case 0: {  // child
178      close(child_in[1]);   // child uses the 0's, parent uses the 1's
179      close(child_out[1]);  // child uses the 0's, parent uses the 1's
180      close(0);
181      close(1);
182      if (dup2(child_in[0], 0) == -1) _exit(1);
183      if (dup2(child_out[0], 1) == -1) _exit(2);
184      // Unset vars that might cause trouble when we fork
185      unsetenv("CPUPROFILE");
186      unsetenv("HEAPPROFILE");
187      unsetenv("HEAPCHECK");
188      unsetenv("PERFTOOLS_VERBOSE");
189      execlp(FLAGS_symbolize_pprof, FLAGS_symbolize_pprof,
190             "--symbols", argv0, NULL);
191      _exit(3);  // if execvp fails, it's bad news for us
192    }
193    default: {  // parent
194      close(child_in[0]);   // child uses the 0's, parent uses the 1's
195      close(child_out[0]);  // child uses the 0's, parent uses the 1's
196#ifdef HAVE_POLL_H
197      // Waiting for 1ms seems to give the OS time to notice any errors.
198      poll(0, 0, 1);
199      // For maximum safety, we check to make sure the execlp
200      // succeeded before trying to write.  (Otherwise we'll get a
201      // SIGPIPE.)  For systems without poll.h, we'll just skip this
202      // check, and trust that the user set PPROF_PATH correctly!
203      struct pollfd pfd = { child_in[1], POLLOUT, 0 };
204      if (!poll(&pfd, 1, 0) || !(pfd.revents & POLLOUT) ||
205          (pfd.revents & (POLLHUP|POLLERR))) {
206        PrintError("Cannot run 'pprof' (is PPROF_PATH set correctly?)");
207        return 0;
208      }
209#endif
210#if defined(__CYGWIN__) || defined(__CYGWIN32__)
211      // On cygwin, DumpProcSelfMaps() takes a HANDLE, not an fd.  Convert.
212      const HANDLE symbols_handle = (HANDLE) get_osfhandle(child_in[1]);
213      DumpProcSelfMaps(symbols_handle);
214#else
215      DumpProcSelfMaps(child_in[1]);  // what pprof expects on stdin
216#endif
217
218      // Allocate 24 bytes = ("0x" + 8 bytes + "\n" + overhead) for each
219      // address to feed to pprof.
220      const int kOutBufSize = 24 * symbolization_table_.size();
221      char *pprof_buffer = new char[kOutBufSize];
222      int written = 0;
223      for (SymbolMap::const_iterator iter = symbolization_table_.begin();
224           iter != symbolization_table_.end(); ++iter) {
225        written += snprintf(pprof_buffer + written, kOutBufSize - written,
226                 // pprof expects format to be 0xXXXXXX
227                 "0x%" PRIxPTR "\n", reinterpret_cast<uintptr_t>(iter->first));
228      }
229      write(child_in[1], pprof_buffer, strlen(pprof_buffer));
230      close(child_in[1]);             // that's all we need to write
231
232      const int kSymbolBufferSize = kSymbolSize * symbolization_table_.size();
233      int total_bytes_read = 0;
234      delete[] symbol_buffer_;
235      symbol_buffer_ = new char[kSymbolBufferSize];
236      memset(symbol_buffer_, '\0', kSymbolBufferSize);
237      while (1) {
238        int bytes_read = read(child_out[1], symbol_buffer_ + total_bytes_read,
239                              kSymbolBufferSize - total_bytes_read);
240        if (bytes_read < 0) {
241          close(child_out[1]);
242          PrintError("Cannot read data from pprof");
243          return 0;
244        } else if (bytes_read == 0) {
245          close(child_out[1]);
246          wait(NULL);
247          break;
248        } else {
249          total_bytes_read += bytes_read;
250        }
251      }
252      // We have successfully read the output of pprof into out.  Make sure
253      // the last symbol is full (we can tell because it ends with a \n).
254      if (total_bytes_read == 0 || symbol_buffer_[total_bytes_read - 1] != '\n')
255        return 0;
256      // make the symbolization_table_ values point to the output vector
257      SymbolMap::iterator fill = symbolization_table_.begin();
258      int num_symbols = 0;
259      const char *current_name = symbol_buffer_;
260      for (int i = 0; i < total_bytes_read; i++) {
261        if (symbol_buffer_[i] == '\n') {
262          fill->second = current_name;
263          symbol_buffer_[i] = '\0';
264          current_name = symbol_buffer_ + i + 1;
265          fill++;
266          num_symbols++;
267        }
268      }
269      return num_symbols;
270    }
271  }
272  PrintError("Unkown error (should never occur!)");
273  return 0;  // shouldn't be reachable
274#endif
275}
276