pprof revision 73b37a9697acd53496bbef06ed25696e0c897341
1#! /usr/bin/env perl
2
3# Copyright (c) 1998-2007, Google Inc.
4# All rights reserved.
5# 
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are
8# met:
9# 
10#     * Redistributions of source code must retain the above copyright
11# notice, this list of conditions and the following disclaimer.
12#     * Redistributions in binary form must reproduce the above
13# copyright notice, this list of conditions and the following disclaimer
14# in the documentation and/or other materials provided with the
15# distribution.
16#     * Neither the name of Google Inc. nor the names of its
17# contributors may be used to endorse or promote products derived from
18# this software without specific prior written permission.
19# 
20# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32# ---
33# Program for printing the profile generated by common/profiler.cc,
34# or by the heap profiler (common/debugallocation.cc)
35#
36# The profile contains a sequence of entries of the form:
37#       <count> <stack trace>
38# This program parses the profile, and generates user-readable
39# output.
40#
41# Examples:
42#
43# % tools/pprof "program" "profile"
44#   Enters "interactive" mode
45#
46# % tools/pprof --text "program" "profile"
47#   Generates one line per procedure
48#
49# % tools/pprof --gv "program" "profile"
50#   Generates annotated call-graph and displays via "gv"
51#
52# % tools/pprof --gv --focus=Mutex "program" "profile"
53#   Restrict to code paths that involve an entry that matches "Mutex"
54#
55# % tools/pprof --gv --focus=Mutex --ignore=string "program" "profile"
56#   Restrict to code paths that involve an entry that matches "Mutex"
57#   and does not match "string"
58#
59# % tools/pprof --list=IBF_CheckDocid "program" "profile"
60#   Generates disassembly listing of all routines with at least one
61#   sample that match the --list=<regexp> pattern.  The listing is
62#   annotated with the flat and cumulative sample counts at each line.
63#
64# % tools/pprof --disasm=IBF_CheckDocid "program" "profile"
65#   Generates disassembly listing of all routines with at least one
66#   sample that match the --disasm=<regexp> pattern.  The listing is
67#   annotated with the flat and cumulative sample counts at each PC value.
68#
69# TODO: Use color to indicate files?
70
71use strict;
72use warnings;
73use Getopt::Long;
74
75my $PPROF_VERSION = "2.0";
76
77# These are the object tools we use which can come from a
78# user-specified location using --tools, from the PPROF_TOOLS
79# environment variable, or from the environment.
80my %obj_tool_map = (
81  "objdump" => "objdump",
82  "nm" => "nm",
83  "addr2line" => "addr2line",
84  "c++filt" => "c++filt",
85  ## ConfigureObjTools may add architecture-specific entries:
86  #"nm_pdb" => "nm-pdb",       # for reading windows (PDB-format) executables
87  #"addr2line_pdb" => "addr2line-pdb",                                # ditto
88  #"otool" => "otool",         # equivalent of objdump on OS X
89);
90# NOTE: these are lists, so you can put in commandline flags if you want.
91my @DOT = ("dot");          # leave non-absolute, since it may be in /usr/local
92my @GV = ("gv");
93my @EVINCE = ("evince");    # could also be xpdf or perhaps acroread
94my @KCACHEGRIND = ("kcachegrind");
95my @PS2PDF = ("ps2pdf");
96# These are used for dynamic profiles
97my @URL_FETCHER = ("curl", "-s");
98
99# These are the web pages that servers need to support for dynamic profiles
100my $HEAP_PAGE = "/pprof/heap";
101my $PROFILE_PAGE = "/pprof/profile";   # must support cgi-param "?seconds=#"
102my $PMUPROFILE_PAGE = "/pprof/pmuprofile(?:\\?.*)?"; # must support cgi-param
103                                                # ?seconds=#&event=x&period=n
104my $GROWTH_PAGE = "/pprof/growth";
105my $CONTENTION_PAGE = "/pprof/contention";
106my $WALL_PAGE = "/pprof/wall(?:\\?.*)?";  # accepts options like namefilter
107my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?";
108my $CENSUSPROFILE_PAGE = "/pprof/censusprofile(?:\\?.*)?"; # must support cgi-param
109                                                       # "?seconds=#",
110                                                       # "?tags_regexp=#" and
111                                                       # "?type=#".
112my $SYMBOL_PAGE = "/pprof/symbol";     # must support symbol lookup via POST
113my $PROGRAM_NAME_PAGE = "/pprof/cmdline";
114
115# These are the web pages that can be named on the command line.
116# All the alternatives must begin with /.
117my $PROFILES = "($HEAP_PAGE|$PROFILE_PAGE|$PMUPROFILE_PAGE|" .
118               "$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|" .
119               "$FILTEREDPROFILE_PAGE|$CENSUSPROFILE_PAGE)";
120
121# default binary name
122my $UNKNOWN_BINARY = "(unknown)";
123
124# There is a pervasive dependency on the length (in hex characters,
125# i.e., nibbles) of an address, distinguishing between 32-bit and
126# 64-bit profiles.  To err on the safe size, default to 64-bit here:
127my $address_length = 16;
128
129my $dev_null = "/dev/null";
130if (! -e $dev_null && $^O =~ /MSWin/) {    # $^O is the OS perl was built for
131  $dev_null = "nul";
132}
133
134# A list of paths to search for shared object files
135my @prefix_list = ();
136
137# Special routine name that should not have any symbols.
138# Used as separator to parse "addr2line -i" output.
139my $sep_symbol = '_fini';
140my $sep_address = undef;
141
142##### Argument parsing #####
143
144sub usage_string {
145  return <<EOF;
146Usage:
147pprof [options] <program> <profiles>
148   <profiles> is a space separated list of profile names.
149pprof [options] <symbolized-profiles>
150   <symbolized-profiles> is a list of profile files where each file contains
151   the necessary symbol mappings  as well as profile data (likely generated
152   with --raw).
153pprof [options] <profile>
154   <profile> is a remote form.  Symbols are obtained from host:port$SYMBOL_PAGE
155
156   Each name can be:
157   /path/to/profile        - a path to a profile file
158   host:port[/<service>]   - a location of a service to get profile from
159
160   The /<service> can be $HEAP_PAGE, $PROFILE_PAGE, /pprof/pmuprofile,
161                         $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall,
162                         $CENSUSPROFILE_PAGE, or /pprof/filteredprofile.
163   For instance:
164     pprof http://myserver.com:80$HEAP_PAGE
165   If /<service> is omitted, the service defaults to $PROFILE_PAGE (cpu profiling).
166pprof --symbols <program>
167   Maps addresses to symbol names.  In this mode, stdin should be a
168   list of library mappings, in the same format as is found in the heap-
169   and cpu-profile files (this loosely matches that of /proc/self/maps
170   on linux), followed by a list of hex addresses to map, one per line.
171
172   For more help with querying remote servers, including how to add the
173   necessary server-side support code, see this filename (or one like it):
174
175   /usr/doc/gperftools-$PPROF_VERSION/pprof_remote_servers.html
176
177Options:
178   --cum               Sort by cumulative data
179   --base=<base>       Subtract <base> from <profile> before display
180   --interactive       Run in interactive mode (interactive "help" gives help) [default]
181   --seconds=<n>       Length of time for dynamic profiles [default=30 secs]
182   --add_lib=<file>    Read additional symbols and line info from the given library
183   --lib_prefix=<dir>  Comma separated list of library path prefixes
184
185Reporting Granularity:
186   --addresses         Report at address level
187   --lines             Report at source line level
188   --functions         Report at function level [default]
189   --files             Report at source file level
190
191Output type:
192   --text              Generate text report
193   --callgrind         Generate callgrind format to stdout
194   --gv                Generate Postscript and display
195   --evince            Generate PDF and display
196   --web               Generate SVG and display
197   --list=<regexp>     Generate source listing of matching routines
198   --disasm=<regexp>   Generate disassembly of matching routines
199   --symbols           Print demangled symbol names found at given addresses
200   --dot               Generate DOT file to stdout
201   --ps                Generate Postcript to stdout
202   --pdf               Generate PDF to stdout
203   --svg               Generate SVG to stdout
204   --gif               Generate GIF to stdout
205   --raw               Generate symbolized pprof data (useful with remote fetch)
206
207Heap-Profile Options:
208   --inuse_space       Display in-use (mega)bytes [default]
209   --inuse_objects     Display in-use objects
210   --alloc_space       Display allocated (mega)bytes
211   --alloc_objects     Display allocated objects
212   --show_bytes        Display space in bytes
213   --drop_negative     Ignore negative differences
214
215Contention-profile options:
216   --total_delay       Display total delay at each region [default]
217   --contentions       Display number of delays at each region
218   --mean_delay        Display mean delay at each region
219
220Call-graph Options:
221   --nodecount=<n>     Show at most so many nodes [default=80]
222   --nodefraction=<f>  Hide nodes below <f>*total [default=.005]
223   --edgefraction=<f>  Hide edges below <f>*total [default=.001]
224   --maxdegree=<n>     Max incoming/outgoing edges per node [default=8]
225   --focus=<regexp>    Focus on nodes matching <regexp>
226   --ignore=<regexp>   Ignore nodes matching <regexp>
227   --scale=<n>         Set GV scaling [default=0]
228   --heapcheck         Make nodes with non-0 object counts
229                       (i.e. direct leak generators) more visible
230
231Miscellaneous:
232   --tools=<prefix or binary:fullpath>[,...]   \$PATH for object tool pathnames
233   --test              Run unit tests
234   --help              This message
235   --version           Version information
236
237Environment Variables:
238   PPROF_TMPDIR        Profiles directory. Defaults to \$HOME/pprof
239   PPROF_TOOLS         Prefix for object tools pathnames
240
241Examples:
242
243pprof /bin/ls ls.prof
244                       Enters "interactive" mode
245pprof --text /bin/ls ls.prof
246                       Outputs one line per procedure
247pprof --web /bin/ls ls.prof
248                       Displays annotated call-graph in web browser
249pprof --gv /bin/ls ls.prof
250                       Displays annotated call-graph via 'gv'
251pprof --gv --focus=Mutex /bin/ls ls.prof
252                       Restricts to code paths including a .*Mutex.* entry
253pprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof
254                       Code paths including Mutex but not string
255pprof --list=getdir /bin/ls ls.prof
256                       (Per-line) annotated source listing for getdir()
257pprof --disasm=getdir /bin/ls ls.prof
258                       (Per-PC) annotated disassembly for getdir()
259
260pprof http://localhost:1234/
261                       Enters "interactive" mode
262pprof --text localhost:1234
263                       Outputs one line per procedure for localhost:1234
264pprof --raw localhost:1234 > ./local.raw
265pprof --text ./local.raw
266                       Fetches a remote profile for later analysis and then
267                       analyzes it in text mode.
268EOF
269}
270
271sub version_string {
272  return <<EOF
273pprof (part of gperftools $PPROF_VERSION)
274
275Copyright 1998-2007 Google Inc.
276
277This is BSD licensed software; see the source for copying conditions
278and license information.
279There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A
280PARTICULAR PURPOSE.
281EOF
282}
283
284sub usage {
285  my $msg = shift;
286  print STDERR "$msg\n\n";
287  print STDERR usage_string();
288  print STDERR "\nFATAL ERROR: $msg\n";    # just as a reminder
289  exit(1);
290}
291
292sub Init() {
293  # Setup tmp-file name and handler to clean it up.
294  # We do this in the very beginning so that we can use
295  # error() and cleanup() function anytime here after.
296  $main::tmpfile_sym = "/tmp/pprof$$.sym";
297  $main::tmpfile_ps = "/tmp/pprof$$";
298  $main::next_tmpfile = 0;
299  $SIG{'INT'} = \&sighandler;
300
301  # Cache from filename/linenumber to source code
302  $main::source_cache = ();
303
304  $main::opt_help = 0;
305  $main::opt_version = 0;
306
307  $main::opt_cum = 0;
308  $main::opt_base = '';
309  $main::opt_addresses = 0;
310  $main::opt_lines = 0;
311  $main::opt_functions = 0;
312  $main::opt_files = 0;
313  $main::opt_lib_prefix = "";
314
315  $main::opt_text = 0;
316  $main::opt_callgrind = 0;
317  $main::opt_list = "";
318  $main::opt_disasm = "";
319  $main::opt_symbols = 0;
320  $main::opt_gv = 0;
321  $main::opt_evince = 0;
322  $main::opt_web = 0;
323  $main::opt_dot = 0;
324  $main::opt_ps = 0;
325  $main::opt_pdf = 0;
326  $main::opt_gif = 0;
327  $main::opt_svg = 0;
328  $main::opt_raw = 0;
329
330  $main::opt_nodecount = 80;
331  $main::opt_nodefraction = 0.005;
332  $main::opt_edgefraction = 0.001;
333  $main::opt_maxdegree = 8;
334  $main::opt_focus = '';
335  $main::opt_ignore = '';
336  $main::opt_scale = 0;
337  $main::opt_heapcheck = 0;
338  $main::opt_seconds = 30;
339  $main::opt_lib = "";
340
341  $main::opt_inuse_space   = 0;
342  $main::opt_inuse_objects = 0;
343  $main::opt_alloc_space   = 0;
344  $main::opt_alloc_objects = 0;
345  $main::opt_show_bytes    = 0;
346  $main::opt_drop_negative = 0;
347  $main::opt_interactive   = 0;
348
349  $main::opt_total_delay = 0;
350  $main::opt_contentions = 0;
351  $main::opt_mean_delay = 0;
352
353  $main::opt_tools   = "";
354  $main::opt_debug   = 0;
355  $main::opt_test    = 0;
356
357  # These are undocumented flags used only by unittests.
358  $main::opt_test_stride = 0;
359
360  # Are we using $SYMBOL_PAGE?
361  $main::use_symbol_page = 0;
362
363  # Files returned by TempName.
364  %main::tempnames = ();
365
366  # Type of profile we are dealing with
367  # Supported types:
368  #     cpu
369  #     heap
370  #     growth
371  #     contention
372  $main::profile_type = '';     # Empty type means "unknown"
373
374  GetOptions("help!"          => \$main::opt_help,
375             "version!"       => \$main::opt_version,
376             "cum!"           => \$main::opt_cum,
377             "base=s"         => \$main::opt_base,
378             "seconds=i"      => \$main::opt_seconds,
379             "add_lib=s"      => \$main::opt_lib,
380             "lib_prefix=s"   => \$main::opt_lib_prefix,
381             "functions!"     => \$main::opt_functions,
382             "lines!"         => \$main::opt_lines,
383             "addresses!"     => \$main::opt_addresses,
384             "files!"         => \$main::opt_files,
385             "text!"          => \$main::opt_text,
386             "callgrind!"     => \$main::opt_callgrind,
387             "list=s"         => \$main::opt_list,
388             "disasm=s"       => \$main::opt_disasm,
389             "symbols!"       => \$main::opt_symbols,
390             "gv!"            => \$main::opt_gv,
391             "evince!"        => \$main::opt_evince,
392             "web!"           => \$main::opt_web,
393             "dot!"           => \$main::opt_dot,
394             "ps!"            => \$main::opt_ps,
395             "pdf!"           => \$main::opt_pdf,
396             "svg!"           => \$main::opt_svg,
397             "gif!"           => \$main::opt_gif,
398             "raw!"           => \$main::opt_raw,
399             "interactive!"   => \$main::opt_interactive,
400             "nodecount=i"    => \$main::opt_nodecount,
401             "nodefraction=f" => \$main::opt_nodefraction,
402             "edgefraction=f" => \$main::opt_edgefraction,
403             "maxdegree=i"    => \$main::opt_maxdegree,
404             "focus=s"        => \$main::opt_focus,
405             "ignore=s"       => \$main::opt_ignore,
406             "scale=i"        => \$main::opt_scale,
407             "heapcheck"      => \$main::opt_heapcheck,
408             "inuse_space!"   => \$main::opt_inuse_space,
409             "inuse_objects!" => \$main::opt_inuse_objects,
410             "alloc_space!"   => \$main::opt_alloc_space,
411             "alloc_objects!" => \$main::opt_alloc_objects,
412             "show_bytes!"    => \$main::opt_show_bytes,
413             "drop_negative!" => \$main::opt_drop_negative,
414             "total_delay!"   => \$main::opt_total_delay,
415             "contentions!"   => \$main::opt_contentions,
416             "mean_delay!"    => \$main::opt_mean_delay,
417             "tools=s"        => \$main::opt_tools,
418             "test!"          => \$main::opt_test,
419             "debug!"         => \$main::opt_debug,
420             # Undocumented flags used only by unittests:
421             "test_stride=i"  => \$main::opt_test_stride,
422      ) || usage("Invalid option(s)");
423
424  # Deal with the standard --help and --version
425  if ($main::opt_help) {
426    print usage_string();
427    exit(0);
428  }
429
430  if ($main::opt_version) {
431    print version_string();
432    exit(0);
433  }
434
435  # Disassembly/listing/symbols mode requires address-level info
436  if ($main::opt_disasm || $main::opt_list || $main::opt_symbols) {
437    $main::opt_functions = 0;
438    $main::opt_lines = 0;
439    $main::opt_addresses = 1;
440    $main::opt_files = 0;
441  }
442
443  # Check heap-profiling flags
444  if ($main::opt_inuse_space +
445      $main::opt_inuse_objects +
446      $main::opt_alloc_space +
447      $main::opt_alloc_objects > 1) {
448    usage("Specify at most on of --inuse/--alloc options");
449  }
450
451  # Check output granularities
452  my $grains =
453      $main::opt_functions +
454      $main::opt_lines +
455      $main::opt_addresses +
456      $main::opt_files +
457      0;
458  if ($grains > 1) {
459    usage("Only specify one output granularity option");
460  }
461  if ($grains == 0) {
462    $main::opt_functions = 1;
463  }
464
465  # Check output modes
466  my $modes =
467      $main::opt_text +
468      $main::opt_callgrind +
469      ($main::opt_list eq '' ? 0 : 1) +
470      ($main::opt_disasm eq '' ? 0 : 1) +
471      ($main::opt_symbols == 0 ? 0 : 1) +
472      $main::opt_gv +
473      $main::opt_evince +
474      $main::opt_web +
475      $main::opt_dot +
476      $main::opt_ps +
477      $main::opt_pdf +
478      $main::opt_svg +
479      $main::opt_gif +
480      $main::opt_raw +
481      $main::opt_interactive +
482      0;
483  if ($modes > 1) {
484    usage("Only specify one output mode");
485  }
486  if ($modes == 0) {
487    if (-t STDOUT) {  # If STDOUT is a tty, activate interactive mode
488      $main::opt_interactive = 1;
489    } else {
490      $main::opt_text = 1;
491    }
492  }
493
494  if ($main::opt_test) {
495    RunUnitTests();
496    # Should not return
497    exit(1);
498  }
499
500  # Binary name and profile arguments list
501  $main::prog = "";
502  @main::pfile_args = ();
503
504  # Remote profiling without a binary (using $SYMBOL_PAGE instead)
505  if (@ARGV > 0) {
506    if (IsProfileURL($ARGV[0])) {
507      $main::use_symbol_page = 1;
508    } elsif (IsSymbolizedProfileFile($ARGV[0])) {
509      $main::use_symbolized_profile = 1;
510      $main::prog = $UNKNOWN_BINARY;  # will be set later from the profile file
511    }
512  }
513
514  if ($main::use_symbol_page || $main::use_symbolized_profile) {
515    # We don't need a binary!
516    my %disabled = ('--lines' => $main::opt_lines,
517                    '--disasm' => $main::opt_disasm);
518    for my $option (keys %disabled) {
519      usage("$option cannot be used without a binary") if $disabled{$option};
520    }
521    # Set $main::prog later...
522    scalar(@ARGV) || usage("Did not specify profile file");
523  } elsif ($main::opt_symbols) {
524    # --symbols needs a binary-name (to run nm on, etc) but not profiles
525    $main::prog = shift(@ARGV) || usage("Did not specify program");
526  } else {
527    $main::prog = shift(@ARGV) || usage("Did not specify program");
528    scalar(@ARGV) || usage("Did not specify profile file");
529  }
530
531  # Parse profile file/location arguments
532  foreach my $farg (@ARGV) {
533    if ($farg =~ m/(.*)\@([0-9]+)(|\/.*)$/ ) {
534      my $machine = $1;
535      my $num_machines = $2;
536      my $path = $3;
537      for (my $i = 0; $i < $num_machines; $i++) {
538        unshift(@main::pfile_args, "$i.$machine$path");
539      }
540    } else {
541      unshift(@main::pfile_args, $farg);
542    }
543  }
544
545  if ($main::use_symbol_page) {
546    unless (IsProfileURL($main::pfile_args[0])) {
547      error("The first profile should be a remote form to use $SYMBOL_PAGE\n");
548    }
549    CheckSymbolPage();
550    $main::prog = FetchProgramName();
551  } elsif (!$main::use_symbolized_profile) {  # may not need objtools!
552    ConfigureObjTools($main::prog)
553  }
554
555  # Break the opt_lib_prefix into the prefix_list array
556  @prefix_list = split (',', $main::opt_lib_prefix);
557
558  # Remove trailing / from the prefixes, in the list to prevent
559  # searching things like /my/path//lib/mylib.so
560  foreach (@prefix_list) {
561    s|/+$||;
562  }
563}
564
565sub Main() {
566  Init();
567  $main::collected_profile = undef;
568  @main::profile_files = ();
569  $main::op_time = time();
570
571  # Printing symbols is special and requires a lot less info that most.
572  if ($main::opt_symbols) {
573    PrintSymbols(*STDIN);   # Get /proc/maps and symbols output from stdin
574    return;
575  }
576
577  # Fetch all profile data
578  FetchDynamicProfiles();
579
580  # this will hold symbols that we read from the profile files
581  my $symbol_map = {};
582
583  # Read one profile, pick the last item on the list
584  my $data = ReadProfile($main::prog, pop(@main::profile_files));
585  my $profile = $data->{profile};
586  my $pcs = $data->{pcs};
587  my $libs = $data->{libs};   # Info about main program and shared libraries
588  $symbol_map = MergeSymbols($symbol_map, $data->{symbols});
589
590  # Add additional profiles, if available.
591  if (scalar(@main::profile_files) > 0) {
592    foreach my $pname (@main::profile_files) {
593      my $data2 = ReadProfile($main::prog, $pname);
594      $profile = AddProfile($profile, $data2->{profile});
595      $pcs = AddPcs($pcs, $data2->{pcs});
596      $symbol_map = MergeSymbols($symbol_map, $data2->{symbols});
597    }
598  }
599
600  # Subtract base from profile, if specified
601  if ($main::opt_base ne '') {
602    my $base = ReadProfile($main::prog, $main::opt_base);
603    $profile = SubtractProfile($profile, $base->{profile});
604    $pcs = AddPcs($pcs, $base->{pcs});
605    $symbol_map = MergeSymbols($symbol_map, $base->{symbols});
606  }
607
608  # Get total data in profile
609  my $total = TotalProfile($profile);
610
611  # Collect symbols
612  my $symbols;
613  if ($main::use_symbolized_profile) {
614    $symbols = FetchSymbols($pcs, $symbol_map);
615  } elsif ($main::use_symbol_page) {
616    $symbols = FetchSymbols($pcs);
617  } else {
618    # TODO(csilvers): $libs uses the /proc/self/maps data from profile1,
619    # which may differ from the data from subsequent profiles, especially
620    # if they were run on different machines.  Use appropriate libs for
621    # each pc somehow.
622    $symbols = ExtractSymbols($libs, $pcs);
623  }
624
625  # Remove uniniteresting stack items
626  $profile = RemoveUninterestingFrames($symbols, $profile);
627
628  # Focus?
629  if ($main::opt_focus ne '') {
630    $profile = FocusProfile($symbols, $profile, $main::opt_focus);
631  }
632
633  # Ignore?
634  if ($main::opt_ignore ne '') {
635    $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore);
636  }
637
638  my $calls = ExtractCalls($symbols, $profile);
639
640  # Reduce profiles to required output granularity, and also clean
641  # each stack trace so a given entry exists at most once.
642  my $reduced = ReduceProfile($symbols, $profile);
643
644  # Get derived profiles
645  my $flat = FlatProfile($reduced);
646  my $cumulative = CumulativeProfile($reduced);
647
648  # Print
649  if (!$main::opt_interactive) {
650    if ($main::opt_disasm) {
651      PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm);
652    } elsif ($main::opt_list) {
653      PrintListing($total, $libs, $flat, $cumulative, $main::opt_list, 0);
654    } elsif ($main::opt_text) {
655      # Make sure the output is empty when have nothing to report
656      # (only matters when --heapcheck is given but we must be
657      # compatible with old branches that did not pass --heapcheck always):
658      if ($total != 0) {
659        printf("Total: %s %s\n", Unparse($total), Units());
660      }
661      PrintText($symbols, $flat, $cumulative, -1);
662    } elsif ($main::opt_raw) {
663      PrintSymbolizedProfile($symbols, $profile, $main::prog);
664    } elsif ($main::opt_callgrind) {
665      PrintCallgrind($calls);
666    } else {
667      if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
668        if ($main::opt_gv) {
669          RunGV(TempName($main::next_tmpfile, "ps"), "");
670        } elsif ($main::opt_evince) {
671          RunEvince(TempName($main::next_tmpfile, "pdf"), "");
672        } elsif ($main::opt_web) {
673          my $tmp = TempName($main::next_tmpfile, "svg");
674          RunWeb($tmp);
675          # The command we run might hand the file name off
676          # to an already running browser instance and then exit.
677          # Normally, we'd remove $tmp on exit (right now),
678          # but fork a child to remove $tmp a little later, so that the
679          # browser has time to load it first.
680          delete $main::tempnames{$tmp};
681          if (fork() == 0) {
682            sleep 5;
683            unlink($tmp);
684            exit(0);
685          }
686        }
687      } else {
688        cleanup();
689        exit(1);
690      }
691    }
692  } else {
693    InteractiveMode($profile, $symbols, $libs, $total);
694  }
695
696  cleanup();
697  exit(0);
698}
699
700##### Entry Point #####
701
702Main();
703
704# Temporary code to detect if we're running on a Goobuntu system.
705# These systems don't have the right stuff installed for the special
706# Readline libraries to work, so as a temporary workaround, we default
707# to using the normal stdio code, rather than the fancier readline-based
708# code
709sub ReadlineMightFail {
710  if (-e '/lib/libtermcap.so.2') {
711    return 0;  # libtermcap exists, so readline should be okay
712  } else {
713    return 1;
714  }
715}
716
717sub RunGV {
718  my $fname = shift;
719  my $bg = shift;       # "" or " &" if we should run in background
720  if (!system(ShellEscape(@GV, "--version") . " >$dev_null 2>&1")) {
721    # Options using double dash are supported by this gv version.
722    # Also, turn on noantialias to better handle bug in gv for
723    # postscript files with large dimensions.
724    # TODO: Maybe we should not pass the --noantialias flag
725    # if the gv version is known to work properly without the flag.
726    system(ShellEscape(@GV, "--scale=$main::opt_scale", "--noantialias", $fname)
727           . $bg);
728  } else {
729    # Old gv version - only supports options that use single dash.
730    print STDERR ShellEscape(@GV, "-scale", $main::opt_scale) . "\n";
731    system(ShellEscape(@GV, "-scale", "$main::opt_scale", $fname) . $bg);
732  }
733}
734
735sub RunEvince {
736  my $fname = shift;
737  my $bg = shift;       # "" or " &" if we should run in background
738  system(ShellEscape(@EVINCE, $fname) . $bg);
739}
740
741sub RunWeb {
742  my $fname = shift;
743  print STDERR "Loading web page file:///$fname\n";
744
745  if (`uname` =~ /Darwin/) {
746    # OS X: open will use standard preference for SVG files.
747    system("/usr/bin/open", $fname);
748    return;
749  }
750
751  # Some kind of Unix; try generic symlinks, then specific browsers.
752  # (Stop once we find one.)
753  # Works best if the browser is already running.
754  my @alt = (
755    "/etc/alternatives/gnome-www-browser",
756    "/etc/alternatives/x-www-browser",
757    "google-chrome",
758    "firefox",
759  );
760  foreach my $b (@alt) {
761    if (system($b, $fname) == 0) {
762      return;
763    }
764  }
765
766  print STDERR "Could not load web browser.\n";
767}
768
769sub RunKcachegrind {
770  my $fname = shift;
771  my $bg = shift;       # "" or " &" if we should run in background
772  print STDERR "Starting '@KCACHEGRIND " . $fname . $bg . "'\n";
773  system(ShellEscape(@KCACHEGRIND, $fname) . $bg);
774}
775
776
777##### Interactive helper routines #####
778
779sub InteractiveMode {
780  $| = 1;  # Make output unbuffered for interactive mode
781  my ($orig_profile, $symbols, $libs, $total) = @_;
782
783  print STDERR "Welcome to pprof!  For help, type 'help'.\n";
784
785  # Use ReadLine if it's installed and input comes from a console.
786  if ( -t STDIN &&
787       !ReadlineMightFail() &&
788       defined(eval {require Term::ReadLine}) ) {
789    my $term = new Term::ReadLine 'pprof';
790    while ( defined ($_ = $term->readline('(pprof) '))) {
791      $term->addhistory($_) if /\S/;
792      if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) {
793        last;    # exit when we get an interactive command to quit
794      }
795    }
796  } else {       # don't have readline
797    while (1) {
798      print STDERR "(pprof) ";
799      $_ = <STDIN>;
800      last if ! defined $_ ;
801      s/\r//g;         # turn windows-looking lines into unix-looking lines
802
803      # Save some flags that might be reset by InteractiveCommand()
804      my $save_opt_lines = $main::opt_lines;
805
806      if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) {
807        last;    # exit when we get an interactive command to quit
808      }
809
810      # Restore flags
811      $main::opt_lines = $save_opt_lines;
812    }
813  }
814}
815
816# Takes two args: orig profile, and command to run.
817# Returns 1 if we should keep going, or 0 if we were asked to quit
818sub InteractiveCommand {
819  my($orig_profile, $symbols, $libs, $total, $command) = @_;
820  $_ = $command;                # just to make future m//'s easier
821  if (!defined($_)) {
822    print STDERR "\n";
823    return 0;
824  }
825  if (m/^\s*quit/) {
826    return 0;
827  }
828  if (m/^\s*help/) {
829    InteractiveHelpMessage();
830    return 1;
831  }
832  # Clear all the mode options -- mode is controlled by "$command"
833  $main::opt_text = 0;
834  $main::opt_callgrind = 0;
835  $main::opt_disasm = 0;
836  $main::opt_list = 0;
837  $main::opt_gv = 0;
838  $main::opt_evince = 0;
839  $main::opt_cum = 0;
840
841  if (m/^\s*(text|top)(\d*)\s*(.*)/) {
842    $main::opt_text = 1;
843
844    my $line_limit = ($2 ne "") ? int($2) : 10;
845
846    my $routine;
847    my $ignore;
848    ($routine, $ignore) = ParseInteractiveArgs($3);
849
850    my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore);
851    my $reduced = ReduceProfile($symbols, $profile);
852
853    # Get derived profiles
854    my $flat = FlatProfile($reduced);
855    my $cumulative = CumulativeProfile($reduced);
856
857    PrintText($symbols, $flat, $cumulative, $line_limit);
858    return 1;
859  }
860  if (m/^\s*callgrind\s*([^ \n]*)/) {
861    $main::opt_callgrind = 1;
862
863    # Get derived profiles
864    my $calls = ExtractCalls($symbols, $orig_profile);
865    my $filename = $1;
866    if ( $1 eq '' ) {
867      $filename = TempName($main::next_tmpfile, "callgrind");
868    }
869    PrintCallgrind($calls, $filename);
870    if ( $1 eq '' ) {
871      RunKcachegrind($filename, " & ");
872      $main::next_tmpfile++;
873    }
874
875    return 1;
876  }
877  if (m/^\s*(web)?list\s*(.+)/) {
878    my $html = (defined($1) && ($1 eq "web"));
879    $main::opt_list = 1;
880
881    my $routine;
882    my $ignore;
883    ($routine, $ignore) = ParseInteractiveArgs($2);
884
885    my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore);
886    my $reduced = ReduceProfile($symbols, $profile);
887
888    # Get derived profiles
889    my $flat = FlatProfile($reduced);
890    my $cumulative = CumulativeProfile($reduced);
891
892    PrintListing($total, $libs, $flat, $cumulative, $routine, $html);
893    return 1;
894  }
895  if (m/^\s*disasm\s*(.+)/) {
896    $main::opt_disasm = 1;
897
898    my $routine;
899    my $ignore;
900    ($routine, $ignore) = ParseInteractiveArgs($1);
901
902    # Process current profile to account for various settings
903    my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore);
904    my $reduced = ReduceProfile($symbols, $profile);
905
906    # Get derived profiles
907    my $flat = FlatProfile($reduced);
908    my $cumulative = CumulativeProfile($reduced);
909
910    PrintDisassembly($libs, $flat, $cumulative, $routine);
911    return 1;
912  }
913  if (m/^\s*(gv|web|evince)\s*(.*)/) {
914    $main::opt_gv = 0;
915    $main::opt_evince = 0;
916    $main::opt_web = 0;
917    if ($1 eq "gv") {
918      $main::opt_gv = 1;
919    } elsif ($1 eq "evince") {
920      $main::opt_evince = 1;
921    } elsif ($1 eq "web") {
922      $main::opt_web = 1;
923    }
924
925    my $focus;
926    my $ignore;
927    ($focus, $ignore) = ParseInteractiveArgs($2);
928
929    # Process current profile to account for various settings
930    my $profile = ProcessProfile($total, $orig_profile, $symbols,
931                                 $focus, $ignore);
932    my $reduced = ReduceProfile($symbols, $profile);
933
934    # Get derived profiles
935    my $flat = FlatProfile($reduced);
936    my $cumulative = CumulativeProfile($reduced);
937
938    if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
939      if ($main::opt_gv) {
940        RunGV(TempName($main::next_tmpfile, "ps"), " &");
941      } elsif ($main::opt_evince) {
942        RunEvince(TempName($main::next_tmpfile, "pdf"), " &");
943      } elsif ($main::opt_web) {
944        RunWeb(TempName($main::next_tmpfile, "svg"));
945      }
946      $main::next_tmpfile++;
947    }
948    return 1;
949  }
950  if (m/^\s*$/) {
951    return 1;
952  }
953  print STDERR "Unknown command: try 'help'.\n";
954  return 1;
955}
956
957
958sub ProcessProfile {
959  my $total_count = shift;
960  my $orig_profile = shift;
961  my $symbols = shift;
962  my $focus = shift;
963  my $ignore = shift;
964
965  # Process current profile to account for various settings
966  my $profile = $orig_profile;
967  printf("Total: %s %s\n", Unparse($total_count), Units());
968  if ($focus ne '') {
969    $profile = FocusProfile($symbols, $profile, $focus);
970    my $focus_count = TotalProfile($profile);
971    printf("After focusing on '%s': %s %s of %s (%0.1f%%)\n",
972           $focus,
973           Unparse($focus_count), Units(),
974           Unparse($total_count), ($focus_count*100.0) / $total_count);
975  }
976  if ($ignore ne '') {
977    $profile = IgnoreProfile($symbols, $profile, $ignore);
978    my $ignore_count = TotalProfile($profile);
979    printf("After ignoring '%s': %s %s of %s (%0.1f%%)\n",
980           $ignore,
981           Unparse($ignore_count), Units(),
982           Unparse($total_count),
983           ($ignore_count*100.0) / $total_count);
984  }
985
986  return $profile;
987}
988
989sub InteractiveHelpMessage {
990  print STDERR <<ENDOFHELP;
991Interactive pprof mode
992
993Commands:
994  gv
995  gv [focus] [-ignore1] [-ignore2]
996      Show graphical hierarchical display of current profile.  Without
997      any arguments, shows all samples in the profile.  With the optional
998      "focus" argument, restricts the samples shown to just those where
999      the "focus" regular expression matches a routine name on the stack
1000      trace.
1001
1002  web
1003  web [focus] [-ignore1] [-ignore2]
1004      Like GV, but displays profile in your web browser instead of using
1005      Ghostview. Works best if your web browser is already running.
1006      To change the browser that gets used:
1007      On Linux, set the /etc/alternatives/gnome-www-browser symlink.
1008      On OS X, change the Finder association for SVG files.
1009
1010  list [routine_regexp] [-ignore1] [-ignore2]
1011      Show source listing of routines whose names match "routine_regexp"
1012
1013  weblist [routine_regexp] [-ignore1] [-ignore2]
1014     Displays a source listing of routines whose names match "routine_regexp"
1015     in a web browser.  You can click on source lines to view the
1016     corresponding disassembly.
1017
1018  top [--cum] [-ignore1] [-ignore2]
1019  top20 [--cum] [-ignore1] [-ignore2]
1020  top37 [--cum] [-ignore1] [-ignore2]
1021      Show top lines ordered by flat profile count, or cumulative count
1022      if --cum is specified.  If a number is present after 'top', the
1023      top K routines will be shown (defaults to showing the top 10)
1024
1025  disasm [routine_regexp] [-ignore1] [-ignore2]
1026      Show disassembly of routines whose names match "routine_regexp",
1027      annotated with sample counts.
1028
1029  callgrind
1030  callgrind [filename]
1031      Generates callgrind file. If no filename is given, kcachegrind is called.
1032
1033  help - This listing
1034  quit or ^D - End pprof
1035
1036For commands that accept optional -ignore tags, samples where any routine in
1037the stack trace matches the regular expression in any of the -ignore
1038parameters will be ignored.
1039
1040Further pprof details are available at this location (or one similar):
1041
1042 /usr/doc/gperftools-$PPROF_VERSION/cpu_profiler.html
1043 /usr/doc/gperftools-$PPROF_VERSION/heap_profiler.html
1044
1045ENDOFHELP
1046}
1047sub ParseInteractiveArgs {
1048  my $args = shift;
1049  my $focus = "";
1050  my $ignore = "";
1051  my @x = split(/ +/, $args);
1052  foreach $a (@x) {
1053    if ($a =~ m/^(--|-)lines$/) {
1054      $main::opt_lines = 1;
1055    } elsif ($a =~ m/^(--|-)cum$/) {
1056      $main::opt_cum = 1;
1057    } elsif ($a =~ m/^-(.*)/) {
1058      $ignore .= (($ignore ne "") ? "|" : "" ) . $1;
1059    } else {
1060      $focus .= (($focus ne "") ? "|" : "" ) . $a;
1061    }
1062  }
1063  if ($ignore ne "") {
1064    print STDERR "Ignoring samples in call stacks that match '$ignore'\n";
1065  }
1066  return ($focus, $ignore);
1067}
1068
1069##### Output code #####
1070
1071sub TempName {
1072  my $fnum = shift;
1073  my $ext = shift;
1074  my $file = "$main::tmpfile_ps.$fnum.$ext";
1075  $main::tempnames{$file} = 1;
1076  return $file;
1077}
1078
1079# Print profile data in packed binary format (64-bit) to standard out
1080sub PrintProfileData {
1081  my $profile = shift;
1082
1083  # print header (64-bit style)
1084  # (zero) (header-size) (version) (sample-period) (zero)
1085  print pack('L*', 0, 0, 3, 0, 0, 0, 1, 0, 0, 0);
1086
1087  foreach my $k (keys(%{$profile})) {
1088    my $count = $profile->{$k};
1089    my @addrs = split(/\n/, $k);
1090    if ($#addrs >= 0) {
1091      my $depth = $#addrs + 1;
1092      # int(foo / 2**32) is the only reliable way to get rid of bottom
1093      # 32 bits on both 32- and 64-bit systems.
1094      print pack('L*', $count & 0xFFFFFFFF, int($count / 2**32));
1095      print pack('L*', $depth & 0xFFFFFFFF, int($depth / 2**32));
1096
1097      foreach my $full_addr (@addrs) {
1098        my $addr = $full_addr;
1099        $addr =~ s/0x0*//;  # strip off leading 0x, zeroes
1100        if (length($addr) > 16) {
1101          print STDERR "Invalid address in profile: $full_addr\n";
1102          next;
1103        }
1104        my $low_addr = substr($addr, -8);       # get last 8 hex chars
1105        my $high_addr = substr($addr, -16, 8);  # get up to 8 more hex chars
1106        print pack('L*', hex('0x' . $low_addr), hex('0x' . $high_addr));
1107      }
1108    }
1109  }
1110}
1111
1112# Print symbols and profile data
1113sub PrintSymbolizedProfile {
1114  my $symbols = shift;
1115  my $profile = shift;
1116  my $prog = shift;
1117
1118  $SYMBOL_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
1119  my $symbol_marker = $&;
1120
1121  print '--- ', $symbol_marker, "\n";
1122  if (defined($prog)) {
1123    print 'binary=', $prog, "\n";
1124  }
1125  while (my ($pc, $name) = each(%{$symbols})) {
1126    my $sep = ' ';
1127    print '0x', $pc;
1128    # We have a list of function names, which include the inlined
1129    # calls.  They are separated (and terminated) by --, which is
1130    # illegal in function names.
1131    for (my $j = 2; $j <= $#{$name}; $j += 3) {
1132      print $sep, $name->[$j];
1133      $sep = '--';
1134    }
1135    print "\n";
1136  }
1137  print '---', "\n";
1138
1139  $PROFILE_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
1140  my $profile_marker = $&;
1141  print '--- ', $profile_marker, "\n";
1142  if (defined($main::collected_profile)) {
1143    # if used with remote fetch, simply dump the collected profile to output.
1144    open(SRC, "<$main::collected_profile");
1145    while (<SRC>) {
1146      print $_;
1147    }
1148    close(SRC);
1149  } else {
1150    # dump a cpu-format profile to standard out
1151    PrintProfileData($profile);
1152  }
1153}
1154
1155# Print text output
1156sub PrintText {
1157  my $symbols = shift;
1158  my $flat = shift;
1159  my $cumulative = shift;
1160  my $line_limit = shift;
1161
1162  my $total = TotalProfile($flat);
1163
1164  # Which profile to sort by?
1165  my $s = $main::opt_cum ? $cumulative : $flat;
1166
1167  my $running_sum = 0;
1168  my $lines = 0;
1169  foreach my $k (sort { GetEntry($s, $b) <=> GetEntry($s, $a) || $a cmp $b }
1170                 keys(%{$cumulative})) {
1171    my $f = GetEntry($flat, $k);
1172    my $c = GetEntry($cumulative, $k);
1173    $running_sum += $f;
1174
1175    my $sym = $k;
1176    if (exists($symbols->{$k})) {
1177      $sym = $symbols->{$k}->[0] . " " . $symbols->{$k}->[1];
1178      if ($main::opt_addresses) {
1179        $sym = $k . " " . $sym;
1180      }
1181    }
1182
1183    if ($f != 0 || $c != 0) {
1184      printf("%8s %6s %6s %8s %6s %s\n",
1185             Unparse($f),
1186             Percent($f, $total),
1187             Percent($running_sum, $total),
1188             Unparse($c),
1189             Percent($c, $total),
1190             $sym);
1191    }
1192    $lines++;
1193    last if ($line_limit >= 0 && $lines >= $line_limit);
1194  }
1195}
1196
1197# Callgrind format has a compression for repeated function and file
1198# names.  You show the name the first time, and just use its number
1199# subsequently.  This can cut down the file to about a third or a
1200# quarter of its uncompressed size.  $key and $val are the key/value
1201# pair that would normally be printed by callgrind; $map is a map from
1202# value to number.
1203sub CompressedCGName {
1204  my($key, $val, $map) = @_;
1205  my $idx = $map->{$val};
1206  # For very short keys, providing an index hurts rather than helps.
1207  if (length($val) <= 3) {
1208    return "$key=$val\n";
1209  } elsif (defined($idx)) {
1210    return "$key=($idx)\n";
1211  } else {
1212    # scalar(keys $map) gives the number of items in the map.
1213    $idx = scalar(keys(%{$map})) + 1;
1214    $map->{$val} = $idx;
1215    return "$key=($idx) $val\n";
1216  }
1217}
1218
1219# Print the call graph in a way that's suiteable for callgrind.
1220sub PrintCallgrind {
1221  my $calls = shift;
1222  my $filename;
1223  my %filename_to_index_map;
1224  my %fnname_to_index_map;
1225
1226  if ($main::opt_interactive) {
1227    $filename = shift;
1228    print STDERR "Writing callgrind file to '$filename'.\n"
1229  } else {
1230    $filename = "&STDOUT";
1231  }
1232  open(CG, ">$filename");
1233  printf CG ("events: Hits\n\n");
1234  foreach my $call ( map { $_->[0] }
1235                     sort { $a->[1] cmp $b ->[1] ||
1236                            $a->[2] <=> $b->[2] }
1237                     map { /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/;
1238                           [$_, $1, $2] }
1239                     keys %$calls ) {
1240    my $count = int($calls->{$call});
1241    $call =~ /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/;
1242    my ( $caller_file, $caller_line, $caller_function,
1243         $callee_file, $callee_line, $callee_function ) =
1244       ( $1, $2, $3, $5, $6, $7 );
1245
1246    # TODO(csilvers): for better compression, collect all the
1247    # caller/callee_files and functions first, before printing
1248    # anything, and only compress those referenced more than once.
1249    printf CG CompressedCGName("fl", $caller_file, \%filename_to_index_map);
1250    printf CG CompressedCGName("fn", $caller_function, \%fnname_to_index_map);
1251    if (defined $6) {
1252      printf CG CompressedCGName("cfl", $callee_file, \%filename_to_index_map);
1253      printf CG CompressedCGName("cfn", $callee_function, \%fnname_to_index_map);
1254      printf CG ("calls=$count $callee_line\n");
1255    }
1256    printf CG ("$caller_line $count\n\n");
1257  }
1258}
1259
1260# Print disassembly for all all routines that match $main::opt_disasm
1261sub PrintDisassembly {
1262  my $libs = shift;
1263  my $flat = shift;
1264  my $cumulative = shift;
1265  my $disasm_opts = shift;
1266
1267  my $total = TotalProfile($flat);
1268
1269  foreach my $lib (@{$libs}) {
1270    my $symbol_table = GetProcedureBoundaries($lib->[0], $disasm_opts);
1271    my $offset = AddressSub($lib->[1], $lib->[3]);
1272    foreach my $routine (sort ByName keys(%{$symbol_table})) {
1273      my $start_addr = $symbol_table->{$routine}->[0];
1274      my $end_addr = $symbol_table->{$routine}->[1];
1275      # See if there are any samples in this routine
1276      my $length = hex(AddressSub($end_addr, $start_addr));
1277      my $addr = AddressAdd($start_addr, $offset);
1278      for (my $i = 0; $i < $length; $i++) {
1279        if (defined($cumulative->{$addr})) {
1280          PrintDisassembledFunction($lib->[0], $offset,
1281                                    $routine, $flat, $cumulative,
1282                                    $start_addr, $end_addr, $total);
1283          last;
1284        }
1285        $addr = AddressInc($addr);
1286      }
1287    }
1288  }
1289}
1290
1291# Return reference to array of tuples of the form:
1292#       [start_address, filename, linenumber, instruction, limit_address]
1293# E.g.,
1294#       ["0x806c43d", "/foo/bar.cc", 131, "ret", "0x806c440"]
1295sub Disassemble {
1296  my $prog = shift;
1297  my $offset = shift;
1298  my $start_addr = shift;
1299  my $end_addr = shift;
1300
1301  my $objdump = $obj_tool_map{"objdump"};
1302  my $cmd = ShellEscape($objdump, "-C", "-d", "-l", "--no-show-raw-insn",
1303                        "--start-address=0x$start_addr",
1304                        "--stop-address=0x$end_addr", $prog);
1305  open(OBJDUMP, "$cmd |") || error("$cmd: $!\n");
1306  my @result = ();
1307  my $filename = "";
1308  my $linenumber = -1;
1309  my $last = ["", "", "", ""];
1310  while (<OBJDUMP>) {
1311    s/\r//g;         # turn windows-looking lines into unix-looking lines
1312    chop;
1313    if (m|\s*([^:\s]+):(\d+)\s*$|) {
1314      # Location line of the form:
1315      #   <filename>:<linenumber>
1316      $filename = $1;
1317      $linenumber = $2;
1318    } elsif (m/^ +([0-9a-f]+):\s*(.*)/) {
1319      # Disassembly line -- zero-extend address to full length
1320      my $addr = HexExtend($1);
1321      my $k = AddressAdd($addr, $offset);
1322      $last->[4] = $k;   # Store ending address for previous instruction
1323      $last = [$k, $filename, $linenumber, $2, $end_addr];
1324      push(@result, $last);
1325    }
1326  }
1327  close(OBJDUMP);
1328  return @result;
1329}
1330
1331# The input file should contain lines of the form /proc/maps-like
1332# output (same format as expected from the profiles) or that looks
1333# like hex addresses (like "0xDEADBEEF").  We will parse all
1334# /proc/maps output, and for all the hex addresses, we will output
1335# "short" symbol names, one per line, in the same order as the input.
1336sub PrintSymbols {
1337  my $maps_and_symbols_file = shift;
1338
1339  # ParseLibraries expects pcs to be in a set.  Fine by us...
1340  my @pclist = ();   # pcs in sorted order
1341  my $pcs = {};
1342  my $map = "";
1343  foreach my $line (<$maps_and_symbols_file>) {
1344    $line =~ s/\r//g;    # turn windows-looking lines into unix-looking lines
1345    if ($line =~ /\b(0x[0-9a-f]+)\b/i) {
1346      push(@pclist, HexExtend($1));
1347      $pcs->{$pclist[-1]} = 1;
1348    } else {
1349      $map .= $line;
1350    }
1351  }
1352
1353  my $libs = ParseLibraries($main::prog, $map, $pcs);
1354  my $symbols = ExtractSymbols($libs, $pcs);
1355
1356  foreach my $pc (@pclist) {
1357    # ->[0] is the shortname, ->[2] is the full name
1358    print(($symbols->{$pc}->[0] || "??") . "\n");
1359  }
1360}
1361
1362
1363# For sorting functions by name
1364sub ByName {
1365  return ShortFunctionName($a) cmp ShortFunctionName($b);
1366}
1367
1368# Print source-listing for all all routines that match $list_opts
1369sub PrintListing {
1370  my $total = shift;
1371  my $libs = shift;
1372  my $flat = shift;
1373  my $cumulative = shift;
1374  my $list_opts = shift;
1375  my $html = shift;
1376
1377  my $output = \*STDOUT;
1378  my $fname = "";
1379
1380  if ($html) {
1381    # Arrange to write the output to a temporary file
1382    $fname = TempName($main::next_tmpfile, "html");
1383    $main::next_tmpfile++;
1384    if (!open(TEMP, ">$fname")) {
1385      print STDERR "$fname: $!\n";
1386      return;
1387    }
1388    $output = \*TEMP;
1389    print $output HtmlListingHeader();
1390    printf $output ("<div class=\"legend\">%s<br>Total: %s %s</div>\n",
1391                    $main::prog, Unparse($total), Units());
1392  }
1393
1394  my $listed = 0;
1395  foreach my $lib (@{$libs}) {
1396    my $symbol_table = GetProcedureBoundaries($lib->[0], $list_opts);
1397    my $offset = AddressSub($lib->[1], $lib->[3]);
1398    foreach my $routine (sort ByName keys(%{$symbol_table})) {
1399      # Print if there are any samples in this routine
1400      my $start_addr = $symbol_table->{$routine}->[0];
1401      my $end_addr = $symbol_table->{$routine}->[1];
1402      my $length = hex(AddressSub($end_addr, $start_addr));
1403      my $addr = AddressAdd($start_addr, $offset);
1404      for (my $i = 0; $i < $length; $i++) {
1405        if (defined($cumulative->{$addr})) {
1406          $listed += PrintSource(
1407            $lib->[0], $offset,
1408            $routine, $flat, $cumulative,
1409            $start_addr, $end_addr,
1410            $html,
1411            $output);
1412          last;
1413        }
1414        $addr = AddressInc($addr);
1415      }
1416    }
1417  }
1418
1419  if ($html) {
1420    if ($listed > 0) {
1421      print $output HtmlListingFooter();
1422      close($output);
1423      RunWeb($fname);
1424    } else {
1425      close($output);
1426      unlink($fname);
1427    }
1428  }
1429}
1430
1431sub HtmlListingHeader {
1432  return <<'EOF';
1433<DOCTYPE html>
1434<html>
1435<head>
1436<title>Pprof listing</title>
1437<style type="text/css">
1438body {
1439  font-family: sans-serif;
1440}
1441h1 {
1442  font-size: 1.5em;
1443  margin-bottom: 4px;
1444}
1445.legend {
1446  font-size: 1.25em;
1447}
1448.line {
1449  color: #aaaaaa;
1450}
1451.nop {
1452  color: #aaaaaa;
1453}
1454.unimportant {
1455  color: #cccccc;
1456}
1457.disasmloc {
1458  color: #000000;
1459}
1460.deadsrc {
1461  cursor: pointer;
1462}
1463.deadsrc:hover {
1464  background-color: #eeeeee;
1465}
1466.livesrc {
1467  color: #0000ff;
1468  cursor: pointer;
1469}
1470.livesrc:hover {
1471  background-color: #eeeeee;
1472}
1473.asm {
1474  color: #008800;
1475  display: none;
1476}
1477</style>
1478<script type="text/javascript">
1479function pprof_toggle_asm(e) {
1480  var target;
1481  if (!e) e = window.event;
1482  if (e.target) target = e.target;
1483  else if (e.srcElement) target = e.srcElement;
1484
1485  if (target) {
1486    var asm = target.nextSibling;
1487    if (asm && asm.className == "asm") {
1488      asm.style.display = (asm.style.display == "block" ? "" : "block");
1489      e.preventDefault();
1490      return false;
1491    }
1492  }
1493}
1494</script>
1495</head>
1496<body>
1497EOF
1498}
1499
1500sub HtmlListingFooter {
1501  return <<'EOF';
1502</body>
1503</html>
1504EOF
1505}
1506
1507sub HtmlEscape {
1508  my $text = shift;
1509  $text =~ s/&/&amp;/g;
1510  $text =~ s/</&lt;/g;
1511  $text =~ s/>/&gt;/g;
1512  return $text;
1513}
1514
1515# Returns the indentation of the line, if it has any non-whitespace
1516# characters.  Otherwise, returns -1.
1517sub Indentation {
1518  my $line = shift;
1519  if (m/^(\s*)\S/) {
1520    return length($1);
1521  } else {
1522    return -1;
1523  }
1524}
1525
1526# If the symbol table contains inlining info, Disassemble() may tag an
1527# instruction with a location inside an inlined function.  But for
1528# source listings, we prefer to use the location in the function we
1529# are listing.  So use MapToSymbols() to fetch full location
1530# information for each instruction and then pick out the first
1531# location from a location list (location list contains callers before
1532# callees in case of inlining).
1533#
1534# After this routine has run, each entry in $instructions contains:
1535#   [0] start address
1536#   [1] filename for function we are listing
1537#   [2] line number for function we are listing
1538#   [3] disassembly
1539#   [4] limit address
1540#   [5] most specific filename (may be different from [1] due to inlining)
1541#   [6] most specific line number (may be different from [2] due to inlining)
1542sub GetTopLevelLineNumbers {
1543  my ($lib, $offset, $instructions) = @_;
1544  my $pcs = [];
1545  for (my $i = 0; $i <= $#{$instructions}; $i++) {
1546    push(@{$pcs}, $instructions->[$i]->[0]);
1547  }
1548  my $symbols = {};
1549  MapToSymbols($lib, $offset, $pcs, $symbols);
1550  for (my $i = 0; $i <= $#{$instructions}; $i++) {
1551    my $e = $instructions->[$i];
1552    push(@{$e}, $e->[1]);
1553    push(@{$e}, $e->[2]);
1554    my $addr = $e->[0];
1555    my $sym = $symbols->{$addr};
1556    if (defined($sym)) {
1557      if ($#{$sym} >= 2 && $sym->[1] =~ m/^(.*):(\d+)$/) {
1558        $e->[1] = $1;  # File name
1559        $e->[2] = $2;  # Line number
1560      }
1561    }
1562  }
1563}
1564
1565# Print source-listing for one routine
1566sub PrintSource {
1567  my $prog = shift;
1568  my $offset = shift;
1569  my $routine = shift;
1570  my $flat = shift;
1571  my $cumulative = shift;
1572  my $start_addr = shift;
1573  my $end_addr = shift;
1574  my $html = shift;
1575  my $output = shift;
1576
1577  # Disassemble all instructions (just to get line numbers)
1578  my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr);
1579  GetTopLevelLineNumbers($prog, $offset, \@instructions);
1580
1581  # Hack 1: assume that the first source file encountered in the
1582  # disassembly contains the routine
1583  my $filename = undef;
1584  for (my $i = 0; $i <= $#instructions; $i++) {
1585    if ($instructions[$i]->[2] >= 0) {
1586      $filename = $instructions[$i]->[1];
1587      last;
1588    }
1589  }
1590  if (!defined($filename)) {
1591    print STDERR "no filename found in $routine\n";
1592    return 0;
1593  }
1594
1595  # Hack 2: assume that the largest line number from $filename is the
1596  # end of the procedure.  This is typically safe since if P1 contains
1597  # an inlined call to P2, then P2 usually occurs earlier in the
1598  # source file.  If this does not work, we might have to compute a
1599  # density profile or just print all regions we find.
1600  my $lastline = 0;
1601  for (my $i = 0; $i <= $#instructions; $i++) {
1602    my $f = $instructions[$i]->[1];
1603    my $l = $instructions[$i]->[2];
1604    if (($f eq $filename) && ($l > $lastline)) {
1605      $lastline = $l;
1606    }
1607  }
1608
1609  # Hack 3: assume the first source location from "filename" is the start of
1610  # the source code.
1611  my $firstline = 1;
1612  for (my $i = 0; $i <= $#instructions; $i++) {
1613    if ($instructions[$i]->[1] eq $filename) {
1614      $firstline = $instructions[$i]->[2];
1615      last;
1616    }
1617  }
1618
1619  # Hack 4: Extend last line forward until its indentation is less than
1620  # the indentation we saw on $firstline
1621  my $oldlastline = $lastline;
1622  {
1623    if (!open(FILE, "<$filename")) {
1624      print STDERR "$filename: $!\n";
1625      return 0;
1626    }
1627    my $l = 0;
1628    my $first_indentation = -1;
1629    while (<FILE>) {
1630      s/\r//g;         # turn windows-looking lines into unix-looking lines
1631      $l++;
1632      my $indent = Indentation($_);
1633      if ($l >= $firstline) {
1634        if ($first_indentation < 0 && $indent >= 0) {
1635          $first_indentation = $indent;
1636          last if ($first_indentation == 0);
1637        }
1638      }
1639      if ($l >= $lastline && $indent >= 0) {
1640        if ($indent >= $first_indentation) {
1641          $lastline = $l+1;
1642        } else {
1643          last;
1644        }
1645      }
1646    }
1647    close(FILE);
1648  }
1649
1650  # Assign all samples to the range $firstline,$lastline,
1651  # Hack 4: If an instruction does not occur in the range, its samples
1652  # are moved to the next instruction that occurs in the range.
1653  my $samples1 = {};        # Map from line number to flat count
1654  my $samples2 = {};        # Map from line number to cumulative count
1655  my $running1 = 0;         # Unassigned flat counts
1656  my $running2 = 0;         # Unassigned cumulative counts
1657  my $total1 = 0;           # Total flat counts
1658  my $total2 = 0;           # Total cumulative counts
1659  my %disasm = ();          # Map from line number to disassembly
1660  my $running_disasm = "";  # Unassigned disassembly
1661  my $skip_marker = "---\n";
1662  if ($html) {
1663    $skip_marker = "";
1664    for (my $l = $firstline; $l <= $lastline; $l++) {
1665      $disasm{$l} = "";
1666    }
1667  }
1668  my $last_dis_filename = '';
1669  my $last_dis_linenum = -1;
1670  my $last_touched_line = -1;  # To detect gaps in disassembly for a line
1671  foreach my $e (@instructions) {
1672    # Add up counts for all address that fall inside this instruction
1673    my $c1 = 0;
1674    my $c2 = 0;
1675    for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) {
1676      $c1 += GetEntry($flat, $a);
1677      $c2 += GetEntry($cumulative, $a);
1678    }
1679
1680    if ($html) {
1681      my $dis = sprintf("      %6s %6s \t\t%8s: %s ",
1682                        HtmlPrintNumber($c1),
1683                        HtmlPrintNumber($c2),
1684                        UnparseAddress($offset, $e->[0]),
1685                        CleanDisassembly($e->[3]));
1686      
1687      # Append the most specific source line associated with this instruction
1688      if (length($dis) < 80) { $dis .= (' ' x (80 - length($dis))) };
1689      $dis = HtmlEscape($dis);
1690      my $f = $e->[5];
1691      my $l = $e->[6];
1692      if ($f ne $last_dis_filename) {
1693        $dis .= sprintf("<span class=disasmloc>%s:%d</span>", 
1694                        HtmlEscape(CleanFileName($f)), $l);
1695      } elsif ($l ne $last_dis_linenum) {
1696        # De-emphasize the unchanged file name portion
1697        $dis .= sprintf("<span class=unimportant>%s</span>" .
1698                        "<span class=disasmloc>:%d</span>", 
1699                        HtmlEscape(CleanFileName($f)), $l);
1700      } else {
1701        # De-emphasize the entire location
1702        $dis .= sprintf("<span class=unimportant>%s:%d</span>", 
1703                        HtmlEscape(CleanFileName($f)), $l);
1704      }
1705      $last_dis_filename = $f;
1706      $last_dis_linenum = $l;
1707      $running_disasm .= $dis;
1708      $running_disasm .= "\n";
1709    }
1710
1711    $running1 += $c1;
1712    $running2 += $c2;
1713    $total1 += $c1;
1714    $total2 += $c2;
1715    my $file = $e->[1];
1716    my $line = $e->[2];
1717    if (($file eq $filename) &&
1718        ($line >= $firstline) &&
1719        ($line <= $lastline)) {
1720      # Assign all accumulated samples to this line
1721      AddEntry($samples1, $line, $running1);
1722      AddEntry($samples2, $line, $running2);
1723      $running1 = 0;
1724      $running2 = 0;
1725      if ($html) {
1726        if ($line != $last_touched_line && $disasm{$line} ne '') {
1727          $disasm{$line} .= "\n";
1728        }
1729        $disasm{$line} .= $running_disasm;
1730        $running_disasm = '';
1731        $last_touched_line = $line;
1732      }
1733    }
1734  }
1735
1736  # Assign any leftover samples to $lastline
1737  AddEntry($samples1, $lastline, $running1);
1738  AddEntry($samples2, $lastline, $running2);
1739  if ($html) {
1740    if ($lastline != $last_touched_line && $disasm{$lastline} ne '') {
1741      $disasm{$lastline} .= "\n";
1742    }
1743    $disasm{$lastline} .= $running_disasm;
1744  }
1745
1746  if ($html) {
1747    printf $output (
1748      "<h1>%s</h1>%s\n<pre onClick=\"pprof_toggle_asm()\">\n" .
1749      "Total:%6s %6s (flat / cumulative %s)\n",
1750      HtmlEscape(ShortFunctionName($routine)),
1751      HtmlEscape(CleanFileName($filename)),
1752      Unparse($total1),
1753      Unparse($total2),
1754      Units());
1755  } else {
1756    printf $output (
1757      "ROUTINE ====================== %s in %s\n" .
1758      "%6s %6s Total %s (flat / cumulative)\n",
1759      ShortFunctionName($routine),
1760      CleanFileName($filename),
1761      Unparse($total1),
1762      Unparse($total2),
1763      Units());
1764  }
1765  if (!open(FILE, "<$filename")) {
1766    print STDERR "$filename: $!\n";
1767    return 0;
1768  }
1769  my $l = 0;
1770  while (<FILE>) {
1771    s/\r//g;         # turn windows-looking lines into unix-looking lines
1772    $l++;
1773    if ($l >= $firstline - 5 &&
1774        (($l <= $oldlastline + 5) || ($l <= $lastline))) {
1775      chop;
1776      my $text = $_;
1777      if ($l == $firstline) { print $output $skip_marker; }
1778      my $n1 = GetEntry($samples1, $l);
1779      my $n2 = GetEntry($samples2, $l);
1780      if ($html) {
1781        # Emit a span that has one of the following classes:
1782        #    livesrc -- has samples
1783        #    deadsrc -- has disassembly, but with no samples
1784        #    nop     -- has no matching disasembly
1785        # Also emit an optional span containing disassembly.
1786        my $dis = $disasm{$l};
1787        my $asm = "";
1788        if (defined($dis) && $dis ne '') {
1789          $asm = "<span class=\"asm\">" . $dis . "</span>";
1790        }
1791        my $source_class = (($n1 + $n2 > 0) 
1792                            ? "livesrc" 
1793                            : (($asm ne "") ? "deadsrc" : "nop"));
1794        printf $output (
1795          "<span class=\"line\">%5d</span> " .
1796          "<span class=\"%s\">%6s %6s %s</span>%s\n",
1797          $l, $source_class,
1798          HtmlPrintNumber($n1),
1799          HtmlPrintNumber($n2),
1800          HtmlEscape($text),
1801          $asm);
1802      } else {
1803        printf $output(
1804          "%6s %6s %4d: %s\n",
1805          UnparseAlt($n1),
1806          UnparseAlt($n2),
1807          $l,
1808          $text);
1809      }
1810      if ($l == $lastline)  { print $output $skip_marker; }
1811    };
1812  }
1813  close(FILE);
1814  if ($html) {
1815    print $output "</pre>\n";
1816  }
1817  return 1;
1818}
1819
1820# Return the source line for the specified file/linenumber.
1821# Returns undef if not found.
1822sub SourceLine {
1823  my $file = shift;
1824  my $line = shift;
1825
1826  # Look in cache
1827  if (!defined($main::source_cache{$file})) {
1828    if (100 < scalar keys(%main::source_cache)) {
1829      # Clear the cache when it gets too big
1830      $main::source_cache = ();
1831    }
1832
1833    # Read all lines from the file
1834    if (!open(FILE, "<$file")) {
1835      print STDERR "$file: $!\n";
1836      $main::source_cache{$file} = [];  # Cache the negative result
1837      return undef;
1838    }
1839    my $lines = [];
1840    push(@{$lines}, "");        # So we can use 1-based line numbers as indices
1841    while (<FILE>) {
1842      push(@{$lines}, $_);
1843    }
1844    close(FILE);
1845
1846    # Save the lines in the cache
1847    $main::source_cache{$file} = $lines;
1848  }
1849
1850  my $lines = $main::source_cache{$file};
1851  if (($line < 0) || ($line > $#{$lines})) {
1852    return undef;
1853  } else {
1854    return $lines->[$line];
1855  }
1856}
1857
1858# Print disassembly for one routine with interspersed source if available
1859sub PrintDisassembledFunction {
1860  my $prog = shift;
1861  my $offset = shift;
1862  my $routine = shift;
1863  my $flat = shift;
1864  my $cumulative = shift;
1865  my $start_addr = shift;
1866  my $end_addr = shift;
1867  my $total = shift;
1868
1869  # Disassemble all instructions
1870  my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr);
1871
1872  # Make array of counts per instruction
1873  my @flat_count = ();
1874  my @cum_count = ();
1875  my $flat_total = 0;
1876  my $cum_total = 0;
1877  foreach my $e (@instructions) {
1878    # Add up counts for all address that fall inside this instruction
1879    my $c1 = 0;
1880    my $c2 = 0;
1881    for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) {
1882      $c1 += GetEntry($flat, $a);
1883      $c2 += GetEntry($cumulative, $a);
1884    }
1885    push(@flat_count, $c1);
1886    push(@cum_count, $c2);
1887    $flat_total += $c1;
1888    $cum_total += $c2;
1889  }
1890
1891  # Print header with total counts
1892  printf("ROUTINE ====================== %s\n" .
1893         "%6s %6s %s (flat, cumulative) %.1f%% of total\n",
1894         ShortFunctionName($routine),
1895         Unparse($flat_total),
1896         Unparse($cum_total),
1897         Units(),
1898         ($cum_total * 100.0) / $total);
1899
1900  # Process instructions in order
1901  my $current_file = "";
1902  for (my $i = 0; $i <= $#instructions; ) {
1903    my $e = $instructions[$i];
1904
1905    # Print the new file name whenever we switch files
1906    if ($e->[1] ne $current_file) {
1907      $current_file = $e->[1];
1908      my $fname = $current_file;
1909      $fname =~ s|^\./||;   # Trim leading "./"
1910
1911      # Shorten long file names
1912      if (length($fname) >= 58) {
1913        $fname = "..." . substr($fname, -55);
1914      }
1915      printf("-------------------- %s\n", $fname);
1916    }
1917
1918    # TODO: Compute range of lines to print together to deal with
1919    # small reorderings.
1920    my $first_line = $e->[2];
1921    my $last_line = $first_line;
1922    my %flat_sum = ();
1923    my %cum_sum = ();
1924    for (my $l = $first_line; $l <= $last_line; $l++) {
1925      $flat_sum{$l} = 0;
1926      $cum_sum{$l} = 0;
1927    }
1928
1929    # Find run of instructions for this range of source lines
1930    my $first_inst = $i;
1931    while (($i <= $#instructions) &&
1932           ($instructions[$i]->[2] >= $first_line) &&
1933           ($instructions[$i]->[2] <= $last_line)) {
1934      $e = $instructions[$i];
1935      $flat_sum{$e->[2]} += $flat_count[$i];
1936      $cum_sum{$e->[2]} += $cum_count[$i];
1937      $i++;
1938    }
1939    my $last_inst = $i - 1;
1940
1941    # Print source lines
1942    for (my $l = $first_line; $l <= $last_line; $l++) {
1943      my $line = SourceLine($current_file, $l);
1944      if (!defined($line)) {
1945        $line = "?\n";
1946        next;
1947      } else {
1948        $line =~ s/^\s+//;
1949      }
1950      printf("%6s %6s %5d: %s",
1951             UnparseAlt($flat_sum{$l}),
1952             UnparseAlt($cum_sum{$l}),
1953             $l,
1954             $line);
1955    }
1956
1957    # Print disassembly
1958    for (my $x = $first_inst; $x <= $last_inst; $x++) {
1959      my $e = $instructions[$x];
1960      printf("%6s %6s    %8s: %6s\n",
1961             UnparseAlt($flat_count[$x]),
1962             UnparseAlt($cum_count[$x]),
1963             UnparseAddress($offset, $e->[0]),
1964             CleanDisassembly($e->[3]));
1965    }
1966  }
1967}
1968
1969# Print DOT graph
1970sub PrintDot {
1971  my $prog = shift;
1972  my $symbols = shift;
1973  my $raw = shift;
1974  my $flat = shift;
1975  my $cumulative = shift;
1976  my $overall_total = shift;
1977
1978  # Get total
1979  my $local_total = TotalProfile($flat);
1980  my $nodelimit = int($main::opt_nodefraction * $local_total);
1981  my $edgelimit = int($main::opt_edgefraction * $local_total);
1982  my $nodecount = $main::opt_nodecount;
1983
1984  # Find nodes to include
1985  my @list = (sort { abs(GetEntry($cumulative, $b)) <=>
1986                     abs(GetEntry($cumulative, $a))
1987                     || $a cmp $b }
1988              keys(%{$cumulative}));
1989  my $last = $nodecount - 1;
1990  if ($last > $#list) {
1991    $last = $#list;
1992  }
1993  while (($last >= 0) &&
1994         (abs(GetEntry($cumulative, $list[$last])) <= $nodelimit)) {
1995    $last--;
1996  }
1997  if ($last < 0) {
1998    print STDERR "No nodes to print\n";
1999    return 0;
2000  }
2001
2002  if ($nodelimit > 0 || $edgelimit > 0) {
2003    printf STDERR ("Dropping nodes with <= %s %s; edges with <= %s abs(%s)\n",
2004                   Unparse($nodelimit), Units(),
2005                   Unparse($edgelimit), Units());
2006  }
2007
2008  # Open DOT output file
2009  my $output;
2010  my $escaped_dot = ShellEscape(@DOT);
2011  my $escaped_ps2pdf = ShellEscape(@PS2PDF);
2012  if ($main::opt_gv) {
2013    my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "ps"));
2014    $output = "| $escaped_dot -Tps2 >$escaped_outfile";
2015  } elsif ($main::opt_evince) {
2016    my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "pdf"));
2017    $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - $escaped_outfile";
2018  } elsif ($main::opt_ps) {
2019    $output = "| $escaped_dot -Tps2";
2020  } elsif ($main::opt_pdf) {
2021    $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - -";
2022  } elsif ($main::opt_web || $main::opt_svg) {
2023    # We need to post-process the SVG, so write to a temporary file always.
2024    my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "svg"));
2025    $output = "| $escaped_dot -Tsvg >$escaped_outfile";
2026  } elsif ($main::opt_gif) {
2027    $output = "| $escaped_dot -Tgif";
2028  } else {
2029    $output = ">&STDOUT";
2030  }
2031  open(DOT, $output) || error("$output: $!\n");
2032
2033  # Title
2034  printf DOT ("digraph \"%s; %s %s\" {\n",
2035              $prog,
2036              Unparse($overall_total),
2037              Units());
2038  if ($main::opt_pdf) {
2039    # The output is more printable if we set the page size for dot.
2040    printf DOT ("size=\"8,11\"\n");
2041  }
2042  printf DOT ("node [width=0.375,height=0.25];\n");
2043
2044  # Print legend
2045  printf DOT ("Legend [shape=box,fontsize=24,shape=plaintext," .
2046              "label=\"%s\\l%s\\l%s\\l%s\\l%s\\l\"];\n",
2047              $prog,
2048              sprintf("Total %s: %s", Units(), Unparse($overall_total)),
2049              sprintf("Focusing on: %s", Unparse($local_total)),
2050              sprintf("Dropped nodes with <= %s abs(%s)",
2051                      Unparse($nodelimit), Units()),
2052              sprintf("Dropped edges with <= %s %s",
2053                      Unparse($edgelimit), Units())
2054              );
2055
2056  # Print nodes
2057  my %node = ();
2058  my $nextnode = 1;
2059  foreach my $a (@list[0..$last]) {
2060    # Pick font size
2061    my $f = GetEntry($flat, $a);
2062    my $c = GetEntry($cumulative, $a);
2063
2064    my $fs = 8;
2065    if ($local_total > 0) {
2066      $fs = 8 + (50.0 * sqrt(abs($f * 1.0 / $local_total)));
2067    }
2068
2069    $node{$a} = $nextnode++;
2070    my $sym = $a;
2071    $sym =~ s/\s+/\\n/g;
2072    $sym =~ s/::/\\n/g;
2073
2074    # Extra cumulative info to print for non-leaves
2075    my $extra = "";
2076    if ($f != $c) {
2077      $extra = sprintf("\\rof %s (%s)",
2078                       Unparse($c),
2079                       Percent($c, $local_total));
2080    }
2081    my $style = "";
2082    if ($main::opt_heapcheck) {
2083      if ($f > 0) {
2084        # make leak-causing nodes more visible (add a background)
2085        $style = ",style=filled,fillcolor=gray"
2086      } elsif ($f < 0) {
2087        # make anti-leak-causing nodes (which almost never occur)
2088        # stand out as well (triple border)
2089        $style = ",peripheries=3"
2090      }
2091    }
2092
2093    printf DOT ("N%d [label=\"%s\\n%s (%s)%s\\r" .
2094                "\",shape=box,fontsize=%.1f%s];\n",
2095                $node{$a},
2096                $sym,
2097                Unparse($f),
2098                Percent($f, $local_total),
2099                $extra,
2100                $fs,
2101                $style,
2102               );
2103  }
2104
2105  # Get edges and counts per edge
2106  my %edge = ();
2107  my $n;
2108  my $fullname_to_shortname_map = {};
2109  FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map);
2110  foreach my $k (keys(%{$raw})) {
2111    # TODO: omit low %age edges
2112    $n = $raw->{$k};
2113    my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k);
2114    for (my $i = 1; $i <= $#translated; $i++) {
2115      my $src = $translated[$i];
2116      my $dst = $translated[$i-1];
2117      #next if ($src eq $dst);  # Avoid self-edges?
2118      if (exists($node{$src}) && exists($node{$dst})) {
2119        my $edge_label = "$src\001$dst";
2120        if (!exists($edge{$edge_label})) {
2121          $edge{$edge_label} = 0;
2122        }
2123        $edge{$edge_label} += $n;
2124      }
2125    }
2126  }
2127
2128  # Print edges (process in order of decreasing counts)
2129  my %indegree = ();   # Number of incoming edges added per node so far
2130  my %outdegree = ();  # Number of outgoing edges added per node so far
2131  foreach my $e (sort { $edge{$b} <=> $edge{$a} } keys(%edge)) {
2132    my @x = split(/\001/, $e);
2133    $n = $edge{$e};
2134
2135    # Initialize degree of kept incoming and outgoing edges if necessary
2136    my $src = $x[0];
2137    my $dst = $x[1];
2138    if (!exists($outdegree{$src})) { $outdegree{$src} = 0; }
2139    if (!exists($indegree{$dst})) { $indegree{$dst} = 0; }
2140
2141    my $keep;
2142    if ($indegree{$dst} == 0) {
2143      # Keep edge if needed for reachability
2144      $keep = 1;
2145    } elsif (abs($n) <= $edgelimit) {
2146      # Drop if we are below --edgefraction
2147      $keep = 0;
2148    } elsif ($outdegree{$src} >= $main::opt_maxdegree ||
2149             $indegree{$dst} >= $main::opt_maxdegree) {
2150      # Keep limited number of in/out edges per node
2151      $keep = 0;
2152    } else {
2153      $keep = 1;
2154    }
2155
2156    if ($keep) {
2157      $outdegree{$src}++;
2158      $indegree{$dst}++;
2159
2160      # Compute line width based on edge count
2161      my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0);
2162      if ($fraction > 1) { $fraction = 1; }
2163      my $w = $fraction * 2;
2164      if ($w < 1 && ($main::opt_web || $main::opt_svg)) {
2165        # SVG output treats line widths < 1 poorly.
2166        $w = 1;
2167      }
2168
2169      # Dot sometimes segfaults if given edge weights that are too large, so
2170      # we cap the weights at a large value
2171      my $edgeweight = abs($n) ** 0.7;
2172      if ($edgeweight > 100000) { $edgeweight = 100000; }
2173      $edgeweight = int($edgeweight);
2174
2175      my $style = sprintf("setlinewidth(%f)", $w);
2176      if ($x[1] =~ m/\(inline\)/) {
2177        $style .= ",dashed";
2178      }
2179
2180      # Use a slightly squashed function of the edge count as the weight
2181      printf DOT ("N%s -> N%s [label=%s, weight=%d, style=\"%s\"];\n",
2182                  $node{$x[0]},
2183                  $node{$x[1]},
2184                  Unparse($n),
2185                  $edgeweight,
2186                  $style);
2187    }
2188  }
2189
2190  print DOT ("}\n");
2191  close(DOT);
2192
2193  if ($main::opt_web || $main::opt_svg) {
2194    # Rewrite SVG to be more usable inside web browser.
2195    RewriteSvg(TempName($main::next_tmpfile, "svg"));
2196  }
2197
2198  return 1;
2199}
2200
2201sub RewriteSvg {
2202  my $svgfile = shift;
2203
2204  open(SVG, $svgfile) || die "open temp svg: $!";
2205  my @svg = <SVG>;
2206  close(SVG);
2207  unlink $svgfile;
2208  my $svg = join('', @svg);
2209
2210  # Dot's SVG output is
2211  #
2212  #    <svg width="___" height="___"
2213  #     viewBox="___" xmlns=...>
2214  #    <g id="graph0" transform="...">
2215  #    ...
2216  #    </g>
2217  #    </svg>
2218  #
2219  # Change it to
2220  #
2221  #    <svg width="100%" height="100%"
2222  #     xmlns=...>
2223  #    $svg_javascript
2224  #    <g id="viewport" transform="translate(0,0)">
2225  #    <g id="graph0" transform="...">
2226  #    ...
2227  #    </g>
2228  #    </g>
2229  #    </svg>
2230
2231  # Fix width, height; drop viewBox.
2232  $svg =~ s/(?s)<svg width="[^"]+" height="[^"]+"(.*?)viewBox="[^"]+"/<svg width="100%" height="100%"$1/;
2233
2234  # Insert script, viewport <g> above first <g>
2235  my $svg_javascript = SvgJavascript();
2236  my $viewport = "<g id=\"viewport\" transform=\"translate(0,0)\">\n";
2237  $svg =~ s/<g id="graph\d"/$svg_javascript$viewport$&/;
2238
2239  # Insert final </g> above </svg>.
2240  $svg =~ s/(.*)(<\/svg>)/$1<\/g>$2/;
2241  $svg =~ s/<g id="graph\d"(.*?)/<g id="viewport"$1/;
2242
2243  if ($main::opt_svg) {
2244    # --svg: write to standard output.
2245    print $svg;
2246  } else {
2247    # Write back to temporary file.
2248    open(SVG, ">$svgfile") || die "open $svgfile: $!";
2249    print SVG $svg;
2250    close(SVG);
2251  }
2252}
2253
2254sub SvgJavascript {
2255  return <<'EOF';
2256<script type="text/ecmascript"><![CDATA[
2257// SVGPan
2258// http://www.cyberz.org/blog/2009/12/08/svgpan-a-javascript-svg-panzoomdrag-library/
2259// Local modification: if(true || ...) below to force panning, never moving.
2260
2261/**
2262 *  SVGPan library 1.2
2263 * ====================
2264 *
2265 * Given an unique existing element with id "viewport", including the
2266 * the library into any SVG adds the following capabilities:
2267 *
2268 *  - Mouse panning
2269 *  - Mouse zooming (using the wheel)
2270 *  - Object dargging
2271 *
2272 * Known issues:
2273 *
2274 *  - Zooming (while panning) on Safari has still some issues
2275 *
2276 * Releases:
2277 *
2278 * 1.2, Sat Mar 20 08:42:50 GMT 2010, Zeng Xiaohui
2279 *	Fixed a bug with browser mouse handler interaction
2280 *
2281 * 1.1, Wed Feb  3 17:39:33 GMT 2010, Zeng Xiaohui
2282 *	Updated the zoom code to support the mouse wheel on Safari/Chrome
2283 *
2284 * 1.0, Andrea Leofreddi
2285 *	First release
2286 *
2287 * This code is licensed under the following BSD license:
2288 *
2289 * Copyright 2009-2010 Andrea Leofreddi <a.leofreddi@itcharm.com>. All rights reserved.
2290 *
2291 * Redistribution and use in source and binary forms, with or without modification, are
2292 * permitted provided that the following conditions are met:
2293 *
2294 *    1. Redistributions of source code must retain the above copyright notice, this list of
2295 *       conditions and the following disclaimer.
2296 *
2297 *    2. Redistributions in binary form must reproduce the above copyright notice, this list
2298 *       of conditions and the following disclaimer in the documentation and/or other materials
2299 *       provided with the distribution.
2300 *
2301 * THIS SOFTWARE IS PROVIDED BY Andrea Leofreddi ``AS IS'' AND ANY EXPRESS OR IMPLIED
2302 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
2303 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Andrea Leofreddi OR
2304 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2305 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
2306 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
2307 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
2308 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
2309 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2310 *
2311 * The views and conclusions contained in the software and documentation are those of the
2312 * authors and should not be interpreted as representing official policies, either expressed
2313 * or implied, of Andrea Leofreddi.
2314 */
2315
2316var root = document.documentElement;
2317
2318var state = 'none', stateTarget, stateOrigin, stateTf;
2319
2320setupHandlers(root);
2321
2322/**
2323 * Register handlers
2324 */
2325function setupHandlers(root){
2326	setAttributes(root, {
2327		"onmouseup" : "add(evt)",
2328		"onmousedown" : "handleMouseDown(evt)",
2329		"onmousemove" : "handleMouseMove(evt)",
2330		"onmouseup" : "handleMouseUp(evt)",
2331		//"onmouseout" : "handleMouseUp(evt)", // Decomment this to stop the pan functionality when dragging out of the SVG element
2332	});
2333
2334	if(navigator.userAgent.toLowerCase().indexOf('webkit') >= 0)
2335		window.addEventListener('mousewheel', handleMouseWheel, false); // Chrome/Safari
2336	else
2337		window.addEventListener('DOMMouseScroll', handleMouseWheel, false); // Others
2338
2339	var g = svgDoc.getElementById("svg");
2340	g.width = "100%";
2341	g.height = "100%";
2342}
2343
2344/**
2345 * Instance an SVGPoint object with given event coordinates.
2346 */
2347function getEventPoint(evt) {
2348	var p = root.createSVGPoint();
2349
2350	p.x = evt.clientX;
2351	p.y = evt.clientY;
2352
2353	return p;
2354}
2355
2356/**
2357 * Sets the current transform matrix of an element.
2358 */
2359function setCTM(element, matrix) {
2360	var s = "matrix(" + matrix.a + "," + matrix.b + "," + matrix.c + "," + matrix.d + "," + matrix.e + "," + matrix.f + ")";
2361
2362	element.setAttribute("transform", s);
2363}
2364
2365/**
2366 * Dumps a matrix to a string (useful for debug).
2367 */
2368function dumpMatrix(matrix) {
2369	var s = "[ " + matrix.a + ", " + matrix.c + ", " + matrix.e + "\n  " + matrix.b + ", " + matrix.d + ", " + matrix.f + "\n  0, 0, 1 ]";
2370
2371	return s;
2372}
2373
2374/**
2375 * Sets attributes of an element.
2376 */
2377function setAttributes(element, attributes){
2378	for (i in attributes)
2379		element.setAttributeNS(null, i, attributes[i]);
2380}
2381
2382/**
2383 * Handle mouse move event.
2384 */
2385function handleMouseWheel(evt) {
2386	if(evt.preventDefault)
2387		evt.preventDefault();
2388
2389	evt.returnValue = false;
2390
2391	var svgDoc = evt.target.ownerDocument;
2392
2393	var delta;
2394
2395	if(evt.wheelDelta)
2396		delta = evt.wheelDelta / 3600; // Chrome/Safari
2397	else
2398		delta = evt.detail / -90; // Mozilla
2399
2400	var z = 1 + delta; // Zoom factor: 0.9/1.1
2401
2402	var g = svgDoc.getElementById("viewport");
2403
2404	var p = getEventPoint(evt);
2405
2406	p = p.matrixTransform(g.getCTM().inverse());
2407
2408	// Compute new scale matrix in current mouse position
2409	var k = root.createSVGMatrix().translate(p.x, p.y).scale(z).translate(-p.x, -p.y);
2410
2411        setCTM(g, g.getCTM().multiply(k));
2412
2413	stateTf = stateTf.multiply(k.inverse());
2414}
2415
2416/**
2417 * Handle mouse move event.
2418 */
2419function handleMouseMove(evt) {
2420	if(evt.preventDefault)
2421		evt.preventDefault();
2422
2423	evt.returnValue = false;
2424
2425	var svgDoc = evt.target.ownerDocument;
2426
2427	var g = svgDoc.getElementById("viewport");
2428
2429	if(state == 'pan') {
2430		// Pan mode
2431		var p = getEventPoint(evt).matrixTransform(stateTf);
2432
2433		setCTM(g, stateTf.inverse().translate(p.x - stateOrigin.x, p.y - stateOrigin.y));
2434	} else if(state == 'move') {
2435		// Move mode
2436		var p = getEventPoint(evt).matrixTransform(g.getCTM().inverse());
2437
2438		setCTM(stateTarget, root.createSVGMatrix().translate(p.x - stateOrigin.x, p.y - stateOrigin.y).multiply(g.getCTM().inverse()).multiply(stateTarget.getCTM()));
2439
2440		stateOrigin = p;
2441	}
2442}
2443
2444/**
2445 * Handle click event.
2446 */
2447function handleMouseDown(evt) {
2448	if(evt.preventDefault)
2449		evt.preventDefault();
2450
2451	evt.returnValue = false;
2452
2453	var svgDoc = evt.target.ownerDocument;
2454
2455	var g = svgDoc.getElementById("viewport");
2456
2457	if(true || evt.target.tagName == "svg") {
2458		// Pan mode
2459		state = 'pan';
2460
2461		stateTf = g.getCTM().inverse();
2462
2463		stateOrigin = getEventPoint(evt).matrixTransform(stateTf);
2464	} else {
2465		// Move mode
2466		state = 'move';
2467
2468		stateTarget = evt.target;
2469
2470		stateTf = g.getCTM().inverse();
2471
2472		stateOrigin = getEventPoint(evt).matrixTransform(stateTf);
2473	}
2474}
2475
2476/**
2477 * Handle mouse button release event.
2478 */
2479function handleMouseUp(evt) {
2480	if(evt.preventDefault)
2481		evt.preventDefault();
2482
2483	evt.returnValue = false;
2484
2485	var svgDoc = evt.target.ownerDocument;
2486
2487	if(state == 'pan' || state == 'move') {
2488		// Quit pan mode
2489		state = '';
2490	}
2491}
2492
2493]]></script>
2494EOF
2495}
2496
2497# Provides a map from fullname to shortname for cases where the
2498# shortname is ambiguous.  The symlist has both the fullname and
2499# shortname for all symbols, which is usually fine, but sometimes --
2500# such as overloaded functions -- two different fullnames can map to
2501# the same shortname.  In that case, we use the address of the
2502# function to disambiguate the two.  This function fills in a map that
2503# maps fullnames to modified shortnames in such cases.  If a fullname
2504# is not present in the map, the 'normal' shortname provided by the
2505# symlist is the appropriate one to use.
2506sub FillFullnameToShortnameMap {
2507  my $symbols = shift;
2508  my $fullname_to_shortname_map = shift;
2509  my $shortnames_seen_once = {};
2510  my $shortnames_seen_more_than_once = {};
2511
2512  foreach my $symlist (values(%{$symbols})) {
2513    # TODO(csilvers): deal with inlined symbols too.
2514    my $shortname = $symlist->[0];
2515    my $fullname = $symlist->[2];
2516    if ($fullname !~ /<[0-9a-fA-F]+>$/) {  # fullname doesn't end in an address
2517      next;       # the only collisions we care about are when addresses differ
2518    }
2519    if (defined($shortnames_seen_once->{$shortname}) &&
2520        $shortnames_seen_once->{$shortname} ne $fullname) {
2521      $shortnames_seen_more_than_once->{$shortname} = 1;
2522    } else {
2523      $shortnames_seen_once->{$shortname} = $fullname;
2524    }
2525  }
2526
2527  foreach my $symlist (values(%{$symbols})) {
2528    my $shortname = $symlist->[0];
2529    my $fullname = $symlist->[2];
2530    # TODO(csilvers): take in a list of addresses we care about, and only
2531    # store in the map if $symlist->[1] is in that list.  Saves space.
2532    next if defined($fullname_to_shortname_map->{$fullname});
2533    if (defined($shortnames_seen_more_than_once->{$shortname})) {
2534      if ($fullname =~ /<0*([^>]*)>$/) {   # fullname has address at end of it
2535        $fullname_to_shortname_map->{$fullname} = "$shortname\@$1";
2536      }
2537    }
2538  }
2539}
2540
2541# Return a small number that identifies the argument.
2542# Multiple calls with the same argument will return the same number.
2543# Calls with different arguments will return different numbers.
2544sub ShortIdFor {
2545  my $key = shift;
2546  my $id = $main::uniqueid{$key};
2547  if (!defined($id)) {
2548    $id = keys(%main::uniqueid) + 1;
2549    $main::uniqueid{$key} = $id;
2550  }
2551  return $id;
2552}
2553
2554# Translate a stack of addresses into a stack of symbols
2555sub TranslateStack {
2556  my $symbols = shift;
2557  my $fullname_to_shortname_map = shift;
2558  my $k = shift;
2559
2560  my @addrs = split(/\n/, $k);
2561  my @result = ();
2562  for (my $i = 0; $i <= $#addrs; $i++) {
2563    my $a = $addrs[$i];
2564
2565    # Skip large addresses since they sometimes show up as fake entries on RH9
2566    if (length($a) > 8 && $a gt "7fffffffffffffff") {
2567      next;
2568    }
2569
2570    if ($main::opt_disasm || $main::opt_list) {
2571      # We want just the address for the key
2572      push(@result, $a);
2573      next;
2574    }
2575
2576    my $symlist = $symbols->{$a};
2577    if (!defined($symlist)) {
2578      $symlist = [$a, "", $a];
2579    }
2580
2581    # We can have a sequence of symbols for a particular entry
2582    # (more than one symbol in the case of inlining).  Callers
2583    # come before callees in symlist, so walk backwards since
2584    # the translated stack should contain callees before callers.
2585    for (my $j = $#{$symlist}; $j >= 2; $j -= 3) {
2586      my $func = $symlist->[$j-2];
2587      my $fileline = $symlist->[$j-1];
2588      my $fullfunc = $symlist->[$j];
2589      if (defined($fullname_to_shortname_map->{$fullfunc})) {
2590        $func = $fullname_to_shortname_map->{$fullfunc};
2591      }
2592      if ($j > 2) {
2593        $func = "$func (inline)";
2594      }
2595
2596      # Do not merge nodes corresponding to Callback::Run since that
2597      # causes confusing cycles in dot display.  Instead, we synthesize
2598      # a unique name for this frame per caller.
2599      if ($func =~ m/Callback.*::Run$/) {
2600        my $caller = ($i > 0) ? $addrs[$i-1] : 0;
2601        $func = "Run#" . ShortIdFor($caller);
2602      }
2603
2604      if ($main::opt_addresses) {
2605        push(@result, "$a $func $fileline");
2606      } elsif ($main::opt_lines) {
2607        if ($func eq '??' && $fileline eq '??:0') {
2608          push(@result, "$a");
2609        } else {
2610          push(@result, "$func $fileline");
2611        }
2612      } elsif ($main::opt_functions) {
2613        if ($func eq '??') {
2614          push(@result, "$a");
2615        } else {
2616          push(@result, $func);
2617        }
2618      } elsif ($main::opt_files) {
2619        if ($fileline eq '??:0' || $fileline eq '') {
2620          push(@result, "$a");
2621        } else {
2622          my $f = $fileline;
2623          $f =~ s/:\d+$//;
2624          push(@result, $f);
2625        }
2626      } else {
2627        push(@result, $a);
2628        last;  # Do not print inlined info
2629      }
2630    }
2631  }
2632
2633  # print join(",", @addrs), " => ", join(",", @result), "\n";
2634  return @result;
2635}
2636
2637# Generate percent string for a number and a total
2638sub Percent {
2639  my $num = shift;
2640  my $tot = shift;
2641  if ($tot != 0) {
2642    return sprintf("%.1f%%", $num * 100.0 / $tot);
2643  } else {
2644    return ($num == 0) ? "nan" : (($num > 0) ? "+inf" : "-inf");
2645  }
2646}
2647
2648# Generate pretty-printed form of number
2649sub Unparse {
2650  my $num = shift;
2651  if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') {
2652    if ($main::opt_inuse_objects || $main::opt_alloc_objects) {
2653      return sprintf("%d", $num);
2654    } else {
2655      if ($main::opt_show_bytes) {
2656        return sprintf("%d", $num);
2657      } else {
2658        return sprintf("%.1f", $num / 1048576.0);
2659      }
2660    }
2661  } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) {
2662    return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds
2663  } else {
2664    return sprintf("%d", $num);
2665  }
2666}
2667
2668# Alternate pretty-printed form: 0 maps to "."
2669sub UnparseAlt {
2670  my $num = shift;
2671  if ($num == 0) {
2672    return ".";
2673  } else {
2674    return Unparse($num);
2675  }
2676}
2677
2678# Alternate pretty-printed form: 0 maps to ""
2679sub HtmlPrintNumber {
2680  my $num = shift;
2681  if ($num == 0) {
2682    return "";
2683  } else {
2684    return Unparse($num);
2685  }
2686}
2687
2688# Return output units
2689sub Units {
2690  if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') {
2691    if ($main::opt_inuse_objects || $main::opt_alloc_objects) {
2692      return "objects";
2693    } else {
2694      if ($main::opt_show_bytes) {
2695        return "B";
2696      } else {
2697        return "MB";
2698      }
2699    }
2700  } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) {
2701    return "seconds";
2702  } else {
2703    return "samples";
2704  }
2705}
2706
2707##### Profile manipulation code #####
2708
2709# Generate flattened profile:
2710# If count is charged to stack [a,b,c,d], in generated profile,
2711# it will be charged to [a]
2712sub FlatProfile {
2713  my $profile = shift;
2714  my $result = {};
2715  foreach my $k (keys(%{$profile})) {
2716    my $count = $profile->{$k};
2717    my @addrs = split(/\n/, $k);
2718    if ($#addrs >= 0) {
2719      AddEntry($result, $addrs[0], $count);
2720    }
2721  }
2722  return $result;
2723}
2724
2725# Generate cumulative profile:
2726# If count is charged to stack [a,b,c,d], in generated profile,
2727# it will be charged to [a], [b], [c], [d]
2728sub CumulativeProfile {
2729  my $profile = shift;
2730  my $result = {};
2731  foreach my $k (keys(%{$profile})) {
2732    my $count = $profile->{$k};
2733    my @addrs = split(/\n/, $k);
2734    foreach my $a (@addrs) {
2735      AddEntry($result, $a, $count);
2736    }
2737  }
2738  return $result;
2739}
2740
2741# If the second-youngest PC on the stack is always the same, returns
2742# that pc.  Otherwise, returns undef.
2743sub IsSecondPcAlwaysTheSame {
2744  my $profile = shift;
2745
2746  my $second_pc = undef;
2747  foreach my $k (keys(%{$profile})) {
2748    my @addrs = split(/\n/, $k);
2749    if ($#addrs < 1) {
2750      return undef;
2751    }
2752    if (not defined $second_pc) {
2753      $second_pc = $addrs[1];
2754    } else {
2755      if ($second_pc ne $addrs[1]) {
2756        return undef;
2757      }
2758    }
2759  }
2760  return $second_pc;
2761}
2762
2763sub ExtractSymbolLocation {
2764  my $symbols = shift;
2765  my $address = shift;
2766  # 'addr2line' outputs "??:0" for unknown locations; we do the
2767  # same to be consistent.
2768  my $location = "??:0:unknown";
2769  if (exists $symbols->{$address}) {
2770    my $file = $symbols->{$address}->[1];
2771    if ($file eq "?") {
2772      $file = "??:0"
2773    }
2774    $location = $file . ":" . $symbols->{$address}->[0];
2775  }
2776  return $location;
2777}
2778
2779# Extracts a graph of calls.
2780sub ExtractCalls {
2781  my $symbols = shift;
2782  my $profile = shift;
2783
2784  my $calls = {};
2785  while( my ($stack_trace, $count) = each %$profile ) {
2786    my @address = split(/\n/, $stack_trace);
2787    my $destination = ExtractSymbolLocation($symbols, $address[0]);
2788    AddEntry($calls, $destination, $count);
2789    for (my $i = 1; $i <= $#address; $i++) {
2790      my $source = ExtractSymbolLocation($symbols, $address[$i]);
2791      my $call = "$source -> $destination";
2792      AddEntry($calls, $call, $count);
2793      $destination = $source;
2794    }
2795  }
2796
2797  return $calls;
2798}
2799
2800sub RemoveUninterestingFrames {
2801  my $symbols = shift;
2802  my $profile = shift;
2803
2804  # List of function names to skip
2805  my %skip = ();
2806  my $skip_regexp = 'NOMATCH';
2807  if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') {
2808    foreach my $name ('calloc',
2809                      'cfree',
2810                      'malloc',
2811                      'free',
2812                      'memalign',
2813                      'posix_memalign',
2814                      'aligned_alloc',
2815                      'pvalloc',
2816                      'valloc',
2817                      'realloc',
2818                      'mallocx', # jemalloc
2819                      'rallocx', # jemalloc
2820                      'xallocx', # jemalloc
2821                      'dallocx', # jemalloc
2822                      'tc_calloc',
2823                      'tc_cfree',
2824                      'tc_malloc',
2825                      'tc_free',
2826                      'tc_memalign',
2827                      'tc_posix_memalign',
2828                      'tc_pvalloc',
2829                      'tc_valloc',
2830                      'tc_realloc',
2831                      'tc_new',
2832                      'tc_delete',
2833                      'tc_newarray',
2834                      'tc_deletearray',
2835                      'tc_new_nothrow',
2836                      'tc_newarray_nothrow',
2837                      'do_malloc',
2838                      '::do_malloc',   # new name -- got moved to an unnamed ns
2839                      '::do_malloc_or_cpp_alloc',
2840                      'DoSampledAllocation',
2841                      'simple_alloc::allocate',
2842                      '__malloc_alloc_template::allocate',
2843                      '__builtin_delete',
2844                      '__builtin_new',
2845                      '__builtin_vec_delete',
2846                      '__builtin_vec_new',
2847                      'operator new',
2848                      'operator new[]',
2849                      # The entry to our memory-allocation routines on OS X
2850                      'malloc_zone_malloc',
2851                      'malloc_zone_calloc',
2852                      'malloc_zone_valloc',
2853                      'malloc_zone_realloc',
2854                      'malloc_zone_memalign',
2855                      'malloc_zone_free',
2856                      # These mark the beginning/end of our custom sections
2857                      '__start_google_malloc',
2858                      '__stop_google_malloc',
2859                      '__start_malloc_hook',
2860                      '__stop_malloc_hook') {
2861      $skip{$name} = 1;
2862      $skip{"_" . $name} = 1;   # Mach (OS X) adds a _ prefix to everything
2863    }
2864    # TODO: Remove TCMalloc once everything has been
2865    # moved into the tcmalloc:: namespace and we have flushed
2866    # old code out of the system.
2867    $skip_regexp = "TCMalloc|^tcmalloc::";
2868  } elsif ($main::profile_type eq 'contention') {
2869    foreach my $vname ('base::RecordLockProfileData',
2870                       'base::SubmitMutexProfileData',
2871                       'base::SubmitSpinLockProfileData',
2872                       'Mutex::Unlock',
2873                       'Mutex::UnlockSlow',
2874                       'Mutex::ReaderUnlock',
2875                       'MutexLock::~MutexLock',
2876                       'SpinLock::Unlock',
2877                       'SpinLock::SlowUnlock',
2878                       'SpinLockHolder::~SpinLockHolder') {
2879      $skip{$vname} = 1;
2880    }
2881  } elsif ($main::profile_type eq 'cpu') {
2882    # Drop signal handlers used for CPU profile collection
2883    # TODO(dpeng): this should not be necessary; it's taken
2884    # care of by the general 2nd-pc mechanism below.
2885    foreach my $name ('ProfileData::Add',           # historical
2886                      'ProfileData::prof_handler',  # historical
2887                      'CpuProfiler::prof_handler',
2888                      '__FRAME_END__',
2889                      '__pthread_sighandler',
2890                      '__restore') {
2891      $skip{$name} = 1;
2892    }
2893  } else {
2894    # Nothing skipped for unknown types
2895  }
2896
2897  if ($main::profile_type eq 'cpu') {
2898    # If all the second-youngest program counters are the same,
2899    # this STRONGLY suggests that it is an artifact of measurement,
2900    # i.e., stack frames pushed by the CPU profiler signal handler.
2901    # Hence, we delete them.
2902    # (The topmost PC is read from the signal structure, not from
2903    # the stack, so it does not get involved.)
2904    while (my $second_pc = IsSecondPcAlwaysTheSame($profile)) {
2905      my $result = {};
2906      my $func = '';
2907      if (exists($symbols->{$second_pc})) {
2908        $second_pc = $symbols->{$second_pc}->[0];
2909      }
2910      print STDERR "Removing $second_pc from all stack traces.\n";
2911      foreach my $k (keys(%{$profile})) {
2912        my $count = $profile->{$k};
2913        my @addrs = split(/\n/, $k);
2914        splice @addrs, 1, 1;
2915        my $reduced_path = join("\n", @addrs);
2916        AddEntry($result, $reduced_path, $count);
2917      }
2918      $profile = $result;
2919    }
2920  }
2921
2922  my $result = {};
2923  foreach my $k (keys(%{$profile})) {
2924    my $count = $profile->{$k};
2925    my @addrs = split(/\n/, $k);
2926    my @path = ();
2927    foreach my $a (@addrs) {
2928      if (exists($symbols->{$a})) {
2929        my $func = $symbols->{$a}->[0];
2930        if ($skip{$func} || ($func =~ m/$skip_regexp/)) {
2931          # Throw away the portion of the backtrace seen so far, under the
2932          # assumption that previous frames were for functions internal to the
2933          # allocator.
2934          @path = ();
2935          next;
2936        }
2937      }
2938      push(@path, $a);
2939    }
2940    my $reduced_path = join("\n", @path);
2941    AddEntry($result, $reduced_path, $count);
2942  }
2943  return $result;
2944}
2945
2946# Reduce profile to granularity given by user
2947sub ReduceProfile {
2948  my $symbols = shift;
2949  my $profile = shift;
2950  my $result = {};
2951  my $fullname_to_shortname_map = {};
2952  FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map);
2953  foreach my $k (keys(%{$profile})) {
2954    my $count = $profile->{$k};
2955    my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k);
2956    my @path = ();
2957    my %seen = ();
2958    $seen{''} = 1;      # So that empty keys are skipped
2959    foreach my $e (@translated) {
2960      # To avoid double-counting due to recursion, skip a stack-trace
2961      # entry if it has already been seen
2962      if (!$seen{$e}) {
2963        $seen{$e} = 1;
2964        push(@path, $e);
2965      }
2966    }
2967    my $reduced_path = join("\n", @path);
2968    AddEntry($result, $reduced_path, $count);
2969  }
2970  return $result;
2971}
2972
2973# Does the specified symbol array match the regexp?
2974sub SymbolMatches {
2975  my $sym = shift;
2976  my $re = shift;
2977  if (defined($sym)) {
2978    for (my $i = 0; $i < $#{$sym}; $i += 3) {
2979      if ($sym->[$i] =~ m/$re/ || $sym->[$i+1] =~ m/$re/) {
2980        return 1;
2981      }
2982    }
2983  }
2984  return 0;
2985}
2986
2987# Focus only on paths involving specified regexps
2988sub FocusProfile {
2989  my $symbols = shift;
2990  my $profile = shift;
2991  my $focus = shift;
2992  my $result = {};
2993  foreach my $k (keys(%{$profile})) {
2994    my $count = $profile->{$k};
2995    my @addrs = split(/\n/, $k);
2996    foreach my $a (@addrs) {
2997      # Reply if it matches either the address/shortname/fileline
2998      if (($a =~ m/$focus/) || SymbolMatches($symbols->{$a}, $focus)) {
2999        AddEntry($result, $k, $count);
3000        last;
3001      }
3002    }
3003  }
3004  return $result;
3005}
3006
3007# Focus only on paths not involving specified regexps
3008sub IgnoreProfile {
3009  my $symbols = shift;
3010  my $profile = shift;
3011  my $ignore = shift;
3012  my $result = {};
3013  foreach my $k (keys(%{$profile})) {
3014    my $count = $profile->{$k};
3015    my @addrs = split(/\n/, $k);
3016    my $matched = 0;
3017    foreach my $a (@addrs) {
3018      # Reply if it matches either the address/shortname/fileline
3019      if (($a =~ m/$ignore/) || SymbolMatches($symbols->{$a}, $ignore)) {
3020        $matched = 1;
3021        last;
3022      }
3023    }
3024    if (!$matched) {
3025      AddEntry($result, $k, $count);
3026    }
3027  }
3028  return $result;
3029}
3030
3031# Get total count in profile
3032sub TotalProfile {
3033  my $profile = shift;
3034  my $result = 0;
3035  foreach my $k (keys(%{$profile})) {
3036    $result += $profile->{$k};
3037  }
3038  return $result;
3039}
3040
3041# Add A to B
3042sub AddProfile {
3043  my $A = shift;
3044  my $B = shift;
3045
3046  my $R = {};
3047  # add all keys in A
3048  foreach my $k (keys(%{$A})) {
3049    my $v = $A->{$k};
3050    AddEntry($R, $k, $v);
3051  }
3052  # add all keys in B
3053  foreach my $k (keys(%{$B})) {
3054    my $v = $B->{$k};
3055    AddEntry($R, $k, $v);
3056  }
3057  return $R;
3058}
3059
3060# Merges symbol maps
3061sub MergeSymbols {
3062  my $A = shift;
3063  my $B = shift;
3064
3065  my $R = {};
3066  foreach my $k (keys(%{$A})) {
3067    $R->{$k} = $A->{$k};
3068  }
3069  if (defined($B)) {
3070    foreach my $k (keys(%{$B})) {
3071      $R->{$k} = $B->{$k};
3072    }
3073  }
3074  return $R;
3075}
3076
3077
3078# Add A to B
3079sub AddPcs {
3080  my $A = shift;
3081  my $B = shift;
3082
3083  my $R = {};
3084  # add all keys in A
3085  foreach my $k (keys(%{$A})) {
3086    $R->{$k} = 1
3087  }
3088  # add all keys in B
3089  foreach my $k (keys(%{$B})) {
3090    $R->{$k} = 1
3091  }
3092  return $R;
3093}
3094
3095# Subtract B from A
3096sub SubtractProfile {
3097  my $A = shift;
3098  my $B = shift;
3099
3100  my $R = {};
3101  foreach my $k (keys(%{$A})) {
3102    my $v = $A->{$k} - GetEntry($B, $k);
3103    if ($v < 0 && $main::opt_drop_negative) {
3104      $v = 0;
3105    }
3106    AddEntry($R, $k, $v);
3107  }
3108  if (!$main::opt_drop_negative) {
3109    # Take care of when subtracted profile has more entries
3110    foreach my $k (keys(%{$B})) {
3111      if (!exists($A->{$k})) {
3112        AddEntry($R, $k, 0 - $B->{$k});
3113      }
3114    }
3115  }
3116  return $R;
3117}
3118
3119# Get entry from profile; zero if not present
3120sub GetEntry {
3121  my $profile = shift;
3122  my $k = shift;
3123  if (exists($profile->{$k})) {
3124    return $profile->{$k};
3125  } else {
3126    return 0;
3127  }
3128}
3129
3130# Add entry to specified profile
3131sub AddEntry {
3132  my $profile = shift;
3133  my $k = shift;
3134  my $n = shift;
3135  if (!exists($profile->{$k})) {
3136    $profile->{$k} = 0;
3137  }
3138  $profile->{$k} += $n;
3139}
3140
3141# Add a stack of entries to specified profile, and add them to the $pcs
3142# list.
3143sub AddEntries {
3144  my $profile = shift;
3145  my $pcs = shift;
3146  my $stack = shift;
3147  my $count = shift;
3148  my @k = ();
3149
3150  foreach my $e (split(/\s+/, $stack)) {
3151    my $pc = HexExtend($e);
3152    $pcs->{$pc} = 1;
3153    push @k, $pc;
3154  }
3155  AddEntry($profile, (join "\n", @k), $count);
3156}
3157
3158##### Code to profile a server dynamically #####
3159
3160sub CheckSymbolPage {
3161  my $url = SymbolPageURL();
3162  my $command = ShellEscape(@URL_FETCHER, $url);
3163  open(SYMBOL, "$command |") or error($command);
3164  my $line = <SYMBOL>;
3165  $line =~ s/\r//g;         # turn windows-looking lines into unix-looking lines
3166  close(SYMBOL);
3167  unless (defined($line)) {
3168    error("$url doesn't exist\n");
3169  }
3170
3171  if ($line =~ /^num_symbols:\s+(\d+)$/) {
3172    if ($1 == 0) {
3173      error("Stripped binary. No symbols available.\n");
3174    }
3175  } else {
3176    error("Failed to get the number of symbols from $url\n");
3177  }
3178}
3179
3180sub IsProfileURL {
3181  my $profile_name = shift;
3182  if (-f $profile_name) {
3183    printf STDERR "Using local file $profile_name.\n";
3184    return 0;
3185  }
3186  return 1;
3187}
3188
3189sub ParseProfileURL {
3190  my $profile_name = shift;
3191
3192  if (!defined($profile_name) || $profile_name eq "") {
3193    return ();
3194  }
3195
3196  # Split profile URL - matches all non-empty strings, so no test.
3197  $profile_name =~ m,^(https?://)?([^/]+)(.*?)(/|$PROFILES)?$,;
3198
3199  my $proto = $1 || "http://";
3200  my $hostport = $2;
3201  my $prefix = $3;
3202  my $profile = $4 || "/";
3203
3204  my $host = $hostport;
3205  $host =~ s/:.*//;
3206
3207  my $baseurl = "$proto$hostport$prefix";
3208  return ($host, $baseurl, $profile);
3209}
3210
3211# We fetch symbols from the first profile argument.
3212sub SymbolPageURL {
3213  my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]);
3214  return "$baseURL$SYMBOL_PAGE";
3215}
3216
3217sub FetchProgramName() {
3218  my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]);
3219  my $url = "$baseURL$PROGRAM_NAME_PAGE";
3220  my $command_line = ShellEscape(@URL_FETCHER, $url);
3221  open(CMDLINE, "$command_line |") or error($command_line);
3222  my $cmdline = <CMDLINE>;
3223  $cmdline =~ s/\r//g;   # turn windows-looking lines into unix-looking lines
3224  close(CMDLINE);
3225  error("Failed to get program name from $url\n") unless defined($cmdline);
3226  $cmdline =~ s/\x00.+//;  # Remove argv[1] and latters.
3227  $cmdline =~ s!\n!!g;  # Remove LFs.
3228  return $cmdline;
3229}
3230
3231# Gee, curl's -L (--location) option isn't reliable at least
3232# with its 7.12.3 version.  Curl will forget to post data if
3233# there is a redirection.  This function is a workaround for
3234# curl.  Redirection happens on borg hosts.
3235sub ResolveRedirectionForCurl {
3236  my $url = shift;
3237  my $command_line = ShellEscape(@URL_FETCHER, "--head", $url);
3238  open(CMDLINE, "$command_line |") or error($command_line);
3239  while (<CMDLINE>) {
3240    s/\r//g;         # turn windows-looking lines into unix-looking lines
3241    if (/^Location: (.*)/) {
3242      $url = $1;
3243    }
3244  }
3245  close(CMDLINE);
3246  return $url;
3247}
3248
3249# Add a timeout flat to URL_FETCHER.  Returns a new list.
3250sub AddFetchTimeout {
3251  my $timeout = shift;
3252  my @fetcher = shift;
3253  if (defined($timeout)) {
3254    if (join(" ", @fetcher) =~ m/\bcurl -s/) {
3255      push(@fetcher, "--max-time", sprintf("%d", $timeout));
3256    } elsif (join(" ", @fetcher) =~ m/\brpcget\b/) {
3257      push(@fetcher, sprintf("--deadline=%d", $timeout));
3258    }
3259  }
3260  return @fetcher;
3261}
3262
3263# Reads a symbol map from the file handle name given as $1, returning
3264# the resulting symbol map.  Also processes variables relating to symbols.
3265# Currently, the only variable processed is 'binary=<value>' which updates
3266# $main::prog to have the correct program name.
3267sub ReadSymbols {
3268  my $in = shift;
3269  my $map = {};
3270  while (<$in>) {
3271    s/\r//g;         # turn windows-looking lines into unix-looking lines
3272    # Removes all the leading zeroes from the symbols, see comment below.
3273    if (m/^0x0*([0-9a-f]+)\s+(.+)/) {
3274      $map->{$1} = $2;
3275    } elsif (m/^---/) {
3276      last;
3277    } elsif (m/^([a-z][^=]*)=(.*)$/ ) {
3278      my ($variable, $value) = ($1, $2);
3279      for ($variable, $value) {
3280        s/^\s+//;
3281        s/\s+$//;
3282      }
3283      if ($variable eq "binary") {
3284        if ($main::prog ne $UNKNOWN_BINARY && $main::prog ne $value) {
3285          printf STDERR ("Warning: Mismatched binary name '%s', using '%s'.\n",
3286                         $main::prog, $value);
3287        }
3288        $main::prog = $value;
3289      } else {
3290        printf STDERR ("Ignoring unknown variable in symbols list: " .
3291            "'%s' = '%s'\n", $variable, $value);
3292      }
3293    }
3294  }
3295  return $map;
3296}
3297
3298# Fetches and processes symbols to prepare them for use in the profile output
3299# code.  If the optional 'symbol_map' arg is not given, fetches symbols from
3300# $SYMBOL_PAGE for all PC values found in profile.  Otherwise, the raw symbols
3301# are assumed to have already been fetched into 'symbol_map' and are simply
3302# extracted and processed.
3303sub FetchSymbols {
3304  my $pcset = shift;
3305  my $symbol_map = shift;
3306
3307  my %seen = ();
3308  my @pcs = grep { !$seen{$_}++ } keys(%$pcset);  # uniq
3309
3310  if (!defined($symbol_map)) {
3311    my $post_data = join("+", sort((map {"0x" . "$_"} @pcs)));
3312
3313    open(POSTFILE, ">$main::tmpfile_sym");
3314    print POSTFILE $post_data;
3315    close(POSTFILE);
3316
3317    my $url = SymbolPageURL();
3318
3319    my $command_line;
3320    if (join(" ", @URL_FETCHER) =~ m/\bcurl -s/) {
3321      $url = ResolveRedirectionForCurl($url);
3322      $command_line = ShellEscape(@URL_FETCHER, "-d", "\@$main::tmpfile_sym",
3323                                  $url);
3324    } else {
3325      $command_line = (ShellEscape(@URL_FETCHER, "--post", $url)
3326                       . " < " . ShellEscape($main::tmpfile_sym));
3327    }
3328    # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols.
3329    my $escaped_cppfilt = ShellEscape($obj_tool_map{"c++filt"});
3330    open(SYMBOL, "$command_line | $escaped_cppfilt |") or error($command_line);
3331    $symbol_map = ReadSymbols(*SYMBOL{IO});
3332    close(SYMBOL);
3333  }
3334
3335  my $symbols = {};
3336  foreach my $pc (@pcs) {
3337    my $fullname;
3338    # For 64 bits binaries, symbols are extracted with 8 leading zeroes.
3339    # Then /symbol reads the long symbols in as uint64, and outputs
3340    # the result with a "0x%08llx" format which get rid of the zeroes.
3341    # By removing all the leading zeroes in both $pc and the symbols from
3342    # /symbol, the symbols match and are retrievable from the map.
3343    my $shortpc = $pc;
3344    $shortpc =~ s/^0*//;
3345    # Each line may have a list of names, which includes the function
3346    # and also other functions it has inlined.  They are separated (in
3347    # PrintSymbolizedProfile), by --, which is illegal in function names.
3348    my $fullnames;
3349    if (defined($symbol_map->{$shortpc})) {
3350      $fullnames = $symbol_map->{$shortpc};
3351    } else {
3352      $fullnames = "0x" . $pc;  # Just use addresses
3353    }
3354    my $sym = [];
3355    $symbols->{$pc} = $sym;
3356    foreach my $fullname (split("--", $fullnames)) {
3357      my $name = ShortFunctionName($fullname);
3358      push(@{$sym}, $name, "?", $fullname);
3359    }
3360  }
3361  return $symbols;
3362}
3363
3364sub BaseName {
3365  my $file_name = shift;
3366  $file_name =~ s!^.*/!!;  # Remove directory name
3367  return $file_name;
3368}
3369
3370sub MakeProfileBaseName {
3371  my ($binary_name, $profile_name) = @_;
3372  my ($host, $baseURL, $path) = ParseProfileURL($profile_name);
3373  my $binary_shortname = BaseName($binary_name);
3374  return sprintf("%s.%s.%s",
3375                 $binary_shortname, $main::op_time, $host);
3376}
3377
3378sub FetchDynamicProfile {
3379  my $binary_name = shift;
3380  my $profile_name = shift;
3381  my $fetch_name_only = shift;
3382  my $encourage_patience = shift;
3383
3384  if (!IsProfileURL($profile_name)) {
3385    return $profile_name;
3386  } else {
3387    my ($host, $baseURL, $path) = ParseProfileURL($profile_name);
3388    if ($path eq "" || $path eq "/") {
3389      # Missing type specifier defaults to cpu-profile
3390      $path = $PROFILE_PAGE;
3391    }
3392
3393    my $profile_file = MakeProfileBaseName($binary_name, $profile_name);
3394
3395    my $url = "$baseURL$path";
3396    my $fetch_timeout = undef;
3397    if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/) {
3398      if ($path =~ m/[?]/) {
3399        $url .= "&";
3400      } else {
3401        $url .= "?";
3402      }
3403      $url .= sprintf("seconds=%d", $main::opt_seconds);
3404      $fetch_timeout = $main::opt_seconds * 1.01 + 60;
3405    } else {
3406      # For non-CPU profiles, we add a type-extension to
3407      # the target profile file name.
3408      my $suffix = $path;
3409      $suffix =~ s,/,.,g;
3410      $profile_file .= $suffix;
3411    }
3412
3413    my $profile_dir = $ENV{"PPROF_TMPDIR"} || ($ENV{HOME} . "/pprof");
3414    if (! -d $profile_dir) {
3415      mkdir($profile_dir)
3416          || die("Unable to create profile directory $profile_dir: $!\n");
3417    }
3418    my $tmp_profile = "$profile_dir/.tmp.$profile_file";
3419    my $real_profile = "$profile_dir/$profile_file";
3420
3421    if ($fetch_name_only > 0) {
3422      return $real_profile;
3423    }
3424
3425    my @fetcher = AddFetchTimeout($fetch_timeout, @URL_FETCHER);
3426    my $cmd = ShellEscape(@fetcher, $url) . " > " . ShellEscape($tmp_profile);
3427    if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE|$CENSUSPROFILE_PAGE/){
3428      print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n  ${real_profile}\n";
3429      if ($encourage_patience) {
3430        print STDERR "Be patient...\n";
3431      }
3432    } else {
3433      print STDERR "Fetching $path profile from $url to\n  ${real_profile}\n";
3434    }
3435
3436    (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n");
3437    (system("mv", $tmp_profile, $real_profile) == 0) || error("Unable to rename profile\n");
3438    print STDERR "Wrote profile to $real_profile\n";
3439    $main::collected_profile = $real_profile;
3440    return $main::collected_profile;
3441  }
3442}
3443
3444# Collect profiles in parallel
3445sub FetchDynamicProfiles {
3446  my $items = scalar(@main::pfile_args);
3447  my $levels = log($items) / log(2);
3448
3449  if ($items == 1) {
3450    $main::profile_files[0] = FetchDynamicProfile($main::prog, $main::pfile_args[0], 0, 1);
3451  } else {
3452    # math rounding issues
3453    if ((2 ** $levels) < $items) {
3454     $levels++;
3455    }
3456    my $count = scalar(@main::pfile_args);
3457    for (my $i = 0; $i < $count; $i++) {
3458      $main::profile_files[$i] = FetchDynamicProfile($main::prog, $main::pfile_args[$i], 1, 0);
3459    }
3460    print STDERR "Fetching $count profiles, Be patient...\n";
3461    FetchDynamicProfilesRecurse($levels, 0, 0);
3462    $main::collected_profile = join(" \\\n    ", @main::profile_files);
3463  }
3464}
3465
3466# Recursively fork a process to get enough processes
3467# collecting profiles
3468sub FetchDynamicProfilesRecurse {
3469  my $maxlevel = shift;
3470  my $level = shift;
3471  my $position = shift;
3472
3473  if (my $pid = fork()) {
3474    $position = 0 | ($position << 1);
3475    TryCollectProfile($maxlevel, $level, $position);
3476    wait;
3477  } else {
3478    $position = 1 | ($position << 1);
3479    TryCollectProfile($maxlevel, $level, $position);
3480    cleanup();
3481    exit(0);
3482  }
3483}
3484
3485# Collect a single profile
3486sub TryCollectProfile {
3487  my $maxlevel = shift;
3488  my $level = shift;
3489  my $position = shift;
3490
3491  if ($level >= ($maxlevel - 1)) {
3492    if ($position < scalar(@main::pfile_args)) {
3493      FetchDynamicProfile($main::prog, $main::pfile_args[$position], 0, 0);
3494    }
3495  } else {
3496    FetchDynamicProfilesRecurse($maxlevel, $level+1, $position);
3497  }
3498}
3499
3500##### Parsing code #####
3501
3502# Provide a small streaming-read module to handle very large
3503# cpu-profile files.  Stream in chunks along a sliding window.
3504# Provides an interface to get one 'slot', correctly handling
3505# endian-ness differences.  A slot is one 32-bit or 64-bit word
3506# (depending on the input profile).  We tell endianness and bit-size
3507# for the profile by looking at the first 8 bytes: in cpu profiles,
3508# the second slot is always 3 (we'll accept anything that's not 0).
3509BEGIN {
3510  package CpuProfileStream;
3511
3512  sub new {
3513    my ($class, $file, $fname) = @_;
3514    my $self = { file        => $file,
3515                 base        => 0,
3516                 stride      => 512 * 1024,   # must be a multiple of bitsize/8
3517                 slots       => [],
3518                 unpack_code => "",           # N for big-endian, V for little
3519                 perl_is_64bit => 1,          # matters if profile is 64-bit
3520    };
3521    bless $self, $class;
3522    # Let unittests adjust the stride
3523    if ($main::opt_test_stride > 0) {
3524      $self->{stride} = $main::opt_test_stride;
3525    }
3526    # Read the first two slots to figure out bitsize and endianness.
3527    my $slots = $self->{slots};
3528    my $str;
3529    read($self->{file}, $str, 8);
3530    # Set the global $address_length based on what we see here.
3531    # 8 is 32-bit (8 hexadecimal chars); 16 is 64-bit (16 hexadecimal chars).
3532    $address_length = ($str eq (chr(0)x8)) ? 16 : 8;
3533    if ($address_length == 8) {
3534      if (substr($str, 6, 2) eq chr(0)x2) {
3535        $self->{unpack_code} = 'V';  # Little-endian.
3536      } elsif (substr($str, 4, 2) eq chr(0)x2) {
3537        $self->{unpack_code} = 'N';  # Big-endian
3538      } else {
3539        ::error("$fname: header size >= 2**16\n");
3540      }
3541      @$slots = unpack($self->{unpack_code} . "*", $str);
3542    } else {
3543      # If we're a 64-bit profile, check if we're a 64-bit-capable
3544      # perl.  Otherwise, each slot will be represented as a float
3545      # instead of an int64, losing precision and making all the
3546      # 64-bit addresses wrong.  We won't complain yet, but will
3547      # later if we ever see a value that doesn't fit in 32 bits.
3548      my $has_q = 0;
3549      eval { $has_q = pack("Q", "1") ? 1 : 1; };
3550      if (!$has_q) {
3551        $self->{perl_is_64bit} = 0;
3552      }
3553      read($self->{file}, $str, 8);
3554      if (substr($str, 4, 4) eq chr(0)x4) {
3555        # We'd love to use 'Q', but it's a) not universal, b) not endian-proof.
3556        $self->{unpack_code} = 'V';  # Little-endian.
3557      } elsif (substr($str, 0, 4) eq chr(0)x4) {
3558        $self->{unpack_code} = 'N';  # Big-endian
3559      } else {
3560        ::error("$fname: header size >= 2**32\n");
3561      }
3562      my @pair = unpack($self->{unpack_code} . "*", $str);
3563      # Since we know one of the pair is 0, it's fine to just add them.
3564      @$slots = (0, $pair[0] + $pair[1]);
3565    }
3566    return $self;
3567  }
3568
3569  # Load more data when we access slots->get(X) which is not yet in memory.
3570  sub overflow {
3571    my ($self) = @_;
3572    my $slots = $self->{slots};
3573    $self->{base} += $#$slots + 1;   # skip over data we're replacing
3574    my $str;
3575    read($self->{file}, $str, $self->{stride});
3576    if ($address_length == 8) {      # the 32-bit case
3577      # This is the easy case: unpack provides 32-bit unpacking primitives.
3578      @$slots = unpack($self->{unpack_code} . "*", $str);
3579    } else {
3580      # We need to unpack 32 bits at a time and combine.
3581      my @b32_values = unpack($self->{unpack_code} . "*", $str);
3582      my @b64_values = ();
3583      for (my $i = 0; $i < $#b32_values; $i += 2) {
3584        # TODO(csilvers): if this is a 32-bit perl, the math below
3585        #    could end up in a too-large int, which perl will promote
3586        #    to a double, losing necessary precision.  Deal with that.
3587        #    Right now, we just die.
3588        my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]);
3589        if ($self->{unpack_code} eq 'N') {    # big-endian
3590          ($lo, $hi) = ($hi, $lo);
3591        }
3592        my $value = $lo + $hi * (2**32);
3593        if (!$self->{perl_is_64bit} &&   # check value is exactly represented
3594            (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) {
3595          ::error("Need a 64-bit perl to process this 64-bit profile.\n");
3596        }
3597        push(@b64_values, $value);
3598      }
3599      @$slots = @b64_values;
3600    }
3601  }
3602
3603  # Access the i-th long in the file (logically), or -1 at EOF.
3604  sub get {
3605    my ($self, $idx) = @_;
3606    my $slots = $self->{slots};
3607    while ($#$slots >= 0) {
3608      if ($idx < $self->{base}) {
3609        # The only time we expect a reference to $slots[$i - something]
3610        # after referencing $slots[$i] is reading the very first header.
3611        # Since $stride > |header|, that shouldn't cause any lookback
3612        # errors.  And everything after the header is sequential.
3613        print STDERR "Unexpected look-back reading CPU profile";
3614        return -1;   # shrug, don't know what better to return
3615      } elsif ($idx > $self->{base} + $#$slots) {
3616        $self->overflow();
3617      } else {
3618        return $slots->[$idx - $self->{base}];
3619      }
3620    }
3621    # If we get here, $slots is [], which means we've reached EOF
3622    return -1;  # unique since slots is supposed to hold unsigned numbers
3623  }
3624}
3625
3626# Reads the top, 'header' section of a profile, and returns the last
3627# line of the header, commonly called a 'header line'.  The header
3628# section of a profile consists of zero or more 'command' lines that
3629# are instructions to pprof, which pprof executes when reading the
3630# header.  All 'command' lines start with a %.  After the command
3631# lines is the 'header line', which is a profile-specific line that
3632# indicates what type of profile it is, and perhaps other global
3633# information about the profile.  For instance, here's a header line
3634# for a heap profile:
3635#   heap profile:     53:    38236 [  5525:  1284029] @ heapprofile
3636# For historical reasons, the CPU profile does not contain a text-
3637# readable header line.  If the profile looks like a CPU profile,
3638# this function returns "".  If no header line could be found, this
3639# function returns undef.
3640#
3641# The following commands are recognized:
3642#   %warn -- emit the rest of this line to stderr, prefixed by 'WARNING:'
3643#
3644# The input file should be in binmode.
3645sub ReadProfileHeader {
3646  local *PROFILE = shift;
3647  my $firstchar = "";
3648  my $line = "";
3649  read(PROFILE, $firstchar, 1);
3650  seek(PROFILE, -1, 1);                    # unread the firstchar
3651  if ($firstchar !~ /[[:print:]]/) {       # is not a text character
3652    return "";
3653  }
3654  while (defined($line = <PROFILE>)) {
3655    $line =~ s/\r//g;   # turn windows-looking lines into unix-looking lines
3656    if ($line =~ /^%warn\s+(.*)/) {        # 'warn' command
3657      # Note this matches both '%warn blah\n' and '%warn\n'.
3658      print STDERR "WARNING: $1\n";        # print the rest of the line
3659    } elsif ($line =~ /^%/) {
3660      print STDERR "Ignoring unknown command from profile header: $line";
3661    } else {
3662      # End of commands, must be the header line.
3663      return $line;
3664    }
3665  }
3666  return undef;     # got to EOF without seeing a header line
3667}
3668
3669sub IsSymbolizedProfileFile {
3670  my $file_name = shift;
3671  if (!(-e $file_name) || !(-r $file_name)) {
3672    return 0;
3673  }
3674  # Check if the file contains a symbol-section marker.
3675  open(TFILE, "<$file_name");
3676  binmode TFILE;
3677  my $firstline = ReadProfileHeader(*TFILE);
3678  close(TFILE);
3679  if (!$firstline) {
3680    return 0;
3681  }
3682  $SYMBOL_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
3683  my $symbol_marker = $&;
3684  return $firstline =~ /^--- *$symbol_marker/;
3685}
3686
3687# Parse profile generated by common/profiler.cc and return a reference
3688# to a map:
3689#      $result->{version}     Version number of profile file
3690#      $result->{period}      Sampling period (in microseconds)
3691#      $result->{profile}     Profile object
3692#      $result->{map}         Memory map info from profile
3693#      $result->{pcs}         Hash of all PC values seen, key is hex address
3694sub ReadProfile {
3695  my $prog = shift;
3696  my $fname = shift;
3697  my $result;            # return value
3698
3699  $CONTENTION_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
3700  my $contention_marker = $&;
3701  $GROWTH_PAGE  =~ m,[^/]+$,;    # matches everything after the last slash
3702  my $growth_marker = $&;
3703  $SYMBOL_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
3704  my $symbol_marker = $&;
3705  $PROFILE_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
3706  my $profile_marker = $&;
3707
3708  # Look at first line to see if it is a heap or a CPU profile.
3709  # CPU profile may start with no header at all, and just binary data
3710  # (starting with \0\0\0\0) -- in that case, don't try to read the
3711  # whole firstline, since it may be gigabytes(!) of data.
3712  open(PROFILE, "<$fname") || error("$fname: $!\n");
3713  binmode PROFILE;      # New perls do UTF-8 processing
3714  my $header = ReadProfileHeader(*PROFILE);
3715  if (!defined($header)) {   # means "at EOF"
3716    error("Profile is empty.\n");
3717  }
3718
3719  my $symbols;
3720  if ($header =~ m/^--- *$symbol_marker/o) {
3721    # Verify that the user asked for a symbolized profile
3722    if (!$main::use_symbolized_profile) {
3723      # we have both a binary and symbolized profiles, abort
3724      error("FATAL ERROR: Symbolized profile\n   $fname\ncannot be used with " .
3725            "a binary arg. Try again without passing\n   $prog\n");
3726    }
3727    # Read the symbol section of the symbolized profile file.
3728    $symbols = ReadSymbols(*PROFILE{IO});
3729    # Read the next line to get the header for the remaining profile.
3730    $header = ReadProfileHeader(*PROFILE) || "";
3731  }
3732
3733  $main::profile_type = '';
3734  if ($header =~ m/^heap profile:.*$growth_marker/o) {
3735    $main::profile_type = 'growth';
3736    $result =  ReadHeapProfile($prog, *PROFILE, $header);
3737  } elsif ($header =~ m/^heap profile:/) {
3738    $main::profile_type = 'heap';
3739    $result =  ReadHeapProfile($prog, *PROFILE, $header);
3740  } elsif ($header =~ m/^--- *$contention_marker/o) {
3741    $main::profile_type = 'contention';
3742    $result = ReadSynchProfile($prog, *PROFILE);
3743  } elsif ($header =~ m/^--- *Stacks:/) {
3744    print STDERR
3745      "Old format contention profile: mistakenly reports " .
3746      "condition variable signals as lock contentions.\n";
3747    $main::profile_type = 'contention';
3748    $result = ReadSynchProfile($prog, *PROFILE);
3749  } elsif ($header =~ m/^--- *$profile_marker/) {
3750    # the binary cpu profile data starts immediately after this line
3751    $main::profile_type = 'cpu';
3752    $result = ReadCPUProfile($prog, $fname, *PROFILE);
3753  } else {
3754    if (defined($symbols)) {
3755      # a symbolized profile contains a format we don't recognize, bail out
3756      error("$fname: Cannot recognize profile section after symbols.\n");
3757    }
3758    # no ascii header present -- must be a CPU profile
3759    $main::profile_type = 'cpu';
3760    $result = ReadCPUProfile($prog, $fname, *PROFILE);
3761  }
3762
3763  close(PROFILE);
3764
3765  # if we got symbols along with the profile, return those as well
3766  if (defined($symbols)) {
3767    $result->{symbols} = $symbols;
3768  }
3769
3770  return $result;
3771}
3772
3773# Subtract one from caller pc so we map back to call instr.
3774# However, don't do this if we're reading a symbolized profile
3775# file, in which case the subtract-one was done when the file
3776# was written.
3777#
3778# We apply the same logic to all readers, though ReadCPUProfile uses an
3779# independent implementation.
3780sub FixCallerAddresses {
3781  my $stack = shift;
3782  if ($main::use_symbolized_profile) {
3783    return $stack;
3784  } else {
3785    $stack =~ /(\s)/;
3786    my $delimiter = $1;
3787    my @addrs = split(' ', $stack);
3788    my @fixedaddrs;
3789    $#fixedaddrs = $#addrs;
3790    if ($#addrs >= 0) {
3791      $fixedaddrs[0] = $addrs[0];
3792    }
3793    for (my $i = 1; $i <= $#addrs; $i++) {
3794      $fixedaddrs[$i] = AddressSub($addrs[$i], "0x1");
3795    }
3796    return join $delimiter, @fixedaddrs;
3797  }
3798}
3799
3800# CPU profile reader
3801sub ReadCPUProfile {
3802  my $prog = shift;
3803  my $fname = shift;       # just used for logging
3804  local *PROFILE = shift;
3805  my $version;
3806  my $period;
3807  my $i;
3808  my $profile = {};
3809  my $pcs = {};
3810
3811  # Parse string into array of slots.
3812  my $slots = CpuProfileStream->new(*PROFILE, $fname);
3813
3814  # Read header.  The current header version is a 5-element structure
3815  # containing:
3816  #   0: header count (always 0)
3817  #   1: header "words" (after this one: 3)
3818  #   2: format version (0)
3819  #   3: sampling period (usec)
3820  #   4: unused padding (always 0)
3821  if ($slots->get(0) != 0 ) {
3822    error("$fname: not a profile file, or old format profile file\n");
3823  }
3824  $i = 2 + $slots->get(1);
3825  $version = $slots->get(2);
3826  $period = $slots->get(3);
3827  # Do some sanity checking on these header values.
3828  if ($version > (2**32) || $period > (2**32) || $i > (2**32) || $i < 5) {
3829    error("$fname: not a profile file, or corrupted profile file\n");
3830  }
3831
3832  # Parse profile
3833  while ($slots->get($i) != -1) {
3834    my $n = $slots->get($i++);
3835    my $d = $slots->get($i++);
3836    if ($d > (2**16)) {  # TODO(csilvers): what's a reasonable max-stack-depth?
3837      my $addr = sprintf("0%o", $i * ($address_length == 8 ? 4 : 8));
3838      print STDERR "At index $i (address $addr):\n";
3839      error("$fname: stack trace depth >= 2**32\n");
3840    }
3841    if ($slots->get($i) == 0) {
3842      # End of profile data marker
3843      $i += $d;
3844      last;
3845    }
3846
3847    # Make key out of the stack entries
3848    my @k = ();
3849    for (my $j = 0; $j < $d; $j++) {
3850      my $pc = $slots->get($i+$j);
3851      # Subtract one from caller pc so we map back to call instr.
3852      # However, don't do this if we're reading a symbolized profile
3853      # file, in which case the subtract-one was done when the file
3854      # was written.
3855      if ($j > 0 && !$main::use_symbolized_profile) {
3856        $pc--;
3857      }
3858      $pc = sprintf("%0*x", $address_length, $pc);
3859      $pcs->{$pc} = 1;
3860      push @k, $pc;
3861    }
3862
3863    AddEntry($profile, (join "\n", @k), $n);
3864    $i += $d;
3865  }
3866
3867  # Parse map
3868  my $map = '';
3869  seek(PROFILE, $i * 4, 0);
3870  read(PROFILE, $map, (stat PROFILE)[7]);
3871
3872  my $r = {};
3873  $r->{version} = $version;
3874  $r->{period} = $period;
3875  $r->{profile} = $profile;
3876  $r->{libs} = ParseLibraries($prog, $map, $pcs);
3877  $r->{pcs} = $pcs;
3878
3879  return $r;
3880}
3881
3882sub ReadHeapProfile {
3883  my $prog = shift;
3884  local *PROFILE = shift;
3885  my $header = shift;
3886
3887  my $index = 1;
3888  if ($main::opt_inuse_space) {
3889    $index = 1;
3890  } elsif ($main::opt_inuse_objects) {
3891    $index = 0;
3892  } elsif ($main::opt_alloc_space) {
3893    $index = 3;
3894  } elsif ($main::opt_alloc_objects) {
3895    $index = 2;
3896  }
3897
3898  # Find the type of this profile.  The header line looks like:
3899  #    heap profile:   1246:  8800744 [  1246:  8800744] @ <heap-url>/266053
3900  # There are two pairs <count: size>, the first inuse objects/space, and the
3901  # second allocated objects/space.  This is followed optionally by a profile
3902  # type, and if that is present, optionally by a sampling frequency.
3903  # For remote heap profiles (v1):
3904  # The interpretation of the sampling frequency is that the profiler, for
3905  # each sample, calculates a uniformly distributed random integer less than
3906  # the given value, and records the next sample after that many bytes have
3907  # been allocated.  Therefore, the expected sample interval is half of the
3908  # given frequency.  By default, if not specified, the expected sample
3909  # interval is 128KB.  Only remote-heap-page profiles are adjusted for
3910  # sample size.
3911  # For remote heap profiles (v2):
3912  # The sampling frequency is the rate of a Poisson process. This means that
3913  # the probability of sampling an allocation of size X with sampling rate Y
3914  # is 1 - exp(-X/Y)
3915  # For version 2, a typical header line might look like this:
3916  # heap profile:   1922: 127792360 [  1922: 127792360] @ <heap-url>_v2/524288
3917  # the trailing number (524288) is the sampling rate. (Version 1 showed
3918  # double the 'rate' here)
3919  my $sampling_algorithm = 0;
3920  my $sample_adjustment = 0;
3921  chomp($header);
3922  my $type = "unknown";
3923  if ($header =~ m"^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*([^/]*)(/(\d+))?)?") {
3924    if (defined($6) && ($6 ne '')) {
3925      $type = $6;
3926      my $sample_period = $8;
3927      # $type is "heapprofile" for profiles generated by the
3928      # heap-profiler, and either "heap" or "heap_v2" for profiles
3929      # generated by sampling directly within tcmalloc.  It can also
3930      # be "growth" for heap-growth profiles.  The first is typically
3931      # found for profiles generated locally, and the others for
3932      # remote profiles.
3933      if (($type eq "heapprofile") || ($type !~ /heap/) ) {
3934        # No need to adjust for the sampling rate with heap-profiler-derived data
3935        $sampling_algorithm = 0;
3936      } elsif ($type =~ /_v2/) {
3937        $sampling_algorithm = 2;     # version 2 sampling
3938        if (defined($sample_period) && ($sample_period ne '')) {
3939          $sample_adjustment = int($sample_period);
3940        }
3941      } else {
3942        $sampling_algorithm = 1;     # version 1 sampling
3943        if (defined($sample_period) && ($sample_period ne '')) {
3944          $sample_adjustment = int($sample_period)/2;
3945        }
3946      }
3947    } else {
3948      # We detect whether or not this is a remote-heap profile by checking
3949      # that the total-allocated stats ($n2,$s2) are exactly the
3950      # same as the in-use stats ($n1,$s1).  It is remotely conceivable
3951      # that a non-remote-heap profile may pass this check, but it is hard
3952      # to imagine how that could happen.
3953      # In this case it's so old it's guaranteed to be remote-heap version 1.
3954      my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4);
3955      if (($n1 == $n2) && ($s1 == $s2)) {
3956        # This is likely to be a remote-heap based sample profile
3957        $sampling_algorithm = 1;
3958      }
3959    }
3960  }
3961
3962  if ($sampling_algorithm > 0) {
3963    # For remote-heap generated profiles, adjust the counts and sizes to
3964    # account for the sample rate (we sample once every 128KB by default).
3965    if ($sample_adjustment == 0) {
3966      # Turn on profile adjustment.
3967      $sample_adjustment = 128*1024;
3968      print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n";
3969    } else {
3970      printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n",
3971                     $sample_adjustment);
3972    }
3973    if ($sampling_algorithm > 1) {
3974      # We don't bother printing anything for the original version (version 1)
3975      printf STDERR "Heap version $sampling_algorithm\n";
3976    }
3977  }
3978
3979  my $profile = {};
3980  my $pcs = {};
3981  my $map = "";
3982
3983  while (<PROFILE>) {
3984    s/\r//g;         # turn windows-looking lines into unix-looking lines
3985    if (/^MAPPED_LIBRARIES:/) {
3986      # Read the /proc/self/maps data
3987      while (<PROFILE>) {
3988        s/\r//g;         # turn windows-looking lines into unix-looking lines
3989        $map .= $_;
3990      }
3991      last;
3992    }
3993
3994    if (/^--- Memory map:/) {
3995      # Read /proc/self/maps data as formatted by DumpAddressMap()
3996      my $buildvar = "";
3997      while (<PROFILE>) {
3998        s/\r//g;         # turn windows-looking lines into unix-looking lines
3999        # Parse "build=<dir>" specification if supplied
4000        if (m/^\s*build=(.*)\n/) {
4001          $buildvar = $1;
4002        }
4003
4004        # Expand "$build" variable if available
4005        $_ =~ s/\$build\b/$buildvar/g;
4006
4007        $map .= $_;
4008      }
4009      last;
4010    }
4011
4012    # Read entry of the form:
4013    #  <count1>: <bytes1> [<count2>: <bytes2>] @ a1 a2 a3 ... an
4014    s/^\s*//;
4015    s/\s*$//;
4016    if (m/^\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]\s+@\s+(.*)$/) {
4017      my $stack = $5;
4018      my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4);
4019
4020      if ($sample_adjustment) {
4021        if ($sampling_algorithm == 2) {
4022          # Remote-heap version 2
4023          # The sampling frequency is the rate of a Poisson process.
4024          # This means that the probability of sampling an allocation of
4025          # size X with sampling rate Y is 1 - exp(-X/Y)
4026          if ($n1 != 0) {
4027            my $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
4028            my $scale_factor = 1/(1 - exp(-$ratio));
4029            $n1 *= $scale_factor;
4030            $s1 *= $scale_factor;
4031          }
4032          if ($n2 != 0) {
4033            my $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
4034            my $scale_factor = 1/(1 - exp(-$ratio));
4035            $n2 *= $scale_factor;
4036            $s2 *= $scale_factor;
4037          }
4038        } else {
4039          # Remote-heap version 1
4040          my $ratio;
4041          $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
4042          if ($ratio < 1) {
4043            $n1 /= $ratio;
4044            $s1 /= $ratio;
4045          }
4046          $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
4047          if ($ratio < 1) {
4048            $n2 /= $ratio;
4049            $s2 /= $ratio;
4050          }
4051        }
4052      }
4053
4054      my @counts = ($n1, $s1, $n2, $s2);
4055      AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]);
4056    }
4057  }
4058
4059  my $r = {};
4060  $r->{version} = "heap";
4061  $r->{period} = 1;
4062  $r->{profile} = $profile;
4063  $r->{libs} = ParseLibraries($prog, $map, $pcs);
4064  $r->{pcs} = $pcs;
4065  return $r;
4066}
4067
4068sub ReadSynchProfile {
4069  my $prog = shift;
4070  local *PROFILE = shift;
4071  my $header = shift;
4072
4073  my $map = '';
4074  my $profile = {};
4075  my $pcs = {};
4076  my $sampling_period = 1;
4077  my $cyclespernanosec = 2.8;   # Default assumption for old binaries
4078  my $seen_clockrate = 0;
4079  my $line;
4080
4081  my $index = 0;
4082  if ($main::opt_total_delay) {
4083    $index = 0;
4084  } elsif ($main::opt_contentions) {
4085    $index = 1;
4086  } elsif ($main::opt_mean_delay) {
4087    $index = 2;
4088  }
4089
4090  while ( $line = <PROFILE> ) {
4091    $line =~ s/\r//g;      # turn windows-looking lines into unix-looking lines
4092    if ( $line =~ /^\s*(\d+)\s+(\d+) \@\s*(.*?)\s*$/ ) {
4093      my ($cycles, $count, $stack) = ($1, $2, $3);
4094
4095      # Convert cycles to nanoseconds
4096      $cycles /= $cyclespernanosec;
4097
4098      # Adjust for sampling done by application
4099      $cycles *= $sampling_period;
4100      $count *= $sampling_period;
4101
4102      my @values = ($cycles, $count, $cycles / $count);
4103      AddEntries($profile, $pcs, FixCallerAddresses($stack), $values[$index]);
4104
4105    } elsif ( $line =~ /^(slow release).*thread \d+  \@\s*(.*?)\s*$/ ||
4106              $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) {
4107      my ($cycles, $stack) = ($1, $2);
4108      if ($cycles !~ /^\d+$/) {
4109        next;
4110      }
4111
4112      # Convert cycles to nanoseconds
4113      $cycles /= $cyclespernanosec;
4114
4115      # Adjust for sampling done by application
4116      $cycles *= $sampling_period;
4117
4118      AddEntries($profile, $pcs, FixCallerAddresses($stack), $cycles);
4119
4120    } elsif ( $line =~ m/^([a-z][^=]*)=(.*)$/ ) {
4121      my ($variable, $value) = ($1,$2);
4122      for ($variable, $value) {
4123        s/^\s+//;
4124        s/\s+$//;
4125      }
4126      if ($variable eq "cycles/second") {
4127        $cyclespernanosec = $value / 1e9;
4128        $seen_clockrate = 1;
4129      } elsif ($variable eq "sampling period") {
4130        $sampling_period = $value;
4131      } elsif ($variable eq "ms since reset") {
4132        # Currently nothing is done with this value in pprof
4133        # So we just silently ignore it for now
4134      } elsif ($variable eq "discarded samples") {
4135        # Currently nothing is done with this value in pprof
4136        # So we just silently ignore it for now
4137      } else {
4138        printf STDERR ("Ignoring unnknown variable in /contention output: " .
4139                       "'%s' = '%s'\n",$variable,$value);
4140      }
4141    } else {
4142      # Memory map entry
4143      $map .= $line;
4144    }
4145  }
4146
4147  if (!$seen_clockrate) {
4148    printf STDERR ("No cycles/second entry in profile; Guessing %.1f GHz\n",
4149                   $cyclespernanosec);
4150  }
4151
4152  my $r = {};
4153  $r->{version} = 0;
4154  $r->{period} = $sampling_period;
4155  $r->{profile} = $profile;
4156  $r->{libs} = ParseLibraries($prog, $map, $pcs);
4157  $r->{pcs} = $pcs;
4158  return $r;
4159}
4160
4161# Given a hex value in the form "0x1abcd" or "1abcd", return either
4162# "0001abcd" or "000000000001abcd", depending on the current (global)
4163# address length.
4164sub HexExtend {
4165  my $addr = shift;
4166
4167  $addr =~ s/^(0x)?0*//;
4168  my $zeros_needed = $address_length - length($addr);
4169  if ($zeros_needed < 0) {
4170    printf STDERR "Warning: address $addr is longer than address length $address_length\n";
4171    return $addr;
4172  }
4173  return ("0" x $zeros_needed) . $addr;
4174}
4175
4176##### Symbol extraction #####
4177
4178# Aggressively search the lib_prefix values for the given library
4179# If all else fails, just return the name of the library unmodified.
4180# If the lib_prefix is "/my/path,/other/path" and $file is "/lib/dir/mylib.so"
4181# it will search the following locations in this order, until it finds a file:
4182#   /my/path/lib/dir/mylib.so
4183#   /other/path/lib/dir/mylib.so
4184#   /my/path/dir/mylib.so
4185#   /other/path/dir/mylib.so
4186#   /my/path/mylib.so
4187#   /other/path/mylib.so
4188#   /lib/dir/mylib.so              (returned as last resort)
4189sub FindLibrary {
4190  my $file = shift;
4191  my $suffix = $file;
4192
4193  # Search for the library as described above
4194  do {
4195    foreach my $prefix (@prefix_list) {
4196      my $fullpath = $prefix . $suffix;
4197      if (-e $fullpath) {
4198        return $fullpath;
4199      }
4200    }
4201  } while ($suffix =~ s|^/[^/]+/|/|);
4202  return $file;
4203}
4204
4205# Return path to library with debugging symbols.
4206# For libc libraries, the copy in /usr/lib/debug contains debugging symbols
4207sub DebuggingLibrary {
4208  my $file = shift;
4209  if ($file =~ m|^/|) {
4210      if (-f "/usr/lib/debug$file") {
4211        return "/usr/lib/debug$file";
4212      } elsif (-f "/usr/lib/debug$file.debug") {
4213        return "/usr/lib/debug$file.debug";
4214      }
4215  }
4216  return undef;
4217}
4218
4219# Parse text section header of a library using objdump
4220sub ParseTextSectionHeaderFromObjdump {
4221  my $lib = shift;
4222
4223  my $size = undef;
4224  my $vma;
4225  my $file_offset;
4226  # Get objdump output from the library file to figure out how to
4227  # map between mapped addresses and addresses in the library.
4228  my $cmd = ShellEscape($obj_tool_map{"objdump"}, "-h", $lib);
4229  open(OBJDUMP, "$cmd |") || error("$cmd: $!\n");
4230  while (<OBJDUMP>) {
4231    s/\r//g;         # turn windows-looking lines into unix-looking lines
4232    # Idx Name          Size      VMA       LMA       File off  Algn
4233    #  10 .text         00104b2c  420156f0  420156f0  000156f0  2**4
4234    # For 64-bit objects, VMA and LMA will be 16 hex digits, size and file
4235    # offset may still be 8.  But AddressSub below will still handle that.
4236    my @x = split;
4237    if (($#x >= 6) && ($x[1] eq '.text')) {
4238      $size = $x[2];
4239      $vma = $x[3];
4240      $file_offset = $x[5];
4241      last;
4242    }
4243  }
4244  close(OBJDUMP);
4245
4246  if (!defined($size)) {
4247    return undef;
4248  }
4249
4250  my $r = {};
4251  $r->{size} = $size;
4252  $r->{vma} = $vma;
4253  $r->{file_offset} = $file_offset;
4254
4255  return $r;
4256}
4257
4258# Parse text section header of a library using otool (on OS X)
4259sub ParseTextSectionHeaderFromOtool {
4260  my $lib = shift;
4261
4262  my $size = undef;
4263  my $vma = undef;
4264  my $file_offset = undef;
4265  # Get otool output from the library file to figure out how to
4266  # map between mapped addresses and addresses in the library.
4267  my $command = ShellEscape($obj_tool_map{"otool"}, "-l", $lib);
4268  open(OTOOL, "$command |") || error("$command: $!\n");
4269  my $cmd = "";
4270  my $sectname = "";
4271  my $segname = "";
4272  foreach my $line (<OTOOL>) {
4273    $line =~ s/\r//g;      # turn windows-looking lines into unix-looking lines
4274    # Load command <#>
4275    #       cmd LC_SEGMENT
4276    # [...]
4277    # Section
4278    #   sectname __text
4279    #    segname __TEXT
4280    #       addr 0x000009f8
4281    #       size 0x00018b9e
4282    #     offset 2552
4283    #      align 2^2 (4)
4284    # We will need to strip off the leading 0x from the hex addresses,
4285    # and convert the offset into hex.
4286    if ($line =~ /Load command/) {
4287      $cmd = "";
4288      $sectname = "";
4289      $segname = "";
4290    } elsif ($line =~ /Section/) {
4291      $sectname = "";
4292      $segname = "";
4293    } elsif ($line =~ /cmd (\w+)/) {
4294      $cmd = $1;
4295    } elsif ($line =~ /sectname (\w+)/) {
4296      $sectname = $1;
4297    } elsif ($line =~ /segname (\w+)/) {
4298      $segname = $1;
4299    } elsif (!(($cmd eq "LC_SEGMENT" || $cmd eq "LC_SEGMENT_64") &&
4300               $sectname eq "__text" &&
4301               $segname eq "__TEXT")) {
4302      next;
4303    } elsif ($line =~ /\baddr 0x([0-9a-fA-F]+)/) {
4304      $vma = $1;
4305    } elsif ($line =~ /\bsize 0x([0-9a-fA-F]+)/) {
4306      $size = $1;
4307    } elsif ($line =~ /\boffset ([0-9]+)/) {
4308      $file_offset = sprintf("%016x", $1);
4309    }
4310    if (defined($vma) && defined($size) && defined($file_offset)) {
4311      last;
4312    }
4313  }
4314  close(OTOOL);
4315
4316  if (!defined($vma) || !defined($size) || !defined($file_offset)) {
4317     return undef;
4318  }
4319
4320  my $r = {};
4321  $r->{size} = $size;
4322  $r->{vma} = $vma;
4323  $r->{file_offset} = $file_offset;
4324
4325  return $r;
4326}
4327
4328sub ParseTextSectionHeader {
4329  # obj_tool_map("otool") is only defined if we're in a Mach-O environment
4330  if (defined($obj_tool_map{"otool"})) {
4331    my $r = ParseTextSectionHeaderFromOtool(@_);
4332    if (defined($r)){
4333      return $r;
4334    }
4335  }
4336  # If otool doesn't work, or we don't have it, fall back to objdump
4337  return ParseTextSectionHeaderFromObjdump(@_);
4338}
4339
4340# Split /proc/pid/maps dump into a list of libraries
4341sub ParseLibraries {
4342  return if $main::use_symbol_page;  # We don't need libraries info.
4343  my $prog = shift;
4344  my $map = shift;
4345  my $pcs = shift;
4346
4347  my $result = [];
4348  my $h = "[a-f0-9]+";
4349  my $zero_offset = HexExtend("0");
4350
4351  my $buildvar = "";
4352  foreach my $l (split("\n", $map)) {
4353    if ($l =~ m/^\s*build=(.*)$/) {
4354      $buildvar = $1;
4355    }
4356
4357    my $start;
4358    my $finish;
4359    my $offset;
4360    my $lib;
4361    if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*(\.\d+){0,3})?)$/i) {
4362      # Full line from /proc/self/maps.  Example:
4363      #   40000000-40015000 r-xp 00000000 03:01 12845071   /lib/ld-2.3.2.so
4364      $start = HexExtend($1);
4365      $finish = HexExtend($2);
4366      $offset = HexExtend($3);
4367      $lib = $4;
4368      $lib =~ s|\\|/|g;     # turn windows-style paths into unix-style paths
4369    } elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.so(\.\d+)*)/) {
4370      # Cooked line from DumpAddressMap.  Example:
4371      #   40000000-40015000: /lib/ld-2.3.2.so
4372      $start = HexExtend($1);
4373      $finish = HexExtend($2);
4374      $offset = $zero_offset;
4375      $lib = $3;
4376    }
4377    # FreeBSD 10.0 virtual memory map /proc/curproc/map as defined in
4378    # function procfs_doprocmap (sys/fs/procfs/procfs_map.c)
4379    #
4380    # Example:
4381    # 0x800600000 0x80061a000 26 0 0xfffff800035a0000 r-x 75 33 0x1004 COW NC vnode /libexec/ld-elf.s
4382    # o.1 NCH -1
4383    elsif ($l =~ /^(0x$h)\s(0x$h)\s\d+\s\d+\s0x$h\sr-x\s\d+\s\d+\s0x\d+\s(COW|NCO)\s(NC|NNC)\svnode\s(\S+\.so(\.\d+)*)/) {
4384      $start = HexExtend($1);
4385      $finish = HexExtend($2);
4386      $offset = $zero_offset;
4387      $lib = FindLibrary($5);
4388
4389    } else {
4390      next;
4391    }
4392
4393    # Expand "$build" variable if available
4394    $lib =~ s/\$build\b/$buildvar/g;
4395
4396    $lib = FindLibrary($lib);
4397
4398    # Check for pre-relocated libraries, which use pre-relocated symbol tables
4399    # and thus require adjusting the offset that we'll use to translate
4400    # VM addresses into symbol table addresses.
4401    # Only do this if we're not going to fetch the symbol table from a
4402    # debugging copy of the library.
4403    if (!DebuggingLibrary($lib)) {
4404      my $text = ParseTextSectionHeader($lib);
4405      if (defined($text)) {
4406         my $vma_offset = AddressSub($text->{vma}, $text->{file_offset});
4407         $offset = AddressAdd($offset, $vma_offset);
4408      }
4409    }
4410
4411    if($main::opt_debug) { printf STDERR "$start:$finish ($offset) $lib\n"; }
4412    push(@{$result}, [$lib, $start, $finish, $offset]);
4413  }
4414
4415  # Append special entry for additional library (not relocated)
4416  if ($main::opt_lib ne "") {
4417    my $text = ParseTextSectionHeader($main::opt_lib);
4418    if (defined($text)) {
4419       my $start = $text->{vma};
4420       my $finish = AddressAdd($start, $text->{size});
4421
4422       push(@{$result}, [$main::opt_lib, $start, $finish, $start]);
4423    }
4424  }
4425
4426  # Append special entry for the main program.  This covers
4427  # 0..max_pc_value_seen, so that we assume pc values not found in one
4428  # of the library ranges will be treated as coming from the main
4429  # program binary.
4430  my $min_pc = HexExtend("0");
4431  my $max_pc = $min_pc;          # find the maximal PC value in any sample
4432  foreach my $pc (keys(%{$pcs})) {
4433    if (HexExtend($pc) gt $max_pc) { $max_pc = HexExtend($pc); }
4434  }
4435  push(@{$result}, [$prog, $min_pc, $max_pc, $zero_offset]);
4436
4437  return $result;
4438}
4439
4440# Add two hex addresses of length $address_length.
4441# Run pprof --test for unit test if this is changed.
4442sub AddressAdd {
4443  my $addr1 = shift;
4444  my $addr2 = shift;
4445  my $sum;
4446
4447  if ($address_length == 8) {
4448    # Perl doesn't cope with wraparound arithmetic, so do it explicitly:
4449    $sum = (hex($addr1)+hex($addr2)) % (0x10000000 * 16);
4450    return sprintf("%08x", $sum);
4451
4452  } else {
4453    # Do the addition in 7-nibble chunks to trivialize carry handling.
4454
4455    if ($main::opt_debug and $main::opt_test) {
4456      print STDERR "AddressAdd $addr1 + $addr2 = ";
4457    }
4458
4459    my $a1 = substr($addr1,-7);
4460    $addr1 = substr($addr1,0,-7);
4461    my $a2 = substr($addr2,-7);
4462    $addr2 = substr($addr2,0,-7);
4463    $sum = hex($a1) + hex($a2);
4464    my $c = 0;
4465    if ($sum > 0xfffffff) {
4466      $c = 1;
4467      $sum -= 0x10000000;
4468    }
4469    my $r = sprintf("%07x", $sum);
4470
4471    $a1 = substr($addr1,-7);
4472    $addr1 = substr($addr1,0,-7);
4473    $a2 = substr($addr2,-7);
4474    $addr2 = substr($addr2,0,-7);
4475    $sum = hex($a1) + hex($a2) + $c;
4476    $c = 0;
4477    if ($sum > 0xfffffff) {
4478      $c = 1;
4479      $sum -= 0x10000000;
4480    }
4481    $r = sprintf("%07x", $sum) . $r;
4482
4483    $sum = hex($addr1) + hex($addr2) + $c;
4484    if ($sum > 0xff) { $sum -= 0x100; }
4485    $r = sprintf("%02x", $sum) . $r;
4486
4487    if ($main::opt_debug and $main::opt_test) { print STDERR "$r\n"; }
4488
4489    return $r;
4490  }
4491}
4492
4493
4494# Subtract two hex addresses of length $address_length.
4495# Run pprof --test for unit test if this is changed.
4496sub AddressSub {
4497  my $addr1 = shift;
4498  my $addr2 = shift;
4499  my $diff;
4500
4501  if ($address_length == 8) {
4502    # Perl doesn't cope with wraparound arithmetic, so do it explicitly:
4503    $diff = (hex($addr1)-hex($addr2)) % (0x10000000 * 16);
4504    return sprintf("%08x", $diff);
4505
4506  } else {
4507    # Do the addition in 7-nibble chunks to trivialize borrow handling.
4508    # if ($main::opt_debug) { print STDERR "AddressSub $addr1 - $addr2 = "; }
4509
4510    my $a1 = hex(substr($addr1,-7));
4511    $addr1 = substr($addr1,0,-7);
4512    my $a2 = hex(substr($addr2,-7));
4513    $addr2 = substr($addr2,0,-7);
4514    my $b = 0;
4515    if ($a2 > $a1) {
4516      $b = 1;
4517      $a1 += 0x10000000;
4518    }
4519    $diff = $a1 - $a2;
4520    my $r = sprintf("%07x", $diff);
4521
4522    $a1 = hex(substr($addr1,-7));
4523    $addr1 = substr($addr1,0,-7);
4524    $a2 = hex(substr($addr2,-7)) + $b;
4525    $addr2 = substr($addr2,0,-7);
4526    $b = 0;
4527    if ($a2 > $a1) {
4528      $b = 1;
4529      $a1 += 0x10000000;
4530    }
4531    $diff = $a1 - $a2;
4532    $r = sprintf("%07x", $diff) . $r;
4533
4534    $a1 = hex($addr1);
4535    $a2 = hex($addr2) + $b;
4536    if ($a2 > $a1) { $a1 += 0x100; }
4537    $diff = $a1 - $a2;
4538    $r = sprintf("%02x", $diff) . $r;
4539
4540    # if ($main::opt_debug) { print STDERR "$r\n"; }
4541
4542    return $r;
4543  }
4544}
4545
4546# Increment a hex addresses of length $address_length.
4547# Run pprof --test for unit test if this is changed.
4548sub AddressInc {
4549  my $addr = shift;
4550  my $sum;
4551
4552  if ($address_length == 8) {
4553    # Perl doesn't cope with wraparound arithmetic, so do it explicitly:
4554    $sum = (hex($addr)+1) % (0x10000000 * 16);
4555    return sprintf("%08x", $sum);
4556
4557  } else {
4558    # Do the addition in 7-nibble chunks to trivialize carry handling.
4559    # We are always doing this to step through the addresses in a function,
4560    # and will almost never overflow the first chunk, so we check for this
4561    # case and exit early.
4562
4563    # if ($main::opt_debug) { print STDERR "AddressInc $addr1 = "; }
4564
4565    my $a1 = substr($addr,-7);
4566    $addr = substr($addr,0,-7);
4567    $sum = hex($a1) + 1;
4568    my $r = sprintf("%07x", $sum);
4569    if ($sum <= 0xfffffff) {
4570      $r = $addr . $r;
4571      # if ($main::opt_debug) { print STDERR "$r\n"; }
4572      return HexExtend($r);
4573    } else {
4574      $r = "0000000";
4575    }
4576
4577    $a1 = substr($addr,-7);
4578    $addr = substr($addr,0,-7);
4579    $sum = hex($a1) + 1;
4580    $r = sprintf("%07x", $sum) . $r;
4581    if ($sum <= 0xfffffff) {
4582      $r = $addr . $r;
4583      # if ($main::opt_debug) { print STDERR "$r\n"; }
4584      return HexExtend($r);
4585    } else {
4586      $r = "00000000000000";
4587    }
4588
4589    $sum = hex($addr) + 1;
4590    if ($sum > 0xff) { $sum -= 0x100; }
4591    $r = sprintf("%02x", $sum) . $r;
4592
4593    # if ($main::opt_debug) { print STDERR "$r\n"; }
4594    return $r;
4595  }
4596}
4597
4598# Extract symbols for all PC values found in profile
4599sub ExtractSymbols {
4600  my $libs = shift;
4601  my $pcset = shift;
4602
4603  my $symbols = {};
4604
4605  # Map each PC value to the containing library.  To make this faster,
4606  # we sort libraries by their starting pc value (highest first), and
4607  # advance through the libraries as we advance the pc.  Sometimes the
4608  # addresses of libraries may overlap with the addresses of the main
4609  # binary, so to make sure the libraries 'win', we iterate over the
4610  # libraries in reverse order (which assumes the binary doesn't start
4611  # in the middle of a library, which seems a fair assumption).
4612  my @pcs = (sort { $a cmp $b } keys(%{$pcset}));  # pcset is 0-extended strings
4613  foreach my $lib (sort {$b->[1] cmp $a->[1]} @{$libs}) {
4614    my $libname = $lib->[0];
4615    my $start = $lib->[1];
4616    my $finish = $lib->[2];
4617    my $offset = $lib->[3];
4618
4619    # Use debug library if it exists
4620    my $debug_libname = DebuggingLibrary($libname);
4621    if ($debug_libname) {
4622        $libname = $debug_libname;
4623    }
4624
4625    # Get list of pcs that belong in this library.
4626    my $contained = [];
4627    my ($start_pc_index, $finish_pc_index);
4628    # Find smallest finish_pc_index such that $finish < $pc[$finish_pc_index].
4629    for ($finish_pc_index = $#pcs + 1; $finish_pc_index > 0;
4630         $finish_pc_index--) {
4631      last if $pcs[$finish_pc_index - 1] le $finish;
4632    }
4633    # Find smallest start_pc_index such that $start <= $pc[$start_pc_index].
4634    for ($start_pc_index = $finish_pc_index; $start_pc_index > 0;
4635         $start_pc_index--) {
4636      last if $pcs[$start_pc_index - 1] lt $start;
4637    }
4638    # This keeps PC values higher than $pc[$finish_pc_index] in @pcs,
4639    # in case there are overlaps in libraries and the main binary.
4640    @{$contained} = splice(@pcs, $start_pc_index,
4641                           $finish_pc_index - $start_pc_index);
4642    # Map to symbols
4643    MapToSymbols($libname, AddressSub($start, $offset), $contained, $symbols);
4644  }
4645
4646  return $symbols;
4647}
4648
4649# Map list of PC values to symbols for a given image
4650sub MapToSymbols {
4651  my $image = shift;
4652  my $offset = shift;
4653  my $pclist = shift;
4654  my $symbols = shift;
4655
4656  my $debug = 0;
4657
4658  # Ignore empty binaries
4659  if ($#{$pclist} < 0) { return; }
4660
4661  # Figure out the addr2line command to use
4662  my $addr2line = $obj_tool_map{"addr2line"};
4663  my $cmd = ShellEscape($addr2line, "-f", "-C", "-e", $image);
4664  if (exists $obj_tool_map{"addr2line_pdb"}) {
4665    $addr2line = $obj_tool_map{"addr2line_pdb"};
4666    $cmd = ShellEscape($addr2line, "--demangle", "-f", "-C", "-e", $image);
4667  }
4668
4669  # If "addr2line" isn't installed on the system at all, just use
4670  # nm to get what info we can (function names, but not line numbers).
4671  if (system(ShellEscape($addr2line, "--help") . " >$dev_null 2>&1") != 0) {
4672    MapSymbolsWithNM($image, $offset, $pclist, $symbols);
4673    return;
4674  }
4675
4676  # "addr2line -i" can produce a variable number of lines per input
4677  # address, with no separator that allows us to tell when data for
4678  # the next address starts.  So we find the address for a special
4679  # symbol (_fini) and interleave this address between all real
4680  # addresses passed to addr2line.  The name of this special symbol
4681  # can then be used as a separator.
4682  $sep_address = undef;  # May be filled in by MapSymbolsWithNM()
4683  my $nm_symbols = {};
4684  MapSymbolsWithNM($image, $offset, $pclist, $nm_symbols);
4685  if (defined($sep_address)) {
4686    # Only add " -i" to addr2line if the binary supports it.
4687    # addr2line --help returns 0, but not if it sees an unknown flag first.
4688    if (system("$cmd -i --help >$dev_null 2>&1") == 0) {
4689      $cmd .= " -i";
4690    } else {
4691      $sep_address = undef;   # no need for sep_address if we don't support -i
4692    }
4693  }
4694
4695  # Make file with all PC values with intervening 'sep_address' so
4696  # that we can reliably detect the end of inlined function list
4697  open(ADDRESSES, ">$main::tmpfile_sym") || error("$main::tmpfile_sym: $!\n");
4698  if ($debug) { print("---- $image ---\n"); }
4699  for (my $i = 0; $i <= $#{$pclist}; $i++) {
4700    # addr2line always reads hex addresses, and does not need '0x' prefix.
4701    if ($debug) { printf STDERR ("%s\n", $pclist->[$i]); }
4702    printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset));
4703    if (defined($sep_address)) {
4704      printf ADDRESSES ("%s\n", $sep_address);
4705    }
4706  }
4707  close(ADDRESSES);
4708  if ($debug) {
4709    print("----\n");
4710    system("cat", $main::tmpfile_sym);
4711    print("----\n");
4712    system("$cmd < " . ShellEscape($main::tmpfile_sym));
4713    print("----\n");
4714  }
4715
4716  open(SYMBOLS, "$cmd <" . ShellEscape($main::tmpfile_sym) . " |")
4717      || error("$cmd: $!\n");
4718  my $count = 0;   # Index in pclist
4719  while (<SYMBOLS>) {
4720    # Read fullfunction and filelineinfo from next pair of lines
4721    s/\r?\n$//g;
4722    my $fullfunction = $_;
4723    $_ = <SYMBOLS>;
4724    s/\r?\n$//g;
4725    my $filelinenum = $_;
4726
4727    if (defined($sep_address) && $fullfunction eq $sep_symbol) {
4728      # Terminating marker for data for this address
4729      $count++;
4730      next;
4731    }
4732
4733    $filelinenum =~ s|\\|/|g; # turn windows-style paths into unix-style paths
4734
4735    my $pcstr = $pclist->[$count];
4736    my $function = ShortFunctionName($fullfunction);
4737    my $nms = $nm_symbols->{$pcstr};
4738    if (defined($nms)) {
4739      if ($fullfunction eq '??') {
4740        # nm found a symbol for us.
4741        $function = $nms->[0];
4742        $fullfunction = $nms->[2];
4743      } else {
4744	# MapSymbolsWithNM tags each routine with its starting address,
4745	# useful in case the image has multiple occurrences of this
4746	# routine.  (It uses a syntax that resembles template paramters,
4747	# that are automatically stripped out by ShortFunctionName().)
4748	# addr2line does not provide the same information.  So we check
4749	# if nm disambiguated our symbol, and if so take the annotated
4750	# (nm) version of the routine-name.  TODO(csilvers): this won't
4751	# catch overloaded, inlined symbols, which nm doesn't see.
4752	# Better would be to do a check similar to nm's, in this fn.
4753	if ($nms->[2] =~ m/^\Q$function\E/) {  # sanity check it's the right fn
4754	  $function = $nms->[0];
4755	  $fullfunction = $nms->[2];
4756	}
4757      }
4758    }
4759    
4760    # Prepend to accumulated symbols for pcstr
4761    # (so that caller comes before callee)
4762    my $sym = $symbols->{$pcstr};
4763    if (!defined($sym)) {
4764      $sym = [];
4765      $symbols->{$pcstr} = $sym;
4766    }
4767    unshift(@{$sym}, $function, $filelinenum, $fullfunction);
4768    if ($debug) { printf STDERR ("%s => [%s]\n", $pcstr, join(" ", @{$sym})); }
4769    if (!defined($sep_address)) {
4770      # Inlining is off, so this entry ends immediately
4771      $count++;
4772    }
4773  }
4774  close(SYMBOLS);
4775}
4776
4777# Use nm to map the list of referenced PCs to symbols.  Return true iff we
4778# are able to read procedure information via nm.
4779sub MapSymbolsWithNM {
4780  my $image = shift;
4781  my $offset = shift;
4782  my $pclist = shift;
4783  my $symbols = shift;
4784
4785  # Get nm output sorted by increasing address
4786  my $symbol_table = GetProcedureBoundaries($image, ".");
4787  if (!%{$symbol_table}) {
4788    return 0;
4789  }
4790  # Start addresses are already the right length (8 or 16 hex digits).
4791  my @names = sort { $symbol_table->{$a}->[0] cmp $symbol_table->{$b}->[0] }
4792    keys(%{$symbol_table});
4793
4794  if ($#names < 0) {
4795    # No symbols: just use addresses
4796    foreach my $pc (@{$pclist}) {
4797      my $pcstr = "0x" . $pc;
4798      $symbols->{$pc} = [$pcstr, "?", $pcstr];
4799    }
4800    return 0;
4801  }
4802
4803  # Sort addresses so we can do a join against nm output
4804  my $index = 0;
4805  my $fullname = $names[0];
4806  my $name = ShortFunctionName($fullname);
4807  foreach my $pc (sort { $a cmp $b } @{$pclist}) {
4808    # Adjust for mapped offset
4809    my $mpc = AddressSub($pc, $offset);
4810    while (($index < $#names) && ($mpc ge $symbol_table->{$fullname}->[1])){
4811      $index++;
4812      $fullname = $names[$index];
4813      $name = ShortFunctionName($fullname);
4814    }
4815    if ($mpc lt $symbol_table->{$fullname}->[1]) {
4816      $symbols->{$pc} = [$name, "?", $fullname];
4817    } else {
4818      my $pcstr = "0x" . $pc;
4819      $symbols->{$pc} = [$pcstr, "?", $pcstr];
4820    }
4821  }
4822  return 1;
4823}
4824
4825sub ShortFunctionName {
4826  my $function = shift;
4827  while ($function =~ s/\([^()]*\)(\s*const)?//g) { }   # Argument types
4828  while ($function =~ s/<[^<>]*>//g)  { }    # Remove template arguments
4829  $function =~ s/^.*\s+(\w+::)/$1/;          # Remove leading type
4830  return $function;
4831}
4832
4833# Trim overly long symbols found in disassembler output
4834sub CleanDisassembly {
4835  my $d = shift;
4836  while ($d =~ s/\([^()%]*\)(\s*const)?//g) { } # Argument types, not (%rax)
4837  while ($d =~ s/(\w+)<[^<>]*>/$1/g)  { }       # Remove template arguments
4838  return $d;
4839}
4840
4841# Clean file name for display
4842sub CleanFileName {
4843  my ($f) = @_;
4844  $f =~ s|^/proc/self/cwd/||;
4845  $f =~ s|^\./||;
4846  return $f;
4847}
4848
4849# Make address relative to section and clean up for display
4850sub UnparseAddress {
4851  my ($offset, $address) = @_;
4852  $address = AddressSub($address, $offset);
4853  $address =~ s/^0x//;
4854  $address =~ s/^0*//;
4855  return $address;
4856}
4857
4858##### Miscellaneous #####
4859
4860# Find the right versions of the above object tools to use.  The
4861# argument is the program file being analyzed, and should be an ELF
4862# 32-bit or ELF 64-bit executable file.  The location of the tools
4863# is determined by considering the following options in this order:
4864#   1) --tools option, if set
4865#   2) PPROF_TOOLS environment variable, if set
4866#   3) the environment
4867sub ConfigureObjTools {
4868  my $prog_file = shift;
4869
4870  # Check for the existence of $prog_file because /usr/bin/file does not
4871  # predictably return error status in prod.
4872  (-e $prog_file)  || error("$prog_file does not exist.\n");
4873
4874  my $file_type = undef;
4875  if (-e "/usr/bin/file") {
4876    # Follow symlinks (at least for systems where "file" supports that).
4877    my $escaped_prog_file = ShellEscape($prog_file);
4878    $file_type = `/usr/bin/file -L $escaped_prog_file 2>$dev_null ||
4879                  /usr/bin/file $escaped_prog_file`;
4880  } elsif ($^O == "MSWin32") {
4881    $file_type = "MS Windows";
4882  } else {
4883    print STDERR "WARNING: Can't determine the file type of $prog_file";
4884  }
4885
4886  if ($file_type =~ /64-bit/) {
4887    # Change $address_length to 16 if the program file is ELF 64-bit.
4888    # We can't detect this from many (most?) heap or lock contention
4889    # profiles, since the actual addresses referenced are generally in low
4890    # memory even for 64-bit programs.
4891    $address_length = 16;
4892  }
4893
4894  if ($file_type =~ /MS Windows/) {
4895    # For windows, we provide a version of nm and addr2line as part of
4896    # the opensource release, which is capable of parsing
4897    # Windows-style PDB executables.  It should live in the path, or
4898    # in the same directory as pprof.
4899    $obj_tool_map{"nm_pdb"} = "nm-pdb";
4900    $obj_tool_map{"addr2line_pdb"} = "addr2line-pdb";
4901  }
4902
4903  if ($file_type =~ /Mach-O/) {
4904    # OS X uses otool to examine Mach-O files, rather than objdump.
4905    $obj_tool_map{"otool"} = "otool";
4906    $obj_tool_map{"addr2line"} = "false";  # no addr2line
4907    $obj_tool_map{"objdump"} = "false";  # no objdump
4908  }
4909
4910  # Go fill in %obj_tool_map with the pathnames to use:
4911  foreach my $tool (keys %obj_tool_map) {
4912    $obj_tool_map{$tool} = ConfigureTool($obj_tool_map{$tool});
4913  }
4914}
4915
4916# Returns the path of a caller-specified object tool.  If --tools or
4917# PPROF_TOOLS are specified, then returns the full path to the tool
4918# with that prefix.  Otherwise, returns the path unmodified (which
4919# means we will look for it on PATH).
4920sub ConfigureTool {
4921  my $tool = shift;
4922  my $path;
4923
4924  # --tools (or $PPROF_TOOLS) is a comma separated list, where each
4925  # item is either a) a pathname prefix, or b) a map of the form
4926  # <tool>:<path>.  First we look for an entry of type (b) for our
4927  # tool.  If one is found, we use it.  Otherwise, we consider all the
4928  # pathname prefixes in turn, until one yields an existing file.  If
4929  # none does, we use a default path.
4930  my $tools = $main::opt_tools || $ENV{"PPROF_TOOLS"} || "";
4931  if ($tools =~ m/(,|^)\Q$tool\E:([^,]*)/) {
4932    $path = $2;
4933    # TODO(csilvers): sanity-check that $path exists?  Hard if it's relative.
4934  } elsif ($tools ne '') {
4935    foreach my $prefix (split(',', $tools)) {
4936      next if ($prefix =~ /:/);    # ignore "tool:fullpath" entries in the list
4937      if (-x $prefix . $tool) {
4938        $path = $prefix . $tool;
4939        last;
4940      }
4941    }
4942    if (!$path) {
4943      error("No '$tool' found with prefix specified by " .
4944            "--tools (or \$PPROF_TOOLS) '$tools'\n");
4945    }
4946  } else {
4947    # ... otherwise use the version that exists in the same directory as
4948    # pprof.  If there's nothing there, use $PATH.
4949    $0 =~ m,[^/]*$,;     # this is everything after the last slash
4950    my $dirname = $`;    # this is everything up to and including the last slash
4951    if (-x "$dirname$tool") {
4952      $path = "$dirname$tool";
4953    } else { 
4954      $path = $tool;
4955    }
4956  }
4957  if ($main::opt_debug) { print STDERR "Using '$path' for '$tool'.\n"; }
4958  return $path;
4959}
4960
4961sub ShellEscape {
4962  my @escaped_words = ();
4963  foreach my $word (@_) {
4964    my $escaped_word = $word;
4965    if ($word =~ m![^a-zA-Z0-9/.,_=-]!) {  # check for anything not in whitelist
4966      $escaped_word =~ s/'/'\\''/;
4967      $escaped_word = "'$escaped_word'";
4968    }
4969    push(@escaped_words, $escaped_word);
4970  }
4971  return join(" ", @escaped_words);
4972}
4973
4974sub cleanup {
4975  unlink($main::tmpfile_sym);
4976  unlink(keys %main::tempnames);
4977
4978  # We leave any collected profiles in $HOME/pprof in case the user wants
4979  # to look at them later.  We print a message informing them of this.
4980  if ((scalar(@main::profile_files) > 0) &&
4981      defined($main::collected_profile)) {
4982    if (scalar(@main::profile_files) == 1) {
4983      print STDERR "Dynamically gathered profile is in $main::collected_profile\n";
4984    }
4985    print STDERR "If you want to investigate this profile further, you can do:\n";
4986    print STDERR "\n";
4987    print STDERR "  pprof \\\n";
4988    print STDERR "    $main::prog \\\n";
4989    print STDERR "    $main::collected_profile\n";
4990    print STDERR "\n";
4991  }
4992}
4993
4994sub sighandler {
4995  cleanup();
4996  exit(1);
4997}
4998
4999sub error {
5000  my $msg = shift;
5001  print STDERR $msg;
5002  cleanup();
5003  exit(1);
5004}
5005
5006
5007# Run $nm_command and get all the resulting procedure boundaries whose
5008# names match "$regexp" and returns them in a hashtable mapping from
5009# procedure name to a two-element vector of [start address, end address]
5010sub GetProcedureBoundariesViaNm {
5011  my $escaped_nm_command = shift;    # shell-escaped
5012  my $regexp = shift;
5013
5014  my $symbol_table = {};
5015  open(NM, "$escaped_nm_command |") || error("$escaped_nm_command: $!\n");
5016  my $last_start = "0";
5017  my $routine = "";
5018  while (<NM>) {
5019    s/\r//g;         # turn windows-looking lines into unix-looking lines
5020    if (m/^\s*([0-9a-f]+) (.) (..*)/) {
5021      my $start_val = $1;
5022      my $type = $2;
5023      my $this_routine = $3;
5024
5025      # It's possible for two symbols to share the same address, if
5026      # one is a zero-length variable (like __start_google_malloc) or
5027      # one symbol is a weak alias to another (like __libc_malloc).
5028      # In such cases, we want to ignore all values except for the
5029      # actual symbol, which in nm-speak has type "T".  The logic
5030      # below does this, though it's a bit tricky: what happens when
5031      # we have a series of lines with the same address, is the first
5032      # one gets queued up to be processed.  However, it won't
5033      # *actually* be processed until later, when we read a line with
5034      # a different address.  That means that as long as we're reading
5035      # lines with the same address, we have a chance to replace that
5036      # item in the queue, which we do whenever we see a 'T' entry --
5037      # that is, a line with type 'T'.  If we never see a 'T' entry,
5038      # we'll just go ahead and process the first entry (which never
5039      # got touched in the queue), and ignore the others.
5040      if ($start_val eq $last_start && $type =~ /t/i) {
5041        # We are the 'T' symbol at this address, replace previous symbol.
5042        $routine = $this_routine;
5043        next;
5044      } elsif ($start_val eq $last_start) {
5045        # We're not the 'T' symbol at this address, so ignore us.
5046        next;
5047      }
5048
5049      if ($this_routine eq $sep_symbol) {
5050        $sep_address = HexExtend($start_val);
5051      }
5052
5053      # Tag this routine with the starting address in case the image
5054      # has multiple occurrences of this routine.  We use a syntax
5055      # that resembles template parameters that are automatically
5056      # stripped out by ShortFunctionName()
5057      $this_routine .= "<$start_val>";
5058
5059      if (defined($routine) && $routine =~ m/$regexp/) {
5060        $symbol_table->{$routine} = [HexExtend($last_start),
5061                                     HexExtend($start_val)];
5062      }
5063      $last_start = $start_val;
5064      $routine = $this_routine;
5065    } elsif (m/^Loaded image name: (.+)/) {
5066      # The win32 nm workalike emits information about the binary it is using.
5067      if ($main::opt_debug) { print STDERR "Using Image $1\n"; }
5068    } elsif (m/^PDB file name: (.+)/) {
5069      # The win32 nm workalike emits information about the pdb it is using.
5070      if ($main::opt_debug) { print STDERR "Using PDB $1\n"; }
5071    }
5072  }
5073  close(NM);
5074  # Handle the last line in the nm output.  Unfortunately, we don't know
5075  # how big this last symbol is, because we don't know how big the file
5076  # is.  For now, we just give it a size of 0.
5077  # TODO(csilvers): do better here.
5078  if (defined($routine) && $routine =~ m/$regexp/) {
5079    $symbol_table->{$routine} = [HexExtend($last_start),
5080                                 HexExtend($last_start)];
5081  }
5082  return $symbol_table;
5083}
5084
5085# Gets the procedure boundaries for all routines in "$image" whose names
5086# match "$regexp" and returns them in a hashtable mapping from procedure
5087# name to a two-element vector of [start address, end address].
5088# Will return an empty map if nm is not installed or not working properly.
5089sub GetProcedureBoundaries {
5090  my $image = shift;
5091  my $regexp = shift;
5092
5093  # If $image doesn't start with /, then put ./ in front of it.  This works
5094  # around an obnoxious bug in our probing of nm -f behavior.
5095  # "nm -f $image" is supposed to fail on GNU nm, but if:
5096  #
5097  # a. $image starts with [BbSsPp] (for example, bin/foo/bar), AND
5098  # b. you have a.out in your current directory (a not uncommon occurence)
5099  #
5100  # then "nm -f $image" succeeds because -f only looks at the first letter of
5101  # the argument, which looks valid because it's [BbSsPp], and then since
5102  # there's no image provided, it looks for a.out and finds it.
5103  #
5104  # This regex makes sure that $image starts with . or /, forcing the -f
5105  # parsing to fail since . and / are not valid formats.
5106  $image =~ s#^[^/]#./$&#;
5107
5108  # For libc libraries, the copy in /usr/lib/debug contains debugging symbols
5109  my $debugging = DebuggingLibrary($image);
5110  if ($debugging) {
5111    $image = $debugging;
5112  }
5113
5114  my $nm = $obj_tool_map{"nm"};
5115  my $cppfilt = $obj_tool_map{"c++filt"};
5116
5117  # nm can fail for two reasons: 1) $image isn't a debug library; 2) nm
5118  # binary doesn't support --demangle.  In addition, for OS X we need
5119  # to use the -f flag to get 'flat' nm output (otherwise we don't sort
5120  # properly and get incorrect results).  Unfortunately, GNU nm uses -f
5121  # in an incompatible way.  So first we test whether our nm supports
5122  # --demangle and -f.
5123  my $demangle_flag = "";
5124  my $cppfilt_flag = "";
5125  my $to_devnull = ">$dev_null 2>&1";
5126  if (system(ShellEscape($nm, "--demangle", "image") . $to_devnull) == 0) {
5127    # In this mode, we do "nm --demangle <foo>"
5128    $demangle_flag = "--demangle";
5129    $cppfilt_flag = "";
5130  } elsif (system(ShellEscape($cppfilt, $image) . $to_devnull) == 0) {
5131    # In this mode, we do "nm <foo> | c++filt"
5132    $cppfilt_flag = " | " . ShellEscape($cppfilt);
5133  };
5134  my $flatten_flag = "";
5135  if (system(ShellEscape($nm, "-f", $image) . $to_devnull) == 0) {
5136    $flatten_flag = "-f";
5137  }
5138
5139  # Finally, in the case $imagie isn't a debug library, we try again with
5140  # -D to at least get *exported* symbols.  If we can't use --demangle,
5141  # we use c++filt instead, if it exists on this system.
5142  my @nm_commands = (ShellEscape($nm, "-n", $flatten_flag, $demangle_flag,
5143                                 $image) . " 2>$dev_null $cppfilt_flag",
5144                     ShellEscape($nm, "-D", "-n", $flatten_flag, $demangle_flag,
5145                                 $image) . " 2>$dev_null $cppfilt_flag",
5146                     # 6nm is for Go binaries
5147                     ShellEscape("6nm", "$image") . " 2>$dev_null | sort",
5148                     );
5149
5150  # If the executable is an MS Windows PDB-format executable, we'll
5151  # have set up obj_tool_map("nm_pdb").  In this case, we actually
5152  # want to use both unix nm and windows-specific nm_pdb, since
5153  # PDB-format executables can apparently include dwarf .o files.
5154  if (exists $obj_tool_map{"nm_pdb"}) {
5155    push(@nm_commands,
5156         ShellEscape($obj_tool_map{"nm_pdb"}, "--demangle", $image)
5157         . " 2>$dev_null");
5158  }
5159
5160  foreach my $nm_command (@nm_commands) {
5161    my $symbol_table = GetProcedureBoundariesViaNm($nm_command, $regexp);
5162    return $symbol_table if (%{$symbol_table});
5163  }
5164  my $symbol_table = {};
5165  return $symbol_table;
5166}
5167
5168
5169# The test vectors for AddressAdd/Sub/Inc are 8-16-nibble hex strings.
5170# To make them more readable, we add underscores at interesting places.
5171# This routine removes the underscores, producing the canonical representation
5172# used by pprof to represent addresses, particularly in the tested routines.
5173sub CanonicalHex {
5174  my $arg = shift;
5175  return join '', (split '_',$arg);
5176}
5177
5178
5179# Unit test for AddressAdd:
5180sub AddressAddUnitTest {
5181  my $test_data_8 = shift;
5182  my $test_data_16 = shift;
5183  my $error_count = 0;
5184  my $fail_count = 0;
5185  my $pass_count = 0;
5186  # print STDERR "AddressAddUnitTest: ", 1+$#{$test_data_8}, " tests\n";
5187
5188  # First a few 8-nibble addresses.  Note that this implementation uses
5189  # plain old arithmetic, so a quick sanity check along with verifying what
5190  # happens to overflow (we want it to wrap):
5191  $address_length = 8;
5192  foreach my $row (@{$test_data_8}) {
5193    if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
5194    my $sum = AddressAdd ($row->[0], $row->[1]);
5195    if ($sum ne $row->[2]) {
5196      printf STDERR "ERROR: %s != %s + %s = %s\n", $sum,
5197             $row->[0], $row->[1], $row->[2];
5198      ++$fail_count;
5199    } else {
5200      ++$pass_count;
5201    }
5202  }
5203  printf STDERR "AddressAdd 32-bit tests: %d passes, %d failures\n",
5204         $pass_count, $fail_count;
5205  $error_count = $fail_count;
5206  $fail_count = 0;
5207  $pass_count = 0;
5208
5209  # Now 16-nibble addresses.
5210  $address_length = 16;
5211  foreach my $row (@{$test_data_16}) {
5212    if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
5213    my $sum = AddressAdd (CanonicalHex($row->[0]), CanonicalHex($row->[1]));
5214    my $expected = join '', (split '_',$row->[2]);
5215    if ($sum ne CanonicalHex($row->[2])) {
5216      printf STDERR "ERROR: %s != %s + %s = %s\n", $sum,
5217             $row->[0], $row->[1], $row->[2];
5218      ++$fail_count;
5219    } else {
5220      ++$pass_count;
5221    }
5222  }
5223  printf STDERR "AddressAdd 64-bit tests: %d passes, %d failures\n",
5224         $pass_count, $fail_count;
5225  $error_count += $fail_count;
5226
5227  return $error_count;
5228}
5229
5230
5231# Unit test for AddressSub:
5232sub AddressSubUnitTest {
5233  my $test_data_8 = shift;
5234  my $test_data_16 = shift;
5235  my $error_count = 0;
5236  my $fail_count = 0;
5237  my $pass_count = 0;
5238  # print STDERR "AddressSubUnitTest: ", 1+$#{$test_data_8}, " tests\n";
5239
5240  # First a few 8-nibble addresses.  Note that this implementation uses
5241  # plain old arithmetic, so a quick sanity check along with verifying what
5242  # happens to overflow (we want it to wrap):
5243  $address_length = 8;
5244  foreach my $row (@{$test_data_8}) {
5245    if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
5246    my $sum = AddressSub ($row->[0], $row->[1]);
5247    if ($sum ne $row->[3]) {
5248      printf STDERR "ERROR: %s != %s - %s = %s\n", $sum,
5249             $row->[0], $row->[1], $row->[3];
5250      ++$fail_count;
5251    } else {
5252      ++$pass_count;
5253    }
5254  }
5255  printf STDERR "AddressSub 32-bit tests: %d passes, %d failures\n",
5256         $pass_count, $fail_count;
5257  $error_count = $fail_count;
5258  $fail_count = 0;
5259  $pass_count = 0;
5260
5261  # Now 16-nibble addresses.
5262  $address_length = 16;
5263  foreach my $row (@{$test_data_16}) {
5264    if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
5265    my $sum = AddressSub (CanonicalHex($row->[0]), CanonicalHex($row->[1]));
5266    if ($sum ne CanonicalHex($row->[3])) {
5267      printf STDERR "ERROR: %s != %s - %s = %s\n", $sum,
5268             $row->[0], $row->[1], $row->[3];
5269      ++$fail_count;
5270    } else {
5271      ++$pass_count;
5272    }
5273  }
5274  printf STDERR "AddressSub 64-bit tests: %d passes, %d failures\n",
5275         $pass_count, $fail_count;
5276  $error_count += $fail_count;
5277
5278  return $error_count;
5279}
5280
5281
5282# Unit test for AddressInc:
5283sub AddressIncUnitTest {
5284  my $test_data_8 = shift;
5285  my $test_data_16 = shift;
5286  my $error_count = 0;
5287  my $fail_count = 0;
5288  my $pass_count = 0;
5289  # print STDERR "AddressIncUnitTest: ", 1+$#{$test_data_8}, " tests\n";
5290
5291  # First a few 8-nibble addresses.  Note that this implementation uses
5292  # plain old arithmetic, so a quick sanity check along with verifying what
5293  # happens to overflow (we want it to wrap):
5294  $address_length = 8;
5295  foreach my $row (@{$test_data_8}) {
5296    if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
5297    my $sum = AddressInc ($row->[0]);
5298    if ($sum ne $row->[4]) {
5299      printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum,
5300             $row->[0], $row->[4];
5301      ++$fail_count;
5302    } else {
5303      ++$pass_count;
5304    }
5305  }
5306  printf STDERR "AddressInc 32-bit tests: %d passes, %d failures\n",
5307         $pass_count, $fail_count;
5308  $error_count = $fail_count;
5309  $fail_count = 0;
5310  $pass_count = 0;
5311
5312  # Now 16-nibble addresses.
5313  $address_length = 16;
5314  foreach my $row (@{$test_data_16}) {
5315    if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
5316    my $sum = AddressInc (CanonicalHex($row->[0]));
5317    if ($sum ne CanonicalHex($row->[4])) {
5318      printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum,
5319             $row->[0], $row->[4];
5320      ++$fail_count;
5321    } else {
5322      ++$pass_count;
5323    }
5324  }
5325  printf STDERR "AddressInc 64-bit tests: %d passes, %d failures\n",
5326         $pass_count, $fail_count;
5327  $error_count += $fail_count;
5328
5329  return $error_count;
5330}
5331
5332
5333# Driver for unit tests.
5334# Currently just the address add/subtract/increment routines for 64-bit.
5335sub RunUnitTests {
5336  my $error_count = 0;
5337
5338  # This is a list of tuples [a, b, a+b, a-b, a+1]
5339  my $unit_test_data_8 = [
5340    [qw(aaaaaaaa 50505050 fafafafa 5a5a5a5a aaaaaaab)],
5341    [qw(50505050 aaaaaaaa fafafafa a5a5a5a6 50505051)],
5342    [qw(ffffffff aaaaaaaa aaaaaaa9 55555555 00000000)],
5343    [qw(00000001 ffffffff 00000000 00000002 00000002)],
5344    [qw(00000001 fffffff0 fffffff1 00000011 00000002)],
5345  ];
5346  my $unit_test_data_16 = [
5347    # The implementation handles data in 7-nibble chunks, so those are the
5348    # interesting boundaries.
5349    [qw(aaaaaaaa 50505050
5350        00_000000f_afafafa 00_0000005_a5a5a5a 00_000000a_aaaaaab)],
5351    [qw(50505050 aaaaaaaa
5352        00_000000f_afafafa ff_ffffffa_5a5a5a6 00_0000005_0505051)],
5353    [qw(ffffffff aaaaaaaa
5354        00_000001a_aaaaaa9 00_0000005_5555555 00_0000010_0000000)],
5355    [qw(00000001 ffffffff
5356        00_0000010_0000000 ff_ffffff0_0000002 00_0000000_0000002)],
5357    [qw(00000001 fffffff0
5358        00_000000f_ffffff1 ff_ffffff0_0000011 00_0000000_0000002)],
5359
5360    [qw(00_a00000a_aaaaaaa 50505050
5361        00_a00000f_afafafa 00_a000005_a5a5a5a 00_a00000a_aaaaaab)],
5362    [qw(0f_fff0005_0505050 aaaaaaaa
5363        0f_fff000f_afafafa 0f_ffefffa_5a5a5a6 0f_fff0005_0505051)],
5364    [qw(00_000000f_fffffff 01_800000a_aaaaaaa
5365        01_800001a_aaaaaa9 fe_8000005_5555555 00_0000010_0000000)],
5366    [qw(00_0000000_0000001 ff_fffffff_fffffff
5367        00_0000000_0000000 00_0000000_0000002 00_0000000_0000002)],
5368    [qw(00_0000000_0000001 ff_fffffff_ffffff0
5369        ff_fffffff_ffffff1 00_0000000_0000011 00_0000000_0000002)],
5370  ];
5371
5372  $error_count += AddressAddUnitTest($unit_test_data_8, $unit_test_data_16);
5373  $error_count += AddressSubUnitTest($unit_test_data_8, $unit_test_data_16);
5374  $error_count += AddressIncUnitTest($unit_test_data_8, $unit_test_data_16);
5375  if ($error_count > 0) {
5376    print STDERR $error_count, " errors: FAILED\n";
5377  } else {
5378    print STDERR "PASS\n";
5379  }
5380  exit ($error_count);
5381}
5382