125b3c049e70834cf33790a28643ab058b507b35cBen Cheng//--------------------------------------------------------------------*/ 225b3c049e70834cf33790a28643ab058b507b35cBen Cheng//--- Massif: a heap profiling tool. ms_main.c ---*/ 325b3c049e70834cf33790a28643ab058b507b35cBen Cheng//--------------------------------------------------------------------*/ 425b3c049e70834cf33790a28643ab058b507b35cBen Cheng 525b3c049e70834cf33790a28643ab058b507b35cBen Cheng/* 625b3c049e70834cf33790a28643ab058b507b35cBen Cheng This file is part of Massif, a Valgrind tool for profiling memory 725b3c049e70834cf33790a28643ab058b507b35cBen Cheng usage of programs. 825b3c049e70834cf33790a28643ab058b507b35cBen Cheng 925b3c049e70834cf33790a28643ab058b507b35cBen Cheng Copyright (C) 2003-2013 Nicholas Nethercote 1025b3c049e70834cf33790a28643ab058b507b35cBen Cheng njn@valgrind.org 1125b3c049e70834cf33790a28643ab058b507b35cBen Cheng 1225b3c049e70834cf33790a28643ab058b507b35cBen Cheng This program is free software; you can redistribute it and/or 1325b3c049e70834cf33790a28643ab058b507b35cBen Cheng modify it under the terms of the GNU General Public License as 1425b3c049e70834cf33790a28643ab058b507b35cBen Cheng published by the Free Software Foundation; either version 2 of the 1525b3c049e70834cf33790a28643ab058b507b35cBen Cheng License, or (at your option) any later version. 1625b3c049e70834cf33790a28643ab058b507b35cBen Cheng 1725b3c049e70834cf33790a28643ab058b507b35cBen Cheng This program is distributed in the hope that it will be useful, but 1825b3c049e70834cf33790a28643ab058b507b35cBen Cheng WITHOUT ANY WARRANTY; without even the implied warranty of 1925b3c049e70834cf33790a28643ab058b507b35cBen Cheng MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 2025b3c049e70834cf33790a28643ab058b507b35cBen Cheng General Public License for more details. 2125b3c049e70834cf33790a28643ab058b507b35cBen Cheng 2225b3c049e70834cf33790a28643ab058b507b35cBen Cheng You should have received a copy of the GNU General Public License 2325b3c049e70834cf33790a28643ab058b507b35cBen Cheng along with this program; if not, write to the Free Software 2425b3c049e70834cf33790a28643ab058b507b35cBen Cheng Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 2525b3c049e70834cf33790a28643ab058b507b35cBen Cheng 02111-1307, USA. 2625b3c049e70834cf33790a28643ab058b507b35cBen Cheng 2725b3c049e70834cf33790a28643ab058b507b35cBen Cheng The GNU General Public License is contained in the file COPYING. 2825b3c049e70834cf33790a28643ab058b507b35cBen Cheng*/ 2925b3c049e70834cf33790a28643ab058b507b35cBen Cheng 3025b3c049e70834cf33790a28643ab058b507b35cBen Cheng//--------------------------------------------------------------------------- 3125b3c049e70834cf33790a28643ab058b507b35cBen Cheng// XXX: 3225b3c049e70834cf33790a28643ab058b507b35cBen Cheng//--------------------------------------------------------------------------- 3325b3c049e70834cf33790a28643ab058b507b35cBen Cheng// Todo -- nice, but less critical: 3425b3c049e70834cf33790a28643ab058b507b35cBen Cheng// - do a graph-drawing test 3525b3c049e70834cf33790a28643ab058b507b35cBen Cheng// - make file format more generic. Obstacles: 3625b3c049e70834cf33790a28643ab058b507b35cBen Cheng// - unit prefixes are not generic 3725b3c049e70834cf33790a28643ab058b507b35cBen Cheng// - preset column widths for stats are not generic 3825b3c049e70834cf33790a28643ab058b507b35cBen Cheng// - preset column headers are not generic 3925b3c049e70834cf33790a28643ab058b507b35cBen Cheng// - "Massif arguments:" line is not generic 4025b3c049e70834cf33790a28643ab058b507b35cBen Cheng// - do snapshots on client requests 4125b3c049e70834cf33790a28643ab058b507b35cBen Cheng// - (Michael Meeks): have an interactive way to request a dump 4225b3c049e70834cf33790a28643ab058b507b35cBen Cheng// (callgrind_control-style) 4325b3c049e70834cf33790a28643ab058b507b35cBen Cheng// - "profile now" 4425b3c049e70834cf33790a28643ab058b507b35cBen Cheng// - "show me the extra allocations since the last snapshot" 4525b3c049e70834cf33790a28643ab058b507b35cBen Cheng// - "start/stop logging" (eg. quickly skip boring bits) 4625b3c049e70834cf33790a28643ab058b507b35cBen Cheng// - Add ability to draw multiple graphs, eg. heap-only, stack-only, total. 4725b3c049e70834cf33790a28643ab058b507b35cBen Cheng// Give each graph a title. (try to do it generically!) 48// - allow truncation of long fnnames if the exact line number is 49// identified? [hmm, could make getting the name of alloc-fns more 50// difficult] [could dump full names to file, truncate in ms_print] 51// - make --show-below-main=no work 52// - Options like --alloc-fn='operator new(unsigned, std::nothrow_t const&)' 53// don't work in a .valgrindrc file or in $VALGRIND_OPTS. 54// m_commandline.c:add_args_from_string() needs to respect single quotes. 55// - With --stack=yes, want to add a stack trace for detailed snapshots so 56// it's clear where/why the peak is occurring. (Mattieu Castet) Also, 57// possibly useful even with --stack=no? (Andi Yin) 58// 59// Performance: 60// - To run the benchmarks: 61// 62// perl perf/vg_perf --tools=massif --reps=3 perf/{heap,tinycc} massif 63// time valgrind --tool=massif --depth=100 konqueror 64// 65// The other benchmarks don't do much allocation, and so give similar speeds 66// to Nulgrind. 67// 68// Timing results on 'nevermore' (njn's machine) as of r7013: 69// 70// heap 0.53s ma:12.4s (23.5x, -----) 71// tinycc 0.46s ma: 4.9s (10.7x, -----) 72// many-xpts 0.08s ma: 2.0s (25.0x, -----) 73// konqueror 29.6s real 0:21.0s user 74// 75// [Introduction of --time-unit=i as the default slowed things down by 76// roughly 0--20%.] 77// 78// - get_XCon accounts for about 9% of konqueror startup time. Try 79// keeping XPt children sorted by 'ip' and use binary search in get_XCon. 80// Requires factoring out binary search code from various places into a 81// VG_(bsearch) function. 82// 83// Todo -- low priority: 84// - In each XPt, record both bytes and the number of allocations, and 85// possibly the global number of allocations. 86// - (Andy Lin) Give a stack trace on detailed snapshots? 87// - (Artur Wisz) add a feature to Massif to ignore any heap blocks larger 88// than a certain size! Because: "linux's malloc allows to set a 89// MMAP_THRESHOLD value, so we set it to 4096 - all blocks above that will 90// be handled directly by the kernel, and are guaranteed to be returned to 91// the system when freed. So we needed to profile only blocks below this 92// limit." 93// 94// File format working notes: 95 96#if 0 97desc: --heap-admin=foo 98cmd: date 99time_unit: ms 100#----------- 101snapshot=0 102#----------- 103time=0 104mem_heap_B=0 105mem_heap_admin_B=0 106mem_stacks_B=0 107heap_tree=empty 108#----------- 109snapshot=1 110#----------- 111time=353 112mem_heap_B=5 113mem_heap_admin_B=0 114mem_stacks_B=0 115heap_tree=detailed 116n1: 5 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. 117 n1: 5 0x27F6E0: _nl_normalize_codeset (in /lib/libc-2.3.5.so) 118 n1: 5 0x279DE6: _nl_load_locale_from_archive (in /lib/libc-2.3.5.so) 119 n1: 5 0x278E97: _nl_find_locale (in /lib/libc-2.3.5.so) 120 n1: 5 0x278871: setlocale (in /lib/libc-2.3.5.so) 121 n1: 5 0x8049821: (within /bin/date) 122 n0: 5 0x26ED5E: (below main) (in /lib/libc-2.3.5.so) 123 124 125n_events: n time(ms) total(B) useful-heap(B) admin-heap(B) stacks(B) 126t_events: B 127n 0 0 0 0 0 128n 0 0 0 0 0 129t1: 5 <string...> 130 t1: 6 <string...> 131 132Ideas: 133- each snapshot specifies an x-axis value and one or more y-axis values. 134- can display the y-axis values separately if you like 135- can completely separate connection between snapshots and trees. 136 137Challenges: 138- how to specify and scale/abbreviate units on axes? 139- how to combine multiple values into the y-axis? 140 141--------------------------------------------------------------------------------Command: date 142Massif arguments: --heap-admin=foo 143ms_print arguments: massif.out 144-------------------------------------------------------------------------------- 145 KB 1466.472^ :# 147 | :# :: . . 148 ... 149 | ::@ :@ :@ :@:::# :: : :::: 150 0 +-----------------------------------@---@---@-----@--@---#-------------->ms 0 713 151 152Number of snapshots: 50 153 Detailed snapshots: [2, 11, 13, 19, 25, 32 (peak)] 154-------------------------------------------------------------------------------- n time(ms) total(B) useful-heap(B) admin-heap(B) stacks(B) 155-------------------------------------------------------------------------------- 0 0 0 0 0 0 156 1 345 5 5 0 0 157 2 353 5 5 0 0 158100.00% (5B) (heap allocation functions) malloc/new/new[], --alloc-fns, etc. 159->100.00% (5B) 0x27F6E0: _nl_normalize_codeset (in /lib/libc-2.3.5.so) 160#endif 161 162//--------------------------------------------------------------------------- 163 164#include "pub_tool_basics.h" 165#include "pub_tool_vki.h" 166#include "pub_tool_aspacemgr.h" 167#include "pub_tool_debuginfo.h" 168#include "pub_tool_hashtable.h" 169#include "pub_tool_libcbase.h" 170#include "pub_tool_libcassert.h" 171#include "pub_tool_libcfile.h" 172#include "pub_tool_libcprint.h" 173#include "pub_tool_libcproc.h" 174#include "pub_tool_machine.h" 175#include "pub_tool_mallocfree.h" 176#include "pub_tool_options.h" 177#include "pub_tool_replacemalloc.h" 178#include "pub_tool_stacktrace.h" 179#include "pub_tool_threadstate.h" 180#include "pub_tool_tooliface.h" 181#include "pub_tool_xarray.h" 182#include "pub_tool_clientstate.h" 183#include "pub_tool_gdbserver.h" 184 185#include "pub_tool_clreq.h" // For {MALLOC,FREE}LIKE_BLOCK 186 187//------------------------------------------------------------*/ 188//--- Overview of operation ---*/ 189//------------------------------------------------------------*/ 190 191// The size of the stacks and heap is tracked. The heap is tracked in a lot 192// of detail, enough to tell how many bytes each line of code is responsible 193// for, more or less. The main data structure is a tree representing the 194// call tree beneath all the allocation functions like malloc(). 195// (Alternatively, if --pages-as-heap=yes is specified, memory is tracked at 196// the page level, and each page is treated much like a heap block. We use 197// "heap" throughout below to cover this case because the concepts are all the 198// same.) 199// 200// "Snapshots" are recordings of the memory usage. There are two basic 201// kinds: 202// - Normal: these record the current time, total memory size, total heap 203// size, heap admin size and stack size. 204// - Detailed: these record those things in a normal snapshot, plus a very 205// detailed XTree (see below) indicating how the heap is structured. 206// 207// Snapshots are taken every so often. There are two storage classes of 208// snapshots: 209// - Temporary: Massif does a temporary snapshot every so often. The idea 210// is to always have a certain number of temporary snapshots around. So 211// we take them frequently to begin with, but decreasingly often as the 212// program continues to run. Also, we remove some old ones after a while. 213// Overall it's a kind of exponential decay thing. Most of these are 214// normal snapshots, a small fraction are detailed snapshots. 215// - Permanent: Massif takes a permanent (detailed) snapshot in some 216// circumstances. They are: 217// - Peak snapshot: When the memory usage peak is reached, it takes a 218// snapshot. It keeps this, unless the peak is subsequently exceeded, 219// in which case it will overwrite the peak snapshot. 220// - User-requested snapshots: These are done in response to client 221// requests. They are always kept. 222 223// Used for printing things when clo_verbosity > 1. 224#define VERB(verb, format, args...) \ 225 if (VG_(clo_verbosity) > verb) { \ 226 VG_(dmsg)("Massif: " format, ##args); \ 227 } 228 229//------------------------------------------------------------// 230//--- Statistics ---// 231//------------------------------------------------------------// 232 233// Konqueror startup, to give an idea of the numbers involved with a biggish 234// program, with default depth: 235// 236// depth=3 depth=40 237// - 310,000 allocations 238// - 300,000 frees 239// - 15,000 XPts 800,000 XPts 240// - 1,800 top-XPts 241 242static UInt n_heap_allocs = 0; 243static UInt n_heap_reallocs = 0; 244static UInt n_heap_frees = 0; 245static UInt n_ignored_heap_allocs = 0; 246static UInt n_ignored_heap_frees = 0; 247static UInt n_ignored_heap_reallocs = 0; 248static UInt n_stack_allocs = 0; 249static UInt n_stack_frees = 0; 250static UInt n_xpts = 0; 251static UInt n_xpt_init_expansions = 0; 252static UInt n_xpt_later_expansions = 0; 253static UInt n_sxpt_allocs = 0; 254static UInt n_sxpt_frees = 0; 255static UInt n_skipped_snapshots = 0; 256static UInt n_real_snapshots = 0; 257static UInt n_detailed_snapshots = 0; 258static UInt n_peak_snapshots = 0; 259static UInt n_cullings = 0; 260static UInt n_XCon_redos = 0; 261 262//------------------------------------------------------------// 263//--- Globals ---// 264//------------------------------------------------------------// 265 266// Number of guest instructions executed so far. Only used with 267// --time-unit=i. 268static Long guest_instrs_executed = 0; 269 270static SizeT heap_szB = 0; // Live heap size 271static SizeT heap_extra_szB = 0; // Live heap extra size -- slop + admin bytes 272static SizeT stacks_szB = 0; // Live stacks size 273 274// This is the total size from the current peak snapshot, or 0 if no peak 275// snapshot has been taken yet. 276static SizeT peak_snapshot_total_szB = 0; 277 278// Incremented every time memory is allocated/deallocated, by the 279// allocated/deallocated amount; includes heap, heap-admin and stack 280// memory. An alternative to milliseconds as a unit of program "time". 281static ULong total_allocs_deallocs_szB = 0; 282 283// When running with --heap=yes --pages-as-heap=no, we don't start taking 284// snapshots until the first basic block is executed, rather than doing it in 285// ms_post_clo_init (which is the obvious spot), for two reasons. 286// - It lets us ignore stack events prior to that, because they're not 287// really proper ones and just would screw things up. 288// - Because there's still some core initialisation to do, and so there 289// would be an artificial time gap between the first and second snapshots. 290// 291// When running with --heap=yes --pages-as-heap=yes, snapshots start much 292// earlier due to new_mem_startup so this isn't relevant. 293// 294static Bool have_started_executing_code = False; 295 296//------------------------------------------------------------// 297//--- Alloc fns ---// 298//------------------------------------------------------------// 299 300static XArray* alloc_fns; 301static XArray* ignore_fns; 302 303static void init_alloc_fns(void) 304{ 305 // Create the list, and add the default elements. 306 alloc_fns = VG_(newXA)(VG_(malloc), "ms.main.iaf.1", 307 VG_(free), sizeof(HChar*)); 308 #define DO(x) { const HChar* s = x; VG_(addToXA)(alloc_fns, &s); } 309 310 // Ordered roughly according to (presumed) frequency. 311 // Nb: The C++ "operator new*" ones are overloadable. We include them 312 // always anyway, because even if they're overloaded, it would be a 313 // prodigiously stupid overloading that caused them to not allocate 314 // memory. 315 // 316 // XXX: because we don't look at the first stack entry (unless it's a 317 // custom allocation) there's not much point to having all these alloc 318 // functions here -- they should never appear anywhere (I think?) other 319 // than the top stack entry. The only exceptions are those that in 320 // vg_replace_malloc.c are partly or fully implemented in terms of another 321 // alloc function: realloc (which uses malloc); valloc, 322 // malloc_zone_valloc, posix_memalign and memalign_common (which use 323 // memalign). 324 // 325 DO("malloc" ); 326 DO("__builtin_new" ); 327 DO("operator new(unsigned)" ); 328 DO("operator new(unsigned long)" ); 329 DO("__builtin_vec_new" ); 330 DO("operator new[](unsigned)" ); 331 DO("operator new[](unsigned long)" ); 332 DO("calloc" ); 333 DO("realloc" ); 334 DO("memalign" ); 335 DO("posix_memalign" ); 336 DO("valloc" ); 337 DO("operator new(unsigned, std::nothrow_t const&)" ); 338 DO("operator new[](unsigned, std::nothrow_t const&)" ); 339 DO("operator new(unsigned long, std::nothrow_t const&)" ); 340 DO("operator new[](unsigned long, std::nothrow_t const&)"); 341#if defined(VGO_darwin) 342 DO("malloc_zone_malloc" ); 343 DO("malloc_zone_calloc" ); 344 DO("malloc_zone_realloc" ); 345 DO("malloc_zone_memalign" ); 346 DO("malloc_zone_valloc" ); 347#endif 348} 349 350static void init_ignore_fns(void) 351{ 352 // Create the (empty) list. 353 ignore_fns = VG_(newXA)(VG_(malloc), "ms.main.iif.1", 354 VG_(free), sizeof(HChar*)); 355} 356 357// Determines if the named function is a member of the XArray. 358static Bool is_member_fn(XArray* fns, const HChar* fnname) 359{ 360 HChar** fn_ptr; 361 Int i; 362 363 // Nb: It's a linear search through the list, because we're comparing 364 // strings rather than pointers to strings. 365 // Nb: This gets called a lot. It was an OSet, but they're quite slow to 366 // iterate through so it wasn't a good choice. 367 for (i = 0; i < VG_(sizeXA)(fns); i++) { 368 fn_ptr = VG_(indexXA)(fns, i); 369 if (VG_STREQ(fnname, *fn_ptr)) 370 return True; 371 } 372 return False; 373} 374 375 376//------------------------------------------------------------// 377//--- Command line args ---// 378//------------------------------------------------------------// 379 380#define MAX_DEPTH 200 381 382typedef enum { TimeI, TimeMS, TimeB } TimeUnit; 383 384static const HChar* TimeUnit_to_string(TimeUnit time_unit) 385{ 386 switch (time_unit) { 387 case TimeI: return "i"; 388 case TimeMS: return "ms"; 389 case TimeB: return "B"; 390 default: tl_assert2(0, "TimeUnit_to_string: unrecognised TimeUnit"); 391 } 392} 393 394static Bool clo_heap = True; 395 // clo_heap_admin is deliberately a word-sized type. At one point it was 396 // a UInt, but this caused problems on 64-bit machines when it was 397 // multiplied by a small negative number and then promoted to a 398 // word-sized type -- it ended up with a value of 4.2 billion. Sigh. 399static SSizeT clo_heap_admin = 8; 400static Bool clo_pages_as_heap = False; 401static Bool clo_stacks = False; 402static Int clo_depth = 30; 403static double clo_threshold = 1.0; // percentage 404static double clo_peak_inaccuracy = 1.0; // percentage 405static Int clo_time_unit = TimeI; 406static Int clo_detailed_freq = 10; 407static Int clo_max_snapshots = 100; 408static const HChar* clo_massif_out_file = "massif.out.%p"; 409 410static XArray* args_for_massif; 411 412static Bool ms_process_cmd_line_option(const HChar* arg) 413{ 414 const HChar* tmp_str; 415 416 // Remember the arg for later use. 417 VG_(addToXA)(args_for_massif, &arg); 418 419 if VG_BOOL_CLO(arg, "--heap", clo_heap) {} 420 else if VG_BINT_CLO(arg, "--heap-admin", clo_heap_admin, 0, 1024) {} 421 422 else if VG_BOOL_CLO(arg, "--stacks", clo_stacks) {} 423 424 else if VG_BOOL_CLO(arg, "--pages-as-heap", clo_pages_as_heap) {} 425 426 else if VG_BINT_CLO(arg, "--depth", clo_depth, 1, MAX_DEPTH) {} 427 428 else if VG_STR_CLO(arg, "--alloc-fn", tmp_str) { 429 VG_(addToXA)(alloc_fns, &tmp_str); 430 } 431 else if VG_STR_CLO(arg, "--ignore-fn", tmp_str) { 432 VG_(addToXA)(ignore_fns, &tmp_str); 433 } 434 435 else if VG_DBL_CLO(arg, "--threshold", clo_threshold) { 436 if (clo_threshold < 0 || clo_threshold > 100) { 437 VG_(fmsg_bad_option)(arg, 438 "--threshold must be between 0.0 and 100.0\n"); 439 } 440 } 441 442 else if VG_DBL_CLO(arg, "--peak-inaccuracy", clo_peak_inaccuracy) {} 443 444 else if VG_XACT_CLO(arg, "--time-unit=i", clo_time_unit, TimeI) {} 445 else if VG_XACT_CLO(arg, "--time-unit=ms", clo_time_unit, TimeMS) {} 446 else if VG_XACT_CLO(arg, "--time-unit=B", clo_time_unit, TimeB) {} 447 448 else if VG_BINT_CLO(arg, "--detailed-freq", clo_detailed_freq, 1, 1000000) {} 449 450 else if VG_BINT_CLO(arg, "--max-snapshots", clo_max_snapshots, 10, 1000) {} 451 452 else if VG_STR_CLO(arg, "--massif-out-file", clo_massif_out_file) {} 453 454 else 455 return VG_(replacement_malloc_process_cmd_line_option)(arg); 456 457 return True; 458} 459 460static void ms_print_usage(void) 461{ 462 VG_(printf)( 463" --heap=no|yes profile heap blocks [yes]\n" 464" --heap-admin=<size> average admin bytes per heap block;\n" 465" ignored if --heap=no [8]\n" 466" --stacks=no|yes profile stack(s) [no]\n" 467" --pages-as-heap=no|yes profile memory at the page level [no]\n" 468" --depth=<number> depth of contexts [30]\n" 469" --alloc-fn=<name> specify <name> as an alloc function [empty]\n" 470" --ignore-fn=<name> ignore heap allocations within <name> [empty]\n" 471" --threshold=<m.n> significance threshold, as a percentage [1.0]\n" 472" --peak-inaccuracy=<m.n> maximum peak inaccuracy, as a percentage [1.0]\n" 473" --time-unit=i|ms|B time unit: instructions executed, milliseconds\n" 474" or heap bytes alloc'd/dealloc'd [i]\n" 475" --detailed-freq=<N> every Nth snapshot should be detailed [10]\n" 476" --max-snapshots=<N> maximum number of snapshots recorded [100]\n" 477" --massif-out-file=<file> output file name [massif.out.%%p]\n" 478 ); 479} 480 481static void ms_print_debug_usage(void) 482{ 483 VG_(printf)( 484" (none)\n" 485 ); 486} 487 488 489//------------------------------------------------------------// 490//--- XPts, XTrees and XCons ---// 491//------------------------------------------------------------// 492 493// An XPt represents an "execution point", ie. a code address. Each XPt is 494// part of a tree of XPts (an "execution tree", or "XTree"). The details of 495// the heap are represented by a single XTree. 496// 497// The root of the tree is 'alloc_xpt', which represents all allocation 498// functions, eg: 499// - malloc/calloc/realloc/memalign/new/new[]; 500// - user-specified allocation functions (using --alloc-fn); 501// - custom allocation (MALLOCLIKE) points 502// It's a bit of a fake XPt (ie. its 'ip' is zero), and is only used because 503// it makes the code simpler. 504// 505// Any child of 'alloc_xpt' is called a "top-XPt". The XPts at the bottom 506// of an XTree (leaf nodes) are "bottom-XPTs". 507// 508// Each path from a top-XPt to a bottom-XPt through an XTree gives an 509// execution context ("XCon"), ie. a stack trace. (And sub-paths represent 510// stack sub-traces.) The number of XCons in an XTree is equal to the 511// number of bottom-XPTs in that XTree. 512// 513// alloc_xpt XTrees are bi-directional. 514// | ^ 515// v | 516// > parent < Example: if child1() calls parent() and child2() 517// / | \ also calls parent(), and parent() calls malloc(), 518// | / \ | the XTree will look like this. 519// | v v | 520// child1 child2 521// 522// (Note that malformed stack traces can lead to difficulties. See the 523// comment at the bottom of get_XCon.) 524// 525// XTrees and XPts are mirrored by SXTrees and SXPts, where the 'S' is short 526// for "saved". When the XTree is duplicated for a snapshot, we duplicate 527// it as an SXTree, which is similar but omits some things it does not need, 528// and aggregates up insignificant nodes. This is important as an SXTree is 529// typically much smaller than an XTree. 530 531// XXX: make XPt and SXPt extensible arrays, to avoid having to do two 532// allocations per Pt. 533 534typedef struct _XPt XPt; 535struct _XPt { 536 Addr ip; // code address 537 538 // Bottom-XPts: space for the precise context. 539 // Other XPts: space of all the descendent bottom-XPts. 540 // Nb: this value goes up and down as the program executes. 541 SizeT szB; 542 543 XPt* parent; // pointer to parent XPt 544 545 // Children. 546 // n_children and max_children are 32-bit integers. 16-bit integers 547 // are too small -- a very big program might have more than 65536 548 // allocation points (ie. top-XPts) -- Konqueror starting up has 1800. 549 UInt n_children; // number of children 550 UInt max_children; // capacity of children array 551 XPt** children; // pointers to children XPts 552}; 553 554typedef 555 enum { 556 SigSXPt, 557 InsigSXPt 558 } 559 SXPtTag; 560 561typedef struct _SXPt SXPt; 562struct _SXPt { 563 SXPtTag tag; 564 SizeT szB; // memory size for the node, be it Sig or Insig 565 union { 566 // An SXPt representing a single significant code location. Much like 567 // an XPt, minus the fields that aren't necessary. 568 struct { 569 Addr ip; 570 UInt n_children; 571 SXPt** children; 572 } 573 Sig; 574 575 // An SXPt representing one or more code locations, all below the 576 // significance threshold. 577 struct { 578 Int n_xpts; // number of aggregated XPts 579 } 580 Insig; 581 }; 582}; 583 584// Fake XPt representing all allocation functions like malloc(). Acts as 585// parent node to all top-XPts. 586static XPt* alloc_xpt; 587 588static XPt* new_XPt(Addr ip, XPt* parent) 589{ 590 // XPts are never freed, so we can use VG_(perm_malloc) to allocate them. 591 // Note that we cannot use VG_(perm_malloc) for the 'children' array, because 592 // that needs to be resizable. 593 XPt* xpt = VG_(perm_malloc)(sizeof(XPt), vg_alignof(XPt)); 594 xpt->ip = ip; 595 xpt->szB = 0; 596 xpt->parent = parent; 597 598 // We don't initially allocate any space for children. We let that 599 // happen on demand. Many XPts (ie. all the bottom-XPts) don't have any 600 // children anyway. 601 xpt->n_children = 0; 602 xpt->max_children = 0; 603 xpt->children = NULL; 604 605 // Update statistics 606 n_xpts++; 607 608 return xpt; 609} 610 611static void add_child_xpt(XPt* parent, XPt* child) 612{ 613 // Expand 'children' if necessary. 614 tl_assert(parent->n_children <= parent->max_children); 615 if (parent->n_children == parent->max_children) { 616 if (0 == parent->max_children) { 617 parent->max_children = 4; 618 parent->children = VG_(malloc)( "ms.main.acx.1", 619 parent->max_children * sizeof(XPt*) ); 620 n_xpt_init_expansions++; 621 } else { 622 parent->max_children *= 2; // Double size 623 parent->children = VG_(realloc)( "ms.main.acx.2", 624 parent->children, 625 parent->max_children * sizeof(XPt*) ); 626 n_xpt_later_expansions++; 627 } 628 } 629 630 // Insert new child XPt in parent's children list. 631 parent->children[ parent->n_children++ ] = child; 632} 633 634// Reverse comparison for a reverse sort -- biggest to smallest. 635static Int SXPt_revcmp_szB(const void* n1, const void* n2) 636{ 637 const SXPt* sxpt1 = *(const SXPt *const *)n1; 638 const SXPt* sxpt2 = *(const SXPt *const *)n2; 639 return ( sxpt1->szB < sxpt2->szB ? 1 640 : sxpt1->szB > sxpt2->szB ? -1 641 : 0); 642} 643 644//------------------------------------------------------------// 645//--- XTree Operations ---// 646//------------------------------------------------------------// 647 648// Duplicates an XTree as an SXTree. 649static SXPt* dup_XTree(XPt* xpt, SizeT total_szB) 650{ 651 Int i, n_sig_children, n_insig_children, n_child_sxpts; 652 SizeT sig_child_threshold_szB; 653 SXPt* sxpt; 654 655 // Number of XPt children Action for SXPT 656 // ------------------ --------------- 657 // 0 sig, 0 insig alloc 0 children 658 // N sig, 0 insig alloc N children, dup all 659 // N sig, M insig alloc N+1, dup first N, aggregate remaining M 660 // 0 sig, M insig alloc 1, aggregate M 661 662 // Work out how big a child must be to be significant. If the current 663 // total_szB is zero, then we set it to 1, which means everything will be 664 // judged insignificant -- this is sensible, as there's no point showing 665 // any detail for this case. Unless they used --threshold=0, in which 666 // case we show them everything because that's what they asked for. 667 // 668 // Nb: We do this once now, rather than once per child, because if we do 669 // that the cost of all the divisions adds up to something significant. 670 if (0 == total_szB && 0 != clo_threshold) { 671 sig_child_threshold_szB = 1; 672 } else { 673 sig_child_threshold_szB = (SizeT)((total_szB * clo_threshold) / 100); 674 } 675 676 // How many children are significant? And do we need an aggregate SXPt? 677 n_sig_children = 0; 678 for (i = 0; i < xpt->n_children; i++) { 679 if (xpt->children[i]->szB >= sig_child_threshold_szB) { 680 n_sig_children++; 681 } 682 } 683 n_insig_children = xpt->n_children - n_sig_children; 684 n_child_sxpts = n_sig_children + ( n_insig_children > 0 ? 1 : 0 ); 685 686 // Duplicate the XPt. 687 sxpt = VG_(malloc)("ms.main.dX.1", sizeof(SXPt)); 688 n_sxpt_allocs++; 689 sxpt->tag = SigSXPt; 690 sxpt->szB = xpt->szB; 691 sxpt->Sig.ip = xpt->ip; 692 sxpt->Sig.n_children = n_child_sxpts; 693 694 // Create the SXPt's children. 695 if (n_child_sxpts > 0) { 696 Int j; 697 SizeT sig_children_szB = 0, insig_children_szB = 0; 698 sxpt->Sig.children = VG_(malloc)("ms.main.dX.2", 699 n_child_sxpts * sizeof(SXPt*)); 700 701 // Duplicate the significant children. (Nb: sig_children_szB + 702 // insig_children_szB doesn't necessarily equal xpt->szB.) 703 j = 0; 704 for (i = 0; i < xpt->n_children; i++) { 705 if (xpt->children[i]->szB >= sig_child_threshold_szB) { 706 sxpt->Sig.children[j++] = dup_XTree(xpt->children[i], total_szB); 707 sig_children_szB += xpt->children[i]->szB; 708 } else { 709 insig_children_szB += xpt->children[i]->szB; 710 } 711 } 712 713 // Create the SXPt for the insignificant children, if any, and put it 714 // in the last child entry. 715 if (n_insig_children > 0) { 716 // Nb: We 'n_sxpt_allocs' here because creating an Insig SXPt 717 // doesn't involve a call to dup_XTree(). 718 SXPt* insig_sxpt = VG_(malloc)("ms.main.dX.3", sizeof(SXPt)); 719 n_sxpt_allocs++; 720 insig_sxpt->tag = InsigSXPt; 721 insig_sxpt->szB = insig_children_szB; 722 insig_sxpt->Insig.n_xpts = n_insig_children; 723 sxpt->Sig.children[n_sig_children] = insig_sxpt; 724 } 725 } else { 726 sxpt->Sig.children = NULL; 727 } 728 729 return sxpt; 730} 731 732static void free_SXTree(SXPt* sxpt) 733{ 734 Int i; 735 tl_assert(sxpt != NULL); 736 737 switch (sxpt->tag) { 738 case SigSXPt: 739 // Free all children SXPts, then the children array. 740 for (i = 0; i < sxpt->Sig.n_children; i++) { 741 free_SXTree(sxpt->Sig.children[i]); 742 sxpt->Sig.children[i] = NULL; 743 } 744 VG_(free)(sxpt->Sig.children); sxpt->Sig.children = NULL; 745 break; 746 747 case InsigSXPt: 748 break; 749 750 default: tl_assert2(0, "free_SXTree: unknown SXPt tag"); 751 } 752 753 // Free the SXPt itself. 754 VG_(free)(sxpt); sxpt = NULL; 755 n_sxpt_frees++; 756} 757 758// Sanity checking: we periodically check the heap XTree with 759// ms_expensive_sanity_check. 760static void sanity_check_XTree(XPt* xpt, XPt* parent) 761{ 762 tl_assert(xpt != NULL); 763 764 // Check back-pointer. 765 tl_assert2(xpt->parent == parent, 766 "xpt->parent = %p, parent = %p\n", xpt->parent, parent); 767 768 // Check children counts look sane. 769 tl_assert(xpt->n_children <= xpt->max_children); 770 771 // Unfortunately, xpt's size is not necessarily equal to the sum of xpt's 772 // children's sizes. See comment at the bottom of get_XCon. 773} 774 775// Sanity checking: we check SXTrees (which are in snapshots) after 776// snapshots are created, before they are deleted, and before they are 777// printed. 778static void sanity_check_SXTree(SXPt* sxpt) 779{ 780 Int i; 781 782 tl_assert(sxpt != NULL); 783 784 // Check the sum of any children szBs equals the SXPt's szB. Check the 785 // children at the same time. 786 switch (sxpt->tag) { 787 case SigSXPt: { 788 if (sxpt->Sig.n_children > 0) { 789 for (i = 0; i < sxpt->Sig.n_children; i++) { 790 sanity_check_SXTree(sxpt->Sig.children[i]); 791 } 792 } 793 break; 794 } 795 case InsigSXPt: 796 break; // do nothing 797 798 default: tl_assert2(0, "sanity_check_SXTree: unknown SXPt tag"); 799 } 800} 801 802 803//------------------------------------------------------------// 804//--- XCon Operations ---// 805//------------------------------------------------------------// 806 807// This is the limit on the number of removed alloc-fns that can be in a 808// single XCon. 809#define MAX_OVERESTIMATE 50 810#define MAX_IPS (MAX_DEPTH + MAX_OVERESTIMATE) 811 812// This is used for various buffers which can hold function names/IP 813// description. Some C++ names can get really long so 1024 isn't big 814// enough. 815#define BUF_LEN 2048 816 817// Determine if the given IP belongs to a function that should be ignored. 818static Bool fn_should_be_ignored(Addr ip) 819{ 820 static HChar buf[BUF_LEN]; 821 return 822 ( VG_(get_fnname)(ip, buf, BUF_LEN) && is_member_fn(ignore_fns, buf) 823 ? True : False ); 824} 825 826// Get the stack trace for an XCon, filtering out uninteresting entries: 827// alloc-fns and entries above alloc-fns, and entries below main-or-below-main. 828// Eg: alloc-fn1 / alloc-fn2 / a / b / main / (below main) / c 829// becomes: a / b / main 830// Nb: it's possible to end up with an empty trace, eg. if 'main' is marked 831// as an alloc-fn. This is ok. 832static 833Int get_IPs( ThreadId tid, Bool exclude_first_entry, Addr ips[]) 834{ 835 static HChar buf[BUF_LEN]; 836 Int n_ips, i, n_alloc_fns_removed; 837 Int overestimate; 838 Bool redo; 839 840 // We ask for a few more IPs than clo_depth suggests we need. Then we 841 // remove every entry that is an alloc-fn. Depending on the 842 // circumstances, we may need to redo it all, asking for more IPs. 843 // Details: 844 // - If the original stack trace is smaller than asked-for, redo=False 845 // - Else if after filtering we have >= clo_depth IPs, redo=False 846 // - Else redo=True 847 // In other words, to redo, we'd have to get a stack trace as big as we 848 // asked for and remove more than 'overestimate' alloc-fns. 849 850 // Main loop. 851 redo = True; // Assume this to begin with. 852 for (overestimate = 3; redo; overestimate += 6) { 853 // This should never happen -- would require MAX_OVERESTIMATE 854 // alloc-fns to be removed from the stack trace. 855 if (overestimate > MAX_OVERESTIMATE) 856 VG_(tool_panic)("get_IPs: ips[] too small, inc. MAX_OVERESTIMATE?"); 857 858 // Ask for more IPs than clo_depth suggests we need. 859 n_ips = VG_(get_StackTrace)( tid, ips, clo_depth + overestimate, 860 NULL/*array to dump SP values in*/, 861 NULL/*array to dump FP values in*/, 862 0/*first_ip_delta*/ ); 863 tl_assert(n_ips > 0); 864 865 // If the original stack trace is smaller than asked-for, redo=False. 866 if (n_ips < clo_depth + overestimate) { redo = False; } 867 868 // Filter out alloc fns. If requested, we automatically remove the 869 // first entry (which presumably will be something like malloc or 870 // __builtin_new that we're sure to filter out) without looking at it, 871 // because VG_(get_fnname) is expensive. 872 n_alloc_fns_removed = ( exclude_first_entry ? 1 : 0 ); 873 for (i = n_alloc_fns_removed; i < n_ips; i++) { 874 if (VG_(get_fnname)(ips[i], buf, BUF_LEN)) { 875 if (is_member_fn(alloc_fns, buf)) { 876 n_alloc_fns_removed++; 877 } else { 878 break; 879 } 880 } 881 } 882 // Remove the alloc fns by shuffling the rest down over them. 883 n_ips -= n_alloc_fns_removed; 884 for (i = 0; i < n_ips; i++) { 885 ips[i] = ips[i + n_alloc_fns_removed]; 886 } 887 888 // If after filtering we have >= clo_depth IPs, redo=False 889 if (n_ips >= clo_depth) { 890 redo = False; 891 n_ips = clo_depth; // Ignore any IPs below --depth. 892 } 893 894 if (redo) { 895 n_XCon_redos++; 896 } 897 } 898 return n_ips; 899} 900 901// Gets an XCon and puts it in the tree. Returns the XCon's bottom-XPt. 902// Unless the allocation should be ignored, in which case we return NULL. 903static XPt* get_XCon( ThreadId tid, Bool exclude_first_entry ) 904{ 905 static Addr ips[MAX_IPS]; 906 Int i; 907 XPt* xpt = alloc_xpt; 908 909 // After this call, the IPs we want are in ips[0]..ips[n_ips-1]. 910 Int n_ips = get_IPs(tid, exclude_first_entry, ips); 911 912 // Should we ignore this allocation? (Nb: n_ips can be zero, eg. if 913 // 'main' is marked as an alloc-fn.) 914 if (n_ips > 0 && fn_should_be_ignored(ips[0])) { 915 return NULL; 916 } 917 918 // Now do the search/insertion of the XCon. 919 for (i = 0; i < n_ips; i++) { 920 Addr ip = ips[i]; 921 Int ch; 922 // Look for IP in xpt's children. 923 // Linear search, ugh -- about 10% of time for konqueror startup tried 924 // caching last result, only hit about 4% for konqueror. 925 // Nb: this search hits about 98% of the time for konqueror 926 for (ch = 0; True; ch++) { 927 if (ch == xpt->n_children) { 928 // IP not found in the children. 929 // Create and add new child XPt, then stop. 930 XPt* new_child_xpt = new_XPt(ip, xpt); 931 add_child_xpt(xpt, new_child_xpt); 932 xpt = new_child_xpt; 933 break; 934 935 } else if (ip == xpt->children[ch]->ip) { 936 // Found the IP in the children, stop. 937 xpt = xpt->children[ch]; 938 break; 939 } 940 } 941 } 942 943 // [Note: several comments refer to this comment. Do not delete it 944 // without updating them.] 945 // 946 // A complication... If all stack traces were well-formed, then the 947 // returned xpt would always be a bottom-XPt. As a consequence, an XPt's 948 // size would always be equal to the sum of its children's sizes, which 949 // is an excellent sanity check. 950 // 951 // Unfortunately, stack traces occasionally are malformed, ie. truncated. 952 // This allows a stack trace to be a sub-trace of another, eg. a/b/c is a 953 // sub-trace of a/b/c/d. So we can't assume this xpt is a bottom-XPt; 954 // nor can we do sanity check an XPt's size against its children's sizes. 955 // This is annoying, but must be dealt with. (Older versions of Massif 956 // had this assertion in, and it was reported to fail by real users a 957 // couple of times.) Even more annoyingly, I can't come up with a simple 958 // test case that exhibit such a malformed stack trace, so I can't 959 // regression test it. Sigh. 960 // 961 // However, we can print a warning, so that if it happens (unexpectedly) 962 // in existing regression tests we'll know. Also, it warns users that 963 // the output snapshots may not add up the way they might expect. 964 // 965 //tl_assert(0 == xpt->n_children); // Must be bottom-XPt 966 if (0 != xpt->n_children) { 967 static Int n_moans = 0; 968 if (n_moans < 3) { 969 VG_(umsg)( 970 "Warning: Malformed stack trace detected. In Massif's output,\n"); 971 VG_(umsg)( 972 " the size of an entry's child entries may not sum up\n"); 973 VG_(umsg)( 974 " to the entry's size as they normally do.\n"); 975 n_moans++; 976 if (3 == n_moans) 977 VG_(umsg)( 978 " (And Massif now won't warn about this again.)\n"); 979 } 980 } 981 return xpt; 982} 983 984// Update 'szB' of every XPt in the XCon, by percolating upwards. 985static void update_XCon(XPt* xpt, SSizeT space_delta) 986{ 987 tl_assert(clo_heap); 988 tl_assert(NULL != xpt); 989 990 if (0 == space_delta) 991 return; 992 993 while (xpt != alloc_xpt) { 994 if (space_delta < 0) tl_assert(xpt->szB >= -space_delta); 995 xpt->szB += space_delta; 996 xpt = xpt->parent; 997 } 998 if (space_delta < 0) tl_assert(alloc_xpt->szB >= -space_delta); 999 alloc_xpt->szB += space_delta; 1000} 1001 1002 1003//------------------------------------------------------------// 1004//--- Snapshots ---// 1005//------------------------------------------------------------// 1006 1007// Snapshots are done in a way so that we always have a reasonable number of 1008// them. We start by taking them quickly. Once we hit our limit, we cull 1009// some (eg. half), and start taking them more slowly. Once we hit the 1010// limit again, we again cull and then take them even more slowly, and so 1011// on. 1012 1013// Time is measured either in i or ms or bytes, depending on the --time-unit 1014// option. It's a Long because it can exceed 32-bits reasonably easily, and 1015// because we need to allow negative values to represent unset times. 1016typedef Long Time; 1017 1018#define UNUSED_SNAPSHOT_TIME -333 // A conspicuous negative number. 1019 1020typedef 1021 enum { 1022 Normal = 77, 1023 Peak, 1024 Unused 1025 } 1026 SnapshotKind; 1027 1028typedef 1029 struct { 1030 SnapshotKind kind; 1031 Time time; 1032 SizeT heap_szB; 1033 SizeT heap_extra_szB;// Heap slop + admin bytes. 1034 SizeT stacks_szB; 1035 SXPt* alloc_sxpt; // Heap XTree root, if a detailed snapshot, 1036 } // otherwise NULL. 1037 Snapshot; 1038 1039static UInt next_snapshot_i = 0; // Index of where next snapshot will go. 1040static Snapshot* snapshots; // Array of snapshots. 1041 1042static Bool is_snapshot_in_use(Snapshot* snapshot) 1043{ 1044 if (Unused == snapshot->kind) { 1045 // If snapshot is unused, check all the fields are unset. 1046 tl_assert(snapshot->time == UNUSED_SNAPSHOT_TIME); 1047 tl_assert(snapshot->heap_extra_szB == 0); 1048 tl_assert(snapshot->heap_szB == 0); 1049 tl_assert(snapshot->stacks_szB == 0); 1050 tl_assert(snapshot->alloc_sxpt == NULL); 1051 return False; 1052 } else { 1053 tl_assert(snapshot->time != UNUSED_SNAPSHOT_TIME); 1054 return True; 1055 } 1056} 1057 1058static Bool is_detailed_snapshot(Snapshot* snapshot) 1059{ 1060 return (snapshot->alloc_sxpt ? True : False); 1061} 1062 1063static Bool is_uncullable_snapshot(Snapshot* snapshot) 1064{ 1065 return &snapshots[0] == snapshot // First snapshot 1066 || &snapshots[next_snapshot_i-1] == snapshot // Last snapshot 1067 || snapshot->kind == Peak; // Peak snapshot 1068} 1069 1070static void sanity_check_snapshot(Snapshot* snapshot) 1071{ 1072 if (snapshot->alloc_sxpt) { 1073 sanity_check_SXTree(snapshot->alloc_sxpt); 1074 } 1075} 1076 1077// All the used entries should look used, all the unused ones should be clear. 1078static void sanity_check_snapshots_array(void) 1079{ 1080 Int i; 1081 for (i = 0; i < next_snapshot_i; i++) { 1082 tl_assert( is_snapshot_in_use( & snapshots[i] )); 1083 } 1084 for ( ; i < clo_max_snapshots; i++) { 1085 tl_assert(!is_snapshot_in_use( & snapshots[i] )); 1086 } 1087} 1088 1089// This zeroes all the fields in the snapshot, but does not free the heap 1090// XTree if present. It also does a sanity check unless asked not to; we 1091// can't sanity check at startup when clearing the initial snapshots because 1092// they're full of junk. 1093static void clear_snapshot(Snapshot* snapshot, Bool do_sanity_check) 1094{ 1095 if (do_sanity_check) sanity_check_snapshot(snapshot); 1096 snapshot->kind = Unused; 1097 snapshot->time = UNUSED_SNAPSHOT_TIME; 1098 snapshot->heap_extra_szB = 0; 1099 snapshot->heap_szB = 0; 1100 snapshot->stacks_szB = 0; 1101 snapshot->alloc_sxpt = NULL; 1102} 1103 1104// This zeroes all the fields in the snapshot, and frees the heap XTree if 1105// present. 1106static void delete_snapshot(Snapshot* snapshot) 1107{ 1108 // Nb: if there's an XTree, we free it after calling clear_snapshot, 1109 // because clear_snapshot does a sanity check which includes checking the 1110 // XTree. 1111 SXPt* tmp_sxpt = snapshot->alloc_sxpt; 1112 clear_snapshot(snapshot, /*do_sanity_check*/True); 1113 if (tmp_sxpt) { 1114 free_SXTree(tmp_sxpt); 1115 } 1116} 1117 1118static void VERB_snapshot(Int verbosity, const HChar* prefix, Int i) 1119{ 1120 Snapshot* snapshot = &snapshots[i]; 1121 const HChar* suffix; 1122 switch (snapshot->kind) { 1123 case Peak: suffix = "p"; break; 1124 case Normal: suffix = ( is_detailed_snapshot(snapshot) ? "d" : "." ); break; 1125 case Unused: suffix = "u"; break; 1126 default: 1127 tl_assert2(0, "VERB_snapshot: unknown snapshot kind: %d", snapshot->kind); 1128 } 1129 VERB(verbosity, "%s S%s%3d (t:%lld, hp:%ld, ex:%ld, st:%ld)\n", 1130 prefix, suffix, i, 1131 snapshot->time, 1132 snapshot->heap_szB, 1133 snapshot->heap_extra_szB, 1134 snapshot->stacks_szB 1135 ); 1136} 1137 1138// Cull half the snapshots; we choose those that represent the smallest 1139// time-spans, because that gives us the most even distribution of snapshots 1140// over time. (It's possible to lose interesting spikes, however.) 1141// 1142// Algorithm for N snapshots: We find the snapshot representing the smallest 1143// timeframe, and remove it. We repeat this until (N/2) snapshots are gone. 1144// We have to do this one snapshot at a time, rather than finding the (N/2) 1145// smallest snapshots in one hit, because when a snapshot is removed, its 1146// neighbours immediately cover greater timespans. So it's O(N^2), but N is 1147// small, and it's not done very often. 1148// 1149// Once we're done, we return the new smallest interval between snapshots. 1150// That becomes our minimum time interval. 1151static UInt cull_snapshots(void) 1152{ 1153 Int i, jp, j, jn, min_timespan_i; 1154 Int n_deleted = 0; 1155 Time min_timespan; 1156 1157 n_cullings++; 1158 1159 // Sets j to the index of the first not-yet-removed snapshot at or after i 1160 #define FIND_SNAPSHOT(i, j) \ 1161 for (j = i; \ 1162 j < clo_max_snapshots && !is_snapshot_in_use(&snapshots[j]); \ 1163 j++) { } 1164 1165 VERB(2, "Culling...\n"); 1166 1167 // First we remove enough snapshots by clearing them in-place. Once 1168 // that's done, we can slide the remaining ones down. 1169 for (i = 0; i < clo_max_snapshots/2; i++) { 1170 // Find the snapshot representing the smallest timespan. The timespan 1171 // for snapshot n = d(N-1,N)+d(N,N+1), where d(A,B) is the time between 1172 // snapshot A and B. We don't consider the first and last snapshots for 1173 // removal. 1174 Snapshot* min_snapshot; 1175 Int min_j; 1176 1177 // Initial triple: (prev, curr, next) == (jp, j, jn) 1178 // Initial min_timespan is the first one. 1179 jp = 0; 1180 FIND_SNAPSHOT(1, j); 1181 FIND_SNAPSHOT(j+1, jn); 1182 min_timespan = 0x7fffffffffffffffLL; 1183 min_j = -1; 1184 while (jn < clo_max_snapshots) { 1185 Time timespan = snapshots[jn].time - snapshots[jp].time; 1186 tl_assert(timespan >= 0); 1187 // Nb: We never cull the peak snapshot. 1188 if (Peak != snapshots[j].kind && timespan < min_timespan) { 1189 min_timespan = timespan; 1190 min_j = j; 1191 } 1192 // Move on to next triple 1193 jp = j; 1194 j = jn; 1195 FIND_SNAPSHOT(jn+1, jn); 1196 } 1197 // We've found the least important snapshot, now delete it. First 1198 // print it if necessary. 1199 tl_assert(-1 != min_j); // Check we found a minimum. 1200 min_snapshot = & snapshots[ min_j ]; 1201 if (VG_(clo_verbosity) > 1) { 1202 HChar buf[64]; 1203 VG_(snprintf)(buf, 64, " %3d (t-span = %lld)", i, min_timespan); 1204 VERB_snapshot(2, buf, min_j); 1205 } 1206 delete_snapshot(min_snapshot); 1207 n_deleted++; 1208 } 1209 1210 // Slide down the remaining snapshots over the removed ones. First set i 1211 // to point to the first empty slot, and j to the first full slot after 1212 // i. Then slide everything down. 1213 for (i = 0; is_snapshot_in_use( &snapshots[i] ); i++) { } 1214 for (j = i; !is_snapshot_in_use( &snapshots[j] ); j++) { } 1215 for ( ; j < clo_max_snapshots; j++) { 1216 if (is_snapshot_in_use( &snapshots[j] )) { 1217 snapshots[i++] = snapshots[j]; 1218 clear_snapshot(&snapshots[j], /*do_sanity_check*/True); 1219 } 1220 } 1221 next_snapshot_i = i; 1222 1223 // Check snapshots array looks ok after changes. 1224 sanity_check_snapshots_array(); 1225 1226 // Find the minimum timespan remaining; that will be our new minimum 1227 // time interval. Note that above we were finding timespans by measuring 1228 // two intervals around a snapshot that was under consideration for 1229 // deletion. Here we only measure single intervals because all the 1230 // deletions have occurred. 1231 // 1232 // But we have to be careful -- some snapshots (eg. snapshot 0, and the 1233 // peak snapshot) are uncullable. If two uncullable snapshots end up 1234 // next to each other, they'll never be culled (assuming the peak doesn't 1235 // change), and the time gap between them will not change. However, the 1236 // time between the remaining cullable snapshots will grow ever larger. 1237 // This means that the min_timespan found will always be that between the 1238 // two uncullable snapshots, and it will be much smaller than it should 1239 // be. To avoid this problem, when computing the minimum timespan, we 1240 // ignore any timespans between two uncullable snapshots. 1241 tl_assert(next_snapshot_i > 1); 1242 min_timespan = 0x7fffffffffffffffLL; 1243 min_timespan_i = -1; 1244 for (i = 1; i < next_snapshot_i; i++) { 1245 if (is_uncullable_snapshot(&snapshots[i]) && 1246 is_uncullable_snapshot(&snapshots[i-1])) 1247 { 1248 VERB(2, "(Ignoring interval %d--%d when computing minimum)\n", i-1, i); 1249 } else { 1250 Time timespan = snapshots[i].time - snapshots[i-1].time; 1251 tl_assert(timespan >= 0); 1252 if (timespan < min_timespan) { 1253 min_timespan = timespan; 1254 min_timespan_i = i; 1255 } 1256 } 1257 } 1258 tl_assert(-1 != min_timespan_i); // Check we found a minimum. 1259 1260 // Print remaining snapshots, if necessary. 1261 if (VG_(clo_verbosity) > 1) { 1262 VERB(2, "Finished culling (%3d of %3d deleted)\n", 1263 n_deleted, clo_max_snapshots); 1264 for (i = 0; i < next_snapshot_i; i++) { 1265 VERB_snapshot(2, " post-cull", i); 1266 } 1267 VERB(2, "New time interval = %lld (between snapshots %d and %d)\n", 1268 min_timespan, min_timespan_i-1, min_timespan_i); 1269 } 1270 1271 return min_timespan; 1272} 1273 1274static Time get_time(void) 1275{ 1276 // Get current time, in whatever time unit we're using. 1277 if (clo_time_unit == TimeI) { 1278 return guest_instrs_executed; 1279 } else if (clo_time_unit == TimeMS) { 1280 // Some stuff happens between the millisecond timer being initialised 1281 // to zero and us taking our first snapshot. We determine that time 1282 // gap so we can subtract it from all subsequent times so that our 1283 // first snapshot is considered to be at t = 0ms. Unfortunately, a 1284 // bunch of symbols get read after the first snapshot is taken but 1285 // before the second one (which is triggered by the first allocation), 1286 // so when the time-unit is 'ms' we always have a big gap between the 1287 // first two snapshots. But at least users won't have to wonder why 1288 // the first snapshot isn't at t=0. 1289 static Bool is_first_get_time = True; 1290 static Time start_time_ms; 1291 if (is_first_get_time) { 1292 start_time_ms = VG_(read_millisecond_timer)(); 1293 is_first_get_time = False; 1294 return 0; 1295 } else { 1296 return VG_(read_millisecond_timer)() - start_time_ms; 1297 } 1298 } else if (clo_time_unit == TimeB) { 1299 return total_allocs_deallocs_szB; 1300 } else { 1301 tl_assert2(0, "bad --time-unit value"); 1302 } 1303} 1304 1305// Take a snapshot, and only that -- decisions on whether to take a 1306// snapshot, or what kind of snapshot, are made elsewhere. 1307// Nb: we call the arg "my_time" because "time" shadows a global declaration 1308// in /usr/include/time.h on Darwin. 1309static void 1310take_snapshot(Snapshot* snapshot, SnapshotKind kind, Time my_time, 1311 Bool is_detailed) 1312{ 1313 tl_assert(!is_snapshot_in_use(snapshot)); 1314 if (!clo_pages_as_heap) { 1315 tl_assert(have_started_executing_code); 1316 } 1317 1318 // Heap and heap admin. 1319 if (clo_heap) { 1320 snapshot->heap_szB = heap_szB; 1321 if (is_detailed) { 1322 SizeT total_szB = heap_szB + heap_extra_szB + stacks_szB; 1323 snapshot->alloc_sxpt = dup_XTree(alloc_xpt, total_szB); 1324 tl_assert( alloc_xpt->szB == heap_szB); 1325 tl_assert(snapshot->alloc_sxpt->szB == heap_szB); 1326 } 1327 snapshot->heap_extra_szB = heap_extra_szB; 1328 } 1329 1330 // Stack(s). 1331 if (clo_stacks) { 1332 snapshot->stacks_szB = stacks_szB; 1333 } 1334 1335 // Rest of snapshot. 1336 snapshot->kind = kind; 1337 snapshot->time = my_time; 1338 sanity_check_snapshot(snapshot); 1339 1340 // Update stats. 1341 if (Peak == kind) n_peak_snapshots++; 1342 if (is_detailed) n_detailed_snapshots++; 1343 n_real_snapshots++; 1344} 1345 1346 1347// Take a snapshot, if it's time, or if we've hit a peak. 1348static void 1349maybe_take_snapshot(SnapshotKind kind, const HChar* what) 1350{ 1351 // 'min_time_interval' is the minimum time interval between snapshots. 1352 // If we try to take a snapshot and less than this much time has passed, 1353 // we don't take it. It gets larger as the program runs longer. It's 1354 // initialised to zero so that we begin by taking snapshots as quickly as 1355 // possible. 1356 static Time min_time_interval = 0; 1357 // Zero allows startup snapshot. 1358 static Time earliest_possible_time_of_next_snapshot = 0; 1359 static Int n_snapshots_since_last_detailed = 0; 1360 static Int n_skipped_snapshots_since_last_snapshot = 0; 1361 1362 Snapshot* snapshot; 1363 Bool is_detailed; 1364 // Nb: we call this variable "my_time" because "time" shadows a global 1365 // declaration in /usr/include/time.h on Darwin. 1366 Time my_time = get_time(); 1367 1368 switch (kind) { 1369 case Normal: 1370 // Only do a snapshot if it's time. 1371 if (my_time < earliest_possible_time_of_next_snapshot) { 1372 n_skipped_snapshots++; 1373 n_skipped_snapshots_since_last_snapshot++; 1374 return; 1375 } 1376 is_detailed = (clo_detailed_freq-1 == n_snapshots_since_last_detailed); 1377 break; 1378 1379 case Peak: { 1380 // Because we're about to do a deallocation, we're coming down from a 1381 // local peak. If it is (a) actually a global peak, and (b) a certain 1382 // amount bigger than the previous peak, then we take a peak snapshot. 1383 // By not taking a snapshot for every peak, we save a lot of effort -- 1384 // because many peaks remain peak only for a short time. 1385 SizeT total_szB = heap_szB + heap_extra_szB + stacks_szB; 1386 SizeT excess_szB_for_new_peak = 1387 (SizeT)((peak_snapshot_total_szB * clo_peak_inaccuracy) / 100); 1388 if (total_szB <= peak_snapshot_total_szB + excess_szB_for_new_peak) { 1389 return; 1390 } 1391 is_detailed = True; 1392 break; 1393 } 1394 1395 default: 1396 tl_assert2(0, "maybe_take_snapshot: unrecognised snapshot kind"); 1397 } 1398 1399 // Take the snapshot. 1400 snapshot = & snapshots[next_snapshot_i]; 1401 take_snapshot(snapshot, kind, my_time, is_detailed); 1402 1403 // Record if it was detailed. 1404 if (is_detailed) { 1405 n_snapshots_since_last_detailed = 0; 1406 } else { 1407 n_snapshots_since_last_detailed++; 1408 } 1409 1410 // Update peak data, if it's a Peak snapshot. 1411 if (Peak == kind) { 1412 Int i, number_of_peaks_snapshots_found = 0; 1413 1414 // Sanity check the size, then update our recorded peak. 1415 SizeT snapshot_total_szB = 1416 snapshot->heap_szB + snapshot->heap_extra_szB + snapshot->stacks_szB; 1417 tl_assert2(snapshot_total_szB > peak_snapshot_total_szB, 1418 "%ld, %ld\n", snapshot_total_szB, peak_snapshot_total_szB); 1419 peak_snapshot_total_szB = snapshot_total_szB; 1420 1421 // Find the old peak snapshot, if it exists, and mark it as normal. 1422 for (i = 0; i < next_snapshot_i; i++) { 1423 if (Peak == snapshots[i].kind) { 1424 snapshots[i].kind = Normal; 1425 number_of_peaks_snapshots_found++; 1426 } 1427 } 1428 tl_assert(number_of_peaks_snapshots_found <= 1); 1429 } 1430 1431 // Finish up verbosity and stats stuff. 1432 if (n_skipped_snapshots_since_last_snapshot > 0) { 1433 VERB(2, " (skipped %d snapshot%s)\n", 1434 n_skipped_snapshots_since_last_snapshot, 1435 ( 1 == n_skipped_snapshots_since_last_snapshot ? "" : "s") ); 1436 } 1437 VERB_snapshot(2, what, next_snapshot_i); 1438 n_skipped_snapshots_since_last_snapshot = 0; 1439 1440 // Cull the entries, if our snapshot table is full. 1441 next_snapshot_i++; 1442 if (clo_max_snapshots == next_snapshot_i) { 1443 min_time_interval = cull_snapshots(); 1444 } 1445 1446 // Work out the earliest time when the next snapshot can happen. 1447 earliest_possible_time_of_next_snapshot = my_time + min_time_interval; 1448} 1449 1450 1451//------------------------------------------------------------// 1452//--- Sanity checking ---// 1453//------------------------------------------------------------// 1454 1455static Bool ms_cheap_sanity_check ( void ) 1456{ 1457 return True; // Nothing useful we can cheaply check. 1458} 1459 1460static Bool ms_expensive_sanity_check ( void ) 1461{ 1462 sanity_check_XTree(alloc_xpt, /*parent*/NULL); 1463 sanity_check_snapshots_array(); 1464 return True; 1465} 1466 1467 1468//------------------------------------------------------------// 1469//--- Heap management ---// 1470//------------------------------------------------------------// 1471 1472// Metadata for heap blocks. Each one contains a pointer to a bottom-XPt, 1473// which is a foothold into the XCon at which it was allocated. From 1474// HP_Chunks, XPt 'space' fields are incremented (at allocation) and 1475// decremented (at deallocation). 1476// 1477// Nb: first two fields must match core's VgHashNode. 1478typedef 1479 struct _HP_Chunk { 1480 struct _HP_Chunk* next; 1481 Addr data; // Ptr to actual block 1482 SizeT req_szB; // Size requested 1483 SizeT slop_szB; // Extra bytes given above those requested 1484 XPt* where; // Where allocated; bottom-XPt 1485 } 1486 HP_Chunk; 1487 1488static VgHashTable malloc_list = NULL; // HP_Chunks 1489 1490static void update_alloc_stats(SSizeT szB_delta) 1491{ 1492 // Update total_allocs_deallocs_szB. 1493 if (szB_delta < 0) szB_delta = -szB_delta; 1494 total_allocs_deallocs_szB += szB_delta; 1495} 1496 1497static void update_heap_stats(SSizeT heap_szB_delta, Int heap_extra_szB_delta) 1498{ 1499 if (heap_szB_delta < 0) 1500 tl_assert(heap_szB >= -heap_szB_delta); 1501 if (heap_extra_szB_delta < 0) 1502 tl_assert(heap_extra_szB >= -heap_extra_szB_delta); 1503 1504 heap_extra_szB += heap_extra_szB_delta; 1505 heap_szB += heap_szB_delta; 1506 1507 update_alloc_stats(heap_szB_delta + heap_extra_szB_delta); 1508} 1509 1510static 1511void* record_block( ThreadId tid, void* p, SizeT req_szB, SizeT slop_szB, 1512 Bool exclude_first_entry, Bool maybe_snapshot ) 1513{ 1514 // Make new HP_Chunk node, add to malloc_list 1515 HP_Chunk* hc = VG_(malloc)("ms.main.rb.1", sizeof(HP_Chunk)); 1516 hc->req_szB = req_szB; 1517 hc->slop_szB = slop_szB; 1518 hc->data = (Addr)p; 1519 hc->where = NULL; 1520 VG_(HT_add_node)(malloc_list, hc); 1521 1522 if (clo_heap) { 1523 VERB(3, "<<< record_block (%lu, %lu)\n", req_szB, slop_szB); 1524 1525 hc->where = get_XCon( tid, exclude_first_entry ); 1526 1527 if (hc->where) { 1528 // Update statistics. 1529 n_heap_allocs++; 1530 1531 // Update heap stats. 1532 update_heap_stats(req_szB, clo_heap_admin + slop_szB); 1533 1534 // Update XTree. 1535 update_XCon(hc->where, req_szB); 1536 1537 // Maybe take a snapshot. 1538 if (maybe_snapshot) { 1539 maybe_take_snapshot(Normal, " alloc"); 1540 } 1541 1542 } else { 1543 // Ignored allocation. 1544 n_ignored_heap_allocs++; 1545 1546 VERB(3, "(ignored)\n"); 1547 } 1548 1549 VERB(3, ">>>\n"); 1550 } 1551 1552 return p; 1553} 1554 1555static __inline__ 1556void* alloc_and_record_block ( ThreadId tid, SizeT req_szB, SizeT req_alignB, 1557 Bool is_zeroed ) 1558{ 1559 SizeT actual_szB, slop_szB; 1560 void* p; 1561 1562 if ((SSizeT)req_szB < 0) return NULL; 1563 1564 // Allocate and zero if necessary. 1565 p = VG_(cli_malloc)( req_alignB, req_szB ); 1566 if (!p) { 1567 return NULL; 1568 } 1569 if (is_zeroed) VG_(memset)(p, 0, req_szB); 1570 actual_szB = VG_(malloc_usable_size)(p); 1571 tl_assert(actual_szB >= req_szB); 1572 slop_szB = actual_szB - req_szB; 1573 1574 // Record block. 1575 record_block(tid, p, req_szB, slop_szB, /*exclude_first_entry*/True, 1576 /*maybe_snapshot*/True); 1577 1578 return p; 1579} 1580 1581static __inline__ 1582void unrecord_block ( void* p, Bool maybe_snapshot ) 1583{ 1584 // Remove HP_Chunk from malloc_list 1585 HP_Chunk* hc = VG_(HT_remove)(malloc_list, (UWord)p); 1586 if (NULL == hc) { 1587 return; // must have been a bogus free() 1588 } 1589 1590 if (clo_heap) { 1591 VERB(3, "<<< unrecord_block\n"); 1592 1593 if (hc->where) { 1594 // Update statistics. 1595 n_heap_frees++; 1596 1597 // Maybe take a peak snapshot, since it's a deallocation. 1598 if (maybe_snapshot) { 1599 maybe_take_snapshot(Peak, "de-PEAK"); 1600 } 1601 1602 // Update heap stats. 1603 update_heap_stats(-hc->req_szB, -clo_heap_admin - hc->slop_szB); 1604 1605 // Update XTree. 1606 update_XCon(hc->where, -hc->req_szB); 1607 1608 // Maybe take a snapshot. 1609 if (maybe_snapshot) { 1610 maybe_take_snapshot(Normal, "dealloc"); 1611 } 1612 1613 } else { 1614 n_ignored_heap_frees++; 1615 1616 VERB(3, "(ignored)\n"); 1617 } 1618 1619 VERB(3, ">>> (-%lu, -%lu)\n", hc->req_szB, hc->slop_szB); 1620 } 1621 1622 // Actually free the chunk, and the heap block (if necessary) 1623 VG_(free)( hc ); hc = NULL; 1624} 1625 1626// Nb: --ignore-fn is tricky for realloc. If the block's original alloc was 1627// ignored, but the realloc is not requested to be ignored, and we are 1628// shrinking the block, then we have to ignore the realloc -- otherwise we 1629// could end up with negative heap sizes. This isn't a danger if we are 1630// growing such a block, but for consistency (it also simplifies things) we 1631// ignore such reallocs as well. 1632static __inline__ 1633void* realloc_block ( ThreadId tid, void* p_old, SizeT new_req_szB ) 1634{ 1635 HP_Chunk* hc; 1636 void* p_new; 1637 SizeT old_req_szB, old_slop_szB, new_slop_szB, new_actual_szB; 1638 XPt *old_where, *new_where; 1639 Bool is_ignored = False; 1640 1641 // Remove the old block 1642 hc = VG_(HT_remove)(malloc_list, (UWord)p_old); 1643 if (hc == NULL) { 1644 return NULL; // must have been a bogus realloc() 1645 } 1646 1647 old_req_szB = hc->req_szB; 1648 old_slop_szB = hc->slop_szB; 1649 1650 tl_assert(!clo_pages_as_heap); // Shouldn't be here if --pages-as-heap=yes. 1651 if (clo_heap) { 1652 VERB(3, "<<< realloc_block (%lu)\n", new_req_szB); 1653 1654 if (hc->where) { 1655 // Update statistics. 1656 n_heap_reallocs++; 1657 1658 // Maybe take a peak snapshot, if it's (effectively) a deallocation. 1659 if (new_req_szB < old_req_szB) { 1660 maybe_take_snapshot(Peak, "re-PEAK"); 1661 } 1662 } else { 1663 // The original malloc was ignored, so we have to ignore the 1664 // realloc as well. 1665 is_ignored = True; 1666 } 1667 } 1668 1669 // Actually do the allocation, if necessary. 1670 if (new_req_szB <= old_req_szB + old_slop_szB) { 1671 // New size is smaller or same; block not moved. 1672 p_new = p_old; 1673 new_slop_szB = old_slop_szB + (old_req_szB - new_req_szB); 1674 1675 } else { 1676 // New size is bigger; make new block, copy shared contents, free old. 1677 p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB); 1678 if (!p_new) { 1679 // Nb: if realloc fails, NULL is returned but the old block is not 1680 // touched. What an awful function. 1681 return NULL; 1682 } 1683 VG_(memcpy)(p_new, p_old, old_req_szB + old_slop_szB); 1684 VG_(cli_free)(p_old); 1685 new_actual_szB = VG_(malloc_usable_size)(p_new); 1686 tl_assert(new_actual_szB >= new_req_szB); 1687 new_slop_szB = new_actual_szB - new_req_szB; 1688 } 1689 1690 if (p_new) { 1691 // Update HP_Chunk. 1692 hc->data = (Addr)p_new; 1693 hc->req_szB = new_req_szB; 1694 hc->slop_szB = new_slop_szB; 1695 old_where = hc->where; 1696 hc->where = NULL; 1697 1698 // Update XTree. 1699 if (clo_heap) { 1700 new_where = get_XCon( tid, /*exclude_first_entry*/True); 1701 if (!is_ignored && new_where) { 1702 hc->where = new_where; 1703 update_XCon(old_where, -old_req_szB); 1704 update_XCon(new_where, new_req_szB); 1705 } else { 1706 // The realloc itself is ignored. 1707 is_ignored = True; 1708 1709 // Update statistics. 1710 n_ignored_heap_reallocs++; 1711 } 1712 } 1713 } 1714 1715 // Now insert the new hc (with a possibly new 'data' field) into 1716 // malloc_list. If this realloc() did not increase the memory size, we 1717 // will have removed and then re-added hc unnecessarily. But that's ok 1718 // because shrinking a block with realloc() is (presumably) much rarer 1719 // than growing it, and this way simplifies the growing case. 1720 VG_(HT_add_node)(malloc_list, hc); 1721 1722 if (clo_heap) { 1723 if (!is_ignored) { 1724 // Update heap stats. 1725 update_heap_stats(new_req_szB - old_req_szB, 1726 new_slop_szB - old_slop_szB); 1727 1728 // Maybe take a snapshot. 1729 maybe_take_snapshot(Normal, "realloc"); 1730 } else { 1731 1732 VERB(3, "(ignored)\n"); 1733 } 1734 1735 VERB(3, ">>> (%ld, %ld)\n", 1736 new_req_szB - old_req_szB, new_slop_szB - old_slop_szB); 1737 } 1738 1739 return p_new; 1740} 1741 1742 1743//------------------------------------------------------------// 1744//--- malloc() et al replacement wrappers ---// 1745//------------------------------------------------------------// 1746 1747static void* ms_malloc ( ThreadId tid, SizeT szB ) 1748{ 1749 return alloc_and_record_block( tid, szB, VG_(clo_alignment), /*is_zeroed*/False ); 1750} 1751 1752static void* ms___builtin_new ( ThreadId tid, SizeT szB ) 1753{ 1754 return alloc_and_record_block( tid, szB, VG_(clo_alignment), /*is_zeroed*/False ); 1755} 1756 1757static void* ms___builtin_vec_new ( ThreadId tid, SizeT szB ) 1758{ 1759 return alloc_and_record_block( tid, szB, VG_(clo_alignment), /*is_zeroed*/False ); 1760} 1761 1762static void* ms_calloc ( ThreadId tid, SizeT m, SizeT szB ) 1763{ 1764 return alloc_and_record_block( tid, m*szB, VG_(clo_alignment), /*is_zeroed*/True ); 1765} 1766 1767static void *ms_memalign ( ThreadId tid, SizeT alignB, SizeT szB ) 1768{ 1769 return alloc_and_record_block( tid, szB, alignB, False ); 1770} 1771 1772static void ms_free ( ThreadId tid __attribute__((unused)), void* p ) 1773{ 1774 unrecord_block(p, /*maybe_snapshot*/True); 1775 VG_(cli_free)(p); 1776} 1777 1778static void ms___builtin_delete ( ThreadId tid, void* p ) 1779{ 1780 unrecord_block(p, /*maybe_snapshot*/True); 1781 VG_(cli_free)(p); 1782} 1783 1784static void ms___builtin_vec_delete ( ThreadId tid, void* p ) 1785{ 1786 unrecord_block(p, /*maybe_snapshot*/True); 1787 VG_(cli_free)(p); 1788} 1789 1790static void* ms_realloc ( ThreadId tid, void* p_old, SizeT new_szB ) 1791{ 1792 return realloc_block(tid, p_old, new_szB); 1793} 1794 1795static SizeT ms_malloc_usable_size ( ThreadId tid, void* p ) 1796{ 1797 HP_Chunk* hc = VG_(HT_lookup)( malloc_list, (UWord)p ); 1798 1799 return ( hc ? hc->req_szB + hc->slop_szB : 0 ); 1800} 1801 1802//------------------------------------------------------------// 1803//--- Page handling ---// 1804//------------------------------------------------------------// 1805 1806static 1807void ms_record_page_mem ( Addr a, SizeT len ) 1808{ 1809 ThreadId tid = VG_(get_running_tid)(); 1810 Addr end; 1811 tl_assert(VG_IS_PAGE_ALIGNED(len)); 1812 tl_assert(len >= VKI_PAGE_SIZE); 1813 // Record the first N-1 pages as blocks, but don't do any snapshots. 1814 for (end = a + len - VKI_PAGE_SIZE; a < end; a += VKI_PAGE_SIZE) { 1815 record_block( tid, (void*)a, VKI_PAGE_SIZE, /*slop_szB*/0, 1816 /*exclude_first_entry*/False, /*maybe_snapshot*/False ); 1817 } 1818 // Record the last page as a block, and maybe do a snapshot afterwards. 1819 record_block( tid, (void*)a, VKI_PAGE_SIZE, /*slop_szB*/0, 1820 /*exclude_first_entry*/False, /*maybe_snapshot*/True ); 1821} 1822 1823static 1824void ms_unrecord_page_mem( Addr a, SizeT len ) 1825{ 1826 Addr end; 1827 tl_assert(VG_IS_PAGE_ALIGNED(len)); 1828 tl_assert(len >= VKI_PAGE_SIZE); 1829 for (end = a + len - VKI_PAGE_SIZE; a < end; a += VKI_PAGE_SIZE) { 1830 unrecord_block((void*)a, /*maybe_snapshot*/False); 1831 } 1832 unrecord_block((void*)a, /*maybe_snapshot*/True); 1833} 1834 1835//------------------------------------------------------------// 1836 1837static 1838void ms_new_mem_mmap ( Addr a, SizeT len, 1839 Bool rr, Bool ww, Bool xx, ULong di_handle ) 1840{ 1841 tl_assert(VG_IS_PAGE_ALIGNED(len)); 1842 ms_record_page_mem(a, len); 1843} 1844 1845static 1846void ms_new_mem_startup( Addr a, SizeT len, 1847 Bool rr, Bool ww, Bool xx, ULong di_handle ) 1848{ 1849 // startup maps are always be page-sized, except the trampoline page is 1850 // marked by the core as only being the size of the trampoline itself, 1851 // which is something like 57 bytes. Round it up to page size. 1852 len = VG_PGROUNDUP(len); 1853 ms_record_page_mem(a, len); 1854} 1855 1856static 1857void ms_new_mem_brk ( Addr a, SizeT len, ThreadId tid ) 1858{ 1859 // brk limit is not necessarily aligned on a page boundary. 1860 // If new memory being brk-ed implies to allocate a new page, 1861 // then call ms_record_page_mem with page aligned parameters 1862 // otherwise just ignore. 1863 Addr old_bottom_page = VG_PGROUNDDN(a - 1); 1864 Addr new_top_page = VG_PGROUNDDN(a + len - 1); 1865 if (old_bottom_page != new_top_page) 1866 ms_record_page_mem(VG_PGROUNDDN(a), 1867 (new_top_page - old_bottom_page)); 1868} 1869 1870static 1871void ms_copy_mem_remap( Addr from, Addr to, SizeT len) 1872{ 1873 tl_assert(VG_IS_PAGE_ALIGNED(len)); 1874 ms_unrecord_page_mem(from, len); 1875 ms_record_page_mem(to, len); 1876} 1877 1878static 1879void ms_die_mem_munmap( Addr a, SizeT len ) 1880{ 1881 tl_assert(VG_IS_PAGE_ALIGNED(len)); 1882 ms_unrecord_page_mem(a, len); 1883} 1884 1885static 1886void ms_die_mem_brk( Addr a, SizeT len ) 1887{ 1888 // Call ms_unrecord_page_mem only if one or more pages are de-allocated. 1889 // See ms_new_mem_brk for more details. 1890 Addr new_bottom_page = VG_PGROUNDDN(a - 1); 1891 Addr old_top_page = VG_PGROUNDDN(a + len - 1); 1892 if (old_top_page != new_bottom_page) 1893 ms_unrecord_page_mem(VG_PGROUNDDN(a), 1894 (old_top_page - new_bottom_page)); 1895 1896} 1897 1898//------------------------------------------------------------// 1899//--- Stacks ---// 1900//------------------------------------------------------------// 1901 1902// We really want the inlining to occur... 1903#define INLINE inline __attribute__((always_inline)) 1904 1905static void update_stack_stats(SSizeT stack_szB_delta) 1906{ 1907 if (stack_szB_delta < 0) tl_assert(stacks_szB >= -stack_szB_delta); 1908 stacks_szB += stack_szB_delta; 1909 1910 update_alloc_stats(stack_szB_delta); 1911} 1912 1913static INLINE void new_mem_stack_2(SizeT len, const HChar* what) 1914{ 1915 if (have_started_executing_code) { 1916 VERB(3, "<<< new_mem_stack (%ld)\n", len); 1917 n_stack_allocs++; 1918 update_stack_stats(len); 1919 maybe_take_snapshot(Normal, what); 1920 VERB(3, ">>>\n"); 1921 } 1922} 1923 1924static INLINE void die_mem_stack_2(SizeT len, const HChar* what) 1925{ 1926 if (have_started_executing_code) { 1927 VERB(3, "<<< die_mem_stack (%ld)\n", -len); 1928 n_stack_frees++; 1929 maybe_take_snapshot(Peak, "stkPEAK"); 1930 update_stack_stats(-len); 1931 maybe_take_snapshot(Normal, what); 1932 VERB(3, ">>>\n"); 1933 } 1934} 1935 1936static void new_mem_stack(Addr a, SizeT len) 1937{ 1938 new_mem_stack_2(len, "stk-new"); 1939} 1940 1941static void die_mem_stack(Addr a, SizeT len) 1942{ 1943 die_mem_stack_2(len, "stk-die"); 1944} 1945 1946static void new_mem_stack_signal(Addr a, SizeT len, ThreadId tid) 1947{ 1948 new_mem_stack_2(len, "sig-new"); 1949} 1950 1951static void die_mem_stack_signal(Addr a, SizeT len) 1952{ 1953 die_mem_stack_2(len, "sig-die"); 1954} 1955 1956 1957//------------------------------------------------------------// 1958//--- Client Requests ---// 1959//------------------------------------------------------------// 1960 1961static void print_monitor_help ( void ) 1962{ 1963 VG_(gdb_printf) ("\n"); 1964 VG_(gdb_printf) ("massif monitor commands:\n"); 1965 VG_(gdb_printf) (" snapshot [<filename>]\n"); 1966 VG_(gdb_printf) (" detailed_snapshot [<filename>]\n"); 1967 VG_(gdb_printf) (" takes a snapshot (or a detailed snapshot)\n"); 1968 VG_(gdb_printf) (" and saves it in <filename>\n"); 1969 VG_(gdb_printf) (" default <filename> is massif.vgdb.out\n"); 1970 VG_(gdb_printf) ("\n"); 1971} 1972 1973 1974/* Forward declaration. 1975 return True if request recognised, False otherwise */ 1976static Bool handle_gdb_monitor_command (ThreadId tid, HChar *req); 1977static Bool ms_handle_client_request ( ThreadId tid, UWord* argv, UWord* ret ) 1978{ 1979 switch (argv[0]) { 1980 case VG_USERREQ__MALLOCLIKE_BLOCK: { 1981 void* p = (void*)argv[1]; 1982 SizeT szB = argv[2]; 1983 record_block( tid, p, szB, /*slop_szB*/0, /*exclude_first_entry*/False, 1984 /*maybe_snapshot*/True ); 1985 *ret = 0; 1986 return True; 1987 } 1988 case VG_USERREQ__RESIZEINPLACE_BLOCK: { 1989 void* p = (void*)argv[1]; 1990 SizeT newSizeB = argv[3]; 1991 1992 unrecord_block(p, /*maybe_snapshot*/True); 1993 record_block(tid, p, newSizeB, /*slop_szB*/0, 1994 /*exclude_first_entry*/False, /*maybe_snapshot*/True); 1995 return True; 1996 } 1997 case VG_USERREQ__FREELIKE_BLOCK: { 1998 void* p = (void*)argv[1]; 1999 unrecord_block(p, /*maybe_snapshot*/True); 2000 *ret = 0; 2001 return True; 2002 } 2003 case VG_USERREQ__GDB_MONITOR_COMMAND: { 2004 Bool handled = handle_gdb_monitor_command (tid, (HChar*)argv[1]); 2005 if (handled) 2006 *ret = 1; 2007 else 2008 *ret = 0; 2009 return handled; 2010 } 2011 2012 default: 2013 *ret = 0; 2014 return False; 2015 } 2016} 2017 2018//------------------------------------------------------------// 2019//--- Instrumentation ---// 2020//------------------------------------------------------------// 2021 2022static void add_counter_update(IRSB* sbOut, Int n) 2023{ 2024 #if defined(VG_BIGENDIAN) 2025 # define END Iend_BE 2026 #elif defined(VG_LITTLEENDIAN) 2027 # define END Iend_LE 2028 #else 2029 # error "Unknown endianness" 2030 #endif 2031 // Add code to increment 'guest_instrs_executed' by 'n', like this: 2032 // WrTmp(t1, Load64(&guest_instrs_executed)) 2033 // WrTmp(t2, Add64(RdTmp(t1), Const(n))) 2034 // Store(&guest_instrs_executed, t2) 2035 IRTemp t1 = newIRTemp(sbOut->tyenv, Ity_I64); 2036 IRTemp t2 = newIRTemp(sbOut->tyenv, Ity_I64); 2037 IRExpr* counter_addr = mkIRExpr_HWord( (HWord)&guest_instrs_executed ); 2038 2039 IRStmt* st1 = IRStmt_WrTmp(t1, IRExpr_Load(END, Ity_I64, counter_addr)); 2040 IRStmt* st2 = 2041 IRStmt_WrTmp(t2, 2042 IRExpr_Binop(Iop_Add64, IRExpr_RdTmp(t1), 2043 IRExpr_Const(IRConst_U64(n)))); 2044 IRStmt* st3 = IRStmt_Store(END, counter_addr, IRExpr_RdTmp(t2)); 2045 2046 addStmtToIRSB( sbOut, st1 ); 2047 addStmtToIRSB( sbOut, st2 ); 2048 addStmtToIRSB( sbOut, st3 ); 2049} 2050 2051static IRSB* ms_instrument2( IRSB* sbIn ) 2052{ 2053 Int i, n = 0; 2054 IRSB* sbOut; 2055 2056 // We increment the instruction count in two places: 2057 // - just before any Ist_Exit statements; 2058 // - just before the IRSB's end. 2059 // In the former case, we zero 'n' and then continue instrumenting. 2060 2061 sbOut = deepCopyIRSBExceptStmts(sbIn); 2062 2063 for (i = 0; i < sbIn->stmts_used; i++) { 2064 IRStmt* st = sbIn->stmts[i]; 2065 2066 if (!st || st->tag == Ist_NoOp) continue; 2067 2068 if (st->tag == Ist_IMark) { 2069 n++; 2070 } else if (st->tag == Ist_Exit) { 2071 if (n > 0) { 2072 // Add an increment before the Exit statement, then reset 'n'. 2073 add_counter_update(sbOut, n); 2074 n = 0; 2075 } 2076 } 2077 addStmtToIRSB( sbOut, st ); 2078 } 2079 2080 if (n > 0) { 2081 // Add an increment before the SB end. 2082 add_counter_update(sbOut, n); 2083 } 2084 return sbOut; 2085} 2086 2087static 2088IRSB* ms_instrument ( VgCallbackClosure* closure, 2089 IRSB* sbIn, 2090 VexGuestLayout* layout, 2091 VexGuestExtents* vge, 2092 VexArchInfo* archinfo_host, 2093 IRType gWordTy, IRType hWordTy ) 2094{ 2095 if (! have_started_executing_code) { 2096 // Do an initial sample to guarantee that we have at least one. 2097 // We use 'maybe_take_snapshot' instead of 'take_snapshot' to ensure 2098 // 'maybe_take_snapshot's internal static variables are initialised. 2099 have_started_executing_code = True; 2100 maybe_take_snapshot(Normal, "startup"); 2101 } 2102 2103 if (clo_time_unit == TimeI) { return ms_instrument2(sbIn); } 2104 else if (clo_time_unit == TimeMS) { return sbIn; } 2105 else if (clo_time_unit == TimeB) { return sbIn; } 2106 else { tl_assert2(0, "bad --time-unit value"); } 2107} 2108 2109 2110//------------------------------------------------------------// 2111//--- Writing snapshots ---// 2112//------------------------------------------------------------// 2113 2114HChar FP_buf[BUF_LEN]; 2115 2116// XXX: implement f{,n}printf in m_libcprint.c eventually, and use it here. 2117// Then change Cachegrind to use it too. 2118#define FP(format, args...) ({ \ 2119 VG_(snprintf)(FP_buf, BUF_LEN, format, ##args); \ 2120 FP_buf[BUF_LEN-1] = '\0'; /* Make sure the string is terminated. */ \ 2121 VG_(write)(fd, (void*)FP_buf, VG_(strlen)(FP_buf)); \ 2122}) 2123 2124// Nb: uses a static buffer, each call trashes the last string returned. 2125static const HChar* make_perc(double x) 2126{ 2127 static HChar mbuf[32]; 2128 2129 VG_(percentify)((ULong)(x * 100), 10000, 2, 6, mbuf); 2130 // XXX: this is bogus if the denominator was zero -- resulting string is 2131 // something like "0 --%") 2132 if (' ' == mbuf[0]) mbuf[0] = '0'; 2133 return mbuf; 2134} 2135 2136static void pp_snapshot_SXPt(Int fd, SXPt* sxpt, Int depth, HChar* depth_str, 2137 Int depth_str_len, 2138 SizeT snapshot_heap_szB, SizeT snapshot_total_szB) 2139{ 2140 Int i, j, n_insig_children_sxpts; 2141 SXPt* child = NULL; 2142 2143 // Used for printing function names. Is made static to keep it out 2144 // of the stack frame -- this function is recursive. Obviously this 2145 // now means its contents are trashed across the recursive call. 2146 static HChar ip_desc_array[BUF_LEN]; 2147 const HChar* ip_desc = ip_desc_array; 2148 2149 switch (sxpt->tag) { 2150 case SigSXPt: 2151 // Print the SXPt itself. 2152 if (0 == depth) { 2153 if (clo_heap) { 2154 ip_desc = 2155 ( clo_pages_as_heap 2156 ? "(page allocation syscalls) mmap/mremap/brk, --alloc-fns, etc." 2157 : "(heap allocation functions) malloc/new/new[], --alloc-fns, etc." 2158 ); 2159 } else { 2160 // XXX: --alloc-fns? 2161 2162 // Nick thinks this case cannot happen. ip_desc_array would be 2163 // conceptually uninitialised here. Therefore: 2164 tl_assert2(0, "pp_snapshot_SXPt: unexpected"); 2165 } 2166 } else { 2167 // If it's main-or-below-main, we (if appropriate) ignore everything 2168 // below it by pretending it has no children. 2169 if ( ! VG_(clo_show_below_main) ) { 2170 Vg_FnNameKind kind = VG_(get_fnname_kind_from_IP)(sxpt->Sig.ip); 2171 if (Vg_FnNameMain == kind || Vg_FnNameBelowMain == kind) { 2172 sxpt->Sig.n_children = 0; 2173 } 2174 } 2175 2176 // We need the -1 to get the line number right, But I'm not sure why. 2177 ip_desc = VG_(describe_IP)(sxpt->Sig.ip-1, ip_desc_array, BUF_LEN); 2178 } 2179 2180 // Do the non-ip_desc part first... 2181 FP("%sn%d: %lu ", depth_str, sxpt->Sig.n_children, sxpt->szB); 2182 2183 // For ip_descs beginning with "0xABCD...:" addresses, we first 2184 // measure the length of the "0xabcd: " address at the start of the 2185 // ip_desc. 2186 j = 0; 2187 if ('0' == ip_desc[0] && 'x' == ip_desc[1]) { 2188 j = 2; 2189 while (True) { 2190 if (ip_desc[j]) { 2191 if (':' == ip_desc[j]) break; 2192 j++; 2193 } else { 2194 tl_assert2(0, "ip_desc has unexpected form: %s\n", ip_desc); 2195 } 2196 } 2197 } 2198 // Nb: We treat this specially (ie. we don't use FP) so that if the 2199 // ip_desc is too long (eg. due to a long C++ function name), it'll 2200 // get truncated, but the '\n' is still there so its a valid file. 2201 // (At one point we were truncating without adding the '\n', which 2202 // caused bug #155929.) 2203 // 2204 // Also, we account for the length of the address in ip_desc when 2205 // truncating. (The longest address we could have is 18 chars: "0x" 2206 // plus 16 address digits.) This ensures that the truncated function 2207 // name always has the same length, which makes truncation 2208 // deterministic and thus makes testing easier. 2209 tl_assert(j <= 18); 2210 VG_(snprintf)(FP_buf, BUF_LEN, "%s\n", ip_desc); 2211 FP_buf[BUF_LEN-18+j-5] = '.'; // "..." at the end make the 2212 FP_buf[BUF_LEN-18+j-4] = '.'; // truncation more obvious. 2213 FP_buf[BUF_LEN-18+j-3] = '.'; 2214 FP_buf[BUF_LEN-18+j-2] = '\n'; // The last char is '\n'. 2215 FP_buf[BUF_LEN-18+j-1] = '\0'; // The string is terminated. 2216 VG_(write)(fd, (void*)FP_buf, VG_(strlen)(FP_buf)); 2217 2218 // Indent. 2219 tl_assert(depth+1 < depth_str_len-1); // -1 for end NUL char 2220 depth_str[depth+0] = ' '; 2221 depth_str[depth+1] = '\0'; 2222 2223 // Sort SXPt's children by szB (reverse order: biggest to smallest). 2224 // Nb: we sort them here, rather than earlier (eg. in dup_XTree), for 2225 // two reasons. First, if we do it during dup_XTree, it can get 2226 // expensive (eg. 15% of execution time for konqueror 2227 // startup/shutdown). Second, this way we get the Insig SXPt (if one 2228 // is present) in its sorted position, not at the end. 2229 VG_(ssort)(sxpt->Sig.children, sxpt->Sig.n_children, sizeof(SXPt*), 2230 SXPt_revcmp_szB); 2231 2232 // Print the SXPt's children. They should already be in sorted order. 2233 n_insig_children_sxpts = 0; 2234 for (i = 0; i < sxpt->Sig.n_children; i++) { 2235 child = sxpt->Sig.children[i]; 2236 2237 if (InsigSXPt == child->tag) 2238 n_insig_children_sxpts++; 2239 2240 // Ok, print the child. NB: contents of ip_desc_array will be 2241 // trashed by this recursive call. Doesn't matter currently, 2242 // but worth noting. 2243 pp_snapshot_SXPt(fd, child, depth+1, depth_str, depth_str_len, 2244 snapshot_heap_szB, snapshot_total_szB); 2245 } 2246 2247 // Unindent. 2248 depth_str[depth+0] = '\0'; 2249 depth_str[depth+1] = '\0'; 2250 2251 // There should be 0 or 1 Insig children SXPts. 2252 tl_assert(n_insig_children_sxpts <= 1); 2253 break; 2254 2255 case InsigSXPt: { 2256 const HChar* s = ( 1 == sxpt->Insig.n_xpts ? "," : "s, all" ); 2257 FP("%sn0: %lu in %d place%s below massif's threshold (%s)\n", 2258 depth_str, sxpt->szB, sxpt->Insig.n_xpts, s, 2259 make_perc(clo_threshold)); 2260 break; 2261 } 2262 2263 default: 2264 tl_assert2(0, "pp_snapshot_SXPt: unrecognised SXPt tag"); 2265 } 2266} 2267 2268static void pp_snapshot(Int fd, Snapshot* snapshot, Int snapshot_n) 2269{ 2270 sanity_check_snapshot(snapshot); 2271 2272 FP("#-----------\n"); 2273 FP("snapshot=%d\n", snapshot_n); 2274 FP("#-----------\n"); 2275 FP("time=%lld\n", snapshot->time); 2276 FP("mem_heap_B=%lu\n", snapshot->heap_szB); 2277 FP("mem_heap_extra_B=%lu\n", snapshot->heap_extra_szB); 2278 FP("mem_stacks_B=%lu\n", snapshot->stacks_szB); 2279 2280 if (is_detailed_snapshot(snapshot)) { 2281 // Detailed snapshot -- print heap tree. 2282 Int depth_str_len = clo_depth + 3; 2283 HChar* depth_str = VG_(malloc)("ms.main.pps.1", 2284 sizeof(HChar) * depth_str_len); 2285 SizeT snapshot_total_szB = 2286 snapshot->heap_szB + snapshot->heap_extra_szB + snapshot->stacks_szB; 2287 depth_str[0] = '\0'; // Initialise depth_str to "". 2288 2289 FP("heap_tree=%s\n", ( Peak == snapshot->kind ? "peak" : "detailed" )); 2290 pp_snapshot_SXPt(fd, snapshot->alloc_sxpt, 0, depth_str, 2291 depth_str_len, snapshot->heap_szB, 2292 snapshot_total_szB); 2293 2294 VG_(free)(depth_str); 2295 2296 } else { 2297 FP("heap_tree=empty\n"); 2298 } 2299} 2300 2301static void write_snapshots_to_file(const HChar* massif_out_file, 2302 Snapshot snapshots_array[], 2303 Int nr_elements) 2304{ 2305 Int i, fd; 2306 SysRes sres; 2307 2308 sres = VG_(open)(massif_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY, 2309 VKI_S_IRUSR|VKI_S_IWUSR); 2310 if (sr_isError(sres)) { 2311 // If the file can't be opened for whatever reason (conflict 2312 // between multiple cachegrinded processes?), give up now. 2313 VG_(umsg)("error: can't open output file '%s'\n", massif_out_file ); 2314 VG_(umsg)(" ... so profiling results will be missing.\n"); 2315 return; 2316 } else { 2317 fd = sr_Res(sres); 2318 } 2319 2320 // Print massif-specific options that were used. 2321 // XXX: is it worth having a "desc:" line? Could just call it "options:" 2322 // -- this file format isn't as generic as Cachegrind's, so the 2323 // implied genericity of "desc:" is bogus. 2324 FP("desc:"); 2325 for (i = 0; i < VG_(sizeXA)(args_for_massif); i++) { 2326 HChar* arg = *(HChar**)VG_(indexXA)(args_for_massif, i); 2327 FP(" %s", arg); 2328 } 2329 if (0 == i) FP(" (none)"); 2330 FP("\n"); 2331 2332 // Print "cmd:" line. 2333 FP("cmd: "); 2334 if (VG_(args_the_exename)) { 2335 FP("%s", VG_(args_the_exename)); 2336 for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) { 2337 HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i ); 2338 if (arg) 2339 FP(" %s", arg); 2340 } 2341 } else { 2342 FP(" ???"); 2343 } 2344 FP("\n"); 2345 2346 FP("time_unit: %s\n", TimeUnit_to_string(clo_time_unit)); 2347 2348 for (i = 0; i < nr_elements; i++) { 2349 Snapshot* snapshot = & snapshots_array[i]; 2350 pp_snapshot(fd, snapshot, i); // Detailed snapshot! 2351 } 2352 VG_(close) (fd); 2353} 2354 2355static void write_snapshots_array_to_file(void) 2356{ 2357 // Setup output filename. Nb: it's important to do this now, ie. as late 2358 // as possible. If we do it at start-up and the program forks and the 2359 // output file format string contains a %p (pid) specifier, both the 2360 // parent and child will incorrectly write to the same file; this 2361 // happened in 3.3.0. 2362 HChar* massif_out_file = 2363 VG_(expand_file_name)("--massif-out-file", clo_massif_out_file); 2364 write_snapshots_to_file (massif_out_file, snapshots, next_snapshot_i); 2365 VG_(free)(massif_out_file); 2366} 2367 2368static void handle_snapshot_monitor_command (const HChar *filename, 2369 Bool detailed) 2370{ 2371 Snapshot snapshot; 2372 2373 if (!clo_pages_as_heap && !have_started_executing_code) { 2374 // See comments of variable have_started_executing_code. 2375 VG_(gdb_printf) 2376 ("error: cannot take snapshot before execution has started\n"); 2377 return; 2378 } 2379 2380 clear_snapshot(&snapshot, /* do_sanity_check */ False); 2381 take_snapshot(&snapshot, Normal, get_time(), detailed); 2382 write_snapshots_to_file ((filename == NULL) ? 2383 "massif.vgdb.out" : filename, 2384 &snapshot, 2385 1); 2386 delete_snapshot(&snapshot); 2387} 2388 2389static Bool handle_gdb_monitor_command (ThreadId tid, HChar *req) 2390{ 2391 HChar* wcmd; 2392 HChar s[VG_(strlen(req)) + 1]; /* copy for strtok_r */ 2393 HChar *ssaveptr; 2394 2395 VG_(strcpy) (s, req); 2396 2397 wcmd = VG_(strtok_r) (s, " ", &ssaveptr); 2398 switch (VG_(keyword_id) ("help snapshot detailed_snapshot", 2399 wcmd, kwd_report_duplicated_matches)) { 2400 case -2: /* multiple matches */ 2401 return True; 2402 case -1: /* not found */ 2403 return False; 2404 case 0: /* help */ 2405 print_monitor_help(); 2406 return True; 2407 case 1: { /* snapshot */ 2408 HChar* filename; 2409 filename = VG_(strtok_r) (NULL, " ", &ssaveptr); 2410 handle_snapshot_monitor_command (filename, False /* detailed */); 2411 return True; 2412 } 2413 case 2: { /* detailed_snapshot */ 2414 HChar* filename; 2415 filename = VG_(strtok_r) (NULL, " ", &ssaveptr); 2416 handle_snapshot_monitor_command (filename, True /* detailed */); 2417 return True; 2418 } 2419 default: 2420 tl_assert(0); 2421 return False; 2422 } 2423} 2424 2425static void ms_print_stats (void) 2426{ 2427#define STATS(format, args...) \ 2428 VG_(dmsg)("Massif: " format, ##args) 2429 2430 STATS("heap allocs: %u\n", n_heap_allocs); 2431 STATS("heap reallocs: %u\n", n_heap_reallocs); 2432 STATS("heap frees: %u\n", n_heap_frees); 2433 STATS("ignored heap allocs: %u\n", n_ignored_heap_allocs); 2434 STATS("ignored heap frees: %u\n", n_ignored_heap_frees); 2435 STATS("ignored heap reallocs: %u\n", n_ignored_heap_reallocs); 2436 STATS("stack allocs: %u\n", n_stack_allocs); 2437 STATS("stack frees: %u\n", n_stack_frees); 2438 STATS("XPts: %u\n", n_xpts); 2439 STATS("top-XPts: %u (%d%%)\n", 2440 alloc_xpt->n_children, 2441 ( n_xpts ? alloc_xpt->n_children * 100 / n_xpts : 0)); 2442 STATS("XPt init expansions: %u\n", n_xpt_init_expansions); 2443 STATS("XPt later expansions: %u\n", n_xpt_later_expansions); 2444 STATS("SXPt allocs: %u\n", n_sxpt_allocs); 2445 STATS("SXPt frees: %u\n", n_sxpt_frees); 2446 STATS("skipped snapshots: %u\n", n_skipped_snapshots); 2447 STATS("real snapshots: %u\n", n_real_snapshots); 2448 STATS("detailed snapshots: %u\n", n_detailed_snapshots); 2449 STATS("peak snapshots: %u\n", n_peak_snapshots); 2450 STATS("cullings: %u\n", n_cullings); 2451 STATS("XCon redos: %u\n", n_XCon_redos); 2452#undef STATS 2453} 2454 2455//------------------------------------------------------------// 2456//--- Finalisation ---// 2457//------------------------------------------------------------// 2458 2459static void ms_fini(Int exit_status) 2460{ 2461 // Output. 2462 write_snapshots_array_to_file(); 2463 2464 // Stats 2465 tl_assert(n_xpts > 0); // always have alloc_xpt 2466 2467 if (VG_(clo_stats)) 2468 ms_print_stats(); 2469} 2470 2471 2472//------------------------------------------------------------// 2473//--- Initialisation ---// 2474//------------------------------------------------------------// 2475 2476static void ms_post_clo_init(void) 2477{ 2478 Int i; 2479 HChar* LD_PRELOAD_val; 2480 HChar* s; 2481 HChar* s2; 2482 2483 // Check options. 2484 if (clo_pages_as_heap) { 2485 if (clo_stacks) { 2486 VG_(fmsg_bad_option)( 2487 "--pages-as-heap=yes together with --stacks=yes", ""); 2488 } 2489 } 2490 if (!clo_heap) { 2491 clo_pages_as_heap = False; 2492 } 2493 2494 // If --pages-as-heap=yes we don't want malloc replacement to occur. So we 2495 // disable vgpreload_massif-$PLATFORM.so by removing it from LD_PRELOAD (or 2496 // platform-equivalent). We replace it entirely with spaces because then 2497 // the linker doesn't complain (it does complain if we just change the name 2498 // to a bogus file). This is a bit of a hack, but LD_PRELOAD is setup well 2499 // before tool initialisation, so this seems the best way to do it. 2500 if (clo_pages_as_heap) { 2501 clo_heap_admin = 0; // No heap admin on pages. 2502 2503 LD_PRELOAD_val = VG_(getenv)( VG_(LD_PRELOAD_var_name) ); 2504 tl_assert(LD_PRELOAD_val); 2505 2506 // Make sure the vgpreload_core-$PLATFORM entry is there, for sanity. 2507 s2 = VG_(strstr)(LD_PRELOAD_val, "vgpreload_core"); 2508 tl_assert(s2); 2509 2510 // Now find the vgpreload_massif-$PLATFORM entry. 2511 s2 = VG_(strstr)(LD_PRELOAD_val, "vgpreload_massif"); 2512 tl_assert(s2); 2513 2514 // Blank out everything to the previous ':', which must be there because 2515 // of the preceding vgpreload_core-$PLATFORM entry. 2516 for (s = s2; *s != ':'; s--) { 2517 *s = ' '; 2518 } 2519 2520 // Blank out everything to the end of the entry, which will be '\0' if 2521 // LD_PRELOAD was empty before Valgrind started, or ':' otherwise. 2522 for (s = s2; *s != ':' && *s != '\0'; s++) { 2523 *s = ' '; 2524 } 2525 } 2526 2527 // Print alloc-fns and ignore-fns, if necessary. 2528 if (VG_(clo_verbosity) > 1) { 2529 VERB(1, "alloc-fns:\n"); 2530 for (i = 0; i < VG_(sizeXA)(alloc_fns); i++) { 2531 HChar** fn_ptr = VG_(indexXA)(alloc_fns, i); 2532 VERB(1, " %s\n", *fn_ptr); 2533 } 2534 2535 VERB(1, "ignore-fns:\n"); 2536 if (0 == VG_(sizeXA)(ignore_fns)) { 2537 VERB(1, " <empty>\n"); 2538 } 2539 for (i = 0; i < VG_(sizeXA)(ignore_fns); i++) { 2540 HChar** fn_ptr = VG_(indexXA)(ignore_fns, i); 2541 VERB(1, " %d: %s\n", i, *fn_ptr); 2542 } 2543 } 2544 2545 // Events to track. 2546 if (clo_stacks) { 2547 VG_(track_new_mem_stack) ( new_mem_stack ); 2548 VG_(track_die_mem_stack) ( die_mem_stack ); 2549 VG_(track_new_mem_stack_signal) ( new_mem_stack_signal ); 2550 VG_(track_die_mem_stack_signal) ( die_mem_stack_signal ); 2551 } 2552 2553 if (clo_pages_as_heap) { 2554 VG_(track_new_mem_startup) ( ms_new_mem_startup ); 2555 VG_(track_new_mem_brk) ( ms_new_mem_brk ); 2556 VG_(track_new_mem_mmap) ( ms_new_mem_mmap ); 2557 2558 VG_(track_copy_mem_remap) ( ms_copy_mem_remap ); 2559 2560 VG_(track_die_mem_brk) ( ms_die_mem_brk ); 2561 VG_(track_die_mem_munmap) ( ms_die_mem_munmap ); 2562 } 2563 2564 // Initialise snapshot array, and sanity-check it. 2565 snapshots = VG_(malloc)("ms.main.mpoci.1", 2566 sizeof(Snapshot) * clo_max_snapshots); 2567 // We don't want to do snapshot sanity checks here, because they're 2568 // currently uninitialised. 2569 for (i = 0; i < clo_max_snapshots; i++) { 2570 clear_snapshot( & snapshots[i], /*do_sanity_check*/False ); 2571 } 2572 sanity_check_snapshots_array(); 2573} 2574 2575static void ms_pre_clo_init(void) 2576{ 2577 VG_(details_name) ("Massif"); 2578 VG_(details_version) (NULL); 2579 VG_(details_description) ("a heap profiler"); 2580 VG_(details_copyright_author)( 2581 "Copyright (C) 2003-2013, and GNU GPL'd, by Nicholas Nethercote"); 2582 VG_(details_bug_reports_to) (VG_BUGS_TO); 2583 2584 VG_(details_avg_translation_sizeB) ( 330 ); 2585 2586 VG_(clo_vex_control).iropt_register_updates 2587 = VexRegUpdSpAtMemAccess; // overridable by the user. 2588 2589 // Basic functions. 2590 VG_(basic_tool_funcs) (ms_post_clo_init, 2591 ms_instrument, 2592 ms_fini); 2593 2594 // Needs. 2595 VG_(needs_libc_freeres)(); 2596 VG_(needs_command_line_options)(ms_process_cmd_line_option, 2597 ms_print_usage, 2598 ms_print_debug_usage); 2599 VG_(needs_client_requests) (ms_handle_client_request); 2600 VG_(needs_sanity_checks) (ms_cheap_sanity_check, 2601 ms_expensive_sanity_check); 2602 VG_(needs_print_stats) (ms_print_stats); 2603 VG_(needs_malloc_replacement) (ms_malloc, 2604 ms___builtin_new, 2605 ms___builtin_vec_new, 2606 ms_memalign, 2607 ms_calloc, 2608 ms_free, 2609 ms___builtin_delete, 2610 ms___builtin_vec_delete, 2611 ms_realloc, 2612 ms_malloc_usable_size, 2613 0 ); 2614 2615 // HP_Chunks. 2616 malloc_list = VG_(HT_construct)( "Massif's malloc list" ); 2617 2618 // Dummy node at top of the context structure. 2619 alloc_xpt = new_XPt(/*ip*/0, /*parent*/NULL); 2620 2621 // Initialise alloc_fns and ignore_fns. 2622 init_alloc_fns(); 2623 init_ignore_fns(); 2624 2625 // Initialise args_for_massif. 2626 args_for_massif = VG_(newXA)(VG_(malloc), "ms.main.mprci.1", 2627 VG_(free), sizeof(HChar*)); 2628} 2629 2630VG_DETERMINE_INTERFACE_VERSION(ms_pre_clo_init) 2631 2632//--------------------------------------------------------------------// 2633//--- end ---// 2634//--------------------------------------------------------------------// 2635