1
2//--------------------------------------------------------------------*/
3//--- DHAT: a Dynamic Heap Analysis Tool                 dh_main.c ---*/
4//--------------------------------------------------------------------*/
5
6/*
7   This file is part of DHAT, a Valgrind tool for profiling the
8   heap usage of programs.
9
10   Copyright (C) 2010-2017 Mozilla Inc
11
12   This program is free software; you can redistribute it and/or
13   modify it under the terms of the GNU General Public License as
14   published by the Free Software Foundation; either version 2 of the
15   License, or (at your option) any later version.
16
17   This program is distributed in the hope that it will be useful, but
18   WITHOUT ANY WARRANTY; without even the implied warranty of
19   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20   General Public License for more details.
21
22   You should have received a copy of the GNU General Public License
23   along with this program; if not, write to the Free Software
24   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25   02111-1307, USA.
26
27   The GNU General Public License is contained in the file COPYING.
28*/
29
30/* Contributed by Julian Seward <jseward@acm.org> */
31
32
33#include "pub_tool_basics.h"
34#include "pub_tool_libcbase.h"
35#include "pub_tool_libcassert.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_machine.h"      // VG_(fnptr_to_fnentry)
38#include "pub_tool_mallocfree.h"
39#include "pub_tool_options.h"
40#include "pub_tool_replacemalloc.h"
41#include "pub_tool_tooliface.h"
42#include "pub_tool_wordfm.h"
43
44#define HISTOGRAM_SIZE_LIMIT 1024
45
46
47//------------------------------------------------------------//
48//--- Globals                                              ---//
49//------------------------------------------------------------//
50
51// Number of guest instructions executed so far.  This is
52// incremented directly from the generated code.
53static ULong g_guest_instrs_executed = 0;
54
55// Summary statistics for the entire run.
56static ULong g_tot_blocks = 0;   // total blocks allocated
57static ULong g_tot_bytes  = 0;   // total bytes allocated
58
59static ULong g_cur_blocks_live = 0; // curr # blocks live
60static ULong g_cur_bytes_live  = 0; // curr # bytes live
61
62static ULong g_max_blocks_live = 0; // bytes and blocks at
63static ULong g_max_bytes_live  = 0; // the max residency point
64
65
66//------------------------------------------------------------//
67//--- an Interval Tree of live blocks                      ---//
68//------------------------------------------------------------//
69
70/* Tracks information about live blocks. */
71typedef
72   struct {
73      Addr        payload;
74      SizeT       req_szB;
75      ExeContext* ap;  /* allocation ec */
76      ULong       allocd_at; /* instruction number */
77      ULong       n_reads;
78      ULong       n_writes;
79      /* Approx histogram, one byte per payload byte.  Counts latch up
80         therefore at 0xFFFF.  Can be NULL if the block is resized or if
81         the block is larger than HISTOGRAM_SIZE_LIMIT. */
82      UShort*     histoW; /* [0 .. req_szB-1] */
83   }
84   Block;
85
86/* May not contain zero-sized blocks.  May not contain
87   overlapping blocks. */
88static WordFM* interval_tree = NULL;  /* WordFM* Block* void */
89
90/* Here's the comparison function.  Since the tree is required
91to contain non-zero sized, non-overlapping blocks, it's good
92enough to consider any overlap as a match. */
93static Word interval_tree_Cmp ( UWord k1, UWord k2 )
94{
95   Block* b1 = (Block*)k1;
96   Block* b2 = (Block*)k2;
97   tl_assert(b1->req_szB > 0);
98   tl_assert(b2->req_szB > 0);
99   if (b1->payload + b1->req_szB <= b2->payload) return -1;
100   if (b2->payload + b2->req_szB <= b1->payload) return  1;
101   return 0;
102}
103
104// 2-entry cache for find_Block_containing
105static Block* fbc_cache0 = NULL;
106static Block* fbc_cache1 = NULL;
107
108static UWord stats__n_fBc_cached = 0;
109static UWord stats__n_fBc_uncached = 0;
110static UWord stats__n_fBc_notfound = 0;
111
112static Block* find_Block_containing ( Addr a )
113{
114   if (LIKELY(fbc_cache0
115              && fbc_cache0->payload <= a
116              && a < fbc_cache0->payload + fbc_cache0->req_szB)) {
117      // found at 0
118      stats__n_fBc_cached++;
119      return fbc_cache0;
120   }
121   if (LIKELY(fbc_cache1
122              && fbc_cache1->payload <= a
123              && a < fbc_cache1->payload + fbc_cache1->req_szB)) {
124      // found at 1; swap 0 and 1
125      Block* tmp = fbc_cache0;
126      fbc_cache0 = fbc_cache1;
127      fbc_cache1 = tmp;
128      stats__n_fBc_cached++;
129      return fbc_cache0;
130   }
131   Block fake;
132   fake.payload = a;
133   fake.req_szB = 1;
134   UWord foundkey = 1;
135   UWord foundval = 1;
136   Bool found = VG_(lookupFM)( interval_tree,
137                               &foundkey, &foundval, (UWord)&fake );
138   if (!found) {
139      stats__n_fBc_notfound++;
140      return NULL;
141   }
142   tl_assert(foundval == 0); // we don't store vals in the interval tree
143   tl_assert(foundkey != 1);
144   Block* res = (Block*)foundkey;
145   tl_assert(res != &fake);
146   // put at the top position
147   fbc_cache1 = fbc_cache0;
148   fbc_cache0 = res;
149   stats__n_fBc_uncached++;
150   return res;
151}
152
153// delete a block; asserts if not found.  (viz, 'a' must be
154// known to be present.)
155static void delete_Block_starting_at ( Addr a )
156{
157   Block fake;
158   fake.payload = a;
159   fake.req_szB = 1;
160   Bool found = VG_(delFromFM)( interval_tree,
161                                NULL, NULL, (Addr)&fake );
162   tl_assert(found);
163   fbc_cache0 = fbc_cache1 = NULL;
164}
165
166
167//------------------------------------------------------------//
168//--- a FM of allocation points (APs)                      ---//
169//------------------------------------------------------------//
170
171typedef
172   struct {
173      // the allocation point that we're summarising stats for
174      ExeContext* ap;
175      // used when printing results
176      Bool shown;
177      // The current number of blocks and bytes live for this AP
178      ULong cur_blocks_live;
179      ULong cur_bytes_live;
180      // The number of blocks and bytes live at the max-liveness
181      // point.  Note this is a bit subtle.  max_blocks_live is not
182      // the maximum number of live blocks, but rather the number of
183      // blocks live at the point of maximum byte liveness.  These are
184      // not necessarily the same thing.
185      ULong max_blocks_live;
186      ULong max_bytes_live;
187      // Total number of blocks and bytes allocated by this AP.
188      ULong tot_blocks;
189      ULong tot_bytes;
190      // Sum of death ages for all blocks allocated by this AP,
191      // that have subsequently been freed.
192      ULong death_ages_sum;
193      ULong deaths;
194      // Total number of reads and writes in all blocks allocated
195      // by this AP.
196      ULong n_reads;
197      ULong n_writes;
198      /* Histogram information.  We maintain a histogram aggregated for
199         all retiring Blocks allocated by this AP, but only if:
200         - this AP has only ever allocated objects of one size
201         - that size is <= HISTOGRAM_SIZE_LIMIT
202         What we need therefore is a mechanism to see if this AP
203         has only ever allocated blocks of one size.
204
205         3 states:
206            Unknown          because no retirement yet
207            Exactly xsize    all retiring blocks are of this size
208            Mixed            multiple different sizes seen
209      */
210      enum { Unknown=999, Exactly, Mixed } xsize_tag;
211      SizeT xsize;
212      UInt* histo; /* [0 .. xsize-1] */
213   }
214   APInfo;
215
216/* maps ExeContext*'s to APInfo*'s.  Note that the keys must match the
217   .ap field in the values. */
218static WordFM* apinfo = NULL;  /* WordFM* ExeContext* APInfo* */
219
220
221/* 'bk' is being introduced (has just been allocated).  Find the
222   relevant APInfo entry for it, or create one, based on the block's
223   allocation EC.  Then, update the APInfo to the extent that we
224   actually can, to reflect the allocation. */
225static void intro_Block ( Block* bk )
226{
227   tl_assert(bk);
228   tl_assert(bk->ap);
229
230   APInfo* api   = NULL;
231   UWord   keyW  = 0;
232   UWord   valW  = 0;
233   Bool    found = VG_(lookupFM)( apinfo,
234                                  &keyW, &valW, (UWord)bk->ap );
235   if (found) {
236      api = (APInfo*)valW;
237      tl_assert(keyW == (UWord)bk->ap);
238   } else {
239      api = VG_(malloc)( "dh.main.intro_Block.1", sizeof(APInfo) );
240      VG_(memset)(api, 0, sizeof(*api));
241      api->ap = bk->ap;
242      Bool present = VG_(addToFM)( apinfo,
243                                   (UWord)bk->ap, (UWord)api );
244      tl_assert(!present);
245      // histo stuff
246      tl_assert(api->deaths == 0);
247      api->xsize_tag = Unknown;
248      api->xsize = 0;
249      if (0) VG_(printf)("api %p   -->  Unknown\n", api);
250   }
251
252   tl_assert(api->ap == bk->ap);
253
254   /* So: update stats to reflect an allocation */
255
256   // # live blocks
257   api->cur_blocks_live++;
258
259   // # live bytes
260   api->cur_bytes_live += bk->req_szB;
261   if (api->cur_bytes_live > api->max_bytes_live) {
262      api->max_bytes_live  = api->cur_bytes_live;
263      api->max_blocks_live = api->cur_blocks_live;
264   }
265
266   // total blocks and bytes allocated here
267   api->tot_blocks++;
268   api->tot_bytes += bk->req_szB;
269
270   // update summary globals
271   g_tot_blocks++;
272   g_tot_bytes += bk->req_szB;
273
274   g_cur_blocks_live++;
275   g_cur_bytes_live += bk->req_szB;
276   if (g_cur_bytes_live > g_max_bytes_live) {
277      g_max_bytes_live = g_cur_bytes_live;
278      g_max_blocks_live = g_cur_blocks_live;
279   }
280}
281
282
283/* 'bk' is retiring (being freed).  Find the relevant APInfo entry for
284   it, which must already exist.  Then, fold info from 'bk' into that
285   entry.  'because_freed' is True if the block is retiring because
286   the client has freed it.  If it is False then the block is retiring
287   because the program has finished, in which case we want to skip the
288   updates of the total blocks live etc for this AP, but still fold in
289   the access counts and histo data that have so far accumulated for
290   the block. */
291static void retire_Block ( Block* bk, Bool because_freed )
292{
293   tl_assert(bk);
294   tl_assert(bk->ap);
295
296   APInfo* api   = NULL;
297   UWord   keyW  = 0;
298   UWord   valW  = 0;
299   Bool    found = VG_(lookupFM)( apinfo,
300                                  &keyW, &valW, (UWord)bk->ap );
301
302   tl_assert(found);
303   api = (APInfo*)valW;
304   tl_assert(api->ap == bk->ap);
305
306   // update stats following this free.
307   if (0)
308   VG_(printf)("ec %p  api->c_by_l %llu  bk->rszB %llu\n",
309               bk->ap, api->cur_bytes_live, (ULong)bk->req_szB);
310
311   // update total blocks live etc for this AP
312   if (because_freed) {
313      tl_assert(api->cur_blocks_live >= 1);
314      tl_assert(api->cur_bytes_live >= bk->req_szB);
315      api->cur_blocks_live--;
316      api->cur_bytes_live -= bk->req_szB;
317
318      api->deaths++;
319
320      tl_assert(bk->allocd_at <= g_guest_instrs_executed);
321      api->death_ages_sum += (g_guest_instrs_executed - bk->allocd_at);
322
323      // update global summary stats
324      tl_assert(g_cur_blocks_live > 0);
325      g_cur_blocks_live--;
326      tl_assert(g_cur_bytes_live >= bk->req_szB);
327      g_cur_bytes_live -= bk->req_szB;
328   }
329
330   // access counts
331   api->n_reads  += bk->n_reads;
332   api->n_writes += bk->n_writes;
333
334   // histo stuff.  First, do state transitions for xsize/xsize_tag.
335   switch (api->xsize_tag) {
336
337      case Unknown:
338         tl_assert(api->xsize == 0);
339         tl_assert(api->deaths == 1 || api->deaths == 0);
340         tl_assert(!api->histo);
341         api->xsize_tag = Exactly;
342         api->xsize = bk->req_szB;
343         if (0) VG_(printf)("api %p   -->  Exactly(%lu)\n", api, api->xsize);
344         // and allocate the histo
345         if (bk->histoW) {
346            api->histo = VG_(malloc)("dh.main.retire_Block.1",
347                                     api->xsize * sizeof(UInt));
348            VG_(memset)(api->histo, 0, api->xsize * sizeof(UInt));
349         }
350         break;
351
352      case Exactly:
353         //tl_assert(api->deaths > 1);
354         if (bk->req_szB != api->xsize) {
355            if (0) VG_(printf)("api %p   -->  Mixed(%lu -> %lu)\n",
356                               api, api->xsize, bk->req_szB);
357            api->xsize_tag = Mixed;
358            api->xsize = 0;
359            // deallocate the histo, if any
360            if (api->histo) {
361               VG_(free)(api->histo);
362               api->histo = NULL;
363            }
364         }
365         break;
366
367      case Mixed:
368         //tl_assert(api->deaths > 1);
369         break;
370
371      default:
372        tl_assert(0);
373   }
374
375   // See if we can fold the histo data from this block into
376   // the data for the AP
377   if (api->xsize_tag == Exactly && api->histo && bk->histoW) {
378      tl_assert(api->xsize == bk->req_szB);
379      UWord i;
380      for (i = 0; i < api->xsize; i++) {
381         // FIXME: do something better in case of overflow of api->histo[..]
382         // Right now, at least don't let it overflow/wrap around
383         if (api->histo[i] <= 0xFFFE0000)
384            api->histo[i] += (UInt)bk->histoW[i];
385      }
386      if (0) VG_(printf)("fold in, AP = %p\n", api);
387   }
388
389
390
391#if 0
392   if (bk->histoB) {
393      VG_(printf)("block retiring, histo %lu: ", bk->req_szB);
394      UWord i;
395      for (i = 0; i < bk->req_szB; i++)
396        VG_(printf)("%u ", (UInt)bk->histoB[i]);
397      VG_(printf)("\n");
398   } else {
399      VG_(printf)("block retiring, no histo %lu\n", bk->req_szB);
400   }
401#endif
402}
403
404/* This handles block resizing.  When a block with AP 'ec' has a
405   size change of 'delta', call here to update the APInfo. */
406static void apinfo_change_cur_bytes_live( ExeContext* ec, Long delta )
407{
408   APInfo* api   = NULL;
409   UWord   keyW  = 0;
410   UWord   valW  = 0;
411   Bool    found = VG_(lookupFM)( apinfo,
412                                  &keyW, &valW, (UWord)ec );
413
414   tl_assert(found);
415   api = (APInfo*)valW;
416   tl_assert(api->ap == ec);
417
418   if (delta < 0) {
419      tl_assert(api->cur_bytes_live >= -delta);
420      tl_assert(g_cur_bytes_live >= -delta);
421   }
422
423   // adjust current live size
424   api->cur_bytes_live += delta;
425   g_cur_bytes_live += delta;
426
427   if (delta > 0 && api->cur_bytes_live > api->max_bytes_live) {
428      api->max_bytes_live  = api->cur_bytes_live;
429      api->max_blocks_live = api->cur_blocks_live;
430   }
431
432   // update global summary stats
433   if (delta > 0 && g_cur_bytes_live > g_max_bytes_live) {
434      g_max_bytes_live = g_cur_bytes_live;
435      g_max_blocks_live = g_cur_blocks_live;
436   }
437   if (delta > 0)
438      g_tot_bytes += delta;
439
440   // adjust total allocation size
441   if (delta > 0)
442      api->tot_bytes += delta;
443}
444
445
446//------------------------------------------------------------//
447//--- update both Block and APInfos after {m,re}alloc/free ---//
448//------------------------------------------------------------//
449
450static
451void* new_block ( ThreadId tid, void* p, SizeT req_szB, SizeT req_alignB,
452                  Bool is_zeroed )
453{
454   tl_assert(p == NULL); // don't handle custom allocators right now
455   SizeT actual_szB /*, slop_szB*/;
456
457   if ((SSizeT)req_szB < 0) return NULL;
458
459   if (req_szB == 0)
460      req_szB = 1;  /* can't allow zero-sized blocks in the interval tree */
461
462   // Allocate and zero if necessary
463   if (!p) {
464      p = VG_(cli_malloc)( req_alignB, req_szB );
465      if (!p) {
466         return NULL;
467      }
468      if (is_zeroed) VG_(memset)(p, 0, req_szB);
469      actual_szB = VG_(cli_malloc_usable_size)(p);
470      tl_assert(actual_szB >= req_szB);
471      /* slop_szB = actual_szB - req_szB; */
472   } else {
473      /* slop_szB = 0; */
474   }
475
476   // Make new HP_Chunk node, add to malloc_list
477   Block* bk = VG_(malloc)("dh.new_block.1", sizeof(Block));
478   bk->payload   = (Addr)p;
479   bk->req_szB   = req_szB;
480   bk->ap        = VG_(record_ExeContext)(tid, 0/*first word delta*/);
481   bk->allocd_at = g_guest_instrs_executed;
482   bk->n_reads   = 0;
483   bk->n_writes  = 0;
484   // set up histogram array, if the block isn't too large
485   bk->histoW = NULL;
486   if (req_szB <= HISTOGRAM_SIZE_LIMIT) {
487      bk->histoW = VG_(malloc)("dh.new_block.2", req_szB * sizeof(UShort));
488      VG_(memset)(bk->histoW, 0, req_szB * sizeof(UShort));
489   }
490
491   Bool present = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
492   tl_assert(!present);
493   fbc_cache0 = fbc_cache1 = NULL;
494
495   intro_Block(bk);
496
497   if (0) VG_(printf)("ALLOC %lu -> %p\n", req_szB, p);
498
499   return p;
500}
501
502static
503void die_block ( void* p, Bool custom_free )
504{
505   tl_assert(!custom_free);  // at least for now
506
507   Block* bk = find_Block_containing( (Addr)p );
508
509   if (!bk) {
510     return; // bogus free
511   }
512
513   tl_assert(bk->req_szB > 0);
514   // assert the block finder is behaving sanely
515   tl_assert(bk->payload <= (Addr)p);
516   tl_assert( (Addr)p < bk->payload + bk->req_szB );
517
518   if (bk->payload != (Addr)p) {
519      return; // bogus free
520   }
521
522   if (0) VG_(printf)(" FREE %p %llu\n",
523                      p, g_guest_instrs_executed - bk->allocd_at);
524
525   retire_Block(bk, True/*because_freed*/);
526
527   VG_(cli_free)( (void*)bk->payload );
528   delete_Block_starting_at( bk->payload );
529   if (bk->histoW) {
530      VG_(free)( bk->histoW );
531      bk->histoW = NULL;
532   }
533   VG_(free)( bk );
534}
535
536
537static
538void* renew_block ( ThreadId tid, void* p_old, SizeT new_req_szB )
539{
540   if (0) VG_(printf)("REALL %p %lu\n", p_old, new_req_szB);
541   void* p_new = NULL;
542
543   tl_assert(new_req_szB > 0); // map 0 to 1
544
545   // Find the old block.
546   Block* bk = find_Block_containing( (Addr)p_old );
547   if (!bk) {
548      return NULL;   // bogus realloc
549   }
550
551   tl_assert(bk->req_szB > 0);
552   // assert the block finder is behaving sanely
553   tl_assert(bk->payload <= (Addr)p_old);
554   tl_assert( (Addr)p_old < bk->payload + bk->req_szB );
555
556   if (bk->payload != (Addr)p_old) {
557      return NULL; // bogus realloc
558   }
559
560   // Keeping the histogram alive in any meaningful way across
561   // block resizing is too darn complicated.  Just throw it away.
562   if (bk->histoW) {
563      VG_(free)(bk->histoW);
564      bk->histoW = NULL;
565   }
566
567   // Actually do the allocation, if necessary.
568   if (new_req_szB <= bk->req_szB) {
569
570      // New size is smaller or same; block not moved.
571      apinfo_change_cur_bytes_live(bk->ap,
572                                   (Long)new_req_szB - (Long)bk->req_szB);
573      bk->req_szB = new_req_szB;
574      return p_old;
575
576   } else {
577
578      // New size is bigger;  make new block, copy shared contents, free old.
579      p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB);
580      if (!p_new) {
581         // Nb: if realloc fails, NULL is returned but the old block is not
582         // touched.  What an awful function.
583         return NULL;
584      }
585      tl_assert(p_new != p_old);
586
587      VG_(memcpy)(p_new, p_old, bk->req_szB);
588      VG_(cli_free)(p_old);
589
590      // Since the block has moved, we need to re-insert it into the
591      // interval tree at the new place.  Do this by removing
592      // and re-adding it.
593      delete_Block_starting_at( (Addr)p_old );
594      // now 'bk' is no longer in the tree, but the Block itself
595      // is still alive
596
597      // Update the metadata.
598      apinfo_change_cur_bytes_live(bk->ap,
599                                   (Long)new_req_szB - (Long)bk->req_szB);
600      bk->payload = (Addr)p_new;
601      bk->req_szB = new_req_szB;
602
603      // and re-add
604      Bool present
605         = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
606      tl_assert(!present);
607      fbc_cache0 = fbc_cache1 = NULL;
608
609      return p_new;
610   }
611   /*NOTREACHED*/
612   tl_assert(0);
613}
614
615
616//------------------------------------------------------------//
617//--- malloc() et al replacement wrappers                  ---//
618//------------------------------------------------------------//
619
620static void* dh_malloc ( ThreadId tid, SizeT szB )
621{
622   return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
623}
624
625static void* dh___builtin_new ( ThreadId tid, SizeT szB )
626{
627   return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
628}
629
630static void* dh___builtin_vec_new ( ThreadId tid, SizeT szB )
631{
632   return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
633}
634
635static void* dh_calloc ( ThreadId tid, SizeT m, SizeT szB )
636{
637   return new_block( tid, NULL, m*szB, VG_(clo_alignment), /*is_zeroed*/True );
638}
639
640static void *dh_memalign ( ThreadId tid, SizeT alignB, SizeT szB )
641{
642   return new_block( tid, NULL, szB, alignB, False );
643}
644
645static void dh_free ( ThreadId tid __attribute__((unused)), void* p )
646{
647   die_block( p, /*custom_free*/False );
648}
649
650static void dh___builtin_delete ( ThreadId tid, void* p )
651{
652   die_block( p, /*custom_free*/False);
653}
654
655static void dh___builtin_vec_delete ( ThreadId tid, void* p )
656{
657   die_block( p, /*custom_free*/False );
658}
659
660static void* dh_realloc ( ThreadId tid, void* p_old, SizeT new_szB )
661{
662   if (p_old == NULL) {
663      return dh_malloc(tid, new_szB);
664   }
665   if (new_szB == 0) {
666      dh_free(tid, p_old);
667      return NULL;
668   }
669   return renew_block(tid, p_old, new_szB);
670}
671
672static SizeT dh_malloc_usable_size ( ThreadId tid, void* p )
673{
674   Block* bk = find_Block_containing( (Addr)p );
675   return bk ? bk->req_szB : 0;
676}
677
678
679//------------------------------------------------------------//
680//--- memory references                                    ---//
681//------------------------------------------------------------//
682
683static
684void inc_histo_for_block ( Block* bk, Addr addr, UWord szB )
685{
686   UWord i, offMin, offMax1;
687   offMin = addr - bk->payload;
688   tl_assert(offMin < bk->req_szB);
689   offMax1 = offMin + szB;
690   if (offMax1 > bk->req_szB)
691      offMax1 = bk->req_szB;
692   //VG_(printf)("%lu %lu   (size of block %lu)\n", offMin, offMax1, bk->req_szB);
693   for (i = offMin; i < offMax1; i++) {
694      UShort n = bk->histoW[i];
695      if (n < 0xFFFF) n++;
696      bk->histoW[i] = n;
697   }
698}
699
700static VG_REGPARM(2)
701void dh_handle_write ( Addr addr, UWord szB )
702{
703   Block* bk = find_Block_containing(addr);
704   if (bk) {
705      bk->n_writes += szB;
706      if (bk->histoW)
707         inc_histo_for_block(bk, addr, szB);
708   }
709}
710
711static VG_REGPARM(2)
712void dh_handle_read ( Addr addr, UWord szB )
713{
714   Block* bk = find_Block_containing(addr);
715   if (bk) {
716      bk->n_reads += szB;
717      if (bk->histoW)
718         inc_histo_for_block(bk, addr, szB);
719   }
720}
721
722
723// Handle reads and writes by syscalls (read == kernel
724// reads user space, write == kernel writes user space).
725// Assumes no such read or write spans a heap block
726// boundary and so we can treat it just as one giant
727// read or write.
728static
729void dh_handle_noninsn_read ( CorePart part, ThreadId tid, const HChar* s,
730                              Addr base, SizeT size )
731{
732   switch (part) {
733      case Vg_CoreSysCall:
734         dh_handle_read(base, size);
735         break;
736      case Vg_CoreSysCallArgInMem:
737         break;
738      case Vg_CoreTranslate:
739         break;
740      default:
741         tl_assert(0);
742   }
743}
744
745static
746void dh_handle_noninsn_write ( CorePart part, ThreadId tid,
747                               Addr base, SizeT size )
748{
749   switch (part) {
750      case Vg_CoreSysCall:
751         dh_handle_write(base, size);
752         break;
753      case Vg_CoreSignal:
754         break;
755      default:
756         tl_assert(0);
757   }
758}
759
760
761//------------------------------------------------------------//
762//--- Instrumentation                                      ---//
763//------------------------------------------------------------//
764
765#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
766#define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
767#define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
768#define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
769#define assign(_t, _e)           IRStmt_WrTmp((_t), (_e))
770
771static
772void add_counter_update(IRSB* sbOut, Int n)
773{
774   #if defined(VG_BIGENDIAN)
775   # define END Iend_BE
776   #elif defined(VG_LITTLEENDIAN)
777   # define END Iend_LE
778   #else
779   # error "Unknown endianness"
780   #endif
781   // Add code to increment 'g_guest_instrs_executed' by 'n', like this:
782   //   WrTmp(t1, Load64(&g_guest_instrs_executed))
783   //   WrTmp(t2, Add64(RdTmp(t1), Const(n)))
784   //   Store(&g_guest_instrs_executed, t2)
785   IRTemp t1 = newIRTemp(sbOut->tyenv, Ity_I64);
786   IRTemp t2 = newIRTemp(sbOut->tyenv, Ity_I64);
787   IRExpr* counter_addr = mkIRExpr_HWord( (HWord)&g_guest_instrs_executed );
788
789   IRStmt* st1 = assign(t1, IRExpr_Load(END, Ity_I64, counter_addr));
790   IRStmt* st2 = assign(t2, binop(Iop_Add64, mkexpr(t1), mkU64(n)));
791   IRStmt* st3 = IRStmt_Store(END, counter_addr, mkexpr(t2));
792
793   addStmtToIRSB( sbOut, st1 );
794   addStmtToIRSB( sbOut, st2 );
795   addStmtToIRSB( sbOut, st3 );
796}
797
798static
799void addMemEvent(IRSB* sbOut, Bool isWrite, Int szB, IRExpr* addr,
800                 Int goff_sp)
801{
802   IRType   tyAddr   = Ity_INVALID;
803   const HChar* hName= NULL;
804   void*    hAddr    = NULL;
805   IRExpr** argv     = NULL;
806   IRDirty* di       = NULL;
807
808   const Int THRESH = 4096 * 4; // somewhat arbitrary
809   const Int rz_szB = VG_STACK_REDZONE_SZB;
810
811   tyAddr = typeOfIRExpr( sbOut->tyenv, addr );
812   tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
813
814   if (isWrite) {
815      hName = "dh_handle_write";
816      hAddr = &dh_handle_write;
817   } else {
818      hName = "dh_handle_read";
819      hAddr = &dh_handle_read;
820   }
821
822   argv = mkIRExprVec_2( addr, mkIRExpr_HWord(szB) );
823
824   /* Add the helper. */
825   tl_assert(hName);
826   tl_assert(hAddr);
827   tl_assert(argv);
828   di = unsafeIRDirty_0_N( 2/*regparms*/,
829                           hName, VG_(fnptr_to_fnentry)( hAddr ),
830                           argv );
831
832   /* Generate the guard condition: "(addr - (SP - RZ)) >u N", for
833      some arbitrary N.  If that fails then addr is in the range (SP -
834      RZ .. SP + N - RZ).  If N is smallish (a page?) then we can say
835      addr is within a page of SP and so can't possibly be a heap
836      access, and so can be skipped. */
837   IRTemp sp = newIRTemp(sbOut->tyenv, tyAddr);
838   addStmtToIRSB( sbOut, assign(sp, IRExpr_Get(goff_sp, tyAddr)));
839
840   IRTemp sp_minus_rz = newIRTemp(sbOut->tyenv, tyAddr);
841   addStmtToIRSB(
842      sbOut,
843      assign(sp_minus_rz,
844             tyAddr == Ity_I32
845                ? binop(Iop_Sub32, mkexpr(sp), mkU32(rz_szB))
846                : binop(Iop_Sub64, mkexpr(sp), mkU64(rz_szB)))
847   );
848
849   IRTemp diff = newIRTemp(sbOut->tyenv, tyAddr);
850   addStmtToIRSB(
851      sbOut,
852      assign(diff,
853             tyAddr == Ity_I32
854                ? binop(Iop_Sub32, addr, mkexpr(sp_minus_rz))
855                : binop(Iop_Sub64, addr, mkexpr(sp_minus_rz)))
856   );
857
858   IRTemp guard = newIRTemp(sbOut->tyenv, Ity_I1);
859   addStmtToIRSB(
860      sbOut,
861      assign(guard,
862             tyAddr == Ity_I32
863                ? binop(Iop_CmpLT32U, mkU32(THRESH), mkexpr(diff))
864                : binop(Iop_CmpLT64U, mkU64(THRESH), mkexpr(diff)))
865   );
866   di->guard = mkexpr(guard);
867
868   addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
869}
870
871static
872IRSB* dh_instrument ( VgCallbackClosure* closure,
873                      IRSB* sbIn,
874                      const VexGuestLayout* layout,
875                      const VexGuestExtents* vge,
876                      const VexArchInfo* archinfo_host,
877                      IRType gWordTy, IRType hWordTy )
878{
879   Int   i, n = 0;
880   IRSB* sbOut;
881   IRTypeEnv* tyenv = sbIn->tyenv;
882
883   const Int goff_sp = layout->offset_SP;
884
885   // We increment the instruction count in two places:
886   // - just before any Ist_Exit statements;
887   // - just before the IRSB's end.
888   // In the former case, we zero 'n' and then continue instrumenting.
889
890   sbOut = deepCopyIRSBExceptStmts(sbIn);
891
892   // Copy verbatim any IR preamble preceding the first IMark
893   i = 0;
894   while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
895      addStmtToIRSB( sbOut, sbIn->stmts[i] );
896      i++;
897   }
898
899   for (/*use current i*/; i < sbIn->stmts_used; i++) {
900      IRStmt* st = sbIn->stmts[i];
901
902      if (!st || st->tag == Ist_NoOp) continue;
903
904      switch (st->tag) {
905
906         case Ist_IMark: {
907            n++;
908            break;
909         }
910
911         case Ist_Exit: {
912            if (n > 0) {
913               // Add an increment before the Exit statement, then reset 'n'.
914               add_counter_update(sbOut, n);
915               n = 0;
916            }
917            break;
918         }
919
920         case Ist_WrTmp: {
921            IRExpr* data = st->Ist.WrTmp.data;
922            if (data->tag == Iex_Load) {
923               IRExpr* aexpr = data->Iex.Load.addr;
924               // Note also, endianness info is ignored.  I guess
925               // that's not interesting.
926               addMemEvent( sbOut, False/*!isWrite*/,
927                            sizeofIRType(data->Iex.Load.ty),
928                            aexpr, goff_sp );
929            }
930            break;
931         }
932
933         case Ist_Store: {
934            IRExpr* data  = st->Ist.Store.data;
935            IRExpr* aexpr = st->Ist.Store.addr;
936            addMemEvent( sbOut, True/*isWrite*/,
937                         sizeofIRType(typeOfIRExpr(tyenv, data)),
938                         aexpr, goff_sp );
939            break;
940         }
941
942         case Ist_Dirty: {
943            Int      dataSize;
944            IRDirty* d = st->Ist.Dirty.details;
945            if (d->mFx != Ifx_None) {
946               /* This dirty helper accesses memory.  Collect the details. */
947               tl_assert(d->mAddr != NULL);
948               tl_assert(d->mSize != 0);
949               dataSize = d->mSize;
950               // Large (eg. 28B, 108B, 512B on x86) data-sized
951               // instructions will be done inaccurately, but they're
952               // very rare and this avoids errors from hitting more
953               // than two cache lines in the simulation.
954               if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
955                  addMemEvent( sbOut, False/*!isWrite*/,
956                               dataSize, d->mAddr, goff_sp );
957               if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
958                  addMemEvent( sbOut, True/*isWrite*/,
959                               dataSize, d->mAddr, goff_sp );
960            } else {
961               tl_assert(d->mAddr == NULL);
962               tl_assert(d->mSize == 0);
963            }
964            break;
965         }
966
967         case Ist_CAS: {
968            /* We treat it as a read and a write of the location.  I
969               think that is the same behaviour as it was before IRCAS
970               was introduced, since prior to that point, the Vex
971               front ends would translate a lock-prefixed instruction
972               into a (normal) read followed by a (normal) write. */
973            Int    dataSize;
974            IRCAS* cas = st->Ist.CAS.details;
975            tl_assert(cas->addr != NULL);
976            tl_assert(cas->dataLo != NULL);
977            dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
978            if (cas->dataHi != NULL)
979               dataSize *= 2; /* since it's a doubleword-CAS */
980            addMemEvent( sbOut, False/*!isWrite*/,
981                         dataSize, cas->addr, goff_sp );
982            addMemEvent( sbOut, True/*isWrite*/,
983                         dataSize, cas->addr, goff_sp );
984            break;
985         }
986
987         case Ist_LLSC: {
988            IRType dataTy;
989            if (st->Ist.LLSC.storedata == NULL) {
990               /* LL */
991               dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
992               addMemEvent( sbOut, False/*!isWrite*/,
993                            sizeofIRType(dataTy),
994                            st->Ist.LLSC.addr, goff_sp );
995            } else {
996               /* SC */
997               dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
998               addMemEvent( sbOut, True/*isWrite*/,
999                            sizeofIRType(dataTy),
1000                            st->Ist.LLSC.addr, goff_sp );
1001            }
1002            break;
1003         }
1004
1005         default:
1006            break;
1007      }
1008
1009      addStmtToIRSB( sbOut, st );
1010   }
1011
1012   if (n > 0) {
1013      // Add an increment before the SB end.
1014      add_counter_update(sbOut, n);
1015   }
1016   return sbOut;
1017}
1018
1019#undef binop
1020#undef mkexpr
1021#undef mkU32
1022#undef mkU64
1023#undef assign
1024
1025
1026//------------------------------------------------------------//
1027//--- Command line args                                    ---//
1028//------------------------------------------------------------//
1029
1030// FORWARDS
1031static Bool identify_metric ( /*OUT*/ULong(**get_metricP)(APInfo*),
1032                              /*OUT*/Bool* increasingP,
1033                              const HChar* metric_name );
1034
1035static Int    clo_show_top_n = 10;
1036static const HChar *clo_sort_by = "max-bytes-live";
1037
1038static Bool dh_process_cmd_line_option(const HChar* arg)
1039{
1040   if VG_BINT_CLO(arg, "--show-top-n", clo_show_top_n, 1, 100000) {}
1041
1042   else if VG_STR_CLO(arg, "--sort-by", clo_sort_by) {
1043       ULong (*dummyFn)(APInfo*);
1044       Bool dummyB;
1045       Bool ok = identify_metric( &dummyFn, &dummyB, clo_sort_by);
1046       if (!ok)
1047          return False;
1048       // otherwise it's OK, in which case leave it alone.
1049       // show_top_n_apinfos will later convert the string by a
1050       // second call to identify_metric.
1051   }
1052
1053   else
1054      return VG_(replacement_malloc_process_cmd_line_option)(arg);
1055
1056   return True;
1057}
1058
1059
1060static void dh_print_usage(void)
1061{
1062   VG_(printf)(
1063"    --show-top-n=number       show the top <number> alloc points [10]\n"
1064"    --sort-by=string\n"
1065"            sort the allocation points by the metric\n"
1066"            defined by <string>, thusly:\n"
1067"                max-bytes-live    maximum live bytes [default]\n"
1068"                tot-bytes-allocd  bytes allocated in total (turnover)\n"
1069"                max-blocks-live   maximum live blocks\n"
1070"                tot-blocks-allocd blocks allocated in total (turnover)\n"
1071   );
1072}
1073
1074static void dh_print_debug_usage(void)
1075{
1076   VG_(printf)(
1077"    (none)\n"
1078   );
1079}
1080
1081
1082//------------------------------------------------------------//
1083//--- Finalisation                                         ---//
1084//------------------------------------------------------------//
1085
1086static void show_N_div_100( /*OUT*/HChar* buf, ULong n )
1087{
1088   ULong nK = n / 100;
1089   ULong nR = n % 100;
1090   VG_(sprintf)(buf, "%llu.%s%llu", nK,
1091                nR < 10 ? "0" : "",
1092                nR);
1093}
1094
1095static void show_APInfo ( APInfo* api )
1096{
1097   HChar bufA[80];   // large enough
1098   VG_(memset)(bufA, 0, sizeof(bufA));
1099   if (api->tot_blocks > 0) {
1100      show_N_div_100( bufA, ((ULong)api->tot_bytes * 100ULL)
1101                              / (ULong)api->tot_blocks );
1102   } else {
1103      bufA[0] = 'N'; bufA[1] = 'a'; bufA[2] = 'N';
1104   }
1105
1106   VG_(umsg)("max-live:    %'llu in %'llu blocks\n",
1107             api->max_bytes_live, api->max_blocks_live);
1108   VG_(umsg)("tot-alloc:   %'llu in %'llu blocks (avg size %s)\n",
1109             api->tot_bytes, api->tot_blocks, bufA);
1110
1111   tl_assert(api->tot_blocks >= api->max_blocks_live);
1112   tl_assert(api->tot_bytes >= api->max_bytes_live);
1113
1114   if (api->deaths > 0) {
1115      // Average Age at Death
1116      ULong aad = api->deaths == 0
1117                  ? 0 : (api->death_ages_sum / api->deaths);
1118      // AAD as a fraction of the total program lifetime (so far)
1119      // measured in ten-thousand-ths (aad_frac_10k == 10000 means the
1120      // complete lifetime of the program.
1121      ULong aad_frac_10k
1122         = g_guest_instrs_executed == 0
1123           ? 0 : (10000ULL * aad) / g_guest_instrs_executed;
1124      HChar buf[80];  // large enough
1125      show_N_div_100(buf, aad_frac_10k);
1126      VG_(umsg)("deaths:      %'llu, at avg age %'llu "
1127                "(%s%% of prog lifetime)\n",
1128                api->deaths, aad, buf );
1129   } else {
1130      VG_(umsg)("deaths:      none (none of these blocks were freed)\n");
1131   }
1132
1133   HChar bufR[80], bufW[80];   // large enough
1134   VG_(memset)(bufR, 0, sizeof(bufR));
1135   VG_(memset)(bufW, 0, sizeof(bufW));
1136   if (api->tot_bytes > 0) {
1137      show_N_div_100(bufR, (100ULL * api->n_reads) / api->tot_bytes);
1138      show_N_div_100(bufW, (100ULL * api->n_writes) / api->tot_bytes);
1139   } else {
1140      VG_(strcat)(bufR, "Inf");
1141      VG_(strcat)(bufW, "Inf");
1142   }
1143
1144   VG_(umsg)("acc-ratios:  %s rd, %s wr "
1145             " (%'llu b-read, %'llu b-written)\n",
1146             bufR, bufW,
1147             api->n_reads, api->n_writes);
1148
1149   VG_(pp_ExeContext)(api->ap);
1150
1151   if (api->histo && api->xsize_tag == Exactly) {
1152      VG_(umsg)("\nAggregated access counts by offset:\n");
1153      VG_(umsg)("\n");
1154      UWord i;
1155      if (api->xsize > 0)
1156         VG_(umsg)("[   0]  ");
1157      for (i = 0; i < api->xsize; i++) {
1158         if (i > 0 && (i % 16) == 0 && i != api->xsize-1) {
1159            VG_(umsg)("\n");
1160            VG_(umsg)("[%4lu]  ", i);
1161         }
1162         VG_(umsg)("%u ", api->histo[i]);
1163      }
1164      VG_(umsg)("\n");
1165   }
1166}
1167
1168
1169/* Metric-access functions for APInfos. */
1170static ULong get_metric__max_bytes_live ( APInfo* api ) {
1171   return api->max_bytes_live;
1172}
1173static ULong get_metric__tot_bytes ( APInfo* api ) {
1174   return api->tot_bytes;
1175}
1176static ULong get_metric__max_blocks_live ( APInfo* api ) {
1177   return api->max_blocks_live;
1178}
1179static ULong get_metric__tot_blocks ( APInfo* api ) {
1180   return api->tot_blocks;
1181}
1182
1183/* Given a string, return the metric-access function and also a Bool
1184   indicating whether we want increasing or decreasing values of the
1185   metric.  This is used twice, once in command line processing, and
1186   then again in show_top_n_apinfos.  Returns False if the given
1187   string could not be identified.*/
1188static Bool identify_metric ( /*OUT*/ULong(**get_metricP)(APInfo*),
1189                              /*OUT*/Bool* increasingP,
1190                              const HChar* metric_name )
1191{
1192   if (0 == VG_(strcmp)(metric_name, "max-bytes-live")) {
1193      *get_metricP = get_metric__max_bytes_live;
1194      *increasingP = False;
1195      return True;
1196   }
1197   if (0 == VG_(strcmp)(metric_name, "tot-bytes-allocd")) {
1198      *get_metricP = get_metric__tot_bytes;
1199      *increasingP = False;
1200      return True;
1201   }
1202   if (0 == VG_(strcmp)(metric_name, "max-blocks-live")) {
1203      *get_metricP = get_metric__max_blocks_live;
1204      *increasingP = False;
1205      return True;
1206   }
1207   if (0 == VG_(strcmp)(metric_name, "tot-blocks-allocd")) {
1208      *get_metricP = get_metric__tot_blocks;
1209      *increasingP = False;
1210      return True;
1211   }
1212   return False;
1213}
1214
1215
1216static void show_top_n_apinfos ( void )
1217{
1218   Int   i;
1219   UWord keyW, valW;
1220   ULong (*get_metric)(APInfo*);
1221   Bool  increasing;
1222
1223   const HChar* metric_name = clo_sort_by;
1224   tl_assert(metric_name); // ensured by clo processing
1225
1226   Bool ok = identify_metric( &get_metric, &increasing, metric_name );
1227   tl_assert(ok); // ensured by clo processing
1228
1229   VG_(umsg)("\n");
1230   VG_(umsg)("======== ORDERED BY %s \"%s\": "
1231             "top %d allocators ========\n",
1232             increasing ? "increasing" : "decreasing",
1233             metric_name, clo_show_top_n );
1234
1235   // Clear all .shown bits
1236   VG_(initIterFM)( apinfo );
1237   while (VG_(nextIterFM)( apinfo, &keyW, &valW )) {
1238      APInfo* api = (APInfo*)valW;
1239      tl_assert(api && api->ap == (ExeContext*)keyW);
1240      api->shown = False;
1241   }
1242   VG_(doneIterFM)( apinfo );
1243
1244   // Now print the top N entries.  Each one requires a
1245   // complete scan of the set.  Duh.
1246   for (i = 0; i < clo_show_top_n; i++) {
1247      ULong   best_metric = increasing ? ~0ULL : 0ULL;
1248      APInfo* best_api    = NULL;
1249
1250      VG_(initIterFM)( apinfo );
1251      while (VG_(nextIterFM)( apinfo, &keyW, &valW )) {
1252         APInfo* api = (APInfo*)valW;
1253         if (api->shown)
1254            continue;
1255         ULong metric = get_metric(api);
1256         if (increasing ? (metric < best_metric) : (metric > best_metric)) {
1257            best_metric = metric;
1258            best_api = api;
1259         }
1260      }
1261      VG_(doneIterFM)( apinfo );
1262
1263      if (!best_api)
1264         break; // all APIs have been shown.  Stop.
1265
1266      VG_(umsg)("\n");
1267      VG_(umsg)("-------------------- %d of %d --------------------\n",
1268                i+1, clo_show_top_n );
1269      show_APInfo(best_api);
1270      best_api->shown = True;
1271   }
1272
1273   VG_(umsg)("\n");
1274}
1275
1276
1277static void dh_fini(Int exit_status)
1278{
1279   // Before printing statistics, we must harvest access counts for
1280   // all the blocks that are still alive.  Not doing so gives
1281   // access ratios which are too low (zero, in the worst case)
1282   // for such blocks, since the accesses that do get made will
1283   // (if we skip this step) not get folded into the AP summaries.
1284   UWord keyW, valW;
1285   VG_(initIterFM)( interval_tree );
1286   while (VG_(nextIterFM)( interval_tree, &keyW, &valW )) {
1287      Block* bk = (Block*)keyW;
1288      tl_assert(valW == 0);
1289      tl_assert(bk);
1290      retire_Block(bk, False/*!because_freed*/);
1291   }
1292   VG_(doneIterFM)( interval_tree );
1293
1294   // show results
1295   VG_(umsg)("======== SUMMARY STATISTICS ========\n");
1296   VG_(umsg)("\n");
1297   VG_(umsg)("guest_insns:  %'llu\n", g_guest_instrs_executed);
1298   VG_(umsg)("\n");
1299   VG_(umsg)("max_live:     %'llu in %'llu blocks\n",
1300             g_max_bytes_live, g_max_blocks_live);
1301   VG_(umsg)("\n");
1302   VG_(umsg)("tot_alloc:    %'llu in %'llu blocks\n",
1303             g_tot_bytes, g_tot_blocks);
1304   VG_(umsg)("\n");
1305   if (g_tot_bytes > 0) {
1306      VG_(umsg)("insns per allocated byte: %'llu\n",
1307                g_guest_instrs_executed / g_tot_bytes);
1308      VG_(umsg)("\n");
1309   }
1310
1311   show_top_n_apinfos();
1312
1313   VG_(umsg)("\n");
1314   VG_(umsg)("\n");
1315   VG_(umsg)("==============================================================\n");
1316   VG_(umsg)("\n");
1317   VG_(umsg)("Some hints: (see --help for command line option details):\n");
1318   VG_(umsg)("\n");
1319   VG_(umsg)("* summary stats for whole program are at the top of this output\n");
1320   VG_(umsg)("\n");
1321   VG_(umsg)("* --show-top-n=  controls how many alloc points are shown.\n");
1322   VG_(umsg)("                 You probably want to set it much higher than\n");
1323   VG_(umsg)("                 the default value (10)\n");
1324   VG_(umsg)("\n");
1325   VG_(umsg)("* --sort-by=     specifies the sort key for output.\n");
1326   VG_(umsg)("                 See --help for details.\n");
1327   VG_(umsg)("\n");
1328   VG_(umsg)("* Each allocation stack, by default 12 frames, counts as\n");
1329   VG_(umsg)("  a separate alloc point.  This causes the data to be spread out\n");
1330   VG_(umsg)("  over far too many alloc points.  I strongly suggest using\n");
1331   VG_(umsg)("  --num-callers=4 or some such, to reduce the spreading.\n");
1332   VG_(umsg)("\n");
1333
1334   if (VG_(clo_stats)) {
1335      VG_(dmsg)(" dhat: find_Block_containing:\n");
1336      VG_(dmsg)("             found: %'lu (%'lu cached + %'lu uncached)\n",
1337                stats__n_fBc_cached + stats__n_fBc_uncached,
1338                stats__n_fBc_cached,
1339                stats__n_fBc_uncached);
1340      VG_(dmsg)("          notfound: %'lu\n", stats__n_fBc_notfound);
1341      VG_(dmsg)("\n");
1342   }
1343}
1344
1345
1346//------------------------------------------------------------//
1347//--- Initialisation                                       ---//
1348//------------------------------------------------------------//
1349
1350static void dh_post_clo_init(void)
1351{
1352}
1353
1354static void dh_pre_clo_init(void)
1355{
1356   VG_(details_name)            ("DHAT");
1357   VG_(details_version)         (NULL);
1358   VG_(details_description)     ("a dynamic heap analysis tool");
1359   VG_(details_copyright_author)(
1360      "Copyright (C) 2010-2017, and GNU GPL'd, by Mozilla Inc");
1361   VG_(details_bug_reports_to)  (VG_BUGS_TO);
1362
1363   // Basic functions.
1364   VG_(basic_tool_funcs)          (dh_post_clo_init,
1365                                   dh_instrument,
1366                                   dh_fini);
1367//zz
1368   // Needs.
1369   VG_(needs_libc_freeres)();
1370   VG_(needs_cxx_freeres)();
1371   VG_(needs_command_line_options)(dh_process_cmd_line_option,
1372                                   dh_print_usage,
1373                                   dh_print_debug_usage);
1374//zz   VG_(needs_client_requests)     (dh_handle_client_request);
1375//zz   VG_(needs_sanity_checks)       (dh_cheap_sanity_check,
1376//zz                                   dh_expensive_sanity_check);
1377   VG_(needs_malloc_replacement)  (dh_malloc,
1378                                   dh___builtin_new,
1379                                   dh___builtin_vec_new,
1380                                   dh_memalign,
1381                                   dh_calloc,
1382                                   dh_free,
1383                                   dh___builtin_delete,
1384                                   dh___builtin_vec_delete,
1385                                   dh_realloc,
1386                                   dh_malloc_usable_size,
1387                                   0 );
1388
1389   VG_(track_pre_mem_read)        ( dh_handle_noninsn_read );
1390   //VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
1391   VG_(track_post_mem_write)      ( dh_handle_noninsn_write );
1392
1393   tl_assert(!interval_tree);
1394   tl_assert(!fbc_cache0);
1395   tl_assert(!fbc_cache1);
1396
1397   interval_tree = VG_(newFM)( VG_(malloc),
1398                               "dh.main.interval_tree.1",
1399                               VG_(free),
1400                               interval_tree_Cmp );
1401
1402   apinfo = VG_(newFM)( VG_(malloc),
1403                        "dh.main.apinfo.1",
1404                        VG_(free),
1405                        NULL/*unboxedcmp*/ );
1406}
1407
1408VG_DETERMINE_INTERFACE_VERSION(dh_pre_clo_init)
1409
1410//--------------------------------------------------------------------//
1411//--- end                                                dh_main.c ---//
1412//--------------------------------------------------------------------//
1413