dh_main.c revision ca503be79ca377d89b1692c37c97a06ed14dad3b
1
2//--------------------------------------------------------------------*/
3//--- DHAT: a Dynamic Heap Analysis Tool                 dh_main.c ---*/
4//--------------------------------------------------------------------*/
5
6/*
7   This file is part of DHAT, a Valgrind tool for profiling the
8   heap usage of programs.
9
10   Copyright (C) 2010-2012 Mozilla Inc
11
12   This program is free software; you can redistribute it and/or
13   modify it under the terms of the GNU General Public License as
14   published by the Free Software Foundation; either version 2 of the
15   License, or (at your option) any later version.
16
17   This program is distributed in the hope that it will be useful, but
18   WITHOUT ANY WARRANTY; without even the implied warranty of
19   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20   General Public License for more details.
21
22   You should have received a copy of the GNU General Public License
23   along with this program; if not, write to the Free Software
24   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25   02111-1307, USA.
26
27   The GNU General Public License is contained in the file COPYING.
28*/
29
30/* Contributed by Julian Seward <jseward@acm.org> */
31
32
33#include "pub_tool_basics.h"
34#include "pub_tool_libcbase.h"
35#include "pub_tool_libcassert.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_machine.h"      // VG_(fnptr_to_fnentry)
38#include "pub_tool_mallocfree.h"
39#include "pub_tool_options.h"
40#include "pub_tool_replacemalloc.h"
41#include "pub_tool_tooliface.h"
42#include "pub_tool_wordfm.h"
43
44#define HISTOGRAM_SIZE_LIMIT 1024
45
46
47//------------------------------------------------------------//
48//--- Globals                                              ---//
49//------------------------------------------------------------//
50
51// Number of guest instructions executed so far.  This is
52// incremented directly from the generated code.
53static ULong g_guest_instrs_executed = 0;
54
55// Summary statistics for the entire run.
56static ULong g_tot_blocks = 0;   // total blocks allocated
57static ULong g_tot_bytes  = 0;   // total bytes allocated
58
59static ULong g_cur_blocks_live = 0; // curr # blocks live
60static ULong g_cur_bytes_live  = 0; // curr # bytes live
61
62static ULong g_max_blocks_live = 0; // bytes and blocks at
63static ULong g_max_bytes_live  = 0; // the max residency point
64
65
66//------------------------------------------------------------//
67//--- an Interval Tree of live blocks                      ---//
68//------------------------------------------------------------//
69
70/* Tracks information about live blocks. */
71typedef
72   struct {
73      Addr        payload;
74      SizeT       req_szB;
75      ExeContext* ap;  /* allocation ec */
76      ULong       allocd_at; /* instruction number */
77      ULong       n_reads;
78      ULong       n_writes;
79      /* Approx histogram, one byte per payload byte.  Counts latch up
80         therefore at 0xFFFF.  Can be NULL if the block is resized or if
81         the block is larger than HISTOGRAM_SIZE_LIMIT. */
82      UShort*     histoW; /* [0 .. req_szB-1] */
83   }
84   Block;
85
86/* May not contain zero-sized blocks.  May not contain
87   overlapping blocks. */
88static WordFM* interval_tree = NULL;  /* WordFM* Block* void */
89
90/* Here's the comparison function.  Since the tree is required
91to contain non-zero sized, non-overlapping blocks, it's good
92enough to consider any overlap as a match. */
93static Word interval_tree_Cmp ( UWord k1, UWord k2 )
94{
95   Block* b1 = (Block*)k1;
96   Block* b2 = (Block*)k2;
97   tl_assert(b1->req_szB > 0);
98   tl_assert(b2->req_szB > 0);
99   if (b1->payload + b1->req_szB <= b2->payload) return -1;
100   if (b2->payload + b2->req_szB <= b1->payload) return  1;
101   return 0;
102}
103
104// 2-entry cache for find_Block_containing
105static Block* fbc_cache0 = NULL;
106static Block* fbc_cache1 = NULL;
107
108static UWord stats__n_fBc_cached = 0;
109static UWord stats__n_fBc_uncached = 0;
110static UWord stats__n_fBc_notfound = 0;
111
112static Block* find_Block_containing ( Addr a )
113{
114   if (LIKELY(fbc_cache0
115              && fbc_cache0->payload <= a
116              && a < fbc_cache0->payload + fbc_cache0->req_szB)) {
117      // found at 0
118      stats__n_fBc_cached++;
119      return fbc_cache0;
120   }
121   if (LIKELY(fbc_cache1
122              && fbc_cache1->payload <= a
123              && a < fbc_cache1->payload + fbc_cache1->req_szB)) {
124      // found at 1; swap 0 and 1
125      Block* tmp = fbc_cache0;
126      fbc_cache0 = fbc_cache1;
127      fbc_cache1 = tmp;
128      stats__n_fBc_cached++;
129      return fbc_cache0;
130   }
131   Block fake;
132   fake.payload = a;
133   fake.req_szB = 1;
134   UWord foundkey = 1;
135   UWord foundval = 1;
136   Bool found = VG_(lookupFM)( interval_tree,
137                               &foundkey, &foundval, (UWord)&fake );
138   if (!found) {
139      stats__n_fBc_notfound++;
140      return NULL;
141   }
142   tl_assert(foundval == 0); // we don't store vals in the interval tree
143   tl_assert(foundkey != 1);
144   Block* res = (Block*)foundkey;
145   tl_assert(res != &fake);
146   // put at the top position
147   fbc_cache1 = fbc_cache0;
148   fbc_cache0 = res;
149   stats__n_fBc_uncached++;
150   return res;
151}
152
153// delete a block; asserts if not found.  (viz, 'a' must be
154// known to be present.)
155static void delete_Block_starting_at ( Addr a )
156{
157   Block fake;
158   fake.payload = a;
159   fake.req_szB = 1;
160   Bool found = VG_(delFromFM)( interval_tree,
161                                NULL, NULL, (Addr)&fake );
162   tl_assert(found);
163   fbc_cache0 = fbc_cache1 = NULL;
164}
165
166
167//------------------------------------------------------------//
168//--- a FM of allocation points (APs)                      ---//
169//------------------------------------------------------------//
170
171typedef
172   struct {
173      // the allocation point that we're summarising stats for
174      ExeContext* ap;
175      // used when printing results
176      Bool shown;
177      // The current number of blocks and bytes live for this AP
178      ULong cur_blocks_live;
179      ULong cur_bytes_live;
180      // The number of blocks and bytes live at the max-liveness
181      // point.  Note this is a bit subtle.  max_blocks_live is not
182      // the maximum number of live blocks, but rather the number of
183      // blocks live at the point of maximum byte liveness.  These are
184      // not necessarily the same thing.
185      ULong max_blocks_live;
186      ULong max_bytes_live;
187      // Total number of blocks and bytes allocated by this AP.
188      ULong tot_blocks;
189      ULong tot_bytes;
190      // Sum of death ages for all blocks allocated by this AP,
191      // that have subsequently been freed.
192      ULong death_ages_sum;
193      ULong deaths;
194      // Total number of reads and writes in all blocks allocated
195      // by this AP.
196      ULong n_reads;
197      ULong n_writes;
198      /* Histogram information.  We maintain a histogram aggregated for
199         all retiring Blocks allocated by this AP, but only if:
200         - this AP has only ever allocated objects of one size
201         - that size is <= HISTOGRAM_SIZE_LIMIT
202         What we need therefore is a mechanism to see if this AP
203         has only ever allocated blocks of one size.
204
205         3 states:
206            Unknown          because no retirement yet
207            Exactly xsize    all retiring blocks are of this size
208            Mixed            multiple different sizes seen
209      */
210      enum { Unknown=999, Exactly, Mixed } xsize_tag;
211      SizeT xsize;
212      UInt* histo; /* [0 .. xsize-1] */
213   }
214   APInfo;
215
216/* maps ExeContext*'s to APInfo*'s.  Note that the keys must match the
217   .ap field in the values. */
218static WordFM* apinfo = NULL;  /* WordFM* ExeContext* APInfo* */
219
220
221/* 'bk' is being introduced (has just been allocated).  Find the
222   relevant APInfo entry for it, or create one, based on the block's
223   allocation EC.  Then, update the APInfo to the extent that we
224   actually can, to reflect the allocation. */
225static void intro_Block ( Block* bk )
226{
227   tl_assert(bk);
228   tl_assert(bk->ap);
229
230   APInfo* api   = NULL;
231   UWord   keyW  = 0;
232   UWord   valW  = 0;
233   Bool    found = VG_(lookupFM)( apinfo,
234                                  &keyW, &valW, (UWord)bk->ap );
235   if (found) {
236      api = (APInfo*)valW;
237      tl_assert(keyW == (UWord)bk->ap);
238   } else {
239      api = VG_(malloc)( "dh.main.intro_Block.1", sizeof(APInfo) );
240      VG_(memset)(api, 0, sizeof(*api));
241      api->ap = bk->ap;
242      Bool present = VG_(addToFM)( apinfo,
243                                   (UWord)bk->ap, (UWord)api );
244      tl_assert(!present);
245      // histo stuff
246      tl_assert(api->deaths == 0);
247      api->xsize_tag = Unknown;
248      api->xsize = 0;
249      if (0) VG_(printf)("api %p   -->  Unknown\n", api);
250   }
251
252   tl_assert(api->ap == bk->ap);
253
254   /* So: update stats to reflect an allocation */
255
256   // # live blocks
257   api->cur_blocks_live++;
258
259   // # live bytes
260   api->cur_bytes_live += bk->req_szB;
261   if (api->cur_bytes_live > api->max_bytes_live) {
262      api->max_bytes_live  = api->cur_bytes_live;
263      api->max_blocks_live = api->cur_blocks_live;
264   }
265
266   // total blocks and bytes allocated here
267   api->tot_blocks++;
268   api->tot_bytes += bk->req_szB;
269
270   // update summary globals
271   g_tot_blocks++;
272   g_tot_bytes += bk->req_szB;
273
274   g_cur_blocks_live++;
275   g_cur_bytes_live += bk->req_szB;
276   if (g_cur_bytes_live > g_max_bytes_live) {
277      g_max_bytes_live = g_cur_bytes_live;
278      g_max_blocks_live = g_cur_blocks_live;
279   }
280}
281
282
283/* 'bk' is retiring (being freed).  Find the relevant APInfo entry for
284   it, which must already exist.  Then, fold info from 'bk' into that
285   entry.  'because_freed' is True if the block is retiring because
286   the client has freed it.  If it is False then the block is retiring
287   because the program has finished, in which case we want to skip the
288   updates of the total blocks live etc for this AP, but still fold in
289   the access counts and histo data that have so far accumulated for
290   the block. */
291static void retire_Block ( Block* bk, Bool because_freed )
292{
293   tl_assert(bk);
294   tl_assert(bk->ap);
295
296   APInfo* api   = NULL;
297   UWord   keyW  = 0;
298   UWord   valW  = 0;
299   Bool    found = VG_(lookupFM)( apinfo,
300                                  &keyW, &valW, (UWord)bk->ap );
301
302   tl_assert(found);
303   api = (APInfo*)valW;
304   tl_assert(api->ap == bk->ap);
305
306   // update stats following this free.
307   if (0)
308   VG_(printf)("ec %p  api->c_by_l %llu  bk->rszB %llu\n",
309               bk->ap, api->cur_bytes_live, (ULong)bk->req_szB);
310
311   // update total blocks live etc for this AP
312   if (because_freed) {
313      tl_assert(api->cur_blocks_live >= 1);
314      tl_assert(api->cur_bytes_live >= bk->req_szB);
315      api->cur_blocks_live--;
316      api->cur_bytes_live -= bk->req_szB;
317
318      api->deaths++;
319
320      tl_assert(bk->allocd_at <= g_guest_instrs_executed);
321      api->death_ages_sum += (g_guest_instrs_executed - bk->allocd_at);
322
323      // update global summary stats
324      tl_assert(g_cur_blocks_live > 0);
325      g_cur_blocks_live--;
326      tl_assert(g_cur_bytes_live >= bk->req_szB);
327      g_cur_bytes_live -= bk->req_szB;
328   }
329
330   // access counts
331   api->n_reads  += bk->n_reads;
332   api->n_writes += bk->n_writes;
333
334   // histo stuff.  First, do state transitions for xsize/xsize_tag.
335   switch (api->xsize_tag) {
336
337      case Unknown:
338         tl_assert(api->xsize == 0);
339         tl_assert(api->deaths == 1 || api->deaths == 0);
340         tl_assert(!api->histo);
341         api->xsize_tag = Exactly;
342         api->xsize = bk->req_szB;
343         if (0) VG_(printf)("api %p   -->  Exactly(%lu)\n", api, api->xsize);
344         // and allocate the histo
345         if (bk->histoW) {
346            api->histo = VG_(malloc)("dh.main.retire_Block.1",
347                                     api->xsize * sizeof(UInt));
348            VG_(memset)(api->histo, 0, api->xsize * sizeof(UInt));
349         }
350         break;
351
352      case Exactly:
353         //tl_assert(api->deaths > 1);
354         if (bk->req_szB != api->xsize) {
355            if (0) VG_(printf)("api %p   -->  Mixed(%lu -> %lu)\n",
356                               api, api->xsize, bk->req_szB);
357            api->xsize_tag = Mixed;
358            api->xsize = 0;
359            // deallocate the histo, if any
360            if (api->histo) {
361               VG_(free)(api->histo);
362               api->histo = NULL;
363            }
364         }
365         break;
366
367      case Mixed:
368         //tl_assert(api->deaths > 1);
369         break;
370
371      default:
372        tl_assert(0);
373   }
374
375   // See if we can fold the histo data from this block into
376   // the data for the AP
377   if (api->xsize_tag == Exactly && api->histo && bk->histoW) {
378      tl_assert(api->xsize == bk->req_szB);
379      UWord i;
380      for (i = 0; i < api->xsize; i++) {
381         // FIXME: do something better in case of overflow of api->histo[..]
382         // Right now, at least don't let it overflow/wrap around
383         if (api->histo[i] <= 0xFFFE0000)
384            api->histo[i] += (UInt)bk->histoW[i];
385      }
386      if (0) VG_(printf)("fold in, AP = %p\n", api);
387   }
388
389
390
391#if 0
392   if (bk->histoB) {
393      VG_(printf)("block retiring, histo %lu: ", bk->req_szB);
394      UWord i;
395      for (i = 0; i < bk->req_szB; i++)
396        VG_(printf)("%u ", (UInt)bk->histoB[i]);
397      VG_(printf)("\n");
398   } else {
399      VG_(printf)("block retiring, no histo %lu\n", bk->req_szB);
400   }
401#endif
402}
403
404/* This handles block resizing.  When a block with AP 'ec' has a
405   size change of 'delta', call here to update the APInfo. */
406static void apinfo_change_cur_bytes_live( ExeContext* ec, Long delta )
407{
408   APInfo* api   = NULL;
409   UWord   keyW  = 0;
410   UWord   valW  = 0;
411   Bool    found = VG_(lookupFM)( apinfo,
412                                  &keyW, &valW, (UWord)ec );
413
414   tl_assert(found);
415   api = (APInfo*)valW;
416   tl_assert(api->ap == ec);
417
418   if (delta < 0) {
419      tl_assert(api->cur_bytes_live >= -delta);
420      tl_assert(g_cur_bytes_live >= -delta);
421   }
422
423   // adjust current live size
424   api->cur_bytes_live += delta;
425   g_cur_bytes_live += delta;
426
427   if (delta > 0 && api->cur_bytes_live > api->max_bytes_live) {
428      api->max_bytes_live  = api->cur_bytes_live;
429      api->max_blocks_live = api->cur_blocks_live;
430   }
431
432   // update global summary stats
433   if (delta > 0 && g_cur_bytes_live > g_max_bytes_live) {
434      g_max_bytes_live = g_cur_bytes_live;
435      g_max_blocks_live = g_cur_blocks_live;
436   }
437   if (delta > 0)
438      g_tot_bytes += delta;
439
440   // adjust total allocation size
441   if (delta > 0)
442      api->tot_bytes += delta;
443}
444
445
446//------------------------------------------------------------//
447//--- update both Block and APInfos after {m,re}alloc/free ---//
448//------------------------------------------------------------//
449
450static
451void* new_block ( ThreadId tid, void* p, SizeT req_szB, SizeT req_alignB,
452                  Bool is_zeroed )
453{
454   tl_assert(p == NULL); // don't handle custom allocators right now
455   SizeT actual_szB /*, slop_szB*/;
456
457   if ((SSizeT)req_szB < 0) return NULL;
458
459   if (req_szB == 0)
460      req_szB = 1;  /* can't allow zero-sized blocks in the interval tree */
461
462   // Allocate and zero if necessary
463   if (!p) {
464      p = VG_(cli_malloc)( req_alignB, req_szB );
465      if (!p) {
466         return NULL;
467      }
468      if (is_zeroed) VG_(memset)(p, 0, req_szB);
469      actual_szB = VG_(malloc_usable_size)(p);
470      tl_assert(actual_szB >= req_szB);
471      /* slop_szB = actual_szB - req_szB; */
472   } else {
473      /* slop_szB = 0; */
474   }
475
476   // Make new HP_Chunk node, add to malloc_list
477   Block* bk = VG_(malloc)("dh.new_block.1", sizeof(Block));
478   bk->payload   = (Addr)p;
479   bk->req_szB   = req_szB;
480   bk->ap        = VG_(record_ExeContext)(tid, 0/*first word delta*/);
481   bk->allocd_at = g_guest_instrs_executed;
482   bk->n_reads   = 0;
483   bk->n_writes  = 0;
484   // set up histogram array, if the block isn't too large
485   bk->histoW = NULL;
486   if (req_szB <= HISTOGRAM_SIZE_LIMIT) {
487      bk->histoW = VG_(malloc)("dh.new_block.2", req_szB * sizeof(UShort));
488      VG_(memset)(bk->histoW, 0, req_szB * sizeof(UShort));
489   }
490
491   Bool present = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
492   tl_assert(!present);
493   fbc_cache0 = fbc_cache1 = NULL;
494
495   intro_Block(bk);
496
497   if (0) VG_(printf)("ALLOC %ld -> %p\n", req_szB, p);
498
499   return p;
500}
501
502static
503void die_block ( void* p, Bool custom_free )
504{
505   tl_assert(!custom_free);  // at least for now
506
507   Block* bk = find_Block_containing( (Addr)p );
508
509   if (!bk) {
510     return; // bogus free
511   }
512
513   tl_assert(bk->req_szB > 0);
514   // assert the block finder is behaving sanely
515   tl_assert(bk->payload <= (Addr)p);
516   tl_assert( (Addr)p < bk->payload + bk->req_szB );
517
518   if (bk->payload != (Addr)p) {
519      return; // bogus free
520   }
521
522   if (0) VG_(printf)(" FREE %p %llu\n",
523                      p, g_guest_instrs_executed - bk->allocd_at);
524
525   retire_Block(bk, True/*because_freed*/);
526
527   VG_(cli_free)( (void*)bk->payload );
528   delete_Block_starting_at( bk->payload );
529   if (bk->histoW) {
530      VG_(free)( bk->histoW );
531      bk->histoW = NULL;
532   }
533   VG_(free)( bk );
534}
535
536
537static
538void* renew_block ( ThreadId tid, void* p_old, SizeT new_req_szB )
539{
540   if (0) VG_(printf)("REALL %p %ld\n", p_old, new_req_szB);
541   void* p_new = NULL;
542
543   tl_assert(new_req_szB > 0); // map 0 to 1
544
545   // Find the old block.
546   Block* bk = find_Block_containing( (Addr)p_old );
547   if (!bk) {
548      return NULL;   // bogus realloc
549   }
550
551   tl_assert(bk->req_szB > 0);
552   // assert the block finder is behaving sanely
553   tl_assert(bk->payload <= (Addr)p_old);
554   tl_assert( (Addr)p_old < bk->payload + bk->req_szB );
555
556   if (bk->payload != (Addr)p_old) {
557      return NULL; // bogus realloc
558   }
559
560   // Keeping the histogram alive in any meaningful way across
561   // block resizing is too darn complicated.  Just throw it away.
562   if (bk->histoW) {
563      VG_(free)(bk->histoW);
564      bk->histoW = NULL;
565   }
566
567   // Actually do the allocation, if necessary.
568   if (new_req_szB <= bk->req_szB) {
569
570      // New size is smaller or same; block not moved.
571      apinfo_change_cur_bytes_live(bk->ap,
572                                   (Long)new_req_szB - (Long)bk->req_szB);
573      bk->req_szB = new_req_szB;
574      return p_old;
575
576   } else {
577
578      // New size is bigger;  make new block, copy shared contents, free old.
579      p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB);
580      if (!p_new) {
581         // Nb: if realloc fails, NULL is returned but the old block is not
582         // touched.  What an awful function.
583         return NULL;
584      }
585      tl_assert(p_new != p_old);
586
587      VG_(memcpy)(p_new, p_old, bk->req_szB);
588      VG_(cli_free)(p_old);
589
590      // Since the block has moved, we need to re-insert it into the
591      // interval tree at the new place.  Do this by removing
592      // and re-adding it.
593      delete_Block_starting_at( (Addr)p_old );
594      // now 'bk' is no longer in the tree, but the Block itself
595      // is still alive
596
597      // Update the metadata.
598      apinfo_change_cur_bytes_live(bk->ap,
599                                   (Long)new_req_szB - (Long)bk->req_szB);
600      bk->payload = (Addr)p_new;
601      bk->req_szB = new_req_szB;
602
603      // and re-add
604      Bool present
605         = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
606      tl_assert(!present);
607      fbc_cache0 = fbc_cache1 = NULL;
608
609      return p_new;
610   }
611   /*NOTREACHED*/
612   tl_assert(0);
613}
614
615
616//------------------------------------------------------------//
617//--- malloc() et al replacement wrappers                  ---//
618//------------------------------------------------------------//
619
620static void* dh_malloc ( ThreadId tid, SizeT szB )
621{
622   return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
623}
624
625static void* dh___builtin_new ( ThreadId tid, SizeT szB )
626{
627   return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
628}
629
630static void* dh___builtin_vec_new ( ThreadId tid, SizeT szB )
631{
632   return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
633}
634
635static void* dh_calloc ( ThreadId tid, SizeT m, SizeT szB )
636{
637   return new_block( tid, NULL, m*szB, VG_(clo_alignment), /*is_zeroed*/True );
638}
639
640static void *dh_memalign ( ThreadId tid, SizeT alignB, SizeT szB )
641{
642   return new_block( tid, NULL, szB, alignB, False );
643}
644
645static void dh_free ( ThreadId tid __attribute__((unused)), void* p )
646{
647   die_block( p, /*custom_free*/False );
648}
649
650static void dh___builtin_delete ( ThreadId tid, void* p )
651{
652   die_block( p, /*custom_free*/False);
653}
654
655static void dh___builtin_vec_delete ( ThreadId tid, void* p )
656{
657   die_block( p, /*custom_free*/False );
658}
659
660static void* dh_realloc ( ThreadId tid, void* p_old, SizeT new_szB )
661{
662   if (p_old == NULL) {
663      return dh_malloc(tid, new_szB);
664   }
665   if (new_szB == 0) {
666      dh_free(tid, p_old);
667      return NULL;
668   }
669   return renew_block(tid, p_old, new_szB);
670}
671
672static SizeT dh_malloc_usable_size ( ThreadId tid, void* p )
673{
674   tl_assert(0);
675//zz   HP_Chunk* hc = VG_(HT_lookup)( malloc_list, (UWord)p );
676//zz
677//zz   return ( hc ? hc->req_szB + hc->slop_szB : 0 );
678}
679
680//------------------------------------------------------------//
681//--- memory references                                    ---//
682//------------------------------------------------------------//
683
684static
685void inc_histo_for_block ( Block* bk, Addr addr, UWord szB )
686{
687   UWord i, offMin, offMax1;
688   offMin = addr - bk->payload;
689   tl_assert(offMin < bk->req_szB);
690   offMax1 = offMin + szB;
691   if (offMax1 > bk->req_szB)
692      offMax1 = bk->req_szB;
693   //VG_(printf)("%lu %lu   (size of block %lu)\n", offMin, offMax1, bk->req_szB);
694   for (i = offMin; i < offMax1; i++) {
695      UShort n = bk->histoW[i];
696      if (n < 0xFFFF) n++;
697      bk->histoW[i] = n;
698   }
699}
700
701static VG_REGPARM(2)
702void dh_handle_write ( Addr addr, UWord szB )
703{
704   Block* bk = find_Block_containing(addr);
705   if (bk) {
706      bk->n_writes += szB;
707      if (bk->histoW)
708         inc_histo_for_block(bk, addr, szB);
709   }
710}
711
712static VG_REGPARM(2)
713void dh_handle_read ( Addr addr, UWord szB )
714{
715   Block* bk = find_Block_containing(addr);
716   if (bk) {
717      bk->n_reads += szB;
718      if (bk->histoW)
719         inc_histo_for_block(bk, addr, szB);
720   }
721}
722
723
724// Handle reads and writes by syscalls (read == kernel
725// reads user space, write == kernel writes user space).
726// Assumes no such read or write spans a heap block
727// boundary and so we can treat it just as one giant
728// read or write.
729static
730void dh_handle_noninsn_read ( CorePart part, ThreadId tid, Char* s,
731                              Addr base, SizeT size )
732{
733   switch (part) {
734      case Vg_CoreSysCall:
735         dh_handle_read(base, size);
736         break;
737      case Vg_CoreSysCallArgInMem:
738         break;
739      case Vg_CoreTranslate:
740         break;
741      default:
742         tl_assert(0);
743   }
744}
745
746static
747void dh_handle_noninsn_write ( CorePart part, ThreadId tid,
748                               Addr base, SizeT size )
749{
750   switch (part) {
751      case Vg_CoreSysCall:
752         dh_handle_write(base, size);
753         break;
754      case Vg_CoreSignal:
755         break;
756      default:
757         tl_assert(0);
758   }
759}
760
761
762//------------------------------------------------------------//
763//--- Instrumentation                                      ---//
764//------------------------------------------------------------//
765
766#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
767#define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
768#define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
769#define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
770#define assign(_t, _e)           IRStmt_WrTmp((_t), (_e))
771
772static
773void add_counter_update(IRSB* sbOut, Int n)
774{
775   #if defined(VG_BIGENDIAN)
776   # define END Iend_BE
777   #elif defined(VG_LITTLEENDIAN)
778   # define END Iend_LE
779   #else
780   # error "Unknown endianness"
781   #endif
782   // Add code to increment 'g_guest_instrs_executed' by 'n', like this:
783   //   WrTmp(t1, Load64(&g_guest_instrs_executed))
784   //   WrTmp(t2, Add64(RdTmp(t1), Const(n)))
785   //   Store(&g_guest_instrs_executed, t2)
786   IRTemp t1 = newIRTemp(sbOut->tyenv, Ity_I64);
787   IRTemp t2 = newIRTemp(sbOut->tyenv, Ity_I64);
788   IRExpr* counter_addr = mkIRExpr_HWord( (HWord)&g_guest_instrs_executed );
789
790   IRStmt* st1 = assign(t1, IRExpr_Load(END, Ity_I64, counter_addr));
791   IRStmt* st2 = assign(t2, binop(Iop_Add64, mkexpr(t1), mkU64(n)));
792   IRStmt* st3 = IRStmt_Store(END, counter_addr, mkexpr(t2));
793
794   addStmtToIRSB( sbOut, st1 );
795   addStmtToIRSB( sbOut, st2 );
796   addStmtToIRSB( sbOut, st3 );
797}
798
799static
800void addMemEvent(IRSB* sbOut, Bool isWrite, Int szB, IRExpr* addr,
801                 Int goff_sp)
802{
803   IRType   tyAddr   = Ity_INVALID;
804   HChar*   hName    = NULL;
805   void*    hAddr    = NULL;
806   IRExpr** argv     = NULL;
807   IRDirty* di       = NULL;
808
809   const Int THRESH = 4096 * 4; // somewhat arbitrary
810   const Int rz_szB = VG_STACK_REDZONE_SZB;
811
812   tyAddr = typeOfIRExpr( sbOut->tyenv, addr );
813   tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
814
815   if (isWrite) {
816      hName = "dh_handle_write";
817      hAddr = &dh_handle_write;
818   } else {
819      hName = "dh_handle_read";
820      hAddr = &dh_handle_read;
821   }
822
823   argv = mkIRExprVec_2( addr, mkIRExpr_HWord(szB) );
824
825   /* Add the helper. */
826   tl_assert(hName);
827   tl_assert(hAddr);
828   tl_assert(argv);
829   di = unsafeIRDirty_0_N( 2/*regparms*/,
830                           hName, VG_(fnptr_to_fnentry)( hAddr ),
831                           argv );
832
833   /* Generate the guard condition: "(addr - (SP - RZ)) >u N", for
834      some arbitrary N.  If that fails then addr is in the range (SP -
835      RZ .. SP + N - RZ).  If N is smallish (a page?) then we can say
836      addr is within a page of SP and so can't possibly be a heap
837      access, and so can be skipped. */
838   IRTemp sp = newIRTemp(sbOut->tyenv, tyAddr);
839   addStmtToIRSB( sbOut, assign(sp, IRExpr_Get(goff_sp, tyAddr)));
840
841   IRTemp sp_minus_rz = newIRTemp(sbOut->tyenv, tyAddr);
842   addStmtToIRSB(
843      sbOut,
844      assign(sp_minus_rz,
845             tyAddr == Ity_I32
846                ? binop(Iop_Sub32, mkexpr(sp), mkU32(rz_szB))
847                : binop(Iop_Sub64, mkexpr(sp), mkU64(rz_szB)))
848   );
849
850   IRTemp diff = newIRTemp(sbOut->tyenv, tyAddr);
851   addStmtToIRSB(
852      sbOut,
853      assign(diff,
854             tyAddr == Ity_I32
855                ? binop(Iop_Sub32, addr, mkexpr(sp_minus_rz))
856                : binop(Iop_Sub64, addr, mkexpr(sp_minus_rz)))
857   );
858
859   IRTemp guard = newIRTemp(sbOut->tyenv, Ity_I1);
860   addStmtToIRSB(
861      sbOut,
862      assign(guard,
863             tyAddr == Ity_I32
864                ? binop(Iop_CmpLT32U, mkU32(THRESH), mkexpr(diff))
865                : binop(Iop_CmpLT64U, mkU64(THRESH), mkexpr(diff)))
866   );
867   di->guard = mkexpr(guard);
868
869   addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
870}
871
872static
873IRSB* dh_instrument ( VgCallbackClosure* closure,
874                      IRSB* sbIn,
875                      VexGuestLayout* layout,
876                      VexGuestExtents* vge,
877                      VexArchInfo* archinfo_host,
878                      IRType gWordTy, IRType hWordTy )
879{
880   Int   i, n = 0;
881   IRSB* sbOut;
882   IRTypeEnv* tyenv = sbIn->tyenv;
883
884   const Int goff_sp = layout->offset_SP;
885
886   // We increment the instruction count in two places:
887   // - just before any Ist_Exit statements;
888   // - just before the IRSB's end.
889   // In the former case, we zero 'n' and then continue instrumenting.
890
891   sbOut = deepCopyIRSBExceptStmts(sbIn);
892
893   // Copy verbatim any IR preamble preceding the first IMark
894   i = 0;
895   while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
896      addStmtToIRSB( sbOut, sbIn->stmts[i] );
897      i++;
898   }
899
900   for (/*use current i*/; i < sbIn->stmts_used; i++) {
901      IRStmt* st = sbIn->stmts[i];
902
903      if (!st || st->tag == Ist_NoOp) continue;
904
905      switch (st->tag) {
906
907         case Ist_IMark: {
908            n++;
909            break;
910         }
911
912         case Ist_Exit: {
913            if (n > 0) {
914               // Add an increment before the Exit statement, then reset 'n'.
915               add_counter_update(sbOut, n);
916               n = 0;
917            }
918            break;
919         }
920
921         case Ist_WrTmp: {
922            IRExpr* data = st->Ist.WrTmp.data;
923            if (data->tag == Iex_Load) {
924               IRExpr* aexpr = data->Iex.Load.addr;
925               // Note also, endianness info is ignored.  I guess
926               // that's not interesting.
927               addMemEvent( sbOut, False/*!isWrite*/,
928                            sizeofIRType(data->Iex.Load.ty),
929                            aexpr, goff_sp );
930            }
931            break;
932         }
933
934         case Ist_Store: {
935            IRExpr* data  = st->Ist.Store.data;
936            IRExpr* aexpr = st->Ist.Store.addr;
937            addMemEvent( sbOut, True/*isWrite*/,
938                         sizeofIRType(typeOfIRExpr(tyenv, data)),
939                         aexpr, goff_sp );
940            break;
941         }
942
943         case Ist_Dirty: {
944            Int      dataSize;
945            IRDirty* d = st->Ist.Dirty.details;
946            if (d->mFx != Ifx_None) {
947               /* This dirty helper accesses memory.  Collect the details. */
948               tl_assert(d->mAddr != NULL);
949               tl_assert(d->mSize != 0);
950               dataSize = d->mSize;
951               // Large (eg. 28B, 108B, 512B on x86) data-sized
952               // instructions will be done inaccurately, but they're
953               // very rare and this avoids errors from hitting more
954               // than two cache lines in the simulation.
955               if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
956                  addMemEvent( sbOut, False/*!isWrite*/,
957                               dataSize, d->mAddr, goff_sp );
958               if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
959                  addMemEvent( sbOut, True/*isWrite*/,
960                               dataSize, d->mAddr, goff_sp );
961            } else {
962               tl_assert(d->mAddr == NULL);
963               tl_assert(d->mSize == 0);
964            }
965            break;
966         }
967
968         case Ist_CAS: {
969            /* We treat it as a read and a write of the location.  I
970               think that is the same behaviour as it was before IRCAS
971               was introduced, since prior to that point, the Vex
972               front ends would translate a lock-prefixed instruction
973               into a (normal) read followed by a (normal) write. */
974            Int    dataSize;
975            IRCAS* cas = st->Ist.CAS.details;
976            tl_assert(cas->addr != NULL);
977            tl_assert(cas->dataLo != NULL);
978            dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
979            if (cas->dataHi != NULL)
980               dataSize *= 2; /* since it's a doubleword-CAS */
981            addMemEvent( sbOut, False/*!isWrite*/,
982                         dataSize, cas->addr, goff_sp );
983            addMemEvent( sbOut, True/*isWrite*/,
984                         dataSize, cas->addr, goff_sp );
985            break;
986         }
987
988         case Ist_LLSC: {
989            IRType dataTy;
990            if (st->Ist.LLSC.storedata == NULL) {
991               /* LL */
992               dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
993               addMemEvent( sbOut, False/*!isWrite*/,
994                            sizeofIRType(dataTy),
995                            st->Ist.LLSC.addr, goff_sp );
996            } else {
997               /* SC */
998               dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
999               addMemEvent( sbOut, True/*isWrite*/,
1000                            sizeofIRType(dataTy),
1001                            st->Ist.LLSC.addr, goff_sp );
1002            }
1003            break;
1004         }
1005
1006         default:
1007            break;
1008      }
1009
1010      addStmtToIRSB( sbOut, st );
1011   }
1012
1013   if (n > 0) {
1014      // Add an increment before the SB end.
1015      add_counter_update(sbOut, n);
1016   }
1017   return sbOut;
1018}
1019
1020#undef binop
1021#undef mkexpr
1022#undef mkU32
1023#undef mkU64
1024#undef assign
1025
1026
1027//------------------------------------------------------------//
1028//--- Command line args                                    ---//
1029//------------------------------------------------------------//
1030
1031// FORWARDS
1032static Bool identify_metric ( /*OUT*/ULong(**get_metricP)(APInfo*),
1033                              /*OUT*/Bool* increasingP,
1034                              Char* metric_name );
1035
1036static Int    clo_show_top_n = 10;
1037static HChar* clo_sort_by    = "max-bytes-live";
1038
1039static Bool dh_process_cmd_line_option(Char* arg)
1040{
1041   if VG_BINT_CLO(arg, "--show-top-n", clo_show_top_n, 1, 100000) {}
1042
1043   else if VG_STR_CLO(arg, "--sort-by", clo_sort_by) {
1044       ULong (*dummyFn)(APInfo*);
1045       Bool dummyB;
1046       Bool ok = identify_metric( &dummyFn, &dummyB, clo_sort_by);
1047       if (!ok)
1048          return False;
1049       // otherwise it's OK, in which case leave it alone.
1050       // show_top_n_apinfos will later convert the string by a
1051       // second call to identify_metric.
1052   }
1053
1054   else
1055      return VG_(replacement_malloc_process_cmd_line_option)(arg);
1056
1057   return True;
1058}
1059
1060
1061static void dh_print_usage(void)
1062{
1063   VG_(printf)(
1064"    --show-top-n=number       show the top <number> alloc points [10]\n"
1065"    --sort-by=string\n"
1066"            sort the allocation points by the metric\n"
1067"            defined by <string>, thusly:\n"
1068"                max-bytes-live    maximum live bytes [default]\n"
1069"                tot-bytes-allocd  total allocation (turnover)\n"
1070"                max-blocks-live   maximum live blocks\n"
1071   );
1072}
1073
1074static void dh_print_debug_usage(void)
1075{
1076   VG_(printf)(
1077"    (none)\n"
1078   );
1079}
1080
1081
1082//------------------------------------------------------------//
1083//--- Finalisation                                         ---//
1084//------------------------------------------------------------//
1085
1086static void show_N_div_100( /*OUT*/HChar* buf, ULong n )
1087{
1088   ULong nK = n / 100;
1089   ULong nR = n % 100;
1090   VG_(sprintf)(buf, "%llu.%s%llu", nK,
1091                nR < 10 ? "0" : "",
1092                nR);
1093}
1094
1095static void show_APInfo ( APInfo* api )
1096{
1097   HChar bufA[80];
1098   VG_(memset)(bufA, 0, sizeof(bufA));
1099   if (api->tot_blocks > 0) {
1100      show_N_div_100( bufA, ((ULong)api->tot_bytes * 100ULL)
1101                              / (ULong)api->tot_blocks );
1102   } else {
1103      bufA[0] = 'N'; bufA[1] = 'a'; bufA[2] = 'N';
1104   }
1105
1106   VG_(umsg)("max-live:    %'llu in %'llu blocks\n",
1107             api->max_bytes_live, api->max_blocks_live);
1108   VG_(umsg)("tot-alloc:   %'llu in %'llu blocks (avg size %s)\n",
1109             api->tot_bytes, api->tot_blocks, bufA);
1110
1111   tl_assert(api->tot_blocks >= api->max_blocks_live);
1112   tl_assert(api->tot_bytes >= api->max_bytes_live);
1113
1114   if (api->deaths > 0) {
1115      // Average Age at Death
1116      ULong aad = api->deaths == 0
1117                  ? 0 : (api->death_ages_sum / api->deaths);
1118      // AAD as a fraction of the total program lifetime (so far)
1119      // measured in ten-thousand-ths (aad_frac_10k == 10000 means the
1120      // complete lifetime of the program.
1121      ULong aad_frac_10k
1122         = g_guest_instrs_executed == 0
1123           ? 0 : (10000ULL * aad) / g_guest_instrs_executed;
1124      HChar buf[16];
1125      show_N_div_100(buf, aad_frac_10k);
1126      VG_(umsg)("deaths:      %'llu, at avg age %'llu "
1127                "(%s%% of prog lifetime)\n",
1128                api->deaths, aad, buf );
1129   } else {
1130      VG_(umsg)("deaths:      none (none of these blocks were freed)\n");
1131   }
1132
1133   HChar bufR[80], bufW[80];
1134   VG_(memset)(bufR, 0, sizeof(bufR));
1135   VG_(memset)(bufW, 0, sizeof(bufW));
1136   if (api->tot_bytes > 0) {
1137      show_N_div_100(bufR, (100ULL * api->n_reads) / api->tot_bytes);
1138      show_N_div_100(bufW, (100ULL * api->n_writes) / api->tot_bytes);
1139   } else {
1140      VG_(strcat)(bufR, "Inf");
1141      VG_(strcat)(bufW, "Inf");
1142   }
1143
1144   VG_(umsg)("acc-ratios:  %s rd, %s wr "
1145             " (%'llu b-read, %'llu b-written)\n",
1146             bufR, bufW,
1147             api->n_reads, api->n_writes);
1148
1149   VG_(pp_ExeContext)(api->ap);
1150
1151   if (api->histo && api->xsize_tag == Exactly) {
1152      VG_(umsg)("\nAggregated access counts by offset:\n");
1153      VG_(umsg)("\n");
1154      UWord i;
1155      if (api->xsize > 0)
1156         VG_(umsg)("[   0]  ");
1157      for (i = 0; i < api->xsize; i++) {
1158         if (i > 0 && (i % 16) == 0 && i != api->xsize-1) {
1159            VG_(umsg)("\n");
1160            VG_(umsg)("[%4lu]  ", i);
1161         }
1162         VG_(umsg)("%u ", api->histo[i]);
1163      }
1164      VG_(umsg)("\n");
1165   }
1166}
1167
1168
1169/* Metric-access functions for APInfos. */
1170static ULong get_metric__max_bytes_live ( APInfo* api ) {
1171   return api->max_bytes_live;
1172}
1173static ULong get_metric__tot_bytes ( APInfo* api ) {
1174   return api->tot_bytes;
1175}
1176static ULong get_metric__max_blocks_live ( APInfo* api ) {
1177   return api->max_blocks_live;
1178}
1179
1180/* Given a string, return the metric-access function and also a Bool
1181   indicating whether we want increasing or decreasing values of the
1182   metric.  This is used twice, once in command line processing, and
1183   then again in show_top_n_apinfos.  Returns False if the given
1184   string could not be identified.*/
1185static Bool identify_metric ( /*OUT*/ULong(**get_metricP)(APInfo*),
1186                              /*OUT*/Bool* increasingP,
1187                              Char* metric_name )
1188{
1189   if (0 == VG_(strcmp)(metric_name, "max-bytes-live")) {
1190      *get_metricP = get_metric__max_bytes_live;
1191      *increasingP = False;
1192      return True;
1193   }
1194   if (0 == VG_(strcmp)(metric_name, "tot-bytes-allocd")) {
1195      *get_metricP = get_metric__tot_bytes;
1196      *increasingP = False;
1197      return True;
1198   }
1199   if (0 == VG_(strcmp)(metric_name, "max-blocks-live")) {
1200      *get_metricP = get_metric__max_blocks_live;
1201      *increasingP = False;
1202      return True;
1203   }
1204   return False;
1205}
1206
1207
1208static void show_top_n_apinfos ( void )
1209{
1210   Int   i;
1211   UWord keyW, valW;
1212   ULong (*get_metric)(APInfo*);
1213   Bool  increasing;
1214
1215   HChar* metric_name = clo_sort_by;
1216   tl_assert(metric_name); // ensured by clo processing
1217
1218   Bool ok = identify_metric( &get_metric, &increasing, metric_name );
1219   tl_assert(ok); // ensured by clo processing
1220
1221   VG_(umsg)("\n");
1222   VG_(umsg)("======== ORDERED BY %s \"%s\": "
1223             "top %d allocators ========\n",
1224             increasing ? "increasing" : "decreasing",
1225             metric_name, clo_show_top_n );
1226
1227   // Clear all .shown bits
1228   VG_(initIterFM)( apinfo );
1229   while (VG_(nextIterFM)( apinfo, &keyW, &valW )) {
1230      APInfo* api = (APInfo*)valW;
1231      tl_assert(api && api->ap == (ExeContext*)keyW);
1232      api->shown = False;
1233   }
1234   VG_(doneIterFM)( apinfo );
1235
1236   // Now print the top N entries.  Each one requires a
1237   // complete scan of the set.  Duh.
1238   for (i = 0; i < clo_show_top_n; i++) {
1239      ULong   best_metric = increasing ? ~0ULL : 0ULL;
1240      APInfo* best_api    = NULL;
1241
1242      VG_(initIterFM)( apinfo );
1243      while (VG_(nextIterFM)( apinfo, &keyW, &valW )) {
1244         APInfo* api = (APInfo*)valW;
1245         if (api->shown)
1246            continue;
1247         ULong metric = get_metric(api);
1248         if (increasing ? (metric < best_metric) : (metric > best_metric)) {
1249            best_metric = metric;
1250            best_api = api;
1251         }
1252      }
1253      VG_(doneIterFM)( apinfo );
1254
1255      if (!best_api)
1256         break; // all APIs have been shown.  Stop.
1257
1258      VG_(umsg)("\n");
1259      VG_(umsg)("-------------------- %d of %d --------------------\n",
1260                i+1, clo_show_top_n );
1261      show_APInfo(best_api);
1262      best_api->shown = True;
1263   }
1264
1265   VG_(umsg)("\n");
1266}
1267
1268
1269static void dh_fini(Int exit_status)
1270{
1271   // Before printing statistics, we must harvest access counts for
1272   // all the blocks that are still alive.  Not doing so gives
1273   // access ratios which are too low (zero, in the worst case)
1274   // for such blocks, since the accesses that do get made will
1275   // (if we skip this step) not get folded into the AP summaries.
1276   UWord keyW, valW;
1277   VG_(initIterFM)( interval_tree );
1278   while (VG_(nextIterFM)( interval_tree, &keyW, &valW )) {
1279      Block* bk = (Block*)keyW;
1280      tl_assert(valW == 0);
1281      tl_assert(bk);
1282      retire_Block(bk, False/*!because_freed*/);
1283   }
1284   VG_(doneIterFM)( interval_tree );
1285
1286   // show results
1287   VG_(umsg)("======== SUMMARY STATISTICS ========\n");
1288   VG_(umsg)("\n");
1289   VG_(umsg)("guest_insns:  %'llu\n", g_guest_instrs_executed);
1290   VG_(umsg)("\n");
1291   VG_(umsg)("max_live:     %'llu in %'llu blocks\n",
1292             g_max_bytes_live, g_max_blocks_live);
1293   VG_(umsg)("\n");
1294   VG_(umsg)("tot_alloc:    %'llu in %'llu blocks\n",
1295             g_tot_bytes, g_tot_blocks);
1296   VG_(umsg)("\n");
1297   if (g_tot_bytes > 0) {
1298      VG_(umsg)("insns per allocated byte: %'llu\n",
1299                g_guest_instrs_executed / g_tot_bytes);
1300      VG_(umsg)("\n");
1301   }
1302
1303   show_top_n_apinfos();
1304
1305   VG_(umsg)("\n");
1306   VG_(umsg)("\n");
1307   VG_(umsg)("==============================================================\n");
1308   VG_(umsg)("\n");
1309   VG_(umsg)("Some hints: (see --help for command line option details):\n");
1310   VG_(umsg)("\n");
1311   VG_(umsg)("* summary stats for whole program are at the top of this output\n");
1312   VG_(umsg)("\n");
1313   VG_(umsg)("* --show-top-n=  controls how many alloc points are shown.\n");
1314   VG_(umsg)("                 You probably want to set it much higher than\n");
1315   VG_(umsg)("                 the default value (10)\n");
1316   VG_(umsg)("\n");
1317   VG_(umsg)("* --sort-by=     specifies the sort key for output.\n");
1318   VG_(umsg)("                 See --help for details.\n");
1319   VG_(umsg)("\n");
1320   VG_(umsg)("* Each allocation stack, by default 12 frames, counts as\n");
1321   VG_(umsg)("  a separate alloc point.  This causes the data to be spread out\n");
1322   VG_(umsg)("  over far too many alloc points.  I strongly suggest using\n");
1323   VG_(umsg)("  --num-callers=4 or some such, to reduce the spreading.\n");
1324   VG_(umsg)("\n");
1325
1326   if (VG_(clo_stats)) {
1327      VG_(dmsg)(" dhat: find_Block_containing:\n");
1328      VG_(dmsg)("             found: %'lu (%'lu cached + %'lu uncached)\n",
1329                stats__n_fBc_cached + stats__n_fBc_uncached,
1330                stats__n_fBc_cached,
1331                stats__n_fBc_uncached);
1332      VG_(dmsg)("          notfound: %'lu\n", stats__n_fBc_notfound);
1333      VG_(dmsg)("\n");
1334   }
1335}
1336
1337
1338//------------------------------------------------------------//
1339//--- Initialisation                                       ---//
1340//------------------------------------------------------------//
1341
1342static void dh_post_clo_init(void)
1343{
1344}
1345
1346static void dh_pre_clo_init(void)
1347{
1348   VG_(details_name)            ("DHAT");
1349   VG_(details_version)         (NULL);
1350   VG_(details_description)     ("a dynamic heap analysis tool");
1351   VG_(details_copyright_author)(
1352      "Copyright (C) 2010-2012, and GNU GPL'd, by Mozilla Inc");
1353   VG_(details_bug_reports_to)  (VG_BUGS_TO);
1354
1355   // Basic functions.
1356   VG_(basic_tool_funcs)          (dh_post_clo_init,
1357                                   dh_instrument,
1358                                   dh_fini);
1359//zz
1360   // Needs.
1361   VG_(needs_libc_freeres)();
1362   VG_(needs_command_line_options)(dh_process_cmd_line_option,
1363                                   dh_print_usage,
1364                                   dh_print_debug_usage);
1365//zz   VG_(needs_client_requests)     (dh_handle_client_request);
1366//zz   VG_(needs_sanity_checks)       (dh_cheap_sanity_check,
1367//zz                                   dh_expensive_sanity_check);
1368   VG_(needs_malloc_replacement)  (dh_malloc,
1369                                   dh___builtin_new,
1370                                   dh___builtin_vec_new,
1371                                   dh_memalign,
1372                                   dh_calloc,
1373                                   dh_free,
1374                                   dh___builtin_delete,
1375                                   dh___builtin_vec_delete,
1376                                   dh_realloc,
1377                                   dh_malloc_usable_size,
1378                                   0 );
1379
1380   VG_(track_pre_mem_read)        ( dh_handle_noninsn_read );
1381   //VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
1382   VG_(track_post_mem_write)      ( dh_handle_noninsn_write );
1383
1384   tl_assert(!interval_tree);
1385   tl_assert(!fbc_cache0);
1386   tl_assert(!fbc_cache1);
1387
1388   interval_tree = VG_(newFM)( VG_(malloc),
1389                               "dh.main.interval_tree.1",
1390                               VG_(free),
1391                               interval_tree_Cmp );
1392
1393   apinfo = VG_(newFM)( VG_(malloc),
1394                        "dh.main.apinfo.1",
1395                        VG_(free),
1396                        NULL/*unboxedcmp*/ );
1397}
1398
1399VG_DETERMINE_INTERFACE_VERSION(dh_pre_clo_init)
1400
1401//--------------------------------------------------------------------//
1402//--- end                                                dh_main.c ---//
1403//--------------------------------------------------------------------//
1404