1
2//--------------------------------------------------------------------*/
3//--- DHAT: a Dynamic Heap Analysis Tool                 dh_main.c ---*/
4//--------------------------------------------------------------------*/
5
6/*
7   This file is part of DHAT, a Valgrind tool for profiling the
8   heap usage of programs.
9
10   Copyright (C) 2010-2011 Mozilla Inc
11
12   This program is free software; you can redistribute it and/or
13   modify it under the terms of the GNU General Public License as
14   published by the Free Software Foundation; either version 2 of the
15   License, or (at your option) any later version.
16
17   This program is distributed in the hope that it will be useful, but
18   WITHOUT ANY WARRANTY; without even the implied warranty of
19   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20   General Public License for more details.
21
22   You should have received a copy of the GNU General Public License
23   along with this program; if not, write to the Free Software
24   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25   02111-1307, USA.
26
27   The GNU General Public License is contained in the file COPYING.
28*/
29
30/* Contributed by Julian Seward <jseward@acm.org> */
31
32
33#include "pub_tool_basics.h"
34#include "pub_tool_libcbase.h"
35#include "pub_tool_libcassert.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_machine.h"      // VG_(fnptr_to_fnentry)
38#include "pub_tool_mallocfree.h"
39#include "pub_tool_options.h"
40#include "pub_tool_replacemalloc.h"
41#include "pub_tool_tooliface.h"
42#include "pub_tool_wordfm.h"
43
44#define HISTOGRAM_SIZE_LIMIT 1024
45
46
47//------------------------------------------------------------//
48//--- Globals                                              ---//
49//------------------------------------------------------------//
50
51// Number of guest instructions executed so far.  This is
52// incremented directly from the generated code.
53static ULong g_guest_instrs_executed = 0;
54
55// Summary statistics for the entire run.
56static ULong g_tot_blocks = 0;   // total blocks allocated
57static ULong g_tot_bytes  = 0;   // total bytes allocated
58
59static ULong g_cur_blocks_live = 0; // curr # blocks live
60static ULong g_cur_bytes_live  = 0; // curr # bytes live
61
62static ULong g_max_blocks_live = 0; // bytes and blocks at
63static ULong g_max_bytes_live  = 0; // the max residency point
64
65
66//------------------------------------------------------------//
67//--- an Interval Tree of live blocks                      ---//
68//------------------------------------------------------------//
69
70/* Tracks information about live blocks. */
71typedef
72   struct {
73      Addr        payload;
74      SizeT       req_szB;
75      ExeContext* ap;  /* allocation ec */
76      ULong       allocd_at; /* instruction number */
77      ULong       n_reads;
78      ULong       n_writes;
79      /* Approx histogram, one byte per payload byte.  Counts latch up
80         therefore at 0xFFFF.  Can be NULL if the block is resized or if
81         the block is larger than HISTOGRAM_SIZE_LIMIT. */
82      UShort*     histoW; /* [0 .. req_szB-1] */
83   }
84   Block;
85
86/* May not contain zero-sized blocks.  May not contain
87   overlapping blocks. */
88static WordFM* interval_tree = NULL;  /* WordFM* Block* void */
89
90/* Here's the comparison function.  Since the tree is required
91to contain non-zero sized, non-overlapping blocks, it's good
92enough to consider any overlap as a match. */
93static Word interval_tree_Cmp ( UWord k1, UWord k2 )
94{
95   Block* b1 = (Block*)k1;
96   Block* b2 = (Block*)k2;
97   tl_assert(b1->req_szB > 0);
98   tl_assert(b2->req_szB > 0);
99   if (b1->payload + b1->req_szB <= b2->payload) return -1;
100   if (b2->payload + b2->req_szB <= b1->payload) return  1;
101   return 0;
102}
103
104// 2-entry cache for find_Block_containing
105static Block* fbc_cache0 = NULL;
106static Block* fbc_cache1 = NULL;
107
108static UWord stats__n_fBc_cached = 0;
109static UWord stats__n_fBc_uncached = 0;
110static UWord stats__n_fBc_notfound = 0;
111
112static Block* find_Block_containing ( Addr a )
113{
114   if (LIKELY(fbc_cache0
115              && fbc_cache0->payload <= a
116              && a < fbc_cache0->payload + fbc_cache0->req_szB)) {
117      // found at 0
118      stats__n_fBc_cached++;
119      return fbc_cache0;
120   }
121   if (LIKELY(fbc_cache1
122              && fbc_cache1->payload <= a
123              && a < fbc_cache1->payload + fbc_cache1->req_szB)) {
124      // found at 1; swap 0 and 1
125      Block* tmp = fbc_cache0;
126      fbc_cache0 = fbc_cache1;
127      fbc_cache1 = tmp;
128      stats__n_fBc_cached++;
129      return fbc_cache0;
130   }
131   Block fake;
132   fake.payload = a;
133   fake.req_szB = 1;
134   UWord foundkey = 1;
135   UWord foundval = 1;
136   Bool found = VG_(lookupFM)( interval_tree,
137                               &foundkey, &foundval, (UWord)&fake );
138   if (!found) {
139      stats__n_fBc_notfound++;
140      return NULL;
141   }
142   tl_assert(foundval == 0); // we don't store vals in the interval tree
143   tl_assert(foundkey != 1);
144   Block* res = (Block*)foundkey;
145   tl_assert(res != &fake);
146   // put at the top position
147   fbc_cache1 = fbc_cache0;
148   fbc_cache0 = res;
149   stats__n_fBc_uncached++;
150   return res;
151}
152
153// delete a block; asserts if not found.  (viz, 'a' must be
154// known to be present.)
155static void delete_Block_starting_at ( Addr a )
156{
157   Block fake;
158   fake.payload = a;
159   fake.req_szB = 1;
160   Bool found = VG_(delFromFM)( interval_tree,
161                                NULL, NULL, (Addr)&fake );
162   tl_assert(found);
163   fbc_cache0 = fbc_cache1 = NULL;
164}
165
166
167//------------------------------------------------------------//
168//--- a FM of allocation points (APs)                      ---//
169//------------------------------------------------------------//
170
171typedef
172   struct {
173      // the allocation point that we're summarising stats for
174      ExeContext* ap;
175      // used when printing results
176      Bool shown;
177      // The current number of blocks and bytes live for this AP
178      ULong cur_blocks_live;
179      ULong cur_bytes_live;
180      // The number of blocks and bytes live at the max-liveness
181      // point.  Note this is a bit subtle.  max_blocks_live is not
182      // the maximum number of live blocks, but rather the number of
183      // blocks live at the point of maximum byte liveness.  These are
184      // not necessarily the same thing.
185      ULong max_blocks_live;
186      ULong max_bytes_live;
187      // Total number of blocks and bytes allocated by this AP.
188      ULong tot_blocks;
189      ULong tot_bytes;
190      // Sum of death ages for all blocks allocated by this AP,
191      // that have subsequently been freed.
192      ULong death_ages_sum;
193      ULong deaths;
194      // Total number of reads and writes in all blocks allocated
195      // by this AP.
196      ULong n_reads;
197      ULong n_writes;
198      /* Histogram information.  We maintain a histogram aggregated for
199         all retiring Blocks allocated by this AP, but only if:
200         - this AP has only ever allocated objects of one size
201         - that size is <= HISTOGRAM_SIZE_LIMIT
202         What we need therefore is a mechanism to see if this AP
203         has only ever allocated blocks of one size.
204
205         3 states:
206            Unknown          because no retirement yet
207            Exactly xsize    all retiring blocks are of this size
208            Mixed            multiple different sizes seen
209      */
210      enum { Unknown=999, Exactly, Mixed } xsize_tag;
211      SizeT xsize;
212      UInt* histo; /* [0 .. xsize-1] */
213   }
214   APInfo;
215
216/* maps ExeContext*'s to APInfo*'s.  Note that the keys must match the
217   .ap field in the values. */
218static WordFM* apinfo = NULL;  /* WordFM* ExeContext* APInfo* */
219
220
221/* 'bk' is being introduced (has just been allocated).  Find the
222   relevant APInfo entry for it, or create one, based on the block's
223   allocation EC.  Then, update the APInfo to the extent that we
224   actually can, to reflect the allocation. */
225static void intro_Block ( Block* bk )
226{
227   tl_assert(bk);
228   tl_assert(bk->ap);
229
230   APInfo* api   = NULL;
231   UWord   keyW  = 0;
232   UWord   valW  = 0;
233   Bool    found = VG_(lookupFM)( apinfo,
234                                  &keyW, &valW, (UWord)bk->ap );
235   if (found) {
236      api = (APInfo*)valW;
237      tl_assert(keyW == (UWord)bk->ap);
238   } else {
239      api = VG_(malloc)( "dh.main.intro_Block.1", sizeof(APInfo) );
240      VG_(memset)(api, 0, sizeof(*api));
241      api->ap = bk->ap;
242      Bool present = VG_(addToFM)( apinfo,
243                                   (UWord)bk->ap, (UWord)api );
244      tl_assert(!present);
245      // histo stuff
246      tl_assert(api->deaths == 0);
247      api->xsize_tag = Unknown;
248      api->xsize = 0;
249      if (0) VG_(printf)("api %p   -->  Unknown\n", api);
250   }
251
252   tl_assert(api->ap == bk->ap);
253
254   /* So: update stats to reflect an allocation */
255
256   // # live blocks
257   api->cur_blocks_live++;
258
259   // # live bytes
260   api->cur_bytes_live += bk->req_szB;
261   if (api->cur_bytes_live > api->max_bytes_live) {
262      api->max_bytes_live  = api->cur_bytes_live;
263      api->max_blocks_live = api->cur_blocks_live;
264   }
265
266   // total blocks and bytes allocated here
267   api->tot_blocks++;
268   api->tot_bytes += bk->req_szB;
269
270   // update summary globals
271   g_tot_blocks++;
272   g_tot_bytes += bk->req_szB;
273
274   g_cur_blocks_live++;
275   g_cur_bytes_live += bk->req_szB;
276   if (g_cur_bytes_live > g_max_bytes_live) {
277      g_max_bytes_live = g_cur_bytes_live;
278      g_max_blocks_live = g_cur_blocks_live;
279   }
280}
281
282
283/* 'bk' is retiring (being freed).  Find the relevant APInfo entry for
284   it, which must already exist.  Then, fold info from 'bk' into that
285   entry.  'because_freed' is True if the block is retiring because
286   the client has freed it.  If it is False then the block is retiring
287   because the program has finished, in which case we want to skip the
288   updates of the total blocks live etc for this AP, but still fold in
289   the access counts and histo data that have so far accumulated for
290   the block. */
291static void retire_Block ( Block* bk, Bool because_freed )
292{
293   tl_assert(bk);
294   tl_assert(bk->ap);
295
296   APInfo* api   = NULL;
297   UWord   keyW  = 0;
298   UWord   valW  = 0;
299   Bool    found = VG_(lookupFM)( apinfo,
300                                  &keyW, &valW, (UWord)bk->ap );
301
302   tl_assert(found);
303   api = (APInfo*)valW;
304   tl_assert(api->ap == bk->ap);
305
306   // update stats following this free.
307   if (0)
308   VG_(printf)("ec %p  api->c_by_l %llu  bk->rszB %llu\n",
309               bk->ap, api->cur_bytes_live, (ULong)bk->req_szB);
310
311   // update total blocks live etc for this AP
312   if (because_freed) {
313      tl_assert(api->cur_blocks_live >= 1);
314      tl_assert(api->cur_bytes_live >= bk->req_szB);
315      api->cur_blocks_live--;
316      api->cur_bytes_live -= bk->req_szB;
317
318      api->deaths++;
319
320      tl_assert(bk->allocd_at <= g_guest_instrs_executed);
321      api->death_ages_sum += (g_guest_instrs_executed - bk->allocd_at);
322
323      // update global summary stats
324      tl_assert(g_cur_blocks_live > 0);
325      g_cur_blocks_live--;
326      tl_assert(g_cur_bytes_live >= bk->req_szB);
327      g_cur_bytes_live -= bk->req_szB;
328   }
329
330   // access counts
331   api->n_reads  += bk->n_reads;
332   api->n_writes += bk->n_writes;
333
334   // histo stuff.  First, do state transitions for xsize/xsize_tag.
335   switch (api->xsize_tag) {
336
337      case Unknown:
338         tl_assert(api->xsize == 0);
339         tl_assert(api->deaths == 1 || api->deaths == 0);
340         tl_assert(!api->histo);
341         api->xsize_tag = Exactly;
342         api->xsize = bk->req_szB;
343         if (0) VG_(printf)("api %p   -->  Exactly(%lu)\n", api, api->xsize);
344         // and allocate the histo
345         if (bk->histoW) {
346            api->histo = VG_(malloc)("dh.main.retire_Block.1",
347                                     api->xsize * sizeof(UInt));
348            VG_(memset)(api->histo, 0, api->xsize * sizeof(UInt));
349         }
350         break;
351
352      case Exactly:
353         //tl_assert(api->deaths > 1);
354         if (bk->req_szB != api->xsize) {
355            if (0) VG_(printf)("api %p   -->  Mixed(%lu -> %lu)\n",
356                               api, api->xsize, bk->req_szB);
357            api->xsize_tag = Mixed;
358            api->xsize = 0;
359            // deallocate the histo, if any
360            if (api->histo) {
361               VG_(free)(api->histo);
362               api->histo = NULL;
363            }
364         }
365         break;
366
367      case Mixed:
368         //tl_assert(api->deaths > 1);
369         break;
370
371      default:
372        tl_assert(0);
373   }
374
375   // See if we can fold the histo data from this block into
376   // the data for the AP
377   if (api->xsize_tag == Exactly && api->histo && bk->histoW) {
378      tl_assert(api->xsize == bk->req_szB);
379      UWord i;
380      for (i = 0; i < api->xsize; i++) {
381         // FIXME: do something better in case of overflow of api->histo[..]
382         // Right now, at least don't let it overflow/wrap around
383         if (api->histo[i] <= 0xFFFE0000)
384            api->histo[i] += (UInt)bk->histoW[i];
385      }
386      if (0) VG_(printf)("fold in, AP = %p\n", api);
387   }
388
389
390
391#if 0
392   if (bk->histoB) {
393      VG_(printf)("block retiring, histo %lu: ", bk->req_szB);
394      UWord i;
395      for (i = 0; i < bk->req_szB; i++)
396        VG_(printf)("%u ", (UInt)bk->histoB[i]);
397      VG_(printf)("\n");
398   } else {
399      VG_(printf)("block retiring, no histo %lu\n", bk->req_szB);
400   }
401#endif
402}
403
404/* This handles block resizing.  When a block with AP 'ec' has a
405   size change of 'delta', call here to update the APInfo. */
406static void apinfo_change_cur_bytes_live( ExeContext* ec, Long delta )
407{
408   APInfo* api   = NULL;
409   UWord   keyW  = 0;
410   UWord   valW  = 0;
411   Bool    found = VG_(lookupFM)( apinfo,
412                                  &keyW, &valW, (UWord)ec );
413
414   tl_assert(found);
415   api = (APInfo*)valW;
416   tl_assert(api->ap == ec);
417
418   if (delta < 0) {
419      tl_assert(api->cur_bytes_live >= -delta);
420      tl_assert(g_cur_bytes_live >= -delta);
421   }
422
423   // adjust current live size
424   api->cur_bytes_live += delta;
425   g_cur_bytes_live += delta;
426
427   if (delta > 0 && api->cur_bytes_live > api->max_bytes_live) {
428      api->max_bytes_live  = api->cur_bytes_live;
429      api->max_blocks_live = api->cur_blocks_live;
430   }
431
432   // update global summary stats
433   if (delta > 0 && g_cur_bytes_live > g_max_bytes_live) {
434      g_max_bytes_live = g_cur_bytes_live;
435      g_max_blocks_live = g_cur_blocks_live;
436   }
437   if (delta > 0)
438      g_tot_bytes += delta;
439
440   // adjust total allocation size
441   if (delta > 0)
442      api->tot_bytes += delta;
443}
444
445
446//------------------------------------------------------------//
447//--- update both Block and APInfos after {m,re}alloc/free ---//
448//------------------------------------------------------------//
449
450static
451void* new_block ( ThreadId tid, void* p, SizeT req_szB, SizeT req_alignB,
452                  Bool is_zeroed )
453{
454   tl_assert(p == NULL); // don't handle custom allocators right now
455   SizeT actual_szB /*, slop_szB*/;
456
457   if ((SSizeT)req_szB < 0) return NULL;
458
459   if (req_szB == 0)
460      req_szB = 1;  /* can't allow zero-sized blocks in the interval tree */
461
462   // Allocate and zero if necessary
463   if (!p) {
464      p = VG_(cli_malloc)( req_alignB, req_szB );
465      if (!p) {
466         return NULL;
467      }
468      if (is_zeroed) VG_(memset)(p, 0, req_szB);
469      actual_szB = VG_(malloc_usable_size)(p);
470      tl_assert(actual_szB >= req_szB);
471      /* slop_szB = actual_szB - req_szB; */
472   } else {
473      /* slop_szB = 0; */
474   }
475
476   // Make new HP_Chunk node, add to malloc_list
477   Block* bk = VG_(malloc)("dh.new_block.1", sizeof(Block));
478   bk->payload   = (Addr)p;
479   bk->req_szB   = req_szB;
480   bk->ap        = VG_(record_ExeContext)(tid, 0/*first word delta*/);
481   bk->allocd_at = g_guest_instrs_executed;
482   bk->n_reads   = 0;
483   bk->n_writes  = 0;
484   // set up histogram array, if the block isn't too large
485   bk->histoW = NULL;
486   if (req_szB <= HISTOGRAM_SIZE_LIMIT) {
487      bk->histoW = VG_(malloc)("dh.new_block.2", req_szB * sizeof(UShort));
488      VG_(memset)(bk->histoW, 0, req_szB * sizeof(UShort));
489   }
490
491   Bool present = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
492   tl_assert(!present);
493   fbc_cache0 = fbc_cache1 = NULL;
494
495   intro_Block(bk);
496
497   if (0) VG_(printf)("ALLOC %ld -> %p\n", req_szB, p);
498
499   return p;
500}
501
502static
503void die_block ( void* p, Bool custom_free )
504{
505   tl_assert(!custom_free);  // at least for now
506
507   Block* bk = find_Block_containing( (Addr)p );
508
509   if (!bk) {
510     return; // bogus free
511   }
512
513   tl_assert(bk->req_szB > 0);
514   // assert the block finder is behaving sanely
515   tl_assert(bk->payload <= (Addr)p);
516   tl_assert( (Addr)p < bk->payload + bk->req_szB );
517
518   if (bk->payload != (Addr)p) {
519      return; // bogus free
520   }
521
522   if (0) VG_(printf)(" FREE %p %llu\n",
523                      p, g_guest_instrs_executed - bk->allocd_at);
524
525   retire_Block(bk, True/*because_freed*/);
526
527   VG_(cli_free)( (void*)bk->payload );
528   delete_Block_starting_at( bk->payload );
529   if (bk->histoW) {
530      VG_(free)( bk->histoW );
531      bk->histoW = NULL;
532   }
533   VG_(free)( bk );
534}
535
536
537static
538void* renew_block ( ThreadId tid, void* p_old, SizeT new_req_szB )
539{
540   if (0) VG_(printf)("REALL %p %ld\n", p_old, new_req_szB);
541   void* p_new = NULL;
542
543   tl_assert(new_req_szB > 0); // map 0 to 1
544
545   // Find the old block.
546   Block* bk = find_Block_containing( (Addr)p_old );
547   if (!bk) {
548      return NULL;   // bogus realloc
549   }
550
551   tl_assert(bk->req_szB > 0);
552   // assert the block finder is behaving sanely
553   tl_assert(bk->payload <= (Addr)p_old);
554   tl_assert( (Addr)p_old < bk->payload + bk->req_szB );
555
556   if (bk->payload != (Addr)p_old) {
557      return NULL; // bogus realloc
558   }
559
560   // Keeping the histogram alive in any meaningful way across
561   // block resizing is too darn complicated.  Just throw it away.
562   if (bk->histoW) {
563      VG_(free)(bk->histoW);
564      bk->histoW = NULL;
565   }
566
567   // Actually do the allocation, if necessary.
568   if (new_req_szB <= bk->req_szB) {
569
570      // New size is smaller or same; block not moved.
571      apinfo_change_cur_bytes_live(bk->ap,
572                                   (Long)new_req_szB - (Long)bk->req_szB);
573      bk->req_szB = new_req_szB;
574      return p_old;
575
576   } else {
577
578      // New size is bigger;  make new block, copy shared contents, free old.
579      p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB);
580      if (!p_new) {
581         // Nb: if realloc fails, NULL is returned but the old block is not
582         // touched.  What an awful function.
583         return NULL;
584      }
585      tl_assert(p_new != p_old);
586
587      VG_(memcpy)(p_new, p_old, bk->req_szB);
588      VG_(cli_free)(p_old);
589
590      // Since the block has moved, we need to re-insert it into the
591      // interval tree at the new place.  Do this by removing
592      // and re-adding it.
593      delete_Block_starting_at( (Addr)p_old );
594      // now 'bk' is no longer in the tree, but the Block itself
595      // is still alive
596
597      // Update the metadata.
598      apinfo_change_cur_bytes_live(bk->ap,
599                                   (Long)new_req_szB - (Long)bk->req_szB);
600      bk->payload = (Addr)p_new;
601      bk->req_szB = new_req_szB;
602
603      // and re-add
604      Bool present
605         = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
606      tl_assert(!present);
607      fbc_cache0 = fbc_cache1 = NULL;
608
609      return p_new;
610   }
611   /*NOTREACHED*/
612   tl_assert(0);
613}
614
615
616//------------------------------------------------------------//
617//--- malloc() et al replacement wrappers                  ---//
618//------------------------------------------------------------//
619
620static void* dh_malloc ( ThreadId tid, SizeT szB )
621{
622   return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
623}
624
625static void* dh___builtin_new ( ThreadId tid, SizeT szB )
626{
627   return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
628}
629
630static void* dh___builtin_vec_new ( ThreadId tid, SizeT szB )
631{
632   return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
633}
634
635static void* dh_calloc ( ThreadId tid, SizeT m, SizeT szB )
636{
637   return new_block( tid, NULL, m*szB, VG_(clo_alignment), /*is_zeroed*/True );
638}
639
640static void *dh_memalign ( ThreadId tid, SizeT alignB, SizeT szB )
641{
642   return new_block( tid, NULL, szB, alignB, False );
643}
644
645static void dh_free ( ThreadId tid __attribute__((unused)), void* p )
646{
647   die_block( p, /*custom_free*/False );
648}
649
650static void dh___builtin_delete ( ThreadId tid, void* p )
651{
652   die_block( p, /*custom_free*/False);
653}
654
655static void dh___builtin_vec_delete ( ThreadId tid, void* p )
656{
657   die_block( p, /*custom_free*/False );
658}
659
660static void* dh_realloc ( ThreadId tid, void* p_old, SizeT new_szB )
661{
662   if (p_old == NULL) {
663      return dh_malloc(tid, new_szB);
664   }
665   if (new_szB == 0) {
666      dh_free(tid, p_old);
667      return NULL;
668   }
669   return renew_block(tid, p_old, new_szB);
670}
671
672static SizeT dh_malloc_usable_size ( ThreadId tid, void* p )
673{
674   tl_assert(0);
675//zz   HP_Chunk* hc = VG_(HT_lookup)( malloc_list, (UWord)p );
676//zz
677//zz   return ( hc ? hc->req_szB + hc->slop_szB : 0 );
678}
679
680//------------------------------------------------------------//
681//--- memory references                                    ---//
682//------------------------------------------------------------//
683
684static
685void inc_histo_for_block ( Block* bk, Addr addr, UWord szB )
686{
687   UWord i, offMin, offMax1;
688   offMin = addr - bk->payload;
689   tl_assert(offMin < bk->req_szB);
690   offMax1 = offMin + szB;
691   if (offMax1 > bk->req_szB)
692      offMax1 = bk->req_szB;
693   //VG_(printf)("%lu %lu   (size of block %lu)\n", offMin, offMax1, bk->req_szB);
694   for (i = offMin; i < offMax1; i++) {
695      UShort n = bk->histoW[i];
696      if (n < 0xFFFF) n++;
697      bk->histoW[i] = n;
698   }
699}
700
701static VG_REGPARM(2)
702void dh_handle_write ( Addr addr, UWord szB )
703{
704   Block* bk = find_Block_containing(addr);
705   if (bk) {
706      bk->n_writes += szB;
707      if (bk->histoW)
708         inc_histo_for_block(bk, addr, szB);
709   }
710}
711
712static VG_REGPARM(2)
713void dh_handle_read ( Addr addr, UWord szB )
714{
715   Block* bk = find_Block_containing(addr);
716   if (bk) {
717      bk->n_reads += szB;
718      if (bk->histoW)
719         inc_histo_for_block(bk, addr, szB);
720   }
721}
722
723
724// Handle reads and writes by syscalls (read == kernel
725// reads user space, write == kernel writes user space).
726// Assumes no such read or write spans a heap block
727// boundary and so we can treat it just as one giant
728// read or write.
729static
730void dh_handle_noninsn_read ( CorePart part, ThreadId tid, Char* s,
731                              Addr base, SizeT size )
732{
733   switch (part) {
734      case Vg_CoreSysCall:
735         dh_handle_read(base, size);
736         break;
737      case Vg_CoreSysCallArgInMem:
738         break;
739      case Vg_CoreTranslate:
740         break;
741      default:
742         tl_assert(0);
743   }
744}
745
746static
747void dh_handle_noninsn_write ( CorePart part, ThreadId tid,
748                               Addr base, SizeT size )
749{
750   switch (part) {
751      case Vg_CoreSysCall:
752         dh_handle_write(base, size);
753         break;
754      case Vg_CoreSignal:
755         break;
756      default:
757         tl_assert(0);
758   }
759}
760
761
762//------------------------------------------------------------//
763//--- Instrumentation                                      ---//
764//------------------------------------------------------------//
765
766#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
767#define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
768#define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
769#define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
770#define assign(_t, _e)           IRStmt_WrTmp((_t), (_e))
771
772static
773void add_counter_update(IRSB* sbOut, Int n)
774{
775   #if defined(VG_BIGENDIAN)
776   # define END Iend_BE
777   #elif defined(VG_LITTLEENDIAN)
778   # define END Iend_LE
779   #else
780   # error "Unknown endianness"
781   #endif
782   // Add code to increment 'g_guest_instrs_executed' by 'n', like this:
783   //   WrTmp(t1, Load64(&g_guest_instrs_executed))
784   //   WrTmp(t2, Add64(RdTmp(t1), Const(n)))
785   //   Store(&g_guest_instrs_executed, t2)
786   IRTemp t1 = newIRTemp(sbOut->tyenv, Ity_I64);
787   IRTemp t2 = newIRTemp(sbOut->tyenv, Ity_I64);
788   IRExpr* counter_addr = mkIRExpr_HWord( (HWord)&g_guest_instrs_executed );
789
790   IRStmt* st1 = assign(t1, IRExpr_Load(END, Ity_I64, counter_addr));
791   IRStmt* st2 = assign(t2, binop(Iop_Add64, mkexpr(t1), mkU64(n)));
792   IRStmt* st3 = IRStmt_Store(END, counter_addr, mkexpr(t2));
793
794   addStmtToIRSB( sbOut, st1 );
795   addStmtToIRSB( sbOut, st2 );
796   addStmtToIRSB( sbOut, st3 );
797}
798
799static
800void addMemEvent(IRSB* sbOut, Bool isWrite, Int szB, IRExpr* addr,
801                 Int goff_sp)
802{
803   IRType   tyAddr   = Ity_INVALID;
804   HChar*   hName    = NULL;
805   void*    hAddr    = NULL;
806   IRExpr** argv     = NULL;
807   IRDirty* di       = NULL;
808
809   const Int THRESH = 4096 * 4; // somewhat arbitrary
810   const Int rz_szB = VG_STACK_REDZONE_SZB;
811
812   tyAddr = typeOfIRExpr( sbOut->tyenv, addr );
813   tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
814
815   if (isWrite) {
816      hName = "dh_handle_write";
817      hAddr = &dh_handle_write;
818   } else {
819      hName = "dh_handle_read";
820      hAddr = &dh_handle_read;
821   }
822
823   argv = mkIRExprVec_2( addr, mkIRExpr_HWord(szB) );
824
825   /* Add the helper. */
826   tl_assert(hName);
827   tl_assert(hAddr);
828   tl_assert(argv);
829   di = unsafeIRDirty_0_N( 2/*regparms*/,
830                           hName, VG_(fnptr_to_fnentry)( hAddr ),
831                           argv );
832
833   /* Generate the guard condition: "(addr - (SP - RZ)) >u N", for
834      some arbitrary N.  If that fails then addr is in the range (SP -
835      RZ .. SP + N - RZ).  If N is smallish (a page?) then we can say
836      addr is within a page of SP and so can't possibly be a heap
837      access, and so can be skipped. */
838   IRTemp sp = newIRTemp(sbOut->tyenv, tyAddr);
839   addStmtToIRSB( sbOut, assign(sp, IRExpr_Get(goff_sp, tyAddr)));
840
841   IRTemp sp_minus_rz = newIRTemp(sbOut->tyenv, tyAddr);
842   addStmtToIRSB(
843      sbOut,
844      assign(sp_minus_rz,
845             tyAddr == Ity_I32
846                ? binop(Iop_Sub32, mkexpr(sp), mkU32(rz_szB))
847                : binop(Iop_Sub64, mkexpr(sp), mkU64(rz_szB)))
848   );
849
850   IRTemp diff = newIRTemp(sbOut->tyenv, tyAddr);
851   addStmtToIRSB(
852      sbOut,
853      assign(diff,
854             tyAddr == Ity_I32
855                ? binop(Iop_Sub32, addr, mkexpr(sp_minus_rz))
856                : binop(Iop_Sub64, addr, mkexpr(sp_minus_rz)))
857   );
858
859   IRTemp guard = newIRTemp(sbOut->tyenv, Ity_I1);
860   addStmtToIRSB(
861      sbOut,
862      assign(guard,
863             tyAddr == Ity_I32
864                ? binop(Iop_CmpLT32U, mkU32(THRESH), mkexpr(diff))
865                : binop(Iop_CmpLT64U, mkU64(THRESH), mkexpr(diff)))
866   );
867   di->guard = mkexpr(guard);
868
869   addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
870}
871
872static
873IRSB* dh_instrument ( VgCallbackClosure* closure,
874                      IRSB* sbIn,
875                      VexGuestLayout* layout,
876                      VexGuestExtents* vge,
877                      IRType gWordTy, IRType hWordTy )
878{
879   Int   i, n = 0;
880   IRSB* sbOut;
881   IRTypeEnv* tyenv = sbIn->tyenv;
882
883   const Int goff_sp = layout->offset_SP;
884
885   // We increment the instruction count in two places:
886   // - just before any Ist_Exit statements;
887   // - just before the IRSB's end.
888   // In the former case, we zero 'n' and then continue instrumenting.
889
890   sbOut = deepCopyIRSBExceptStmts(sbIn);
891
892   // Copy verbatim any IR preamble preceding the first IMark
893   i = 0;
894   while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
895      addStmtToIRSB( sbOut, sbIn->stmts[i] );
896      i++;
897   }
898
899   for (/*use current i*/; i < sbIn->stmts_used; i++) {
900      IRStmt* st = sbIn->stmts[i];
901
902      if (!st || st->tag == Ist_NoOp) continue;
903
904      switch (st->tag) {
905
906         case Ist_IMark: {
907            n++;
908            break;
909         }
910
911         case Ist_Exit: {
912            if (n > 0) {
913               // Add an increment before the Exit statement, then reset 'n'.
914               add_counter_update(sbOut, n);
915               n = 0;
916            }
917            break;
918         }
919
920         case Ist_WrTmp: {
921            IRExpr* data = st->Ist.WrTmp.data;
922            if (data->tag == Iex_Load) {
923               IRExpr* aexpr = data->Iex.Load.addr;
924               // Note also, endianness info is ignored.  I guess
925               // that's not interesting.
926               addMemEvent( sbOut, False/*!isWrite*/,
927                            sizeofIRType(data->Iex.Load.ty),
928                            aexpr, goff_sp );
929            }
930            break;
931         }
932
933         case Ist_Store: {
934            IRExpr* data  = st->Ist.Store.data;
935            IRExpr* aexpr = st->Ist.Store.addr;
936            addMemEvent( sbOut, True/*isWrite*/,
937                         sizeofIRType(typeOfIRExpr(tyenv, data)),
938                         aexpr, goff_sp );
939            break;
940         }
941
942         case Ist_Dirty: {
943            Int      dataSize;
944            IRDirty* d = st->Ist.Dirty.details;
945            if (d->mFx != Ifx_None) {
946               /* This dirty helper accesses memory.  Collect the details. */
947               tl_assert(d->mAddr != NULL);
948               tl_assert(d->mSize != 0);
949               dataSize = d->mSize;
950               // Large (eg. 28B, 108B, 512B on x86) data-sized
951               // instructions will be done inaccurately, but they're
952               // very rare and this avoids errors from hitting more
953               // than two cache lines in the simulation.
954               if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
955                  addMemEvent( sbOut, False/*!isWrite*/,
956                               dataSize, d->mAddr, goff_sp );
957               if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
958                  addMemEvent( sbOut, True/*isWrite*/,
959                               dataSize, d->mAddr, goff_sp );
960            } else {
961               tl_assert(d->mAddr == NULL);
962               tl_assert(d->mSize == 0);
963            }
964            break;
965         }
966
967         case Ist_CAS: {
968            /* We treat it as a read and a write of the location.  I
969               think that is the same behaviour as it was before IRCAS
970               was introduced, since prior to that point, the Vex
971               front ends would translate a lock-prefixed instruction
972               into a (normal) read followed by a (normal) write. */
973            Int    dataSize;
974            IRCAS* cas = st->Ist.CAS.details;
975            tl_assert(cas->addr != NULL);
976            tl_assert(cas->dataLo != NULL);
977            dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
978            if (cas->dataHi != NULL)
979               dataSize *= 2; /* since it's a doubleword-CAS */
980            addMemEvent( sbOut, False/*!isWrite*/,
981                         dataSize, cas->addr, goff_sp );
982            addMemEvent( sbOut, True/*isWrite*/,
983                         dataSize, cas->addr, goff_sp );
984            break;
985         }
986
987         case Ist_LLSC: {
988            IRType dataTy;
989            if (st->Ist.LLSC.storedata == NULL) {
990               /* LL */
991               dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
992               addMemEvent( sbOut, False/*!isWrite*/,
993                            sizeofIRType(dataTy),
994                            st->Ist.LLSC.addr, goff_sp );
995            } else {
996               /* SC */
997               dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
998               addMemEvent( sbOut, True/*isWrite*/,
999                            sizeofIRType(dataTy),
1000                            st->Ist.LLSC.addr, goff_sp );
1001            }
1002            break;
1003         }
1004
1005         default:
1006            break;
1007      }
1008
1009      addStmtToIRSB( sbOut, st );
1010   }
1011
1012   if (n > 0) {
1013      // Add an increment before the SB end.
1014      add_counter_update(sbOut, n);
1015   }
1016   return sbOut;
1017}
1018
1019#undef binop
1020#undef mkexpr
1021#undef mkU32
1022#undef mkU64
1023#undef assign
1024
1025
1026//------------------------------------------------------------//
1027//--- Command line args                                    ---//
1028//------------------------------------------------------------//
1029
1030// FORWARDS
1031static Bool identify_metric ( /*OUT*/ULong(**get_metricP)(APInfo*),
1032                              /*OUT*/Bool* increasingP,
1033                              Char* metric_name );
1034
1035static Int    clo_show_top_n = 10;
1036static HChar* clo_sort_by    = "max-bytes-live";
1037
1038static Bool dh_process_cmd_line_option(Char* arg)
1039{
1040   if VG_BINT_CLO(arg, "--show-top-n", clo_show_top_n, 1, 100000) {}
1041
1042   else if VG_STR_CLO(arg, "--sort-by", clo_sort_by) {
1043       ULong (*dummyFn)(APInfo*);
1044       Bool dummyB;
1045       Bool ok = identify_metric( &dummyFn, &dummyB, clo_sort_by);
1046       if (!ok)
1047          return False;
1048       // otherwise it's OK, in which case leave it alone.
1049       // show_top_n_apinfos will later convert the string by a
1050       // second call to identify_metric.
1051   }
1052
1053   else
1054      return VG_(replacement_malloc_process_cmd_line_option)(arg);
1055
1056   return True;
1057}
1058
1059
1060static void dh_print_usage(void)
1061{
1062   VG_(printf)(
1063"    --show-top-n=number       show the top <number> alloc points [10]\n"
1064"    --sort-by=string\n"
1065"            sort the allocation points by the metric\n"
1066"            defined by <string>, thusly:\n"
1067"                max-bytes-live    maximum live bytes [default]\n"
1068"                tot-bytes-allocd  total allocation (turnover)\n"
1069"                max-blocks-live   maximum live blocks\n"
1070   );
1071}
1072
1073static void dh_print_debug_usage(void)
1074{
1075   VG_(printf)(
1076"    (none)\n"
1077   );
1078}
1079
1080
1081//------------------------------------------------------------//
1082//--- Finalisation                                         ---//
1083//------------------------------------------------------------//
1084
1085static void show_N_div_100( /*OUT*/HChar* buf, ULong n )
1086{
1087   ULong nK = n / 100;
1088   ULong nR = n % 100;
1089   VG_(sprintf)(buf, "%llu.%s%llu", nK,
1090                nR < 10 ? "0" : "",
1091                nR);
1092}
1093
1094static void show_APInfo ( APInfo* api )
1095{
1096   HChar bufA[80];
1097   VG_(memset)(bufA, 0, sizeof(bufA));
1098   if (api->tot_blocks > 0) {
1099      show_N_div_100( bufA, ((ULong)api->tot_bytes * 100ULL)
1100                              / (ULong)api->tot_blocks );
1101   } else {
1102      bufA[0] = 'N'; bufA[1] = 'a'; bufA[2] = 'N';
1103   }
1104
1105   VG_(umsg)("max-live:    %'llu in %'llu blocks\n",
1106             api->max_bytes_live, api->max_blocks_live);
1107   VG_(umsg)("tot-alloc:   %'llu in %'llu blocks (avg size %s)\n",
1108             api->tot_bytes, api->tot_blocks, bufA);
1109
1110   tl_assert(api->tot_blocks >= api->max_blocks_live);
1111   tl_assert(api->tot_bytes >= api->max_bytes_live);
1112
1113   if (api->deaths > 0) {
1114      // Average Age at Death
1115      ULong aad = api->deaths == 0
1116                  ? 0 : (api->death_ages_sum / api->deaths);
1117      // AAD as a fraction of the total program lifetime (so far)
1118      // measured in ten-thousand-ths (aad_frac_10k == 10000 means the
1119      // complete lifetime of the program.
1120      ULong aad_frac_10k
1121         = g_guest_instrs_executed == 0
1122           ? 0 : (10000ULL * aad) / g_guest_instrs_executed;
1123      HChar buf[16];
1124      show_N_div_100(buf, aad_frac_10k);
1125      VG_(umsg)("deaths:      %'llu, at avg age %'llu "
1126                "(%s%% of prog lifetime)\n",
1127                api->deaths, aad, buf );
1128   } else {
1129      VG_(umsg)("deaths:      none (none of these blocks were freed)\n");
1130   }
1131
1132   HChar bufR[80], bufW[80];
1133   VG_(memset)(bufR, 0, sizeof(bufR));
1134   VG_(memset)(bufW, 0, sizeof(bufW));
1135   if (api->tot_bytes > 0) {
1136      show_N_div_100(bufR, (100ULL * api->n_reads) / api->tot_bytes);
1137      show_N_div_100(bufW, (100ULL * api->n_writes) / api->tot_bytes);
1138   } else {
1139      VG_(strcat)(bufR, "Inf");
1140      VG_(strcat)(bufW, "Inf");
1141   }
1142
1143   VG_(umsg)("acc-ratios:  %s rd, %s wr "
1144             " (%'llu b-read, %'llu b-written)\n",
1145             bufR, bufW,
1146             api->n_reads, api->n_writes);
1147
1148   VG_(pp_ExeContext)(api->ap);
1149
1150   if (api->histo && api->xsize_tag == Exactly) {
1151      VG_(umsg)("\nAggregated access counts by offset:\n");
1152      VG_(umsg)("\n");
1153      UWord i;
1154      if (api->xsize > 0)
1155         VG_(umsg)("[   0]  ");
1156      for (i = 0; i < api->xsize; i++) {
1157         if (i > 0 && (i % 16) == 0 && i != api->xsize-1) {
1158            VG_(umsg)("\n");
1159            VG_(umsg)("[%4lu]  ", i);
1160         }
1161         VG_(umsg)("%u ", api->histo[i]);
1162      }
1163      VG_(umsg)("\n");
1164   }
1165}
1166
1167
1168/* Metric-access functions for APInfos. */
1169static ULong get_metric__max_bytes_live ( APInfo* api ) {
1170   return api->max_bytes_live;
1171}
1172static ULong get_metric__tot_bytes ( APInfo* api ) {
1173   return api->tot_bytes;
1174}
1175static ULong get_metric__max_blocks_live ( APInfo* api ) {
1176   return api->max_blocks_live;
1177}
1178
1179/* Given a string, return the metric-access function and also a Bool
1180   indicating whether we want increasing or decreasing values of the
1181   metric.  This is used twice, once in command line processing, and
1182   then again in show_top_n_apinfos.  Returns False if the given
1183   string could not be identified.*/
1184static Bool identify_metric ( /*OUT*/ULong(**get_metricP)(APInfo*),
1185                              /*OUT*/Bool* increasingP,
1186                              Char* metric_name )
1187{
1188   if (0 == VG_(strcmp)(metric_name, "max-bytes-live")) {
1189      *get_metricP = get_metric__max_bytes_live;
1190      *increasingP = False;
1191      return True;
1192   }
1193   if (0 == VG_(strcmp)(metric_name, "tot-bytes-allocd")) {
1194      *get_metricP = get_metric__tot_bytes;
1195      *increasingP = False;
1196      return True;
1197   }
1198   if (0 == VG_(strcmp)(metric_name, "max-blocks-live")) {
1199      *get_metricP = get_metric__max_blocks_live;
1200      *increasingP = False;
1201      return True;
1202   }
1203   return False;
1204}
1205
1206
1207static void show_top_n_apinfos ( void )
1208{
1209   Int   i;
1210   UWord keyW, valW;
1211   ULong (*get_metric)(APInfo*);
1212   Bool  increasing;
1213
1214   HChar* metric_name = clo_sort_by;
1215   tl_assert(metric_name); // ensured by clo processing
1216
1217   Bool ok = identify_metric( &get_metric, &increasing, metric_name );
1218   tl_assert(ok); // ensured by clo processing
1219
1220   VG_(umsg)("\n");
1221   VG_(umsg)("======== ORDERED BY %s \"%s\": "
1222             "top %d allocators ========\n",
1223             increasing ? "increasing" : "decreasing",
1224             metric_name, clo_show_top_n );
1225
1226   // Clear all .shown bits
1227   VG_(initIterFM)( apinfo );
1228   while (VG_(nextIterFM)( apinfo, &keyW, &valW )) {
1229      APInfo* api = (APInfo*)valW;
1230      tl_assert(api && api->ap == (ExeContext*)keyW);
1231      api->shown = False;
1232   }
1233   VG_(doneIterFM)( apinfo );
1234
1235   // Now print the top N entries.  Each one requires a
1236   // complete scan of the set.  Duh.
1237   for (i = 0; i < clo_show_top_n; i++) {
1238      ULong   best_metric = increasing ? ~0ULL : 0ULL;
1239      APInfo* best_api    = NULL;
1240
1241      VG_(initIterFM)( apinfo );
1242      while (VG_(nextIterFM)( apinfo, &keyW, &valW )) {
1243         APInfo* api = (APInfo*)valW;
1244         if (api->shown)
1245            continue;
1246         ULong metric = get_metric(api);
1247         if (increasing ? (metric < best_metric) : (metric > best_metric)) {
1248            best_metric = metric;
1249            best_api = api;
1250         }
1251      }
1252      VG_(doneIterFM)( apinfo );
1253
1254      if (!best_api)
1255         break; // all APIs have been shown.  Stop.
1256
1257      VG_(umsg)("\n");
1258      VG_(umsg)("-------------------- %d of %d --------------------\n",
1259                i+1, clo_show_top_n );
1260      show_APInfo(best_api);
1261      best_api->shown = True;
1262   }
1263
1264   VG_(umsg)("\n");
1265}
1266
1267
1268static void dh_fini(Int exit_status)
1269{
1270   // Before printing statistics, we must harvest access counts for
1271   // all the blocks that are still alive.  Not doing so gives
1272   // access ratios which are too low (zero, in the worst case)
1273   // for such blocks, since the accesses that do get made will
1274   // (if we skip this step) not get folded into the AP summaries.
1275   UWord keyW, valW;
1276   VG_(initIterFM)( interval_tree );
1277   while (VG_(nextIterFM)( interval_tree, &keyW, &valW )) {
1278      Block* bk = (Block*)keyW;
1279      tl_assert(valW == 0);
1280      tl_assert(bk);
1281      retire_Block(bk, False/*!because_freed*/);
1282   }
1283   VG_(doneIterFM)( interval_tree );
1284
1285   // show results
1286   VG_(umsg)("======== SUMMARY STATISTICS ========\n");
1287   VG_(umsg)("\n");
1288   VG_(umsg)("guest_insns:  %'llu\n", g_guest_instrs_executed);
1289   VG_(umsg)("\n");
1290   VG_(umsg)("max_live:     %'llu in %'llu blocks\n",
1291             g_max_bytes_live, g_max_blocks_live);
1292   VG_(umsg)("\n");
1293   VG_(umsg)("tot_alloc:    %'llu in %'llu blocks\n",
1294             g_tot_bytes, g_tot_blocks);
1295   VG_(umsg)("\n");
1296   if (g_tot_bytes > 0) {
1297      VG_(umsg)("insns per allocated byte: %'llu\n",
1298                g_guest_instrs_executed / g_tot_bytes);
1299      VG_(umsg)("\n");
1300   }
1301
1302   show_top_n_apinfos();
1303
1304   VG_(umsg)("\n");
1305   VG_(umsg)("\n");
1306   VG_(umsg)("==============================================================\n");
1307   VG_(umsg)("\n");
1308   VG_(umsg)("Some hints: (see --help for command line option details):\n");
1309   VG_(umsg)("\n");
1310   VG_(umsg)("* summary stats for whole program are at the top of this output\n");
1311   VG_(umsg)("\n");
1312   VG_(umsg)("* --show-top-n=  controls how many alloc points are shown.\n");
1313   VG_(umsg)("                 You probably want to set it much higher than\n");
1314   VG_(umsg)("                 the default value (10)\n");
1315   VG_(umsg)("\n");
1316   VG_(umsg)("* --sort-by=     specifies the sort key for output.\n");
1317   VG_(umsg)("                 See --help for details.\n");
1318   VG_(umsg)("\n");
1319   VG_(umsg)("* Each allocation stack, by default 12 frames, counts as\n");
1320   VG_(umsg)("  a separate alloc point.  This causes the data to be spread out\n");
1321   VG_(umsg)("  over far too many alloc points.  I strongly suggest using\n");
1322   VG_(umsg)("  --num-callers=4 or some such, to reduce the spreading.\n");
1323   VG_(umsg)("\n");
1324
1325   if (VG_(clo_stats)) {
1326      VG_(dmsg)(" dhat: find_Block_containing:\n");
1327      VG_(dmsg)("             found: %'lu (%'lu cached + %'lu uncached)\n",
1328                stats__n_fBc_cached + stats__n_fBc_uncached,
1329                stats__n_fBc_cached,
1330                stats__n_fBc_uncached);
1331      VG_(dmsg)("          notfound: %'lu\n", stats__n_fBc_notfound);
1332      VG_(dmsg)("\n");
1333   }
1334}
1335
1336
1337//------------------------------------------------------------//
1338//--- Initialisation                                       ---//
1339//------------------------------------------------------------//
1340
1341static void dh_post_clo_init(void)
1342{
1343}
1344
1345static void dh_pre_clo_init(void)
1346{
1347   VG_(details_name)            ("DHAT");
1348   VG_(details_version)         (NULL);
1349   VG_(details_description)     ("a dynamic heap analysis tool");
1350   VG_(details_copyright_author)(
1351      "Copyright (C) 2010-2011, and GNU GPL'd, by Mozilla Inc");
1352   VG_(details_bug_reports_to)  (VG_BUGS_TO);
1353
1354   // Basic functions.
1355   VG_(basic_tool_funcs)          (dh_post_clo_init,
1356                                   dh_instrument,
1357                                   dh_fini);
1358//zz
1359   // Needs.
1360   VG_(needs_libc_freeres)();
1361   VG_(needs_command_line_options)(dh_process_cmd_line_option,
1362                                   dh_print_usage,
1363                                   dh_print_debug_usage);
1364//zz   VG_(needs_client_requests)     (dh_handle_client_request);
1365//zz   VG_(needs_sanity_checks)       (dh_cheap_sanity_check,
1366//zz                                   dh_expensive_sanity_check);
1367   VG_(needs_malloc_replacement)  (dh_malloc,
1368                                   dh___builtin_new,
1369                                   dh___builtin_vec_new,
1370                                   dh_memalign,
1371                                   dh_calloc,
1372                                   dh_free,
1373                                   dh___builtin_delete,
1374                                   dh___builtin_vec_delete,
1375                                   dh_realloc,
1376                                   dh_malloc_usable_size,
1377                                   0 );
1378
1379   VG_(track_pre_mem_read)        ( dh_handle_noninsn_read );
1380   //VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
1381   VG_(track_post_mem_write)      ( dh_handle_noninsn_write );
1382
1383   tl_assert(!interval_tree);
1384   tl_assert(!fbc_cache0);
1385   tl_assert(!fbc_cache1);
1386
1387   interval_tree = VG_(newFM)( VG_(malloc),
1388                               "dh.main.interval_tree.1",
1389                               VG_(free),
1390                               interval_tree_Cmp );
1391
1392   apinfo = VG_(newFM)( VG_(malloc),
1393                        "dh.main.apinfo.1",
1394                        VG_(free),
1395                        NULL/*unboxedcmp*/ );
1396}
1397
1398VG_DETERMINE_INTERFACE_VERSION(dh_pre_clo_init)
1399
1400//--------------------------------------------------------------------//
1401//--- end                                                dh_main.c ---//
1402//--------------------------------------------------------------------//
1403