lk_main.c revision dbb9c8e327781376552ddbfcb151e21883217f49
1
2/*--------------------------------------------------------------------*/
3/*--- An example Valgrind tool.                          lk_main.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Lackey, an example Valgrind tool that does
8   some simple program measurement and tracing.
9
10   Copyright (C) 2002-2007 Nicholas Nethercote
11      njn@valgrind.org
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31// This tool shows how to do some basic instrumentation.
32//
33// There are four kinds of instrumentation it can do.  They can be turned
34// on/off independently with command line options:
35//
36// * --basic-counts   : do basic counts, eg. number of instructions
37//                      executed, jumps executed, etc.
38// * --detailed-counts: do more detailed counts:  number of loads, stores
39//                      and ALU operations of different sizes.
40// * --trace-mem=yes:   trace all (data) memory accesses.
41// * --trace-superblocks=yes:
42//                      trace all superblock entries.  Mostly of interest
43//                      to the Valgrind developers.
44//
45// The code for each kind of instrumentation is guarded by a clo_* variable:
46// clo_basic_counts, clo_detailed_counts, clo_trace_mem and clo_trace_sbs.
47//
48// If you want to modify any of the instrumentation code, look for the code
49// that is guarded by the relevant clo_* variable (eg. clo_trace_mem)
50// If you're not interested in the other kinds of instrumentation you can
51// remove them.  If you want to do more complex modifications, please read
52// VEX/pub/libvex_ir.h to understand the intermediate representation.
53//
54//
55// Specific Details about --trace-mem=yes
56// --------------------------------------
57// Lackey's --trace-mem code is a good starting point for building Valgrind
58// tools that act on memory loads and stores.  It also could be used as is,
59// with its output used as input to a post-mortem processing step.  However,
60// because memory traces can be very large, online analysis is generally
61// better.
62//
63// It prints memory data access traces that look like this:
64//
65//   I  0023C790,2  # instruction read at 0x0023C790 of size 2
66//   I  0023C792,5
67//    S BE80199C,4  # data store at 0xBE80199C of size 4
68//   I  0025242B,3
69//    L BE801950,4  # data load at 0xBE801950 of size 4
70//   I  0023D476,7
71//    M 0025747C,1  # data modify at 0x0025747C of size 1
72//   I  0023DC20,2
73//    L 00254962,1
74//    L BE801FB3,1
75//   I  00252305,1
76//    L 00254AEB,1
77//    S 00257998,1
78//
79// Every instruction executed has an "instr" event representing it.
80// Instructions that do memory accesses are followed by one or more "load",
81// "store" or "modify" events.  Some instructions do more than one load or
82// store, as in the last two examples in the above trace.
83//
84// Here are some examples of x86 instructions that do different combinations
85// of loads, stores, and modifies.
86//
87//    Instruction          Memory accesses                  Event sequence
88//    -----------          ---------------                  --------------
89//    add %eax, %ebx       No loads or stores               instr
90//
91//    movl (%eax), %ebx    loads (%eax)                     instr, load
92//
93//    movl %eax, (%ebx)    stores (%ebx)                    instr, store
94//
95//    incl (%ecx)          modifies (%ecx)                  instr, modify
96//
97//    cmpsb                loads (%esi), loads(%edi)        instr, load, load
98//
99//    call*l (%edx)        loads (%edx), stores -4(%esp)    instr, load, store
100//    pushl (%edx)         loads (%edx), stores -4(%esp)    instr, load, store
101//    movsw                loads (%esi), stores (%edi)      instr, load, store
102//
103// Instructions using x86 "rep" prefixes are traced as if they are repeated
104// N times.
105//
106// Lackey with --trace-mem gives good traces, but they are not perfect, for
107// the following reasons:
108//
109// - It does not trace into the OS kernel, so system calls and other kernel
110//   operations (eg. some scheduling and signal handling code) are ignored.
111//
112// - It could model loads and stores done at the system call boundary using
113//   the pre_mem_read/post_mem_write events.  For example, if you call
114//   fstat() you know that the passed in buffer has been written.  But it
115//   currently does not do this.
116//
117// - Valgrind replaces some code (not much) with its own, notably parts of
118//   code for scheduling operations and signal handling.  This code is not
119//   traced.
120//
121// - There is no consideration of virtual-to-physical address mapping.
122//   This may not matter for many purposes.
123//
124// - Valgrind modifies the instruction stream in some very minor ways.  For
125//   example, on x86 the bts, btc, btr instructions are incorrectly
126//   considered to always touch memory (this is a consequence of these
127//   instructions being very difficult to simulate).
128//
129// - Valgrind tools layout memory differently to normal programs, so the
130//   addresses you get will not be typical.  Thus Lackey (and all Valgrind
131//   tools) is suitable for getting relative memory traces -- eg. if you
132//   want to analyse locality of memory accesses -- but is not good if
133//   absolute addresses are important.
134//
135// Despite all these warnings, Dullard's results should be good enough for a
136// wide range of purposes.  For example, Cachegrind shares all the above
137// shortcomings and it is still useful.
138//
139// For further inspiration, you should look at cachegrind/cg_main.c which
140// uses the same basic technique for tracing memory accesses, but also groups
141// events together for processing into twos and threes so that fewer C calls
142// are made and things run faster.
143//
144// Specific Details about --trace-superblocks=yes
145// ----------------------------------------------
146// Valgrind splits code up into single entry, multiple exit blocks
147// known as superblocks.  By itself, --trace-superblocks=yes just
148// prints a message as each superblock is run:
149//
150//  SB 04013170
151//  SB 04013177
152//  SB 04013173
153//  SB 04013177
154//
155// The hex number is the address of the first instruction in the
156// superblock.  You can see the relationship more obviously if you use
157// --trace-superblocks=yes and --trace-mem=yes together.  Then a "SB"
158// message at address X is immediately followed by an "instr:" message
159// for that address, as the first instruction in the block is
160// executed, for example:
161//
162//  SB 04014073
163//  I  04014073,3
164//   L 7FEFFF7F8,8
165//  I  04014076,4
166//  I  0401407A,3
167//  I  0401407D,3
168//  I  04014080,3
169//  I  04014083,6
170
171
172#include "pub_tool_basics.h"
173#include "pub_tool_tooliface.h"
174#include "pub_tool_libcassert.h"
175#include "pub_tool_libcprint.h"
176#include "pub_tool_debuginfo.h"
177#include "pub_tool_libcbase.h"
178#include "pub_tool_options.h"
179#include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
180
181/*------------------------------------------------------------*/
182/*--- Command line options                                 ---*/
183/*------------------------------------------------------------*/
184
185/* Command line options controlling instrumentation kinds, as described at
186 * the top of this file. */
187static Bool clo_basic_counts    = True;
188static Bool clo_detailed_counts = False;
189static Bool clo_trace_mem       = False;
190static Bool clo_trace_sbs       = False;
191
192/* The name of the function of which the number of calls (under
193 * --basic-counts=yes) is to be counted, with default. Override with command
194 * line option --fnname. */
195static Char* clo_fnname = "_dl_runtime_resolve";
196
197static Bool lk_process_cmd_line_option(Char* arg)
198{
199   VG_STR_CLO(arg, "--fnname", clo_fnname)
200   else VG_BOOL_CLO(arg, "--basic-counts",      clo_basic_counts)
201   else VG_BOOL_CLO(arg, "--detailed-counts",   clo_detailed_counts)
202   else VG_BOOL_CLO(arg, "--trace-mem",         clo_trace_mem)
203   else VG_BOOL_CLO(arg, "--trace-superblocks", clo_trace_sbs)
204   else
205      return False;
206
207   tl_assert(clo_fnname);
208   tl_assert(clo_fnname[0]);
209   return True;
210}
211
212static void lk_print_usage(void)
213{
214   VG_(printf)(
215"    --basic-counts=no|yes     count instructions, jumps, etc. [no]\n"
216"    --detailed-counts=no|yes  count loads, stores and alu ops [no]\n"
217"    --trace-mem=no|yes        trace all loads and stores [no]\n"
218"    --trace-superblocks=no|yes  trace all superblock entries [no]\n"
219"    --fnname=<name>           count calls to <name> (only used if\n"
220"                              --basic-count=yes)  [_dl_runtime_resolve]\n"
221   );
222}
223
224static void lk_print_debug_usage(void)
225{
226   VG_(printf)(
227"    (none)\n"
228   );
229}
230
231/*------------------------------------------------------------*/
232/*--- Stuff for --basic-counts                             ---*/
233/*------------------------------------------------------------*/
234
235/* Nb: use ULongs because the numbers can get very big */
236static ULong n_func_calls    = 0;
237static ULong n_SBs_entered   = 0;
238static ULong n_SBs_completed = 0;
239static ULong n_IRStmts       = 0;
240static ULong n_guest_instrs  = 0;
241static ULong n_Jccs          = 0;
242static ULong n_Jccs_untaken  = 0;
243
244static void add_one_func_call(void)
245{
246   n_func_calls++;
247}
248
249static void add_one_SB_entered(void)
250{
251   n_SBs_entered++;
252}
253
254static void add_one_SB_completed(void)
255{
256   n_SBs_completed++;
257}
258
259static void add_one_IRStmt(void)
260{
261   n_IRStmts++;
262}
263
264static void add_one_guest_instr(void)
265{
266   n_guest_instrs++;
267}
268
269static void add_one_Jcc(void)
270{
271   n_Jccs++;
272}
273
274static void add_one_Jcc_untaken(void)
275{
276   n_Jccs_untaken++;
277}
278
279/*------------------------------------------------------------*/
280/*--- Stuff for --detailed-counts                          ---*/
281/*------------------------------------------------------------*/
282
283/* --- Operations --- */
284
285typedef enum { OpLoad=0, OpStore=1, OpAlu=2 } Op;
286
287#define N_OPS 3
288
289
290/* --- Types --- */
291
292#define N_TYPES 9
293
294static Int type2index ( IRType ty )
295{
296   switch (ty) {
297      case Ity_I1:      return 0;
298      case Ity_I8:      return 1;
299      case Ity_I16:     return 2;
300      case Ity_I32:     return 3;
301      case Ity_I64:     return 4;
302      case Ity_I128:    return 5;
303      case Ity_F32:     return 6;
304      case Ity_F64:     return 7;
305      case Ity_V128:    return 8;
306      default: tl_assert(0); break;
307   }
308}
309
310static HChar* nameOfTypeIndex ( IRType ty )
311{
312   switch (ty) {
313      case 0: return "I1";   break;
314      case 1: return "I8";   break;
315      case 2: return "I16";  break;
316      case 3: return "I32";  break;
317      case 4: return "I64";  break;
318      case 5: return "I128"; break;
319      case 6: return "F32";  break;
320      case 7: return "F64";  break;
321      case 8: return "V128"; break;
322      default: tl_assert(0); break;
323   }
324}
325
326
327/* --- Counts --- */
328
329static ULong detailCounts[N_OPS][N_TYPES];
330
331/* The helper that is called from the instrumented code. */
332static VG_REGPARM(1)
333void increment_detail(ULong* detail)
334{
335   (*detail)++;
336}
337
338/* A helper that adds the instrumentation for a detail. */
339static void instrument_detail(IRSB* sb, Op op, IRType type)
340{
341   IRDirty* di;
342   IRExpr** argv;
343   const UInt typeIx = type2index(type);
344
345   tl_assert(op < N_OPS);
346   tl_assert(typeIx < N_TYPES);
347
348   argv = mkIRExprVec_1( mkIRExpr_HWord( (HWord)&detailCounts[op][typeIx] ) );
349   di = unsafeIRDirty_0_N( 1, "increment_detail",
350                              VG_(fnptr_to_fnentry)( &increment_detail ),
351                              argv);
352   addStmtToIRSB( sb, IRStmt_Dirty(di) );
353}
354
355/* Summarize and print the details. */
356static void print_details ( void )
357{
358   Int typeIx;
359   VG_(message)(Vg_UserMsg,
360                "   Type        Loads       Stores       AluOps");
361   VG_(message)(Vg_UserMsg,
362                "   -------------------------------------------");
363   for (typeIx = 0; typeIx < N_TYPES; typeIx++) {
364      VG_(message)(Vg_UserMsg,
365                   "   %4s %,12llu %,12llu %,12llu",
366                   nameOfTypeIndex( typeIx ),
367                   detailCounts[OpLoad ][typeIx],
368                   detailCounts[OpStore][typeIx],
369                   detailCounts[OpAlu  ][typeIx]
370      );
371   }
372}
373
374
375/*------------------------------------------------------------*/
376/*--- Stuff for --trace-mem                                ---*/
377/*------------------------------------------------------------*/
378
379#define MAX_DSIZE    512
380
381typedef
382   IRExpr
383   IRAtom;
384
385typedef
386   enum { Event_Ir, Event_Dr, Event_Dw, Event_Dm }
387   EventKind;
388
389typedef
390   struct {
391      EventKind  ekind;
392      IRAtom*    addr;
393      Int        size;
394   }
395   Event;
396
397/* Up to this many unnotified events are allowed.  Must be at least two,
398   so that reads and writes to the same address can be merged into a modify.
399   Beyond that, larger numbers just potentially induce more spilling due to
400   extending live ranges of address temporaries. */
401#define N_EVENTS 4
402
403/* Maintain an ordered list of memory events which are outstanding, in
404   the sense that no IR has yet been generated to do the relevant
405   helper calls.  The SB is scanned top to bottom and memory events
406   are added to the end of the list, merging with the most recent
407   notified event where possible (Dw immediately following Dr and
408   having the same size and EA can be merged).
409
410   This merging is done so that for architectures which have
411   load-op-store instructions (x86, amd64), the instr is treated as if
412   it makes just one memory reference (a modify), rather than two (a
413   read followed by a write at the same address).
414
415   At various points the list will need to be flushed, that is, IR
416   generated from it.  That must happen before any possible exit from
417   the block (the end, or an IRStmt_Exit).  Flushing also takes place
418   when there is no space to add a new event.
419
420   If we require the simulation statistics to be up to date with
421   respect to possible memory exceptions, then the list would have to
422   be flushed before each memory reference.  That's a pain so we don't
423   bother.
424
425   Flushing the list consists of walking it start to end and emitting
426   instrumentation IR for each event, in the order in which they
427   appear. */
428
429static Event events[N_EVENTS];
430static Int   events_used = 0;
431
432
433static VG_REGPARM(2) void trace_instr(Addr addr, SizeT size)
434{
435   VG_(printf)("I  %08lx,%d\n", addr, size);
436}
437
438static VG_REGPARM(2) void trace_load(Addr addr, SizeT size)
439{
440   VG_(printf)(" L %08lx,%d\n", addr, size);
441}
442
443static VG_REGPARM(2) void trace_store(Addr addr, SizeT size)
444{
445   VG_(printf)(" S %08lx,%d\n", addr, size);
446}
447
448static VG_REGPARM(2) void trace_modify(Addr addr, SizeT size)
449{
450   VG_(printf)(" M %08lx,%d\n", addr, size);
451}
452
453
454static void flushEvents(IRSB* sb)
455{
456   Int        i;
457   Char*      helperName;
458   void*      helperAddr;
459   IRExpr**   argv;
460   IRDirty*   di;
461   Event*     ev;
462
463   for (i = 0; i < events_used; i++) {
464
465      ev = &events[i];
466
467      // Decide on helper fn to call and args to pass it.
468      switch (ev->ekind) {
469         case Event_Ir: helperName = "trace_instr";
470                        helperAddr =  trace_instr;  break;
471
472         case Event_Dr: helperName = "trace_load";
473                        helperAddr =  trace_load;   break;
474
475         case Event_Dw: helperName = "trace_store";
476                        helperAddr =  trace_store;  break;
477
478         case Event_Dm: helperName = "trace_modify";
479                        helperAddr =  trace_modify; break;
480         default:
481            tl_assert(0);
482      }
483
484      // Add the helper.
485      argv = mkIRExprVec_2( ev->addr, mkIRExpr_HWord( ev->size ) );
486      di   = unsafeIRDirty_0_N( /*regparms*/2,
487                                helperName, VG_(fnptr_to_fnentry)( helperAddr ),
488                                argv );
489      addStmtToIRSB( sb, IRStmt_Dirty(di) );
490   }
491
492   events_used = 0;
493}
494
495// WARNING:  If you aren't interested in instruction reads, you can omit the
496// code that adds calls to trace_instr() in flushEvents().  However, you
497// must still call this function, addEvent_Ir() -- it is necessary to add
498// the Ir events to the events list so that merging of paired load/store
499// events into modify events works correctly.
500static void addEvent_Ir ( IRSB* sb, IRAtom* iaddr, UInt isize )
501{
502   Event* evt;
503   tl_assert(clo_trace_mem);
504   tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
505            || VG_CLREQ_SZB == isize );
506   if (events_used == N_EVENTS)
507      flushEvents(sb);
508   tl_assert(events_used >= 0 && events_used < N_EVENTS);
509   evt = &events[events_used];
510   evt->ekind = Event_Ir;
511   evt->addr  = iaddr;
512   evt->size  = isize;
513   events_used++;
514}
515
516static
517void addEvent_Dr ( IRSB* sb, IRAtom* daddr, Int dsize )
518{
519   Event* evt;
520   tl_assert(clo_trace_mem);
521   tl_assert(isIRAtom(daddr));
522   tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
523   if (events_used == N_EVENTS)
524      flushEvents(sb);
525   tl_assert(events_used >= 0 && events_used < N_EVENTS);
526   evt = &events[events_used];
527   evt->ekind = Event_Dr;
528   evt->addr  = daddr;
529   evt->size  = dsize;
530   events_used++;
531}
532
533static
534void addEvent_Dw ( IRSB* sb, IRAtom* daddr, Int dsize )
535{
536   Event* lastEvt;
537   Event* evt;
538   tl_assert(clo_trace_mem);
539   tl_assert(isIRAtom(daddr));
540   tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
541
542   // Is it possible to merge this write with the preceding read?
543   lastEvt = &events[events_used-1];
544   if (events_used > 0
545    && lastEvt->ekind == Event_Dr
546    && lastEvt->size  == dsize
547    && eqIRAtom(lastEvt->addr, daddr))
548   {
549      lastEvt->ekind = Event_Dm;
550      return;
551   }
552
553   // No.  Add as normal.
554   if (events_used == N_EVENTS)
555      flushEvents(sb);
556   tl_assert(events_used >= 0 && events_used < N_EVENTS);
557   evt = &events[events_used];
558   evt->ekind = Event_Dw;
559   evt->size  = dsize;
560   evt->addr  = daddr;
561   events_used++;
562}
563
564
565/*------------------------------------------------------------*/
566/*--- Stuff for --trace-superblocks                        ---*/
567/*------------------------------------------------------------*/
568
569static void trace_superblock(Addr addr)
570{
571   VG_(printf)("SB %08lx\n", addr);
572}
573
574
575/*------------------------------------------------------------*/
576/*--- Basic tool functions                                 ---*/
577/*------------------------------------------------------------*/
578
579static void lk_post_clo_init(void)
580{
581   Int op, tyIx;
582
583   if (clo_detailed_counts) {
584      for (op = 0; op < N_OPS; op++)
585         for (tyIx = 0; tyIx < N_TYPES; tyIx++)
586            detailCounts[op][tyIx] = 0;
587   }
588}
589
590static
591IRSB* lk_instrument ( VgCallbackClosure* closure,
592                      IRSB* sbIn,
593                      VexGuestLayout* layout,
594                      VexGuestExtents* vge,
595                      IRType gWordTy, IRType hWordTy )
596{
597   IRDirty*   di;
598   Int        i;
599   IRSB*      sbOut;
600   Char       fnname[100];
601   IRType     type;
602   IRTypeEnv* tyenv = sbIn->tyenv;
603
604   if (gWordTy != hWordTy) {
605      /* We don't currently support this case. */
606      VG_(tool_panic)("host/guest word size mismatch");
607   }
608
609   /* Set up SB */
610   sbOut = deepCopyIRSBExceptStmts(sbIn);
611
612   // Copy verbatim any IR preamble preceding the first IMark
613   i = 0;
614   while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
615      addStmtToIRSB( sbOut, sbIn->stmts[i] );
616      i++;
617   }
618
619   if (clo_basic_counts) {
620      /* Count this superblock. */
621      di = unsafeIRDirty_0_N( 0, "add_one_SB_entered",
622                                 VG_(fnptr_to_fnentry)( &add_one_SB_entered ),
623                                 mkIRExprVec_0() );
624      addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
625   }
626
627   if (clo_trace_sbs) {
628      /* Print this superblock's address. */
629      di = unsafeIRDirty_0_N(
630              0, "trace_superblock",
631              VG_(fnptr_to_fnentry)( &trace_superblock ),
632              mkIRExprVec_1( mkIRExpr_HWord( vge->base[0] ) )
633           );
634      addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
635   }
636
637   if (clo_trace_mem) {
638      events_used = 0;
639   }
640
641   for (/*use current i*/; i < sbIn->stmts_used; i++) {
642      IRStmt* st = sbIn->stmts[i];
643      if (!st || st->tag == Ist_NoOp) continue;
644
645      if (clo_basic_counts) {
646         /* Count one VEX statement. */
647         di = unsafeIRDirty_0_N( 0, "add_one_IRStmt",
648                                    VG_(fnptr_to_fnentry)( &add_one_IRStmt ),
649                                    mkIRExprVec_0() );
650         addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
651      }
652
653      switch (st->tag) {
654         case Ist_NoOp:
655         case Ist_AbiHint:
656         case Ist_Put:
657         case Ist_PutI:
658         case Ist_MFence:
659            addStmtToIRSB( sbOut, st );
660            break;
661
662         case Ist_IMark:
663            if (clo_basic_counts) {
664               /* Count guest instruction. */
665               di = unsafeIRDirty_0_N( 0, "add_one_guest_instr",
666                                          VG_(fnptr_to_fnentry)( &add_one_guest_instr ),
667                                          mkIRExprVec_0() );
668               addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
669
670               /* An unconditional branch to a known destination in the
671                * guest's instructions can be represented, in the IRSB to
672                * instrument, by the VEX statements that are the
673                * translation of that known destination. This feature is
674                * called 'SB chasing' and can be influenced by command
675                * line option --vex-guest-chase-thresh.
676                *
677                * To get an accurate count of the calls to a specific
678                * function, taking SB chasing into account, we need to
679                * check for each guest instruction (Ist_IMark) if it is
680                * the entry point of a function.
681                */
682               tl_assert(clo_fnname);
683               tl_assert(clo_fnname[0]);
684               if (VG_(get_fnname_if_entry)(st->Ist.IMark.addr,
685                                            fnname, sizeof(fnname))
686                   && 0 == VG_(strcmp)(fnname, clo_fnname)) {
687                  di = unsafeIRDirty_0_N(
688                          0, "add_one_func_call",
689                             VG_(fnptr_to_fnentry)( &add_one_func_call ),
690                             mkIRExprVec_0() );
691                  addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
692               }
693            }
694            if (clo_trace_mem) {
695               // WARNING: do not remove this function call, even if you
696               // aren't interested in instruction reads.  See the comment
697               // above the function itself for more detail.
698               addEvent_Ir( sbOut, mkIRExpr_HWord( (HWord)st->Ist.IMark.addr ),
699                            st->Ist.IMark.len );
700            }
701            addStmtToIRSB( sbOut, st );
702            break;
703
704         case Ist_WrTmp:
705            // Add a call to trace_load() if --trace-mem=yes.
706            if (clo_trace_mem) {
707               IRExpr* data = st->Ist.WrTmp.data;
708               if (data->tag == Iex_Load) {
709                  addEvent_Dr( sbOut, data->Iex.Load.addr,
710                               sizeofIRType(data->Iex.Load.ty) );
711               }
712            }
713            if (clo_detailed_counts) {
714               IRExpr* expr = st->Ist.WrTmp.data;
715               type = typeOfIRExpr(sbOut->tyenv, expr);
716               tl_assert(type != Ity_INVALID);
717               switch (expr->tag) {
718                  case Iex_Load:
719                     instrument_detail( sbOut, OpLoad, type );
720                     break;
721                  case Iex_Unop:
722                  case Iex_Binop:
723                  case Iex_Triop:
724                  case Iex_Qop:
725                  case Iex_Mux0X:
726                     instrument_detail( sbOut, OpAlu, type );
727                     break;
728                  default:
729                     break;
730               }
731            }
732            addStmtToIRSB( sbOut, st );
733            break;
734
735         case Ist_Store:
736            if (clo_trace_mem) {
737               IRExpr* data  = st->Ist.Store.data;
738               addEvent_Dw( sbOut, st->Ist.Store.addr,
739                            sizeofIRType(typeOfIRExpr(tyenv, data)) );
740            }
741            if (clo_detailed_counts) {
742               type = typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data);
743               tl_assert(type != Ity_INVALID);
744               instrument_detail( sbOut, OpStore, type );
745            }
746            addStmtToIRSB( sbOut, st );
747            break;
748
749         case Ist_Dirty: {
750            if (clo_trace_mem) {
751               Int      dsize;
752               IRDirty* d = st->Ist.Dirty.details;
753               if (d->mFx != Ifx_None) {
754                  // This dirty helper accesses memory.  Collect the details.
755                  tl_assert(d->mAddr != NULL);
756                  tl_assert(d->mSize != 0);
757                  dsize = d->mSize;
758                  if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
759                     addEvent_Dr( sbOut, d->mAddr, dsize );
760                  if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
761                     addEvent_Dw( sbOut, d->mAddr, dsize );
762               } else {
763                  tl_assert(d->mAddr == NULL);
764                  tl_assert(d->mSize == 0);
765               }
766            }
767            addStmtToIRSB( sbOut, st );
768            break;
769         }
770
771         case Ist_Exit:
772            if (clo_basic_counts) {
773               /* Count Jcc */
774               di = unsafeIRDirty_0_N( 0, "add_one_Jcc",
775                                          VG_(fnptr_to_fnentry)( &add_one_Jcc ),
776                                          mkIRExprVec_0() );
777               addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
778            }
779            if (clo_trace_mem) {
780               flushEvents(sbOut);
781            }
782
783            addStmtToIRSB( sbOut, st );      // Original statement
784
785            if (clo_basic_counts) {
786               /* Count non-taken Jcc */
787               di = unsafeIRDirty_0_N( 0, "add_one_Jcc_untaken",
788                                          VG_(fnptr_to_fnentry)(
789                                             &add_one_Jcc_untaken ),
790                                          mkIRExprVec_0() );
791               addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
792            }
793            break;
794
795         default:
796            tl_assert(0);
797      }
798   }
799
800   if (clo_basic_counts) {
801      /* Count this basic block. */
802      di = unsafeIRDirty_0_N( 0, "add_one_SB_completed",
803                                 VG_(fnptr_to_fnentry)( &add_one_SB_completed ),
804                                 mkIRExprVec_0() );
805      addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
806   }
807
808   if (clo_trace_mem) {
809      /* At the end of the sbIn.  Flush outstandings. */
810      flushEvents(sbOut);
811   }
812
813   return sbOut;
814}
815
816static void lk_fini(Int exitcode)
817{
818   char percentify_buf[4]; /* Two digits, '%' and 0. */
819   const int percentify_size = sizeof(percentify_buf);
820   const int percentify_decs = 0;
821
822   tl_assert(clo_fnname);
823   tl_assert(clo_fnname[0]);
824
825   if (clo_basic_counts) {
826      VG_(message)(Vg_UserMsg,
827         "Counted %,llu calls to %s()", n_func_calls, clo_fnname);
828
829      VG_(message)(Vg_UserMsg, "");
830      VG_(message)(Vg_UserMsg, "Jccs:");
831      VG_(message)(Vg_UserMsg, "  total:         %,llu", n_Jccs);
832      VG_(percentify)((n_Jccs - n_Jccs_untaken), (n_Jccs ? n_Jccs : 1),
833         percentify_decs, percentify_size, percentify_buf);
834      VG_(message)(Vg_UserMsg, "  taken:         %,llu (%s)",
835         (n_Jccs - n_Jccs_untaken), percentify_buf);
836
837      VG_(message)(Vg_UserMsg, "");
838      VG_(message)(Vg_UserMsg, "Executed:");
839      VG_(message)(Vg_UserMsg, "  SBs entered:   %,llu", n_SBs_entered);
840      VG_(message)(Vg_UserMsg, "  SBs completed: %,llu", n_SBs_completed);
841      VG_(message)(Vg_UserMsg, "  guest instrs:  %,llu", n_guest_instrs);
842      VG_(message)(Vg_UserMsg, "  IRStmts:       %,llu", n_IRStmts);
843
844      VG_(message)(Vg_UserMsg, "");
845      VG_(message)(Vg_UserMsg, "Ratios:");
846      tl_assert(n_SBs_entered); // Paranoia time.
847      VG_(message)(Vg_UserMsg, "  guest instrs : SB entered  = %3u : 10",
848         10 * n_guest_instrs / n_SBs_entered);
849      VG_(message)(Vg_UserMsg, "       IRStmts : SB entered  = %3u : 10",
850         10 * n_IRStmts / n_SBs_entered);
851      tl_assert(n_guest_instrs); // Paranoia time.
852      VG_(message)(Vg_UserMsg, "       IRStmts : guest instr = %3u : 10",
853         10 * n_IRStmts / n_guest_instrs);
854   }
855
856   if (clo_detailed_counts) {
857      VG_(message)(Vg_UserMsg, "");
858      VG_(message)(Vg_UserMsg, "IR-level counts by type:");
859      print_details();
860   }
861
862   if (clo_basic_counts) {
863      VG_(message)(Vg_UserMsg, "");
864      VG_(message)(Vg_UserMsg, "Exit code:       %d", exitcode);
865   }
866}
867
868static void lk_pre_clo_init(void)
869{
870   VG_(details_name)            ("Lackey");
871   VG_(details_version)         (NULL);
872   VG_(details_description)     ("an example Valgrind tool");
873   VG_(details_copyright_author)(
874      "Copyright (C) 2002-2007, and GNU GPL'd, by Nicholas Nethercote.");
875   VG_(details_bug_reports_to)  (VG_BUGS_TO);
876   VG_(details_avg_translation_sizeB) ( 200 );
877
878   VG_(basic_tool_funcs)          (lk_post_clo_init,
879                                   lk_instrument,
880                                   lk_fini);
881   VG_(needs_command_line_options)(lk_process_cmd_line_option,
882                                   lk_print_usage,
883                                   lk_print_debug_usage);
884}
885
886VG_DETERMINE_INTERFACE_VERSION(lk_pre_clo_init)
887
888/*--------------------------------------------------------------------*/
889/*--- end                                                lk_main.c ---*/
890/*--------------------------------------------------------------------*/
891