lk_main.c revision 83df0b67a14425c484d8dda42b53f3ff0b598894
1
2/*--------------------------------------------------------------------*/
3/*--- An example Valgrind tool.                          lk_main.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Lackey, an example Valgrind tool that does
8   some simple program measurement and tracing.
9
10   Copyright (C) 2002-2008 Nicholas Nethercote
11      njn@valgrind.org
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31// This tool shows how to do some basic instrumentation.
32//
33// There are four kinds of instrumentation it can do.  They can be turned
34// on/off independently with command line options:
35//
36// * --basic-counts   : do basic counts, eg. number of instructions
37//                      executed, jumps executed, etc.
38// * --detailed-counts: do more detailed counts:  number of loads, stores
39//                      and ALU operations of different sizes.
40// * --trace-mem=yes:   trace all (data) memory accesses.
41// * --trace-superblocks=yes:
42//                      trace all superblock entries.  Mostly of interest
43//                      to the Valgrind developers.
44//
45// The code for each kind of instrumentation is guarded by a clo_* variable:
46// clo_basic_counts, clo_detailed_counts, clo_trace_mem and clo_trace_sbs.
47//
48// If you want to modify any of the instrumentation code, look for the code
49// that is guarded by the relevant clo_* variable (eg. clo_trace_mem)
50// If you're not interested in the other kinds of instrumentation you can
51// remove them.  If you want to do more complex modifications, please read
52// VEX/pub/libvex_ir.h to understand the intermediate representation.
53//
54//
55// Specific Details about --trace-mem=yes
56// --------------------------------------
57// Lackey's --trace-mem code is a good starting point for building Valgrind
58// tools that act on memory loads and stores.  It also could be used as is,
59// with its output used as input to a post-mortem processing step.  However,
60// because memory traces can be very large, online analysis is generally
61// better.
62//
63// It prints memory data access traces that look like this:
64//
65//   I  0023C790,2  # instruction read at 0x0023C790 of size 2
66//   I  0023C792,5
67//    S BE80199C,4  # data store at 0xBE80199C of size 4
68//   I  0025242B,3
69//    L BE801950,4  # data load at 0xBE801950 of size 4
70//   I  0023D476,7
71//    M 0025747C,1  # data modify at 0x0025747C of size 1
72//   I  0023DC20,2
73//    L 00254962,1
74//    L BE801FB3,1
75//   I  00252305,1
76//    L 00254AEB,1
77//    S 00257998,1
78//
79// Every instruction executed has an "instr" event representing it.
80// Instructions that do memory accesses are followed by one or more "load",
81// "store" or "modify" events.  Some instructions do more than one load or
82// store, as in the last two examples in the above trace.
83//
84// Here are some examples of x86 instructions that do different combinations
85// of loads, stores, and modifies.
86//
87//    Instruction          Memory accesses                  Event sequence
88//    -----------          ---------------                  --------------
89//    add %eax, %ebx       No loads or stores               instr
90//
91//    movl (%eax), %ebx    loads (%eax)                     instr, load
92//
93//    movl %eax, (%ebx)    stores (%ebx)                    instr, store
94//
95//    incl (%ecx)          modifies (%ecx)                  instr, modify
96//
97//    cmpsb                loads (%esi), loads(%edi)        instr, load, load
98//
99//    call*l (%edx)        loads (%edx), stores -4(%esp)    instr, load, store
100//    pushl (%edx)         loads (%edx), stores -4(%esp)    instr, load, store
101//    movsw                loads (%esi), stores (%edi)      instr, load, store
102//
103// Instructions using x86 "rep" prefixes are traced as if they are repeated
104// N times.
105//
106// Lackey with --trace-mem gives good traces, but they are not perfect, for
107// the following reasons:
108//
109// - It does not trace into the OS kernel, so system calls and other kernel
110//   operations (eg. some scheduling and signal handling code) are ignored.
111//
112// - It could model loads and stores done at the system call boundary using
113//   the pre_mem_read/post_mem_write events.  For example, if you call
114//   fstat() you know that the passed in buffer has been written.  But it
115//   currently does not do this.
116//
117// - Valgrind replaces some code (not much) with its own, notably parts of
118//   code for scheduling operations and signal handling.  This code is not
119//   traced.
120//
121// - There is no consideration of virtual-to-physical address mapping.
122//   This may not matter for many purposes.
123//
124// - Valgrind modifies the instruction stream in some very minor ways.  For
125//   example, on x86 the bts, btc, btr instructions are incorrectly
126//   considered to always touch memory (this is a consequence of these
127//   instructions being very difficult to simulate).
128//
129// - Valgrind tools layout memory differently to normal programs, so the
130//   addresses you get will not be typical.  Thus Lackey (and all Valgrind
131//   tools) is suitable for getting relative memory traces -- eg. if you
132//   want to analyse locality of memory accesses -- but is not good if
133//   absolute addresses are important.
134//
135// Despite all these warnings, Dullard's results should be good enough for a
136// wide range of purposes.  For example, Cachegrind shares all the above
137// shortcomings and it is still useful.
138//
139// For further inspiration, you should look at cachegrind/cg_main.c which
140// uses the same basic technique for tracing memory accesses, but also groups
141// events together for processing into twos and threes so that fewer C calls
142// are made and things run faster.
143//
144// Specific Details about --trace-superblocks=yes
145// ----------------------------------------------
146// Valgrind splits code up into single entry, multiple exit blocks
147// known as superblocks.  By itself, --trace-superblocks=yes just
148// prints a message as each superblock is run:
149//
150//  SB 04013170
151//  SB 04013177
152//  SB 04013173
153//  SB 04013177
154//
155// The hex number is the address of the first instruction in the
156// superblock.  You can see the relationship more obviously if you use
157// --trace-superblocks=yes and --trace-mem=yes together.  Then a "SB"
158// message at address X is immediately followed by an "instr:" message
159// for that address, as the first instruction in the block is
160// executed, for example:
161//
162//  SB 04014073
163//  I  04014073,3
164//   L 7FEFFF7F8,8
165//  I  04014076,4
166//  I  0401407A,3
167//  I  0401407D,3
168//  I  04014080,3
169//  I  04014083,6
170
171
172#include "pub_tool_basics.h"
173#include "pub_tool_tooliface.h"
174#include "pub_tool_libcassert.h"
175#include "pub_tool_libcprint.h"
176#include "pub_tool_debuginfo.h"
177#include "pub_tool_libcbase.h"
178#include "pub_tool_options.h"
179#include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
180
181/*------------------------------------------------------------*/
182/*--- Command line options                                 ---*/
183/*------------------------------------------------------------*/
184
185/* Command line options controlling instrumentation kinds, as described at
186 * the top of this file. */
187static Bool clo_basic_counts    = True;
188static Bool clo_detailed_counts = False;
189static Bool clo_trace_mem       = False;
190static Bool clo_trace_sbs       = False;
191
192/* The name of the function of which the number of calls (under
193 * --basic-counts=yes) is to be counted, with default. Override with command
194 * line option --fnname. */
195static Char* clo_fnname = "_dl_runtime_resolve";
196
197static Bool lk_process_cmd_line_option(Char* arg)
198{
199   if VG_STR_CLO(arg, "--fnname", clo_fnname) {}
200   else if VG_BOOL_CLO(arg, "--basic-counts",      clo_basic_counts) {}
201   else if VG_BOOL_CLO(arg, "--detailed-counts",   clo_detailed_counts) {}
202   else if VG_BOOL_CLO(arg, "--trace-mem",         clo_trace_mem) {}
203   else if VG_BOOL_CLO(arg, "--trace-superblocks", clo_trace_sbs) {}
204   else
205      return False;
206
207   tl_assert(clo_fnname);
208   tl_assert(clo_fnname[0]);
209   return True;
210}
211
212static void lk_print_usage(void)
213{
214   VG_(printf)(
215"    --basic-counts=no|yes     count instructions, jumps, etc. [no]\n"
216"    --detailed-counts=no|yes  count loads, stores and alu ops [no]\n"
217"    --trace-mem=no|yes        trace all loads and stores [no]\n"
218"    --trace-superblocks=no|yes  trace all superblock entries [no]\n"
219"    --fnname=<name>           count calls to <name> (only used if\n"
220"                              --basic-count=yes)  [_dl_runtime_resolve]\n"
221   );
222}
223
224static void lk_print_debug_usage(void)
225{
226   VG_(printf)(
227"    (none)\n"
228   );
229}
230
231/*------------------------------------------------------------*/
232/*--- Stuff for --basic-counts                             ---*/
233/*------------------------------------------------------------*/
234
235/* Nb: use ULongs because the numbers can get very big */
236static ULong n_func_calls    = 0;
237static ULong n_SBs_entered   = 0;
238static ULong n_SBs_completed = 0;
239static ULong n_IRStmts       = 0;
240static ULong n_guest_instrs  = 0;
241static ULong n_Jccs          = 0;
242static ULong n_Jccs_untaken  = 0;
243static ULong n_IJccs         = 0;
244static ULong n_IJccs_untaken = 0;
245
246static void add_one_func_call(void)
247{
248   n_func_calls++;
249}
250
251static void add_one_SB_entered(void)
252{
253   n_SBs_entered++;
254}
255
256static void add_one_SB_completed(void)
257{
258   n_SBs_completed++;
259}
260
261static void add_one_IRStmt(void)
262{
263   n_IRStmts++;
264}
265
266static void add_one_guest_instr(void)
267{
268   n_guest_instrs++;
269}
270
271static void add_one_Jcc(void)
272{
273   n_Jccs++;
274}
275
276static void add_one_Jcc_untaken(void)
277{
278   n_Jccs_untaken++;
279}
280
281static void add_one_inverted_Jcc(void)
282{
283   n_IJccs++;
284}
285
286static void add_one_inverted_Jcc_untaken(void)
287{
288   n_IJccs_untaken++;
289}
290
291/*------------------------------------------------------------*/
292/*--- Stuff for --detailed-counts                          ---*/
293/*------------------------------------------------------------*/
294
295/* --- Operations --- */
296
297typedef enum { OpLoad=0, OpStore=1, OpAlu=2 } Op;
298
299#define N_OPS 3
300
301
302/* --- Types --- */
303
304#define N_TYPES 9
305
306static Int type2index ( IRType ty )
307{
308   switch (ty) {
309      case Ity_I1:      return 0;
310      case Ity_I8:      return 1;
311      case Ity_I16:     return 2;
312      case Ity_I32:     return 3;
313      case Ity_I64:     return 4;
314      case Ity_I128:    return 5;
315      case Ity_F32:     return 6;
316      case Ity_F64:     return 7;
317      case Ity_V128:    return 8;
318      default: tl_assert(0);
319   }
320}
321
322static HChar* nameOfTypeIndex ( Int i )
323{
324   switch (i) {
325      case 0: return "I1";   break;
326      case 1: return "I8";   break;
327      case 2: return "I16";  break;
328      case 3: return "I32";  break;
329      case 4: return "I64";  break;
330      case 5: return "I128"; break;
331      case 6: return "F32";  break;
332      case 7: return "F64";  break;
333      case 8: return "V128"; break;
334      default: tl_assert(0);
335   }
336}
337
338
339/* --- Counts --- */
340
341static ULong detailCounts[N_OPS][N_TYPES];
342
343/* The helper that is called from the instrumented code. */
344static VG_REGPARM(1)
345void increment_detail(ULong* detail)
346{
347   (*detail)++;
348}
349
350/* A helper that adds the instrumentation for a detail. */
351static void instrument_detail(IRSB* sb, Op op, IRType type)
352{
353   IRDirty* di;
354   IRExpr** argv;
355   const UInt typeIx = type2index(type);
356
357   tl_assert(op < N_OPS);
358   tl_assert(typeIx < N_TYPES);
359
360   argv = mkIRExprVec_1( mkIRExpr_HWord( (HWord)&detailCounts[op][typeIx] ) );
361   di = unsafeIRDirty_0_N( 1, "increment_detail",
362                              VG_(fnptr_to_fnentry)( &increment_detail ),
363                              argv);
364   addStmtToIRSB( sb, IRStmt_Dirty(di) );
365}
366
367/* Summarize and print the details. */
368static void print_details ( void )
369{
370   Int typeIx;
371   VG_(message)(Vg_UserMsg,
372                "   Type        Loads       Stores       AluOps");
373   VG_(message)(Vg_UserMsg,
374                "   -------------------------------------------");
375   for (typeIx = 0; typeIx < N_TYPES; typeIx++) {
376      VG_(message)(Vg_UserMsg,
377                   "   %4s %'12llu %'12llu %'12llu",
378                   nameOfTypeIndex( typeIx ),
379                   detailCounts[OpLoad ][typeIx],
380                   detailCounts[OpStore][typeIx],
381                   detailCounts[OpAlu  ][typeIx]
382      );
383   }
384}
385
386
387/*------------------------------------------------------------*/
388/*--- Stuff for --trace-mem                                ---*/
389/*------------------------------------------------------------*/
390
391#define MAX_DSIZE    512
392
393typedef
394   IRExpr
395   IRAtom;
396
397typedef
398   enum { Event_Ir, Event_Dr, Event_Dw, Event_Dm }
399   EventKind;
400
401typedef
402   struct {
403      EventKind  ekind;
404      IRAtom*    addr;
405      Int        size;
406   }
407   Event;
408
409/* Up to this many unnotified events are allowed.  Must be at least two,
410   so that reads and writes to the same address can be merged into a modify.
411   Beyond that, larger numbers just potentially induce more spilling due to
412   extending live ranges of address temporaries. */
413#define N_EVENTS 4
414
415/* Maintain an ordered list of memory events which are outstanding, in
416   the sense that no IR has yet been generated to do the relevant
417   helper calls.  The SB is scanned top to bottom and memory events
418   are added to the end of the list, merging with the most recent
419   notified event where possible (Dw immediately following Dr and
420   having the same size and EA can be merged).
421
422   This merging is done so that for architectures which have
423   load-op-store instructions (x86, amd64), the instr is treated as if
424   it makes just one memory reference (a modify), rather than two (a
425   read followed by a write at the same address).
426
427   At various points the list will need to be flushed, that is, IR
428   generated from it.  That must happen before any possible exit from
429   the block (the end, or an IRStmt_Exit).  Flushing also takes place
430   when there is no space to add a new event.
431
432   If we require the simulation statistics to be up to date with
433   respect to possible memory exceptions, then the list would have to
434   be flushed before each memory reference.  That's a pain so we don't
435   bother.
436
437   Flushing the list consists of walking it start to end and emitting
438   instrumentation IR for each event, in the order in which they
439   appear. */
440
441static Event events[N_EVENTS];
442static Int   events_used = 0;
443
444
445static VG_REGPARM(2) void trace_instr(Addr addr, SizeT size)
446{
447   VG_(printf)("I  %08lx,%lu\n", addr, size);
448}
449
450static VG_REGPARM(2) void trace_load(Addr addr, SizeT size)
451{
452   VG_(printf)(" L %08lx,%lu\n", addr, size);
453}
454
455static VG_REGPARM(2) void trace_store(Addr addr, SizeT size)
456{
457   VG_(printf)(" S %08lx,%lu\n", addr, size);
458}
459
460static VG_REGPARM(2) void trace_modify(Addr addr, SizeT size)
461{
462   VG_(printf)(" M %08lx,%lu\n", addr, size);
463}
464
465
466static void flushEvents(IRSB* sb)
467{
468   Int        i;
469   Char*      helperName;
470   void*      helperAddr;
471   IRExpr**   argv;
472   IRDirty*   di;
473   Event*     ev;
474
475   for (i = 0; i < events_used; i++) {
476
477      ev = &events[i];
478
479      // Decide on helper fn to call and args to pass it.
480      switch (ev->ekind) {
481         case Event_Ir: helperName = "trace_instr";
482                        helperAddr =  trace_instr;  break;
483
484         case Event_Dr: helperName = "trace_load";
485                        helperAddr =  trace_load;   break;
486
487         case Event_Dw: helperName = "trace_store";
488                        helperAddr =  trace_store;  break;
489
490         case Event_Dm: helperName = "trace_modify";
491                        helperAddr =  trace_modify; break;
492         default:
493            tl_assert(0);
494      }
495
496      // Add the helper.
497      argv = mkIRExprVec_2( ev->addr, mkIRExpr_HWord( ev->size ) );
498      di   = unsafeIRDirty_0_N( /*regparms*/2,
499                                helperName, VG_(fnptr_to_fnentry)( helperAddr ),
500                                argv );
501      addStmtToIRSB( sb, IRStmt_Dirty(di) );
502   }
503
504   events_used = 0;
505}
506
507// WARNING:  If you aren't interested in instruction reads, you can omit the
508// code that adds calls to trace_instr() in flushEvents().  However, you
509// must still call this function, addEvent_Ir() -- it is necessary to add
510// the Ir events to the events list so that merging of paired load/store
511// events into modify events works correctly.
512static void addEvent_Ir ( IRSB* sb, IRAtom* iaddr, UInt isize )
513{
514   Event* evt;
515   tl_assert(clo_trace_mem);
516   tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
517            || VG_CLREQ_SZB == isize );
518   if (events_used == N_EVENTS)
519      flushEvents(sb);
520   tl_assert(events_used >= 0 && events_used < N_EVENTS);
521   evt = &events[events_used];
522   evt->ekind = Event_Ir;
523   evt->addr  = iaddr;
524   evt->size  = isize;
525   events_used++;
526}
527
528static
529void addEvent_Dr ( IRSB* sb, IRAtom* daddr, Int dsize )
530{
531   Event* evt;
532   tl_assert(clo_trace_mem);
533   tl_assert(isIRAtom(daddr));
534   tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
535   if (events_used == N_EVENTS)
536      flushEvents(sb);
537   tl_assert(events_used >= 0 && events_used < N_EVENTS);
538   evt = &events[events_used];
539   evt->ekind = Event_Dr;
540   evt->addr  = daddr;
541   evt->size  = dsize;
542   events_used++;
543}
544
545static
546void addEvent_Dw ( IRSB* sb, IRAtom* daddr, Int dsize )
547{
548   Event* lastEvt;
549   Event* evt;
550   tl_assert(clo_trace_mem);
551   tl_assert(isIRAtom(daddr));
552   tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
553
554   // Is it possible to merge this write with the preceding read?
555   lastEvt = &events[events_used-1];
556   if (events_used > 0
557    && lastEvt->ekind == Event_Dr
558    && lastEvt->size  == dsize
559    && eqIRAtom(lastEvt->addr, daddr))
560   {
561      lastEvt->ekind = Event_Dm;
562      return;
563   }
564
565   // No.  Add as normal.
566   if (events_used == N_EVENTS)
567      flushEvents(sb);
568   tl_assert(events_used >= 0 && events_used < N_EVENTS);
569   evt = &events[events_used];
570   evt->ekind = Event_Dw;
571   evt->size  = dsize;
572   evt->addr  = daddr;
573   events_used++;
574}
575
576
577/*------------------------------------------------------------*/
578/*--- Stuff for --trace-superblocks                        ---*/
579/*------------------------------------------------------------*/
580
581static void trace_superblock(Addr addr)
582{
583   VG_(printf)("SB %08lx\n", addr);
584}
585
586
587/*------------------------------------------------------------*/
588/*--- Basic tool functions                                 ---*/
589/*------------------------------------------------------------*/
590
591static void lk_post_clo_init(void)
592{
593   Int op, tyIx;
594
595   if (clo_detailed_counts) {
596      for (op = 0; op < N_OPS; op++)
597         for (tyIx = 0; tyIx < N_TYPES; tyIx++)
598            detailCounts[op][tyIx] = 0;
599   }
600}
601
602static
603IRSB* lk_instrument ( VgCallbackClosure* closure,
604                      IRSB* sbIn,
605                      VexGuestLayout* layout,
606                      VexGuestExtents* vge,
607                      IRType gWordTy, IRType hWordTy )
608{
609   IRDirty*   di;
610   Int        i;
611   IRSB*      sbOut;
612   Char       fnname[100];
613   IRType     type;
614   IRTypeEnv* tyenv = sbIn->tyenv;
615   Addr       iaddr = 0, dst;
616   UInt       ilen = 0;
617   Bool       condition_inverted = False;
618
619   if (gWordTy != hWordTy) {
620      /* We don't currently support this case. */
621      VG_(tool_panic)("host/guest word size mismatch");
622   }
623
624   /* Set up SB */
625   sbOut = deepCopyIRSBExceptStmts(sbIn);
626
627   // Copy verbatim any IR preamble preceding the first IMark
628   i = 0;
629   while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
630      addStmtToIRSB( sbOut, sbIn->stmts[i] );
631      i++;
632   }
633
634   if (clo_basic_counts) {
635      /* Count this superblock. */
636      di = unsafeIRDirty_0_N( 0, "add_one_SB_entered",
637                                 VG_(fnptr_to_fnentry)( &add_one_SB_entered ),
638                                 mkIRExprVec_0() );
639      addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
640   }
641
642   if (clo_trace_sbs) {
643      /* Print this superblock's address. */
644      di = unsafeIRDirty_0_N(
645              0, "trace_superblock",
646              VG_(fnptr_to_fnentry)( &trace_superblock ),
647              mkIRExprVec_1( mkIRExpr_HWord( vge->base[0] ) )
648           );
649      addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
650   }
651
652   if (clo_trace_mem) {
653      events_used = 0;
654   }
655
656   for (/*use current i*/; i < sbIn->stmts_used; i++) {
657      IRStmt* st = sbIn->stmts[i];
658      if (!st || st->tag == Ist_NoOp) continue;
659
660      if (clo_basic_counts) {
661         /* Count one VEX statement. */
662         di = unsafeIRDirty_0_N( 0, "add_one_IRStmt",
663                                    VG_(fnptr_to_fnentry)( &add_one_IRStmt ),
664                                    mkIRExprVec_0() );
665         addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
666      }
667
668      switch (st->tag) {
669         case Ist_NoOp:
670         case Ist_AbiHint:
671         case Ist_Put:
672         case Ist_PutI:
673         case Ist_MBE:
674            addStmtToIRSB( sbOut, st );
675            break;
676
677         case Ist_IMark:
678            if (clo_basic_counts) {
679               /* Needed to be able to check for inverted condition in Ist_Exit */
680               iaddr = st->Ist.IMark.addr;
681               ilen  = st->Ist.IMark.len;
682
683               /* Count guest instruction. */
684               di = unsafeIRDirty_0_N( 0, "add_one_guest_instr",
685                                          VG_(fnptr_to_fnentry)( &add_one_guest_instr ),
686                                          mkIRExprVec_0() );
687               addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
688
689               /* An unconditional branch to a known destination in the
690                * guest's instructions can be represented, in the IRSB to
691                * instrument, by the VEX statements that are the
692                * translation of that known destination. This feature is
693                * called 'SB chasing' and can be influenced by command
694                * line option --vex-guest-chase-thresh.
695                *
696                * To get an accurate count of the calls to a specific
697                * function, taking SB chasing into account, we need to
698                * check for each guest instruction (Ist_IMark) if it is
699                * the entry point of a function.
700                */
701               tl_assert(clo_fnname);
702               tl_assert(clo_fnname[0]);
703               if (VG_(get_fnname_if_entry)(st->Ist.IMark.addr,
704                                            fnname, sizeof(fnname))
705                   && 0 == VG_(strcmp)(fnname, clo_fnname)) {
706                  di = unsafeIRDirty_0_N(
707                          0, "add_one_func_call",
708                             VG_(fnptr_to_fnentry)( &add_one_func_call ),
709                             mkIRExprVec_0() );
710                  addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
711               }
712            }
713            if (clo_trace_mem) {
714               // WARNING: do not remove this function call, even if you
715               // aren't interested in instruction reads.  See the comment
716               // above the function itself for more detail.
717               addEvent_Ir( sbOut, mkIRExpr_HWord( (HWord)st->Ist.IMark.addr ),
718                            st->Ist.IMark.len );
719            }
720            addStmtToIRSB( sbOut, st );
721            break;
722
723         case Ist_WrTmp:
724            // Add a call to trace_load() if --trace-mem=yes.
725            if (clo_trace_mem) {
726               IRExpr* data = st->Ist.WrTmp.data;
727               if (data->tag == Iex_Load) {
728                  addEvent_Dr( sbOut, data->Iex.Load.addr,
729                               sizeofIRType(data->Iex.Load.ty) );
730               }
731            }
732            if (clo_detailed_counts) {
733               IRExpr* expr = st->Ist.WrTmp.data;
734               type = typeOfIRExpr(sbOut->tyenv, expr);
735               tl_assert(type != Ity_INVALID);
736               switch (expr->tag) {
737                  case Iex_Load:
738                     instrument_detail( sbOut, OpLoad, type );
739                     break;
740                  case Iex_Unop:
741                  case Iex_Binop:
742                  case Iex_Triop:
743                  case Iex_Qop:
744                  case Iex_Mux0X:
745                     instrument_detail( sbOut, OpAlu, type );
746                     break;
747                  default:
748                     break;
749               }
750            }
751            addStmtToIRSB( sbOut, st );
752            break;
753
754         case Ist_Store:
755            if (clo_trace_mem) {
756               IRExpr* data  = st->Ist.Store.data;
757               addEvent_Dw( sbOut, st->Ist.Store.addr,
758                            sizeofIRType(typeOfIRExpr(tyenv, data)) );
759            }
760            if (clo_detailed_counts) {
761               type = typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data);
762               tl_assert(type != Ity_INVALID);
763               instrument_detail( sbOut, OpStore, type );
764            }
765            addStmtToIRSB( sbOut, st );
766            break;
767
768         case Ist_Dirty: {
769            if (clo_trace_mem) {
770               Int      dsize;
771               IRDirty* d = st->Ist.Dirty.details;
772               if (d->mFx != Ifx_None) {
773                  // This dirty helper accesses memory.  Collect the details.
774                  tl_assert(d->mAddr != NULL);
775                  tl_assert(d->mSize != 0);
776                  dsize = d->mSize;
777                  if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
778                     addEvent_Dr( sbOut, d->mAddr, dsize );
779                  if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
780                     addEvent_Dw( sbOut, d->mAddr, dsize );
781               } else {
782                  tl_assert(d->mAddr == NULL);
783                  tl_assert(d->mSize == 0);
784               }
785            }
786            addStmtToIRSB( sbOut, st );
787            break;
788         }
789
790         case Ist_Exit:
791            if (clo_basic_counts) {
792               // The condition of a branch was inverted by VEX if a taken
793               // branch is in fact a fall trough according to client address
794               tl_assert(iaddr != 0);
795               dst = (sizeof(Addr) == 4) ? st->Ist.Exit.dst->Ico.U32 :
796                                           st->Ist.Exit.dst->Ico.U64;
797               condition_inverted = (dst == iaddr + ilen);
798
799               /* Count Jcc */
800               if (!condition_inverted)
801                  di = unsafeIRDirty_0_N( 0, "add_one_Jcc",
802                                          VG_(fnptr_to_fnentry)( &add_one_Jcc ),
803                                          mkIRExprVec_0() );
804               else
805                  di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc",
806                                          VG_(fnptr_to_fnentry)( &add_one_inverted_Jcc ),
807                                          mkIRExprVec_0() );
808
809               addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
810            }
811            if (clo_trace_mem) {
812               flushEvents(sbOut);
813            }
814
815            addStmtToIRSB( sbOut, st );      // Original statement
816
817            if (clo_basic_counts) {
818               /* Count non-taken Jcc */
819               if (!condition_inverted)
820                  di = unsafeIRDirty_0_N( 0, "add_one_Jcc_untaken",
821                                          VG_(fnptr_to_fnentry)(
822                                             &add_one_Jcc_untaken ),
823                                          mkIRExprVec_0() );
824               else
825                  di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc_untaken",
826                                          VG_(fnptr_to_fnentry)(
827                                             &add_one_inverted_Jcc_untaken ),
828                                          mkIRExprVec_0() );
829
830               addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
831            }
832            break;
833
834         default:
835            tl_assert(0);
836      }
837   }
838
839   if (clo_basic_counts) {
840      /* Count this basic block. */
841      di = unsafeIRDirty_0_N( 0, "add_one_SB_completed",
842                                 VG_(fnptr_to_fnentry)( &add_one_SB_completed ),
843                                 mkIRExprVec_0() );
844      addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
845   }
846
847   if (clo_trace_mem) {
848      /* At the end of the sbIn.  Flush outstandings. */
849      flushEvents(sbOut);
850   }
851
852   return sbOut;
853}
854
855static void lk_fini(Int exitcode)
856{
857   char percentify_buf[4]; /* Two digits, '%' and 0. */
858   const int percentify_size = sizeof(percentify_buf);
859   const int percentify_decs = 0;
860
861   tl_assert(clo_fnname);
862   tl_assert(clo_fnname[0]);
863
864   if (clo_basic_counts) {
865      ULong total_Jccs = n_Jccs + n_IJccs;
866      ULong taken_Jccs = (n_Jccs - n_Jccs_untaken) + n_IJccs_untaken;
867
868      VG_(message)(Vg_UserMsg,
869         "Counted %'llu calls to %s()", n_func_calls, clo_fnname);
870
871      VG_(message)(Vg_UserMsg, "");
872      VG_(message)(Vg_UserMsg, "Jccs:");
873      VG_(message)(Vg_UserMsg, "  total:         %'llu", total_Jccs);
874      VG_(percentify)(taken_Jccs, (total_Jccs ? total_Jccs : 1),
875         percentify_decs, percentify_size, percentify_buf);
876      VG_(message)(Vg_UserMsg, "  taken:         %'llu (%s)",
877         taken_Jccs, percentify_buf);
878
879      VG_(message)(Vg_UserMsg, "");
880      VG_(message)(Vg_UserMsg, "Executed:");
881      VG_(message)(Vg_UserMsg, "  SBs entered:   %'llu", n_SBs_entered);
882      VG_(message)(Vg_UserMsg, "  SBs completed: %'llu", n_SBs_completed);
883      VG_(message)(Vg_UserMsg, "  guest instrs:  %'llu", n_guest_instrs);
884      VG_(message)(Vg_UserMsg, "  IRStmts:       %'llu", n_IRStmts);
885
886      VG_(message)(Vg_UserMsg, "");
887      VG_(message)(Vg_UserMsg, "Ratios:");
888      tl_assert(n_SBs_entered); // Paranoia time.
889      VG_(message)(Vg_UserMsg, "  guest instrs : SB entered  = %3llu : 10",
890         10 * n_guest_instrs / n_SBs_entered);
891      VG_(message)(Vg_UserMsg, "       IRStmts : SB entered  = %3llu : 10",
892         10 * n_IRStmts / n_SBs_entered);
893      tl_assert(n_guest_instrs); // Paranoia time.
894      VG_(message)(Vg_UserMsg, "       IRStmts : guest instr = %3llu : 10",
895         10 * n_IRStmts / n_guest_instrs);
896   }
897
898   if (clo_detailed_counts) {
899      VG_(message)(Vg_UserMsg, "");
900      VG_(message)(Vg_UserMsg, "IR-level counts by type:");
901      print_details();
902   }
903
904   if (clo_basic_counts) {
905      VG_(message)(Vg_UserMsg, "");
906      VG_(message)(Vg_UserMsg, "Exit code:       %d", exitcode);
907   }
908}
909
910static void lk_pre_clo_init(void)
911{
912   VG_(details_name)            ("Lackey");
913   VG_(details_version)         (NULL);
914   VG_(details_description)     ("an example Valgrind tool");
915   VG_(details_copyright_author)(
916      "Copyright (C) 2002-2008, and GNU GPL'd, by Nicholas Nethercote.");
917   VG_(details_bug_reports_to)  (VG_BUGS_TO);
918   VG_(details_avg_translation_sizeB) ( 200 );
919
920   VG_(basic_tool_funcs)          (lk_post_clo_init,
921                                   lk_instrument,
922                                   lk_fini);
923   VG_(needs_command_line_options)(lk_process_cmd_line_option,
924                                   lk_print_usage,
925                                   lk_print_debug_usage);
926}
927
928VG_DETERMINE_INTERFACE_VERSION(lk_pre_clo_init)
929
930/*--------------------------------------------------------------------*/
931/*--- end                                                lk_main.c ---*/
932/*--------------------------------------------------------------------*/
933