1
2/*--------------------------------------------------------------------*/
3/*--- An example Valgrind tool.                          lk_main.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Lackey, an example Valgrind tool that does
8   some simple program measurement and tracing.
9
10   Copyright (C) 2002-2011 Nicholas Nethercote
11      njn@valgrind.org
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31// This tool shows how to do some basic instrumentation.
32//
33// There are four kinds of instrumentation it can do.  They can be turned
34// on/off independently with command line options:
35//
36// * --basic-counts   : do basic counts, eg. number of instructions
37//                      executed, jumps executed, etc.
38// * --detailed-counts: do more detailed counts:  number of loads, stores
39//                      and ALU operations of different sizes.
40// * --trace-mem=yes:   trace all (data) memory accesses.
41// * --trace-superblocks=yes:
42//                      trace all superblock entries.  Mostly of interest
43//                      to the Valgrind developers.
44//
45// The code for each kind of instrumentation is guarded by a clo_* variable:
46// clo_basic_counts, clo_detailed_counts, clo_trace_mem and clo_trace_sbs.
47//
48// If you want to modify any of the instrumentation code, look for the code
49// that is guarded by the relevant clo_* variable (eg. clo_trace_mem)
50// If you're not interested in the other kinds of instrumentation you can
51// remove them.  If you want to do more complex modifications, please read
52// VEX/pub/libvex_ir.h to understand the intermediate representation.
53//
54//
55// Specific Details about --trace-mem=yes
56// --------------------------------------
57// Lackey's --trace-mem code is a good starting point for building Valgrind
58// tools that act on memory loads and stores.  It also could be used as is,
59// with its output used as input to a post-mortem processing step.  However,
60// because memory traces can be very large, online analysis is generally
61// better.
62//
63// It prints memory data access traces that look like this:
64//
65//   I  0023C790,2  # instruction read at 0x0023C790 of size 2
66//   I  0023C792,5
67//    S BE80199C,4  # data store at 0xBE80199C of size 4
68//   I  0025242B,3
69//    L BE801950,4  # data load at 0xBE801950 of size 4
70//   I  0023D476,7
71//    M 0025747C,1  # data modify at 0x0025747C of size 1
72//   I  0023DC20,2
73//    L 00254962,1
74//    L BE801FB3,1
75//   I  00252305,1
76//    L 00254AEB,1
77//    S 00257998,1
78//
79// Every instruction executed has an "instr" event representing it.
80// Instructions that do memory accesses are followed by one or more "load",
81// "store" or "modify" events.  Some instructions do more than one load or
82// store, as in the last two examples in the above trace.
83//
84// Here are some examples of x86 instructions that do different combinations
85// of loads, stores, and modifies.
86//
87//    Instruction          Memory accesses                  Event sequence
88//    -----------          ---------------                  --------------
89//    add %eax, %ebx       No loads or stores               instr
90//
91//    movl (%eax), %ebx    loads (%eax)                     instr, load
92//
93//    movl %eax, (%ebx)    stores (%ebx)                    instr, store
94//
95//    incl (%ecx)          modifies (%ecx)                  instr, modify
96//
97//    cmpsb                loads (%esi), loads(%edi)        instr, load, load
98//
99//    call*l (%edx)        loads (%edx), stores -4(%esp)    instr, load, store
100//    pushl (%edx)         loads (%edx), stores -4(%esp)    instr, load, store
101//    movsw                loads (%esi), stores (%edi)      instr, load, store
102//
103// Instructions using x86 "rep" prefixes are traced as if they are repeated
104// N times.
105//
106// Lackey with --trace-mem gives good traces, but they are not perfect, for
107// the following reasons:
108//
109// - It does not trace into the OS kernel, so system calls and other kernel
110//   operations (eg. some scheduling and signal handling code) are ignored.
111//
112// - It could model loads and stores done at the system call boundary using
113//   the pre_mem_read/post_mem_write events.  For example, if you call
114//   fstat() you know that the passed in buffer has been written.  But it
115//   currently does not do this.
116//
117// - Valgrind replaces some code (not much) with its own, notably parts of
118//   code for scheduling operations and signal handling.  This code is not
119//   traced.
120//
121// - There is no consideration of virtual-to-physical address mapping.
122//   This may not matter for many purposes.
123//
124// - Valgrind modifies the instruction stream in some very minor ways.  For
125//   example, on x86 the bts, btc, btr instructions are incorrectly
126//   considered to always touch memory (this is a consequence of these
127//   instructions being very difficult to simulate).
128//
129// - Valgrind tools layout memory differently to normal programs, so the
130//   addresses you get will not be typical.  Thus Lackey (and all Valgrind
131//   tools) is suitable for getting relative memory traces -- eg. if you
132//   want to analyse locality of memory accesses -- but is not good if
133//   absolute addresses are important.
134//
135// Despite all these warnings, Lackey's results should be good enough for a
136// wide range of purposes.  For example, Cachegrind shares all the above
137// shortcomings and it is still useful.
138//
139// For further inspiration, you should look at cachegrind/cg_main.c which
140// uses the same basic technique for tracing memory accesses, but also groups
141// events together for processing into twos and threes so that fewer C calls
142// are made and things run faster.
143//
144// Specific Details about --trace-superblocks=yes
145// ----------------------------------------------
146// Valgrind splits code up into single entry, multiple exit blocks
147// known as superblocks.  By itself, --trace-superblocks=yes just
148// prints a message as each superblock is run:
149//
150//  SB 04013170
151//  SB 04013177
152//  SB 04013173
153//  SB 04013177
154//
155// The hex number is the address of the first instruction in the
156// superblock.  You can see the relationship more obviously if you use
157// --trace-superblocks=yes and --trace-mem=yes together.  Then a "SB"
158// message at address X is immediately followed by an "instr:" message
159// for that address, as the first instruction in the block is
160// executed, for example:
161//
162//  SB 04014073
163//  I  04014073,3
164//   L 7FEFFF7F8,8
165//  I  04014076,4
166//  I  0401407A,3
167//  I  0401407D,3
168//  I  04014080,3
169//  I  04014083,6
170
171
172#include "pub_tool_basics.h"
173#include "pub_tool_tooliface.h"
174#include "pub_tool_libcassert.h"
175#include "pub_tool_libcprint.h"
176#include "pub_tool_debuginfo.h"
177#include "pub_tool_libcbase.h"
178#include "pub_tool_options.h"
179#include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
180
181/*------------------------------------------------------------*/
182/*--- Command line options                                 ---*/
183/*------------------------------------------------------------*/
184
185/* Command line options controlling instrumentation kinds, as described at
186 * the top of this file. */
187static Bool clo_basic_counts    = True;
188static Bool clo_detailed_counts = False;
189static Bool clo_trace_mem       = False;
190static Bool clo_trace_sbs       = False;
191
192/* The name of the function of which the number of calls (under
193 * --basic-counts=yes) is to be counted, with default. Override with command
194 * line option --fnname. */
195static Char* clo_fnname = "main";
196
197static Bool lk_process_cmd_line_option(Char* arg)
198{
199   if VG_STR_CLO(arg, "--fnname", clo_fnname) {}
200   else if VG_BOOL_CLO(arg, "--basic-counts",      clo_basic_counts) {}
201   else if VG_BOOL_CLO(arg, "--detailed-counts",   clo_detailed_counts) {}
202   else if VG_BOOL_CLO(arg, "--trace-mem",         clo_trace_mem) {}
203   else if VG_BOOL_CLO(arg, "--trace-superblocks", clo_trace_sbs) {}
204   else
205      return False;
206
207   tl_assert(clo_fnname);
208   tl_assert(clo_fnname[0]);
209   return True;
210}
211
212static void lk_print_usage(void)
213{
214   VG_(printf)(
215"    --basic-counts=no|yes     count instructions, jumps, etc. [yes]\n"
216"    --detailed-counts=no|yes  count loads, stores and alu ops [no]\n"
217"    --trace-mem=no|yes        trace all loads and stores [no]\n"
218"    --trace-superblocks=no|yes  trace all superblock entries [no]\n"
219"    --fnname=<name>           count calls to <name> (only used if\n"
220"                              --basic-count=yes)  [main]\n"
221   );
222}
223
224static void lk_print_debug_usage(void)
225{
226   VG_(printf)(
227"    (none)\n"
228   );
229}
230
231/*------------------------------------------------------------*/
232/*--- Stuff for --basic-counts                             ---*/
233/*------------------------------------------------------------*/
234
235/* Nb: use ULongs because the numbers can get very big */
236static ULong n_func_calls    = 0;
237static ULong n_SBs_entered   = 0;
238static ULong n_SBs_completed = 0;
239static ULong n_IRStmts       = 0;
240static ULong n_guest_instrs  = 0;
241static ULong n_Jccs          = 0;
242static ULong n_Jccs_untaken  = 0;
243static ULong n_IJccs         = 0;
244static ULong n_IJccs_untaken = 0;
245
246static void add_one_func_call(void)
247{
248   n_func_calls++;
249}
250
251static void add_one_SB_entered(void)
252{
253   n_SBs_entered++;
254}
255
256static void add_one_SB_completed(void)
257{
258   n_SBs_completed++;
259}
260
261static void add_one_IRStmt(void)
262{
263   n_IRStmts++;
264}
265
266static void add_one_guest_instr(void)
267{
268   n_guest_instrs++;
269}
270
271static void add_one_Jcc(void)
272{
273   n_Jccs++;
274}
275
276static void add_one_Jcc_untaken(void)
277{
278   n_Jccs_untaken++;
279}
280
281static void add_one_inverted_Jcc(void)
282{
283   n_IJccs++;
284}
285
286static void add_one_inverted_Jcc_untaken(void)
287{
288   n_IJccs_untaken++;
289}
290
291/*------------------------------------------------------------*/
292/*--- Stuff for --detailed-counts                          ---*/
293/*------------------------------------------------------------*/
294
295/* --- Operations --- */
296
297typedef enum { OpLoad=0, OpStore=1, OpAlu=2 } Op;
298
299#define N_OPS 3
300
301
302/* --- Types --- */
303
304#define N_TYPES 10
305
306static Int type2index ( IRType ty )
307{
308   switch (ty) {
309      case Ity_I1:      return 0;
310      case Ity_I8:      return 1;
311      case Ity_I16:     return 2;
312      case Ity_I32:     return 3;
313      case Ity_I64:     return 4;
314      case Ity_I128:    return 5;
315      case Ity_F32:     return 6;
316      case Ity_F64:     return 7;
317      case Ity_F128:    return 8;
318      case Ity_V128:    return 9;
319      default: tl_assert(0);
320   }
321}
322
323static HChar* nameOfTypeIndex ( Int i )
324{
325   switch (i) {
326      case 0: return "I1";   break;
327      case 1: return "I8";   break;
328      case 2: return "I16";  break;
329      case 3: return "I32";  break;
330      case 4: return "I64";  break;
331      case 5: return "I128"; break;
332      case 6: return "F32";  break;
333      case 7: return "F64";  break;
334      case 8: return "F128";  break;
335      case 9: return "V128"; break;
336      default: tl_assert(0);
337   }
338}
339
340
341/* --- Counts --- */
342
343static ULong detailCounts[N_OPS][N_TYPES];
344
345/* The helper that is called from the instrumented code. */
346static VG_REGPARM(1)
347void increment_detail(ULong* detail)
348{
349   (*detail)++;
350}
351
352/* A helper that adds the instrumentation for a detail. */
353static void instrument_detail(IRSB* sb, Op op, IRType type)
354{
355   IRDirty* di;
356   IRExpr** argv;
357   const UInt typeIx = type2index(type);
358
359   tl_assert(op < N_OPS);
360   tl_assert(typeIx < N_TYPES);
361
362   argv = mkIRExprVec_1( mkIRExpr_HWord( (HWord)&detailCounts[op][typeIx] ) );
363   di = unsafeIRDirty_0_N( 1, "increment_detail",
364                              VG_(fnptr_to_fnentry)( &increment_detail ),
365                              argv);
366   addStmtToIRSB( sb, IRStmt_Dirty(di) );
367}
368
369/* Summarize and print the details. */
370static void print_details ( void )
371{
372   Int typeIx;
373   VG_(umsg)("   Type        Loads       Stores       AluOps\n");
374   VG_(umsg)("   -------------------------------------------\n");
375   for (typeIx = 0; typeIx < N_TYPES; typeIx++) {
376      VG_(umsg)("   %4s %'12llu %'12llu %'12llu\n",
377                nameOfTypeIndex( typeIx ),
378                detailCounts[OpLoad ][typeIx],
379                detailCounts[OpStore][typeIx],
380                detailCounts[OpAlu  ][typeIx]
381      );
382   }
383}
384
385
386/*------------------------------------------------------------*/
387/*--- Stuff for --trace-mem                                ---*/
388/*------------------------------------------------------------*/
389
390#define MAX_DSIZE    512
391
392typedef
393   IRExpr
394   IRAtom;
395
396typedef
397   enum { Event_Ir, Event_Dr, Event_Dw, Event_Dm }
398   EventKind;
399
400typedef
401   struct {
402      EventKind  ekind;
403      IRAtom*    addr;
404      Int        size;
405   }
406   Event;
407
408/* Up to this many unnotified events are allowed.  Must be at least two,
409   so that reads and writes to the same address can be merged into a modify.
410   Beyond that, larger numbers just potentially induce more spilling due to
411   extending live ranges of address temporaries. */
412#define N_EVENTS 4
413
414/* Maintain an ordered list of memory events which are outstanding, in
415   the sense that no IR has yet been generated to do the relevant
416   helper calls.  The SB is scanned top to bottom and memory events
417   are added to the end of the list, merging with the most recent
418   notified event where possible (Dw immediately following Dr and
419   having the same size and EA can be merged).
420
421   This merging is done so that for architectures which have
422   load-op-store instructions (x86, amd64), the instr is treated as if
423   it makes just one memory reference (a modify), rather than two (a
424   read followed by a write at the same address).
425
426   At various points the list will need to be flushed, that is, IR
427   generated from it.  That must happen before any possible exit from
428   the block (the end, or an IRStmt_Exit).  Flushing also takes place
429   when there is no space to add a new event.
430
431   If we require the simulation statistics to be up to date with
432   respect to possible memory exceptions, then the list would have to
433   be flushed before each memory reference.  That's a pain so we don't
434   bother.
435
436   Flushing the list consists of walking it start to end and emitting
437   instrumentation IR for each event, in the order in which they
438   appear. */
439
440static Event events[N_EVENTS];
441static Int   events_used = 0;
442
443
444static VG_REGPARM(2) void trace_instr(Addr addr, SizeT size)
445{
446   VG_(printf)("I  %08lx,%lu\n", addr, size);
447}
448
449static VG_REGPARM(2) void trace_load(Addr addr, SizeT size)
450{
451   VG_(printf)(" L %08lx,%lu\n", addr, size);
452}
453
454static VG_REGPARM(2) void trace_store(Addr addr, SizeT size)
455{
456   VG_(printf)(" S %08lx,%lu\n", addr, size);
457}
458
459static VG_REGPARM(2) void trace_modify(Addr addr, SizeT size)
460{
461   VG_(printf)(" M %08lx,%lu\n", addr, size);
462}
463
464
465static void flushEvents(IRSB* sb)
466{
467   Int        i;
468   Char*      helperName;
469   void*      helperAddr;
470   IRExpr**   argv;
471   IRDirty*   di;
472   Event*     ev;
473
474   for (i = 0; i < events_used; i++) {
475
476      ev = &events[i];
477
478      // Decide on helper fn to call and args to pass it.
479      switch (ev->ekind) {
480         case Event_Ir: helperName = "trace_instr";
481                        helperAddr =  trace_instr;  break;
482
483         case Event_Dr: helperName = "trace_load";
484                        helperAddr =  trace_load;   break;
485
486         case Event_Dw: helperName = "trace_store";
487                        helperAddr =  trace_store;  break;
488
489         case Event_Dm: helperName = "trace_modify";
490                        helperAddr =  trace_modify; break;
491         default:
492            tl_assert(0);
493      }
494
495      // Add the helper.
496      argv = mkIRExprVec_2( ev->addr, mkIRExpr_HWord( ev->size ) );
497      di   = unsafeIRDirty_0_N( /*regparms*/2,
498                                helperName, VG_(fnptr_to_fnentry)( helperAddr ),
499                                argv );
500      addStmtToIRSB( sb, IRStmt_Dirty(di) );
501   }
502
503   events_used = 0;
504}
505
506// WARNING:  If you aren't interested in instruction reads, you can omit the
507// code that adds calls to trace_instr() in flushEvents().  However, you
508// must still call this function, addEvent_Ir() -- it is necessary to add
509// the Ir events to the events list so that merging of paired load/store
510// events into modify events works correctly.
511static void addEvent_Ir ( IRSB* sb, IRAtom* iaddr, UInt isize )
512{
513   Event* evt;
514   tl_assert(clo_trace_mem);
515   tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
516            || VG_CLREQ_SZB == isize );
517   if (events_used == N_EVENTS)
518      flushEvents(sb);
519   tl_assert(events_used >= 0 && events_used < N_EVENTS);
520   evt = &events[events_used];
521   evt->ekind = Event_Ir;
522   evt->addr  = iaddr;
523   evt->size  = isize;
524   events_used++;
525}
526
527static
528void addEvent_Dr ( IRSB* sb, IRAtom* daddr, Int dsize )
529{
530   Event* evt;
531   tl_assert(clo_trace_mem);
532   tl_assert(isIRAtom(daddr));
533   tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
534   if (events_used == N_EVENTS)
535      flushEvents(sb);
536   tl_assert(events_used >= 0 && events_used < N_EVENTS);
537   evt = &events[events_used];
538   evt->ekind = Event_Dr;
539   evt->addr  = daddr;
540   evt->size  = dsize;
541   events_used++;
542}
543
544static
545void addEvent_Dw ( IRSB* sb, IRAtom* daddr, Int dsize )
546{
547   Event* lastEvt;
548   Event* evt;
549   tl_assert(clo_trace_mem);
550   tl_assert(isIRAtom(daddr));
551   tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
552
553   // Is it possible to merge this write with the preceding read?
554   lastEvt = &events[events_used-1];
555   if (events_used > 0
556    && lastEvt->ekind == Event_Dr
557    && lastEvt->size  == dsize
558    && eqIRAtom(lastEvt->addr, daddr))
559   {
560      lastEvt->ekind = Event_Dm;
561      return;
562   }
563
564   // No.  Add as normal.
565   if (events_used == N_EVENTS)
566      flushEvents(sb);
567   tl_assert(events_used >= 0 && events_used < N_EVENTS);
568   evt = &events[events_used];
569   evt->ekind = Event_Dw;
570   evt->size  = dsize;
571   evt->addr  = daddr;
572   events_used++;
573}
574
575
576/*------------------------------------------------------------*/
577/*--- Stuff for --trace-superblocks                        ---*/
578/*------------------------------------------------------------*/
579
580static void trace_superblock(Addr addr)
581{
582   VG_(printf)("SB %08lx\n", addr);
583}
584
585
586/*------------------------------------------------------------*/
587/*--- Basic tool functions                                 ---*/
588/*------------------------------------------------------------*/
589
590static void lk_post_clo_init(void)
591{
592   Int op, tyIx;
593
594   if (clo_detailed_counts) {
595      for (op = 0; op < N_OPS; op++)
596         for (tyIx = 0; tyIx < N_TYPES; tyIx++)
597            detailCounts[op][tyIx] = 0;
598   }
599}
600
601static
602IRSB* lk_instrument ( VgCallbackClosure* closure,
603                      IRSB* sbIn,
604                      VexGuestLayout* layout,
605                      VexGuestExtents* vge,
606                      IRType gWordTy, IRType hWordTy )
607{
608   IRDirty*   di;
609   Int        i;
610   IRSB*      sbOut;
611   Char       fnname[100];
612   IRType     type;
613   IRTypeEnv* tyenv = sbIn->tyenv;
614   Addr       iaddr = 0, dst;
615   UInt       ilen = 0;
616   Bool       condition_inverted = False;
617
618   if (gWordTy != hWordTy) {
619      /* We don't currently support this case. */
620      VG_(tool_panic)("host/guest word size mismatch");
621   }
622
623   /* Set up SB */
624   sbOut = deepCopyIRSBExceptStmts(sbIn);
625
626   // Copy verbatim any IR preamble preceding the first IMark
627   i = 0;
628   while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
629      addStmtToIRSB( sbOut, sbIn->stmts[i] );
630      i++;
631   }
632
633   if (clo_basic_counts) {
634      /* Count this superblock. */
635      di = unsafeIRDirty_0_N( 0, "add_one_SB_entered",
636                                 VG_(fnptr_to_fnentry)( &add_one_SB_entered ),
637                                 mkIRExprVec_0() );
638      addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
639   }
640
641   if (clo_trace_sbs) {
642      /* Print this superblock's address. */
643      di = unsafeIRDirty_0_N(
644              0, "trace_superblock",
645              VG_(fnptr_to_fnentry)( &trace_superblock ),
646              mkIRExprVec_1( mkIRExpr_HWord( vge->base[0] ) )
647           );
648      addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
649   }
650
651   if (clo_trace_mem) {
652      events_used = 0;
653   }
654
655   for (/*use current i*/; i < sbIn->stmts_used; i++) {
656      IRStmt* st = sbIn->stmts[i];
657      if (!st || st->tag == Ist_NoOp) continue;
658
659      if (clo_basic_counts) {
660         /* Count one VEX statement. */
661         di = unsafeIRDirty_0_N( 0, "add_one_IRStmt",
662                                    VG_(fnptr_to_fnentry)( &add_one_IRStmt ),
663                                    mkIRExprVec_0() );
664         addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
665      }
666
667      switch (st->tag) {
668         case Ist_NoOp:
669         case Ist_AbiHint:
670         case Ist_Put:
671         case Ist_PutI:
672         case Ist_MBE:
673            addStmtToIRSB( sbOut, st );
674            break;
675
676         case Ist_IMark:
677            if (clo_basic_counts) {
678               /* Needed to be able to check for inverted condition in Ist_Exit */
679               iaddr = st->Ist.IMark.addr;
680               ilen  = st->Ist.IMark.len;
681
682               /* Count guest instruction. */
683               di = unsafeIRDirty_0_N( 0, "add_one_guest_instr",
684                                          VG_(fnptr_to_fnentry)( &add_one_guest_instr ),
685                                          mkIRExprVec_0() );
686               addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
687
688               /* An unconditional branch to a known destination in the
689                * guest's instructions can be represented, in the IRSB to
690                * instrument, by the VEX statements that are the
691                * translation of that known destination. This feature is
692                * called 'SB chasing' and can be influenced by command
693                * line option --vex-guest-chase-thresh.
694                *
695                * To get an accurate count of the calls to a specific
696                * function, taking SB chasing into account, we need to
697                * check for each guest instruction (Ist_IMark) if it is
698                * the entry point of a function.
699                */
700               tl_assert(clo_fnname);
701               tl_assert(clo_fnname[0]);
702               if (VG_(get_fnname_if_entry)(st->Ist.IMark.addr,
703                                            fnname, sizeof(fnname))
704                   && 0 == VG_(strcmp)(fnname, clo_fnname)) {
705                  di = unsafeIRDirty_0_N(
706                          0, "add_one_func_call",
707                             VG_(fnptr_to_fnentry)( &add_one_func_call ),
708                             mkIRExprVec_0() );
709                  addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
710               }
711            }
712            if (clo_trace_mem) {
713               // WARNING: do not remove this function call, even if you
714               // aren't interested in instruction reads.  See the comment
715               // above the function itself for more detail.
716               addEvent_Ir( sbOut, mkIRExpr_HWord( (HWord)st->Ist.IMark.addr ),
717                            st->Ist.IMark.len );
718            }
719            addStmtToIRSB( sbOut, st );
720            break;
721
722         case Ist_WrTmp:
723            // Add a call to trace_load() if --trace-mem=yes.
724            if (clo_trace_mem) {
725               IRExpr* data = st->Ist.WrTmp.data;
726               if (data->tag == Iex_Load) {
727                  addEvent_Dr( sbOut, data->Iex.Load.addr,
728                               sizeofIRType(data->Iex.Load.ty) );
729               }
730            }
731            if (clo_detailed_counts) {
732               IRExpr* expr = st->Ist.WrTmp.data;
733               type = typeOfIRExpr(sbOut->tyenv, expr);
734               tl_assert(type != Ity_INVALID);
735               switch (expr->tag) {
736                  case Iex_Load:
737                     instrument_detail( sbOut, OpLoad, type );
738                     break;
739                  case Iex_Unop:
740                  case Iex_Binop:
741                  case Iex_Triop:
742                  case Iex_Qop:
743                  case Iex_Mux0X:
744                     instrument_detail( sbOut, OpAlu, type );
745                     break;
746                  default:
747                     break;
748               }
749            }
750            addStmtToIRSB( sbOut, st );
751            break;
752
753         case Ist_Store:
754            if (clo_trace_mem) {
755               IRExpr* data  = st->Ist.Store.data;
756               addEvent_Dw( sbOut, st->Ist.Store.addr,
757                            sizeofIRType(typeOfIRExpr(tyenv, data)) );
758            }
759            if (clo_detailed_counts) {
760               type = typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data);
761               tl_assert(type != Ity_INVALID);
762               instrument_detail( sbOut, OpStore, type );
763            }
764            addStmtToIRSB( sbOut, st );
765            break;
766
767         case Ist_Dirty: {
768            if (clo_trace_mem) {
769               Int      dsize;
770               IRDirty* d = st->Ist.Dirty.details;
771               if (d->mFx != Ifx_None) {
772                  // This dirty helper accesses memory.  Collect the details.
773                  tl_assert(d->mAddr != NULL);
774                  tl_assert(d->mSize != 0);
775                  dsize = d->mSize;
776                  if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
777                     addEvent_Dr( sbOut, d->mAddr, dsize );
778                  if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
779                     addEvent_Dw( sbOut, d->mAddr, dsize );
780               } else {
781                  tl_assert(d->mAddr == NULL);
782                  tl_assert(d->mSize == 0);
783               }
784            }
785            addStmtToIRSB( sbOut, st );
786            break;
787         }
788
789         case Ist_CAS: {
790            /* We treat it as a read and a write of the location.  I
791               think that is the same behaviour as it was before IRCAS
792               was introduced, since prior to that point, the Vex
793               front ends would translate a lock-prefixed instruction
794               into a (normal) read followed by a (normal) write. */
795            Int    dataSize;
796            IRType dataTy;
797            IRCAS* cas = st->Ist.CAS.details;
798            tl_assert(cas->addr != NULL);
799            tl_assert(cas->dataLo != NULL);
800            dataTy   = typeOfIRExpr(tyenv, cas->dataLo);
801            dataSize = sizeofIRType(dataTy);
802            if (cas->dataHi != NULL)
803               dataSize *= 2; /* since it's a doubleword-CAS */
804            if (clo_trace_mem) {
805               addEvent_Dr( sbOut, cas->addr, dataSize );
806               addEvent_Dw( sbOut, cas->addr, dataSize );
807            }
808            if (clo_detailed_counts) {
809               instrument_detail( sbOut, OpLoad, dataTy );
810               if (cas->dataHi != NULL) /* dcas */
811                  instrument_detail( sbOut, OpLoad, dataTy );
812               instrument_detail( sbOut, OpStore, dataTy );
813               if (cas->dataHi != NULL) /* dcas */
814                  instrument_detail( sbOut, OpStore, dataTy );
815            }
816            addStmtToIRSB( sbOut, st );
817            break;
818         }
819
820         case Ist_LLSC: {
821            IRType dataTy;
822            if (st->Ist.LLSC.storedata == NULL) {
823               /* LL */
824               dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
825               if (clo_trace_mem)
826                  addEvent_Dr( sbOut, st->Ist.LLSC.addr,
827                                      sizeofIRType(dataTy) );
828               if (clo_detailed_counts)
829                  instrument_detail( sbOut, OpLoad, dataTy );
830            } else {
831               /* SC */
832               dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
833               if (clo_trace_mem)
834                  addEvent_Dw( sbOut, st->Ist.LLSC.addr,
835                                      sizeofIRType(dataTy) );
836               if (clo_detailed_counts)
837                  instrument_detail( sbOut, OpStore, dataTy );
838            }
839            addStmtToIRSB( sbOut, st );
840            break;
841         }
842
843         case Ist_Exit:
844            if (clo_basic_counts) {
845               // The condition of a branch was inverted by VEX if a taken
846               // branch is in fact a fall trough according to client address
847               tl_assert(iaddr != 0);
848               dst = (sizeof(Addr) == 4) ? st->Ist.Exit.dst->Ico.U32 :
849                                           st->Ist.Exit.dst->Ico.U64;
850               condition_inverted = (dst == iaddr + ilen);
851
852               /* Count Jcc */
853               if (!condition_inverted)
854                  di = unsafeIRDirty_0_N( 0, "add_one_Jcc",
855                                          VG_(fnptr_to_fnentry)( &add_one_Jcc ),
856                                          mkIRExprVec_0() );
857               else
858                  di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc",
859                                          VG_(fnptr_to_fnentry)(
860                                             &add_one_inverted_Jcc ),
861                                          mkIRExprVec_0() );
862
863               addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
864            }
865            if (clo_trace_mem) {
866               flushEvents(sbOut);
867            }
868
869            addStmtToIRSB( sbOut, st );      // Original statement
870
871            if (clo_basic_counts) {
872               /* Count non-taken Jcc */
873               if (!condition_inverted)
874                  di = unsafeIRDirty_0_N( 0, "add_one_Jcc_untaken",
875                                          VG_(fnptr_to_fnentry)(
876                                             &add_one_Jcc_untaken ),
877                                          mkIRExprVec_0() );
878               else
879                  di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc_untaken",
880                                          VG_(fnptr_to_fnentry)(
881                                             &add_one_inverted_Jcc_untaken ),
882                                          mkIRExprVec_0() );
883
884               addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
885            }
886            break;
887
888         default:
889            tl_assert(0);
890      }
891   }
892
893   if (clo_basic_counts) {
894      /* Count this basic block. */
895      di = unsafeIRDirty_0_N( 0, "add_one_SB_completed",
896                                 VG_(fnptr_to_fnentry)( &add_one_SB_completed ),
897                                 mkIRExprVec_0() );
898      addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
899   }
900
901   if (clo_trace_mem) {
902      /* At the end of the sbIn.  Flush outstandings. */
903      flushEvents(sbOut);
904   }
905
906   return sbOut;
907}
908
909static void lk_fini(Int exitcode)
910{
911   char percentify_buf[5]; /* Two digits, '%' and 0. */
912   const int percentify_size = sizeof(percentify_buf) - 1;
913   const int percentify_decs = 0;
914
915   tl_assert(clo_fnname);
916   tl_assert(clo_fnname[0]);
917
918   if (clo_basic_counts) {
919      ULong total_Jccs = n_Jccs + n_IJccs;
920      ULong taken_Jccs = (n_Jccs - n_Jccs_untaken) + n_IJccs_untaken;
921
922      VG_(umsg)("Counted %'llu call%s to %s()\n",
923                n_func_calls, ( n_func_calls==1 ? "" : "s" ), clo_fnname);
924
925      VG_(umsg)("\n");
926      VG_(umsg)("Jccs:\n");
927      VG_(umsg)("  total:         %'llu\n", total_Jccs);
928      VG_(percentify)(taken_Jccs, (total_Jccs ? total_Jccs : 1),
929         percentify_decs, percentify_size, percentify_buf);
930      VG_(umsg)("  taken:         %'llu (%s)\n",
931         taken_Jccs, percentify_buf);
932
933      VG_(umsg)("\n");
934      VG_(umsg)("Executed:\n");
935      VG_(umsg)("  SBs entered:   %'llu\n", n_SBs_entered);
936      VG_(umsg)("  SBs completed: %'llu\n", n_SBs_completed);
937      VG_(umsg)("  guest instrs:  %'llu\n", n_guest_instrs);
938      VG_(umsg)("  IRStmts:       %'llu\n", n_IRStmts);
939
940      VG_(umsg)("\n");
941      VG_(umsg)("Ratios:\n");
942      tl_assert(n_SBs_entered); // Paranoia time.
943      VG_(umsg)("  guest instrs : SB entered  = %'llu : 10\n",
944         10 * n_guest_instrs / n_SBs_entered);
945      VG_(umsg)("       IRStmts : SB entered  = %'llu : 10\n",
946         10 * n_IRStmts / n_SBs_entered);
947      tl_assert(n_guest_instrs); // Paranoia time.
948      VG_(umsg)("       IRStmts : guest instr = %'llu : 10\n",
949         10 * n_IRStmts / n_guest_instrs);
950   }
951
952   if (clo_detailed_counts) {
953      VG_(umsg)("\n");
954      VG_(umsg)("IR-level counts by type:\n");
955      print_details();
956   }
957
958   if (clo_basic_counts) {
959      VG_(umsg)("\n");
960      VG_(umsg)("Exit code:       %d\n", exitcode);
961   }
962}
963
964static void lk_pre_clo_init(void)
965{
966   VG_(details_name)            ("Lackey");
967   VG_(details_version)         (NULL);
968   VG_(details_description)     ("an example Valgrind tool");
969   VG_(details_copyright_author)(
970      "Copyright (C) 2002-2011, and GNU GPL'd, by Nicholas Nethercote.");
971   VG_(details_bug_reports_to)  (VG_BUGS_TO);
972   VG_(details_avg_translation_sizeB) ( 200 );
973
974   VG_(basic_tool_funcs)          (lk_post_clo_init,
975                                   lk_instrument,
976                                   lk_fini);
977   VG_(needs_command_line_options)(lk_process_cmd_line_option,
978                                   lk_print_usage,
979                                   lk_print_debug_usage);
980}
981
982VG_DETERMINE_INTERFACE_VERSION(lk_pre_clo_init)
983
984/*--------------------------------------------------------------------*/
985/*--- end                                                lk_main.c ---*/
986/*--------------------------------------------------------------------*/
987