1
2/*--------------------------------------------------------------------*/
3/*--- An example Valgrind tool.                          lk_main.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Lackey, an example Valgrind tool that does
8   some simple program measurement and tracing.
9
10   Copyright (C) 2002-2013 Nicholas Nethercote
11      njn@valgrind.org
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31// This tool shows how to do some basic instrumentation.
32//
33// There are four kinds of instrumentation it can do.  They can be turned
34// on/off independently with command line options:
35//
36// * --basic-counts   : do basic counts, eg. number of instructions
37//                      executed, jumps executed, etc.
38// * --detailed-counts: do more detailed counts:  number of loads, stores
39//                      and ALU operations of different sizes.
40// * --trace-mem=yes:   trace all (data) memory accesses.
41// * --trace-superblocks=yes:
42//                      trace all superblock entries.  Mostly of interest
43//                      to the Valgrind developers.
44//
45// The code for each kind of instrumentation is guarded by a clo_* variable:
46// clo_basic_counts, clo_detailed_counts, clo_trace_mem and clo_trace_sbs.
47//
48// If you want to modify any of the instrumentation code, look for the code
49// that is guarded by the relevant clo_* variable (eg. clo_trace_mem)
50// If you're not interested in the other kinds of instrumentation you can
51// remove them.  If you want to do more complex modifications, please read
52// VEX/pub/libvex_ir.h to understand the intermediate representation.
53//
54//
55// Specific Details about --trace-mem=yes
56// --------------------------------------
57// Lackey's --trace-mem code is a good starting point for building Valgrind
58// tools that act on memory loads and stores.  It also could be used as is,
59// with its output used as input to a post-mortem processing step.  However,
60// because memory traces can be very large, online analysis is generally
61// better.
62//
63// It prints memory data access traces that look like this:
64//
65//   I  0023C790,2  # instruction read at 0x0023C790 of size 2
66//   I  0023C792,5
67//    S BE80199C,4  # data store at 0xBE80199C of size 4
68//   I  0025242B,3
69//    L BE801950,4  # data load at 0xBE801950 of size 4
70//   I  0023D476,7
71//    M 0025747C,1  # data modify at 0x0025747C of size 1
72//   I  0023DC20,2
73//    L 00254962,1
74//    L BE801FB3,1
75//   I  00252305,1
76//    L 00254AEB,1
77//    S 00257998,1
78//
79// Every instruction executed has an "instr" event representing it.
80// Instructions that do memory accesses are followed by one or more "load",
81// "store" or "modify" events.  Some instructions do more than one load or
82// store, as in the last two examples in the above trace.
83//
84// Here are some examples of x86 instructions that do different combinations
85// of loads, stores, and modifies.
86//
87//    Instruction          Memory accesses                  Event sequence
88//    -----------          ---------------                  --------------
89//    add %eax, %ebx       No loads or stores               instr
90//
91//    movl (%eax), %ebx    loads (%eax)                     instr, load
92//
93//    movl %eax, (%ebx)    stores (%ebx)                    instr, store
94//
95//    incl (%ecx)          modifies (%ecx)                  instr, modify
96//
97//    cmpsb                loads (%esi), loads(%edi)        instr, load, load
98//
99//    call*l (%edx)        loads (%edx), stores -4(%esp)    instr, load, store
100//    pushl (%edx)         loads (%edx), stores -4(%esp)    instr, load, store
101//    movsw                loads (%esi), stores (%edi)      instr, load, store
102//
103// Instructions using x86 "rep" prefixes are traced as if they are repeated
104// N times.
105//
106// Lackey with --trace-mem gives good traces, but they are not perfect, for
107// the following reasons:
108//
109// - It does not trace into the OS kernel, so system calls and other kernel
110//   operations (eg. some scheduling and signal handling code) are ignored.
111//
112// - It could model loads and stores done at the system call boundary using
113//   the pre_mem_read/post_mem_write events.  For example, if you call
114//   fstat() you know that the passed in buffer has been written.  But it
115//   currently does not do this.
116//
117// - Valgrind replaces some code (not much) with its own, notably parts of
118//   code for scheduling operations and signal handling.  This code is not
119//   traced.
120//
121// - There is no consideration of virtual-to-physical address mapping.
122//   This may not matter for many purposes.
123//
124// - Valgrind modifies the instruction stream in some very minor ways.  For
125//   example, on x86 the bts, btc, btr instructions are incorrectly
126//   considered to always touch memory (this is a consequence of these
127//   instructions being very difficult to simulate).
128//
129// - Valgrind tools layout memory differently to normal programs, so the
130//   addresses you get will not be typical.  Thus Lackey (and all Valgrind
131//   tools) is suitable for getting relative memory traces -- eg. if you
132//   want to analyse locality of memory accesses -- but is not good if
133//   absolute addresses are important.
134//
135// Despite all these warnings, Lackey's results should be good enough for a
136// wide range of purposes.  For example, Cachegrind shares all the above
137// shortcomings and it is still useful.
138//
139// For further inspiration, you should look at cachegrind/cg_main.c which
140// uses the same basic technique for tracing memory accesses, but also groups
141// events together for processing into twos and threes so that fewer C calls
142// are made and things run faster.
143//
144// Specific Details about --trace-superblocks=yes
145// ----------------------------------------------
146// Valgrind splits code up into single entry, multiple exit blocks
147// known as superblocks.  By itself, --trace-superblocks=yes just
148// prints a message as each superblock is run:
149//
150//  SB 04013170
151//  SB 04013177
152//  SB 04013173
153//  SB 04013177
154//
155// The hex number is the address of the first instruction in the
156// superblock.  You can see the relationship more obviously if you use
157// --trace-superblocks=yes and --trace-mem=yes together.  Then a "SB"
158// message at address X is immediately followed by an "instr:" message
159// for that address, as the first instruction in the block is
160// executed, for example:
161//
162//  SB 04014073
163//  I  04014073,3
164//   L 7FEFFF7F8,8
165//  I  04014076,4
166//  I  0401407A,3
167//  I  0401407D,3
168//  I  04014080,3
169//  I  04014083,6
170
171
172#include "pub_tool_basics.h"
173#include "pub_tool_tooliface.h"
174#include "pub_tool_libcassert.h"
175#include "pub_tool_libcprint.h"
176#include "pub_tool_debuginfo.h"
177#include "pub_tool_libcbase.h"
178#include "pub_tool_options.h"
179#include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
180
181/*------------------------------------------------------------*/
182/*--- Command line options                                 ---*/
183/*------------------------------------------------------------*/
184
185/* Command line options controlling instrumentation kinds, as described at
186 * the top of this file. */
187static Bool clo_basic_counts    = True;
188static Bool clo_detailed_counts = False;
189static Bool clo_trace_mem       = False;
190static Bool clo_trace_sbs       = False;
191
192/* The name of the function of which the number of calls (under
193 * --basic-counts=yes) is to be counted, with default. Override with command
194 * line option --fnname. */
195static const HChar* clo_fnname = "main";
196
197static Bool lk_process_cmd_line_option(const HChar* arg)
198{
199   if VG_STR_CLO(arg, "--fnname", clo_fnname) {}
200   else if VG_BOOL_CLO(arg, "--basic-counts",      clo_basic_counts) {}
201   else if VG_BOOL_CLO(arg, "--detailed-counts",   clo_detailed_counts) {}
202   else if VG_BOOL_CLO(arg, "--trace-mem",         clo_trace_mem) {}
203   else if VG_BOOL_CLO(arg, "--trace-superblocks", clo_trace_sbs) {}
204   else
205      return False;
206
207   tl_assert(clo_fnname);
208   tl_assert(clo_fnname[0]);
209   return True;
210}
211
212static void lk_print_usage(void)
213{
214   VG_(printf)(
215"    --basic-counts=no|yes     count instructions, jumps, etc. [yes]\n"
216"    --detailed-counts=no|yes  count loads, stores and alu ops [no]\n"
217"    --trace-mem=no|yes        trace all loads and stores [no]\n"
218"    --trace-superblocks=no|yes  trace all superblock entries [no]\n"
219"    --fnname=<name>           count calls to <name> (only used if\n"
220"                              --basic-count=yes)  [main]\n"
221   );
222}
223
224static void lk_print_debug_usage(void)
225{
226   VG_(printf)(
227"    (none)\n"
228   );
229}
230
231/*------------------------------------------------------------*/
232/*--- Stuff for --basic-counts                             ---*/
233/*------------------------------------------------------------*/
234
235/* Nb: use ULongs because the numbers can get very big */
236static ULong n_func_calls    = 0;
237static ULong n_SBs_entered   = 0;
238static ULong n_SBs_completed = 0;
239static ULong n_IRStmts       = 0;
240static ULong n_guest_instrs  = 0;
241static ULong n_Jccs          = 0;
242static ULong n_Jccs_untaken  = 0;
243static ULong n_IJccs         = 0;
244static ULong n_IJccs_untaken = 0;
245
246static void add_one_func_call(void)
247{
248   n_func_calls++;
249}
250
251static void add_one_SB_entered(void)
252{
253   n_SBs_entered++;
254}
255
256static void add_one_SB_completed(void)
257{
258   n_SBs_completed++;
259}
260
261static void add_one_IRStmt(void)
262{
263   n_IRStmts++;
264}
265
266static void add_one_guest_instr(void)
267{
268   n_guest_instrs++;
269}
270
271static void add_one_Jcc(void)
272{
273   n_Jccs++;
274}
275
276static void add_one_Jcc_untaken(void)
277{
278   n_Jccs_untaken++;
279}
280
281static void add_one_inverted_Jcc(void)
282{
283   n_IJccs++;
284}
285
286static void add_one_inverted_Jcc_untaken(void)
287{
288   n_IJccs_untaken++;
289}
290
291/*------------------------------------------------------------*/
292/*--- Stuff for --detailed-counts                          ---*/
293/*------------------------------------------------------------*/
294
295typedef
296   IRExpr
297   IRAtom;
298
299/* --- Operations --- */
300
301typedef enum { OpLoad=0, OpStore=1, OpAlu=2 } Op;
302
303#define N_OPS 3
304
305
306/* --- Types --- */
307
308#define N_TYPES 11
309
310static Int type2index ( IRType ty )
311{
312   switch (ty) {
313      case Ity_I1:      return 0;
314      case Ity_I8:      return 1;
315      case Ity_I16:     return 2;
316      case Ity_I32:     return 3;
317      case Ity_I64:     return 4;
318      case Ity_I128:    return 5;
319      case Ity_F32:     return 6;
320      case Ity_F64:     return 7;
321      case Ity_F128:    return 8;
322      case Ity_V128:    return 9;
323      case Ity_V256:    return 10;
324      default: tl_assert(0);
325   }
326}
327
328static const HChar* nameOfTypeIndex ( Int i )
329{
330   switch (i) {
331      case 0: return "I1";   break;
332      case 1: return "I8";   break;
333      case 2: return "I16";  break;
334      case 3: return "I32";  break;
335      case 4: return "I64";  break;
336      case 5: return "I128"; break;
337      case 6: return "F32";  break;
338      case 7: return "F64";  break;
339      case 8: return "F128";  break;
340      case 9: return "V128"; break;
341      case 10: return "V256"; break;
342      default: tl_assert(0);
343   }
344}
345
346
347/* --- Counts --- */
348
349static ULong detailCounts[N_OPS][N_TYPES];
350
351/* The helper that is called from the instrumented code. */
352static VG_REGPARM(1)
353void increment_detail(ULong* detail)
354{
355   (*detail)++;
356}
357
358/* A helper that adds the instrumentation for a detail.  guard ::
359   Ity_I1 is the guarding condition for the event.  If NULL it is
360   assumed to mean "always True". */
361static void instrument_detail(IRSB* sb, Op op, IRType type, IRAtom* guard)
362{
363   IRDirty* di;
364   IRExpr** argv;
365   const UInt typeIx = type2index(type);
366
367   tl_assert(op < N_OPS);
368   tl_assert(typeIx < N_TYPES);
369
370   argv = mkIRExprVec_1( mkIRExpr_HWord( (HWord)&detailCounts[op][typeIx] ) );
371   di = unsafeIRDirty_0_N( 1, "increment_detail",
372                              VG_(fnptr_to_fnentry)( &increment_detail ),
373                              argv);
374   if (guard) di->guard = guard;
375   addStmtToIRSB( sb, IRStmt_Dirty(di) );
376}
377
378/* Summarize and print the details. */
379static void print_details ( void )
380{
381   Int typeIx;
382   VG_(umsg)("   Type        Loads       Stores       AluOps\n");
383   VG_(umsg)("   -------------------------------------------\n");
384   for (typeIx = 0; typeIx < N_TYPES; typeIx++) {
385      VG_(umsg)("   %4s %'12llu %'12llu %'12llu\n",
386                nameOfTypeIndex( typeIx ),
387                detailCounts[OpLoad ][typeIx],
388                detailCounts[OpStore][typeIx],
389                detailCounts[OpAlu  ][typeIx]
390      );
391   }
392}
393
394
395/*------------------------------------------------------------*/
396/*--- Stuff for --trace-mem                                ---*/
397/*------------------------------------------------------------*/
398
399#define MAX_DSIZE    512
400
401typedef
402   enum { Event_Ir, Event_Dr, Event_Dw, Event_Dm }
403   EventKind;
404
405typedef
406   struct {
407      EventKind  ekind;
408      IRAtom*    addr;
409      Int        size;
410      IRAtom*    guard; /* :: Ity_I1, or NULL=="always True" */
411   }
412   Event;
413
414/* Up to this many unnotified events are allowed.  Must be at least two,
415   so that reads and writes to the same address can be merged into a modify.
416   Beyond that, larger numbers just potentially induce more spilling due to
417   extending live ranges of address temporaries. */
418#define N_EVENTS 4
419
420/* Maintain an ordered list of memory events which are outstanding, in
421   the sense that no IR has yet been generated to do the relevant
422   helper calls.  The SB is scanned top to bottom and memory events
423   are added to the end of the list, merging with the most recent
424   notified event where possible (Dw immediately following Dr and
425   having the same size and EA can be merged).
426
427   This merging is done so that for architectures which have
428   load-op-store instructions (x86, amd64), the instr is treated as if
429   it makes just one memory reference (a modify), rather than two (a
430   read followed by a write at the same address).
431
432   At various points the list will need to be flushed, that is, IR
433   generated from it.  That must happen before any possible exit from
434   the block (the end, or an IRStmt_Exit).  Flushing also takes place
435   when there is no space to add a new event, and before entering a
436   RMW (read-modify-write) section on processors supporting LL/SC.
437
438   If we require the simulation statistics to be up to date with
439   respect to possible memory exceptions, then the list would have to
440   be flushed before each memory reference.  That's a pain so we don't
441   bother.
442
443   Flushing the list consists of walking it start to end and emitting
444   instrumentation IR for each event, in the order in which they
445   appear. */
446
447static Event events[N_EVENTS];
448static Int   events_used = 0;
449
450
451static VG_REGPARM(2) void trace_instr(Addr addr, SizeT size)
452{
453   VG_(printf)("I  %08lx,%lu\n", addr, size);
454}
455
456static VG_REGPARM(2) void trace_load(Addr addr, SizeT size)
457{
458   VG_(printf)(" L %08lx,%lu\n", addr, size);
459}
460
461static VG_REGPARM(2) void trace_store(Addr addr, SizeT size)
462{
463   VG_(printf)(" S %08lx,%lu\n", addr, size);
464}
465
466static VG_REGPARM(2) void trace_modify(Addr addr, SizeT size)
467{
468   VG_(printf)(" M %08lx,%lu\n", addr, size);
469}
470
471
472static void flushEvents(IRSB* sb)
473{
474   Int        i;
475   const HChar* helperName;
476   void*      helperAddr;
477   IRExpr**   argv;
478   IRDirty*   di;
479   Event*     ev;
480
481   for (i = 0; i < events_used; i++) {
482
483      ev = &events[i];
484
485      // Decide on helper fn to call and args to pass it.
486      switch (ev->ekind) {
487         case Event_Ir: helperName = "trace_instr";
488                        helperAddr =  trace_instr;  break;
489
490         case Event_Dr: helperName = "trace_load";
491                        helperAddr =  trace_load;   break;
492
493         case Event_Dw: helperName = "trace_store";
494                        helperAddr =  trace_store;  break;
495
496         case Event_Dm: helperName = "trace_modify";
497                        helperAddr =  trace_modify; break;
498         default:
499            tl_assert(0);
500      }
501
502      // Add the helper.
503      argv = mkIRExprVec_2( ev->addr, mkIRExpr_HWord( ev->size ) );
504      di   = unsafeIRDirty_0_N( /*regparms*/2,
505                                helperName, VG_(fnptr_to_fnentry)( helperAddr ),
506                                argv );
507      if (ev->guard) {
508         di->guard = ev->guard;
509      }
510      addStmtToIRSB( sb, IRStmt_Dirty(di) );
511   }
512
513   events_used = 0;
514}
515
516// WARNING:  If you aren't interested in instruction reads, you can omit the
517// code that adds calls to trace_instr() in flushEvents().  However, you
518// must still call this function, addEvent_Ir() -- it is necessary to add
519// the Ir events to the events list so that merging of paired load/store
520// events into modify events works correctly.
521static void addEvent_Ir ( IRSB* sb, IRAtom* iaddr, UInt isize )
522{
523   Event* evt;
524   tl_assert(clo_trace_mem);
525   tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
526            || VG_CLREQ_SZB == isize );
527   if (events_used == N_EVENTS)
528      flushEvents(sb);
529   tl_assert(events_used >= 0 && events_used < N_EVENTS);
530   evt = &events[events_used];
531   evt->ekind = Event_Ir;
532   evt->addr  = iaddr;
533   evt->size  = isize;
534   evt->guard = NULL;
535   events_used++;
536}
537
538/* Add a guarded read event. */
539static
540void addEvent_Dr_guarded ( IRSB* sb, IRAtom* daddr, Int dsize, IRAtom* guard )
541{
542   Event* evt;
543   tl_assert(clo_trace_mem);
544   tl_assert(isIRAtom(daddr));
545   tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
546   if (events_used == N_EVENTS)
547      flushEvents(sb);
548   tl_assert(events_used >= 0 && events_used < N_EVENTS);
549   evt = &events[events_used];
550   evt->ekind = Event_Dr;
551   evt->addr  = daddr;
552   evt->size  = dsize;
553   evt->guard = guard;
554   events_used++;
555}
556
557/* Add an ordinary read event, by adding a guarded read event with an
558   always-true guard. */
559static
560void addEvent_Dr ( IRSB* sb, IRAtom* daddr, Int dsize )
561{
562   addEvent_Dr_guarded(sb, daddr, dsize, NULL);
563}
564
565/* Add a guarded write event. */
566static
567void addEvent_Dw_guarded ( IRSB* sb, IRAtom* daddr, Int dsize, IRAtom* guard )
568{
569   Event* evt;
570   tl_assert(clo_trace_mem);
571   tl_assert(isIRAtom(daddr));
572   tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
573   if (events_used == N_EVENTS)
574      flushEvents(sb);
575   tl_assert(events_used >= 0 && events_used < N_EVENTS);
576   evt = &events[events_used];
577   evt->ekind = Event_Dw;
578   evt->addr  = daddr;
579   evt->size  = dsize;
580   evt->guard = guard;
581   events_used++;
582}
583
584/* Add an ordinary write event.  Try to merge it with an immediately
585   preceding ordinary read event of the same size to the same
586   address. */
587static
588void addEvent_Dw ( IRSB* sb, IRAtom* daddr, Int dsize )
589{
590   Event* lastEvt;
591   Event* evt;
592   tl_assert(clo_trace_mem);
593   tl_assert(isIRAtom(daddr));
594   tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
595
596   // Is it possible to merge this write with the preceding read?
597   lastEvt = &events[events_used-1];
598   if (events_used > 0
599       && lastEvt->ekind == Event_Dr
600       && lastEvt->size  == dsize
601       && lastEvt->guard == NULL
602       && eqIRAtom(lastEvt->addr, daddr))
603   {
604      lastEvt->ekind = Event_Dm;
605      return;
606   }
607
608   // No.  Add as normal.
609   if (events_used == N_EVENTS)
610      flushEvents(sb);
611   tl_assert(events_used >= 0 && events_used < N_EVENTS);
612   evt = &events[events_used];
613   evt->ekind = Event_Dw;
614   evt->size  = dsize;
615   evt->addr  = daddr;
616   evt->guard = NULL;
617   events_used++;
618}
619
620
621/*------------------------------------------------------------*/
622/*--- Stuff for --trace-superblocks                        ---*/
623/*------------------------------------------------------------*/
624
625static void trace_superblock(Addr addr)
626{
627   VG_(printf)("SB %08lx\n", addr);
628}
629
630
631/*------------------------------------------------------------*/
632/*--- Basic tool functions                                 ---*/
633/*------------------------------------------------------------*/
634
635static void lk_post_clo_init(void)
636{
637   Int op, tyIx;
638
639   if (clo_detailed_counts) {
640      for (op = 0; op < N_OPS; op++)
641         for (tyIx = 0; tyIx < N_TYPES; tyIx++)
642            detailCounts[op][tyIx] = 0;
643   }
644}
645
646static
647IRSB* lk_instrument ( VgCallbackClosure* closure,
648                      IRSB* sbIn,
649                      VexGuestLayout* layout,
650                      VexGuestExtents* vge,
651                      VexArchInfo* archinfo_host,
652                      IRType gWordTy, IRType hWordTy )
653{
654   IRDirty*   di;
655   Int        i;
656   IRSB*      sbOut;
657   HChar      fnname[100];
658   IRTypeEnv* tyenv = sbIn->tyenv;
659   Addr       iaddr = 0, dst;
660   UInt       ilen = 0;
661   Bool       condition_inverted = False;
662
663   if (gWordTy != hWordTy) {
664      /* We don't currently support this case. */
665      VG_(tool_panic)("host/guest word size mismatch");
666   }
667
668   /* Set up SB */
669   sbOut = deepCopyIRSBExceptStmts(sbIn);
670
671   // Copy verbatim any IR preamble preceding the first IMark
672   i = 0;
673   while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
674      addStmtToIRSB( sbOut, sbIn->stmts[i] );
675      i++;
676   }
677
678   if (clo_basic_counts) {
679      /* Count this superblock. */
680      di = unsafeIRDirty_0_N( 0, "add_one_SB_entered",
681                                 VG_(fnptr_to_fnentry)( &add_one_SB_entered ),
682                                 mkIRExprVec_0() );
683      addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
684   }
685
686   if (clo_trace_sbs) {
687      /* Print this superblock's address. */
688      di = unsafeIRDirty_0_N(
689              0, "trace_superblock",
690              VG_(fnptr_to_fnentry)( &trace_superblock ),
691              mkIRExprVec_1( mkIRExpr_HWord( vge->base[0] ) )
692           );
693      addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
694   }
695
696   if (clo_trace_mem) {
697      events_used = 0;
698   }
699
700   for (/*use current i*/; i < sbIn->stmts_used; i++) {
701      IRStmt* st = sbIn->stmts[i];
702      if (!st || st->tag == Ist_NoOp) continue;
703
704      if (clo_basic_counts) {
705         /* Count one VEX statement. */
706         di = unsafeIRDirty_0_N( 0, "add_one_IRStmt",
707                                    VG_(fnptr_to_fnentry)( &add_one_IRStmt ),
708                                    mkIRExprVec_0() );
709         addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
710      }
711
712      switch (st->tag) {
713         case Ist_NoOp:
714         case Ist_AbiHint:
715         case Ist_Put:
716         case Ist_PutI:
717         case Ist_MBE:
718            addStmtToIRSB( sbOut, st );
719            break;
720
721         case Ist_IMark:
722            if (clo_basic_counts) {
723               /* Needed to be able to check for inverted condition in Ist_Exit */
724               iaddr = st->Ist.IMark.addr;
725               ilen  = st->Ist.IMark.len;
726
727               /* Count guest instruction. */
728               di = unsafeIRDirty_0_N( 0, "add_one_guest_instr",
729                                          VG_(fnptr_to_fnentry)( &add_one_guest_instr ),
730                                          mkIRExprVec_0() );
731               addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
732
733               /* An unconditional branch to a known destination in the
734                * guest's instructions can be represented, in the IRSB to
735                * instrument, by the VEX statements that are the
736                * translation of that known destination. This feature is
737                * called 'SB chasing' and can be influenced by command
738                * line option --vex-guest-chase-thresh.
739                *
740                * To get an accurate count of the calls to a specific
741                * function, taking SB chasing into account, we need to
742                * check for each guest instruction (Ist_IMark) if it is
743                * the entry point of a function.
744                */
745               tl_assert(clo_fnname);
746               tl_assert(clo_fnname[0]);
747               if (VG_(get_fnname_if_entry)(st->Ist.IMark.addr,
748                                            fnname, sizeof(fnname))
749                   && 0 == VG_(strcmp)(fnname, clo_fnname)) {
750                  di = unsafeIRDirty_0_N(
751                          0, "add_one_func_call",
752                             VG_(fnptr_to_fnentry)( &add_one_func_call ),
753                             mkIRExprVec_0() );
754                  addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
755               }
756            }
757            if (clo_trace_mem) {
758               // WARNING: do not remove this function call, even if you
759               // aren't interested in instruction reads.  See the comment
760               // above the function itself for more detail.
761               addEvent_Ir( sbOut, mkIRExpr_HWord( (HWord)st->Ist.IMark.addr ),
762                            st->Ist.IMark.len );
763            }
764            addStmtToIRSB( sbOut, st );
765            break;
766
767         case Ist_WrTmp:
768            // Add a call to trace_load() if --trace-mem=yes.
769            if (clo_trace_mem) {
770               IRExpr* data = st->Ist.WrTmp.data;
771               if (data->tag == Iex_Load) {
772                  addEvent_Dr( sbOut, data->Iex.Load.addr,
773                               sizeofIRType(data->Iex.Load.ty) );
774               }
775            }
776            if (clo_detailed_counts) {
777               IRExpr* expr = st->Ist.WrTmp.data;
778               IRType  type = typeOfIRExpr(sbOut->tyenv, expr);
779               tl_assert(type != Ity_INVALID);
780               switch (expr->tag) {
781                  case Iex_Load:
782                    instrument_detail( sbOut, OpLoad, type, NULL/*guard*/ );
783                     break;
784                  case Iex_Unop:
785                  case Iex_Binop:
786                  case Iex_Triop:
787                  case Iex_Qop:
788                  case Iex_ITE:
789                     instrument_detail( sbOut, OpAlu, type, NULL/*guard*/ );
790                     break;
791                  default:
792                     break;
793               }
794            }
795            addStmtToIRSB( sbOut, st );
796            break;
797
798         case Ist_Store: {
799            IRExpr* data = st->Ist.Store.data;
800            IRType  type = typeOfIRExpr(tyenv, data);
801            tl_assert(type != Ity_INVALID);
802            if (clo_trace_mem) {
803               addEvent_Dw( sbOut, st->Ist.Store.addr,
804                            sizeofIRType(type) );
805            }
806            if (clo_detailed_counts) {
807               instrument_detail( sbOut, OpStore, type, NULL/*guard*/ );
808            }
809            addStmtToIRSB( sbOut, st );
810            break;
811         }
812
813         case Ist_StoreG: {
814            IRStoreG* sg   = st->Ist.StoreG.details;
815            IRExpr*   data = sg->data;
816            IRType    type = typeOfIRExpr(tyenv, data);
817            tl_assert(type != Ity_INVALID);
818            if (clo_trace_mem) {
819               addEvent_Dw_guarded( sbOut, sg->addr,
820                                    sizeofIRType(type), sg->guard );
821            }
822            if (clo_detailed_counts) {
823               instrument_detail( sbOut, OpStore, type, sg->guard );
824            }
825            addStmtToIRSB( sbOut, st );
826            break;
827         }
828
829         case Ist_LoadG: {
830            IRLoadG* lg       = st->Ist.LoadG.details;
831            IRType   type     = Ity_INVALID; /* loaded type */
832            IRType   typeWide = Ity_INVALID; /* after implicit widening */
833            typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
834            tl_assert(type != Ity_INVALID);
835            if (clo_trace_mem) {
836               addEvent_Dr_guarded( sbOut, lg->addr,
837                                    sizeofIRType(type), lg->guard );
838            }
839            if (clo_detailed_counts) {
840               instrument_detail( sbOut, OpLoad, type, lg->guard );
841            }
842            addStmtToIRSB( sbOut, st );
843            break;
844         }
845
846         case Ist_Dirty: {
847            if (clo_trace_mem) {
848               Int      dsize;
849               IRDirty* d = st->Ist.Dirty.details;
850               if (d->mFx != Ifx_None) {
851                  // This dirty helper accesses memory.  Collect the details.
852                  tl_assert(d->mAddr != NULL);
853                  tl_assert(d->mSize != 0);
854                  dsize = d->mSize;
855                  if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
856                     addEvent_Dr( sbOut, d->mAddr, dsize );
857                  if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
858                     addEvent_Dw( sbOut, d->mAddr, dsize );
859               } else {
860                  tl_assert(d->mAddr == NULL);
861                  tl_assert(d->mSize == 0);
862               }
863            }
864            addStmtToIRSB( sbOut, st );
865            break;
866         }
867
868         case Ist_CAS: {
869            /* We treat it as a read and a write of the location.  I
870               think that is the same behaviour as it was before IRCAS
871               was introduced, since prior to that point, the Vex
872               front ends would translate a lock-prefixed instruction
873               into a (normal) read followed by a (normal) write. */
874            Int    dataSize;
875            IRType dataTy;
876            IRCAS* cas = st->Ist.CAS.details;
877            tl_assert(cas->addr != NULL);
878            tl_assert(cas->dataLo != NULL);
879            dataTy   = typeOfIRExpr(tyenv, cas->dataLo);
880            dataSize = sizeofIRType(dataTy);
881            if (cas->dataHi != NULL)
882               dataSize *= 2; /* since it's a doubleword-CAS */
883            if (clo_trace_mem) {
884               addEvent_Dr( sbOut, cas->addr, dataSize );
885               addEvent_Dw( sbOut, cas->addr, dataSize );
886            }
887            if (clo_detailed_counts) {
888               instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ );
889               if (cas->dataHi != NULL) /* dcas */
890                  instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ );
891               instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ );
892               if (cas->dataHi != NULL) /* dcas */
893                  instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ );
894            }
895            addStmtToIRSB( sbOut, st );
896            break;
897         }
898
899         case Ist_LLSC: {
900            IRType dataTy;
901            if (st->Ist.LLSC.storedata == NULL) {
902               /* LL */
903               dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
904               if (clo_trace_mem) {
905                  addEvent_Dr( sbOut, st->Ist.LLSC.addr,
906                                      sizeofIRType(dataTy) );
907                  /* flush events before LL, helps SC to succeed */
908                  flushEvents(sbOut);
909	       }
910               if (clo_detailed_counts)
911                  instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ );
912            } else {
913               /* SC */
914               dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
915               if (clo_trace_mem)
916                  addEvent_Dw( sbOut, st->Ist.LLSC.addr,
917                                      sizeofIRType(dataTy) );
918               if (clo_detailed_counts)
919                  instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ );
920            }
921            addStmtToIRSB( sbOut, st );
922            break;
923         }
924
925         case Ist_Exit:
926            if (clo_basic_counts) {
927               // The condition of a branch was inverted by VEX if a taken
928               // branch is in fact a fall trough according to client address
929               tl_assert(iaddr != 0);
930               dst = (sizeof(Addr) == 4) ? st->Ist.Exit.dst->Ico.U32 :
931                                           st->Ist.Exit.dst->Ico.U64;
932               condition_inverted = (dst == iaddr + ilen);
933
934               /* Count Jcc */
935               if (!condition_inverted)
936                  di = unsafeIRDirty_0_N( 0, "add_one_Jcc",
937                                          VG_(fnptr_to_fnentry)( &add_one_Jcc ),
938                                          mkIRExprVec_0() );
939               else
940                  di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc",
941                                          VG_(fnptr_to_fnentry)(
942                                             &add_one_inverted_Jcc ),
943                                          mkIRExprVec_0() );
944
945               addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
946            }
947            if (clo_trace_mem) {
948               flushEvents(sbOut);
949            }
950
951            addStmtToIRSB( sbOut, st );      // Original statement
952
953            if (clo_basic_counts) {
954               /* Count non-taken Jcc */
955               if (!condition_inverted)
956                  di = unsafeIRDirty_0_N( 0, "add_one_Jcc_untaken",
957                                          VG_(fnptr_to_fnentry)(
958                                             &add_one_Jcc_untaken ),
959                                          mkIRExprVec_0() );
960               else
961                  di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc_untaken",
962                                          VG_(fnptr_to_fnentry)(
963                                             &add_one_inverted_Jcc_untaken ),
964                                          mkIRExprVec_0() );
965
966               addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
967            }
968            break;
969
970         default:
971            ppIRStmt(st);
972            tl_assert(0);
973      }
974   }
975
976   if (clo_basic_counts) {
977      /* Count this basic block. */
978      di = unsafeIRDirty_0_N( 0, "add_one_SB_completed",
979                                 VG_(fnptr_to_fnentry)( &add_one_SB_completed ),
980                                 mkIRExprVec_0() );
981      addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
982   }
983
984   if (clo_trace_mem) {
985      /* At the end of the sbIn.  Flush outstandings. */
986      flushEvents(sbOut);
987   }
988
989   return sbOut;
990}
991
992static void lk_fini(Int exitcode)
993{
994   HChar percentify_buf[5]; /* Two digits, '%' and 0. */
995   const int percentify_size = sizeof(percentify_buf) - 1;
996   const int percentify_decs = 0;
997
998   tl_assert(clo_fnname);
999   tl_assert(clo_fnname[0]);
1000
1001   if (clo_basic_counts) {
1002      ULong total_Jccs = n_Jccs + n_IJccs;
1003      ULong taken_Jccs = (n_Jccs - n_Jccs_untaken) + n_IJccs_untaken;
1004
1005      VG_(umsg)("Counted %'llu call%s to %s()\n",
1006                n_func_calls, ( n_func_calls==1 ? "" : "s" ), clo_fnname);
1007
1008      VG_(umsg)("\n");
1009      VG_(umsg)("Jccs:\n");
1010      VG_(umsg)("  total:         %'llu\n", total_Jccs);
1011      VG_(percentify)(taken_Jccs, (total_Jccs ? total_Jccs : 1),
1012         percentify_decs, percentify_size, percentify_buf);
1013      VG_(umsg)("  taken:         %'llu (%s)\n",
1014         taken_Jccs, percentify_buf);
1015
1016      VG_(umsg)("\n");
1017      VG_(umsg)("Executed:\n");
1018      VG_(umsg)("  SBs entered:   %'llu\n", n_SBs_entered);
1019      VG_(umsg)("  SBs completed: %'llu\n", n_SBs_completed);
1020      VG_(umsg)("  guest instrs:  %'llu\n", n_guest_instrs);
1021      VG_(umsg)("  IRStmts:       %'llu\n", n_IRStmts);
1022
1023      VG_(umsg)("\n");
1024      VG_(umsg)("Ratios:\n");
1025      tl_assert(n_SBs_entered); // Paranoia time.
1026      VG_(umsg)("  guest instrs : SB entered  = %'llu : 10\n",
1027         10 * n_guest_instrs / n_SBs_entered);
1028      VG_(umsg)("       IRStmts : SB entered  = %'llu : 10\n",
1029         10 * n_IRStmts / n_SBs_entered);
1030      tl_assert(n_guest_instrs); // Paranoia time.
1031      VG_(umsg)("       IRStmts : guest instr = %'llu : 10\n",
1032         10 * n_IRStmts / n_guest_instrs);
1033   }
1034
1035   if (clo_detailed_counts) {
1036      VG_(umsg)("\n");
1037      VG_(umsg)("IR-level counts by type:\n");
1038      print_details();
1039   }
1040
1041   if (clo_basic_counts) {
1042      VG_(umsg)("\n");
1043      VG_(umsg)("Exit code:       %d\n", exitcode);
1044   }
1045}
1046
1047static void lk_pre_clo_init(void)
1048{
1049   VG_(details_name)            ("Lackey");
1050   VG_(details_version)         (NULL);
1051   VG_(details_description)     ("an example Valgrind tool");
1052   VG_(details_copyright_author)(
1053      "Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote.");
1054   VG_(details_bug_reports_to)  (VG_BUGS_TO);
1055   VG_(details_avg_translation_sizeB) ( 200 );
1056
1057   VG_(basic_tool_funcs)          (lk_post_clo_init,
1058                                   lk_instrument,
1059                                   lk_fini);
1060   VG_(needs_command_line_options)(lk_process_cmd_line_option,
1061                                   lk_print_usage,
1062                                   lk_print_debug_usage);
1063}
1064
1065VG_DETERMINE_INTERFACE_VERSION(lk_pre_clo_init)
1066
1067/*--------------------------------------------------------------------*/
1068/*--- end                                                lk_main.c ---*/
1069/*--------------------------------------------------------------------*/
1070