lk_main.c revision 227a1ecca7f44cb07e74aa8f1bd24b29df629499
1
2/*--------------------------------------------------------------------*/
3/*--- An example Valgrind tool.                          lk_main.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Lackey, an example Valgrind tool that does
8   some simple program measurement and tracing.
9
10   Copyright (C) 2002-2013 Nicholas Nethercote
11      njn@valgrind.org
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31// This tool shows how to do some basic instrumentation.
32//
33// There are four kinds of instrumentation it can do.  They can be turned
34// on/off independently with command line options:
35//
36// * --basic-counts   : do basic counts, eg. number of instructions
37//                      executed, jumps executed, etc.
38// * --detailed-counts: do more detailed counts:  number of loads, stores
39//                      and ALU operations of different sizes.
40// * --trace-mem=yes:   trace all (data) memory accesses.
41// * --trace-superblocks=yes:
42//                      trace all superblock entries.  Mostly of interest
43//                      to the Valgrind developers.
44//
45// The code for each kind of instrumentation is guarded by a clo_* variable:
46// clo_basic_counts, clo_detailed_counts, clo_trace_mem and clo_trace_sbs.
47//
48// If you want to modify any of the instrumentation code, look for the code
49// that is guarded by the relevant clo_* variable (eg. clo_trace_mem)
50// If you're not interested in the other kinds of instrumentation you can
51// remove them.  If you want to do more complex modifications, please read
52// VEX/pub/libvex_ir.h to understand the intermediate representation.
53//
54//
55// Specific Details about --trace-mem=yes
56// --------------------------------------
57// Lackey's --trace-mem code is a good starting point for building Valgrind
58// tools that act on memory loads and stores.  It also could be used as is,
59// with its output used as input to a post-mortem processing step.  However,
60// because memory traces can be very large, online analysis is generally
61// better.
62//
63// It prints memory data access traces that look like this:
64//
65//   I  0023C790,2  # instruction read at 0x0023C790 of size 2
66//   I  0023C792,5
67//    S BE80199C,4  # data store at 0xBE80199C of size 4
68//   I  0025242B,3
69//    L BE801950,4  # data load at 0xBE801950 of size 4
70//   I  0023D476,7
71//    M 0025747C,1  # data modify at 0x0025747C of size 1
72//   I  0023DC20,2
73//    L 00254962,1
74//    L BE801FB3,1
75//   I  00252305,1
76//    L 00254AEB,1
77//    S 00257998,1
78//
79// Every instruction executed has an "instr" event representing it.
80// Instructions that do memory accesses are followed by one or more "load",
81// "store" or "modify" events.  Some instructions do more than one load or
82// store, as in the last two examples in the above trace.
83//
84// Here are some examples of x86 instructions that do different combinations
85// of loads, stores, and modifies.
86//
87//    Instruction          Memory accesses                  Event sequence
88//    -----------          ---------------                  --------------
89//    add %eax, %ebx       No loads or stores               instr
90//
91//    movl (%eax), %ebx    loads (%eax)                     instr, load
92//
93//    movl %eax, (%ebx)    stores (%ebx)                    instr, store
94//
95//    incl (%ecx)          modifies (%ecx)                  instr, modify
96//
97//    cmpsb                loads (%esi), loads(%edi)        instr, load, load
98//
99//    call*l (%edx)        loads (%edx), stores -4(%esp)    instr, load, store
100//    pushl (%edx)         loads (%edx), stores -4(%esp)    instr, load, store
101//    movsw                loads (%esi), stores (%edi)      instr, load, store
102//
103// Instructions using x86 "rep" prefixes are traced as if they are repeated
104// N times.
105//
106// Lackey with --trace-mem gives good traces, but they are not perfect, for
107// the following reasons:
108//
109// - It does not trace into the OS kernel, so system calls and other kernel
110//   operations (eg. some scheduling and signal handling code) are ignored.
111//
112// - It could model loads and stores done at the system call boundary using
113//   the pre_mem_read/post_mem_write events.  For example, if you call
114//   fstat() you know that the passed in buffer has been written.  But it
115//   currently does not do this.
116//
117// - Valgrind replaces some code (not much) with its own, notably parts of
118//   code for scheduling operations and signal handling.  This code is not
119//   traced.
120//
121// - There is no consideration of virtual-to-physical address mapping.
122//   This may not matter for many purposes.
123//
124// - Valgrind modifies the instruction stream in some very minor ways.  For
125//   example, on x86 the bts, btc, btr instructions are incorrectly
126//   considered to always touch memory (this is a consequence of these
127//   instructions being very difficult to simulate).
128//
129// - Valgrind tools layout memory differently to normal programs, so the
130//   addresses you get will not be typical.  Thus Lackey (and all Valgrind
131//   tools) is suitable for getting relative memory traces -- eg. if you
132//   want to analyse locality of memory accesses -- but is not good if
133//   absolute addresses are important.
134//
135// Despite all these warnings, Lackey's results should be good enough for a
136// wide range of purposes.  For example, Cachegrind shares all the above
137// shortcomings and it is still useful.
138//
139// For further inspiration, you should look at cachegrind/cg_main.c which
140// uses the same basic technique for tracing memory accesses, but also groups
141// events together for processing into twos and threes so that fewer C calls
142// are made and things run faster.
143//
144// Specific Details about --trace-superblocks=yes
145// ----------------------------------------------
146// Valgrind splits code up into single entry, multiple exit blocks
147// known as superblocks.  By itself, --trace-superblocks=yes just
148// prints a message as each superblock is run:
149//
150//  SB 04013170
151//  SB 04013177
152//  SB 04013173
153//  SB 04013177
154//
155// The hex number is the address of the first instruction in the
156// superblock.  You can see the relationship more obviously if you use
157// --trace-superblocks=yes and --trace-mem=yes together.  Then a "SB"
158// message at address X is immediately followed by an "instr:" message
159// for that address, as the first instruction in the block is
160// executed, for example:
161//
162//  SB 04014073
163//  I  04014073,3
164//   L 7FEFFF7F8,8
165//  I  04014076,4
166//  I  0401407A,3
167//  I  0401407D,3
168//  I  04014080,3
169//  I  04014083,6
170
171
172#include "pub_tool_basics.h"
173#include "pub_tool_tooliface.h"
174#include "pub_tool_libcassert.h"
175#include "pub_tool_libcprint.h"
176#include "pub_tool_debuginfo.h"
177#include "pub_tool_libcbase.h"
178#include "pub_tool_options.h"
179#include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
180
181/*------------------------------------------------------------*/
182/*--- Command line options                                 ---*/
183/*------------------------------------------------------------*/
184
185/* Command line options controlling instrumentation kinds, as described at
186 * the top of this file. */
187static Bool clo_basic_counts    = True;
188static Bool clo_detailed_counts = False;
189static Bool clo_trace_mem       = False;
190static Bool clo_trace_sbs       = False;
191
192/* The name of the function of which the number of calls (under
193 * --basic-counts=yes) is to be counted, with default. Override with command
194 * line option --fnname. */
195static const HChar* clo_fnname = "main";
196
197static Bool lk_process_cmd_line_option(const HChar* arg)
198{
199   if VG_STR_CLO(arg, "--fnname", clo_fnname) {}
200   else if VG_BOOL_CLO(arg, "--basic-counts",      clo_basic_counts) {}
201   else if VG_BOOL_CLO(arg, "--detailed-counts",   clo_detailed_counts) {}
202   else if VG_BOOL_CLO(arg, "--trace-mem",         clo_trace_mem) {}
203   else if VG_BOOL_CLO(arg, "--trace-superblocks", clo_trace_sbs) {}
204   else
205      return False;
206
207   tl_assert(clo_fnname);
208   tl_assert(clo_fnname[0]);
209   return True;
210}
211
212static void lk_print_usage(void)
213{
214   VG_(printf)(
215"    --basic-counts=no|yes     count instructions, jumps, etc. [yes]\n"
216"    --detailed-counts=no|yes  count loads, stores and alu ops [no]\n"
217"    --trace-mem=no|yes        trace all loads and stores [no]\n"
218"    --trace-superblocks=no|yes  trace all superblock entries [no]\n"
219"    --fnname=<name>           count calls to <name> (only used if\n"
220"                              --basic-count=yes)  [main]\n"
221   );
222}
223
224static void lk_print_debug_usage(void)
225{
226   VG_(printf)(
227"    (none)\n"
228   );
229}
230
231/*------------------------------------------------------------*/
232/*--- Stuff for --basic-counts                             ---*/
233/*------------------------------------------------------------*/
234
235/* Nb: use ULongs because the numbers can get very big */
236static ULong n_func_calls    = 0;
237static ULong n_SBs_entered   = 0;
238static ULong n_SBs_completed = 0;
239static ULong n_IRStmts       = 0;
240static ULong n_guest_instrs  = 0;
241static ULong n_Jccs          = 0;
242static ULong n_Jccs_untaken  = 0;
243static ULong n_IJccs         = 0;
244static ULong n_IJccs_untaken = 0;
245
246static void add_one_func_call(void)
247{
248   n_func_calls++;
249}
250
251static void add_one_SB_entered(void)
252{
253   n_SBs_entered++;
254}
255
256static void add_one_SB_completed(void)
257{
258   n_SBs_completed++;
259}
260
261static void add_one_IRStmt(void)
262{
263   n_IRStmts++;
264}
265
266static void add_one_guest_instr(void)
267{
268   n_guest_instrs++;
269}
270
271static void add_one_Jcc(void)
272{
273   n_Jccs++;
274}
275
276static void add_one_Jcc_untaken(void)
277{
278   n_Jccs_untaken++;
279}
280
281static void add_one_inverted_Jcc(void)
282{
283   n_IJccs++;
284}
285
286static void add_one_inverted_Jcc_untaken(void)
287{
288   n_IJccs_untaken++;
289}
290
291/*------------------------------------------------------------*/
292/*--- Stuff for --detailed-counts                          ---*/
293/*------------------------------------------------------------*/
294
295typedef
296   IRExpr
297   IRAtom;
298
299/* --- Operations --- */
300
301typedef enum { OpLoad=0, OpStore=1, OpAlu=2 } Op;
302
303#define N_OPS 3
304
305
306/* --- Types --- */
307
308#define N_TYPES 14
309
310static Int type2index ( IRType ty )
311{
312   switch (ty) {
313      case Ity_I1:      return 0;
314      case Ity_I8:      return 1;
315      case Ity_I16:     return 2;
316      case Ity_I32:     return 3;
317      case Ity_I64:     return 4;
318      case Ity_I128:    return 5;
319      case Ity_F32:     return 6;
320      case Ity_F64:     return 7;
321      case Ity_F128:    return 8;
322      case Ity_V128:    return 9;
323      case Ity_V256:    return 10;
324      case Ity_D32:     return 11;
325      case Ity_D64:     return 12;
326      case Ity_D128:    return 13;
327      default: tl_assert(0);
328   }
329}
330
331static const HChar* nameOfTypeIndex ( Int i )
332{
333   switch (i) {
334      case 0: return "I1";   break;
335      case 1: return "I8";   break;
336      case 2: return "I16";  break;
337      case 3: return "I32";  break;
338      case 4: return "I64";  break;
339      case 5: return "I128"; break;
340      case 6: return "F32";  break;
341      case 7: return "F64";  break;
342      case 8: return "F128";  break;
343      case 9: return "V128";  break;
344      case 10: return "V256"; break;
345      case 11: return "D32";  break;
346      case 12: return "D64";  break;
347      case 13: return "D128"; break;
348      default: tl_assert(0);
349   }
350}
351
352
353/* --- Counts --- */
354
355static ULong detailCounts[N_OPS][N_TYPES];
356
357/* The helper that is called from the instrumented code. */
358static VG_REGPARM(1)
359void increment_detail(ULong* detail)
360{
361   (*detail)++;
362}
363
364/* A helper that adds the instrumentation for a detail.  guard ::
365   Ity_I1 is the guarding condition for the event.  If NULL it is
366   assumed to mean "always True". */
367static void instrument_detail(IRSB* sb, Op op, IRType type, IRAtom* guard)
368{
369   IRDirty* di;
370   IRExpr** argv;
371   const UInt typeIx = type2index(type);
372
373   tl_assert(op < N_OPS);
374   tl_assert(typeIx < N_TYPES);
375
376   argv = mkIRExprVec_1( mkIRExpr_HWord( (HWord)&detailCounts[op][typeIx] ) );
377   di = unsafeIRDirty_0_N( 1, "increment_detail",
378                              VG_(fnptr_to_fnentry)( &increment_detail ),
379                              argv);
380   if (guard) di->guard = guard;
381   addStmtToIRSB( sb, IRStmt_Dirty(di) );
382}
383
384/* Summarize and print the details. */
385static void print_details ( void )
386{
387   Int typeIx;
388   VG_(umsg)("   Type        Loads       Stores       AluOps\n");
389   VG_(umsg)("   -------------------------------------------\n");
390   for (typeIx = 0; typeIx < N_TYPES; typeIx++) {
391      VG_(umsg)("   %4s %'12llu %'12llu %'12llu\n",
392                nameOfTypeIndex( typeIx ),
393                detailCounts[OpLoad ][typeIx],
394                detailCounts[OpStore][typeIx],
395                detailCounts[OpAlu  ][typeIx]
396      );
397   }
398}
399
400
401/*------------------------------------------------------------*/
402/*--- Stuff for --trace-mem                                ---*/
403/*------------------------------------------------------------*/
404
405#define MAX_DSIZE    512
406
407typedef
408   enum { Event_Ir, Event_Dr, Event_Dw, Event_Dm }
409   EventKind;
410
411typedef
412   struct {
413      EventKind  ekind;
414      IRAtom*    addr;
415      Int        size;
416      IRAtom*    guard; /* :: Ity_I1, or NULL=="always True" */
417   }
418   Event;
419
420/* Up to this many unnotified events are allowed.  Must be at least two,
421   so that reads and writes to the same address can be merged into a modify.
422   Beyond that, larger numbers just potentially induce more spilling due to
423   extending live ranges of address temporaries. */
424#define N_EVENTS 4
425
426/* Maintain an ordered list of memory events which are outstanding, in
427   the sense that no IR has yet been generated to do the relevant
428   helper calls.  The SB is scanned top to bottom and memory events
429   are added to the end of the list, merging with the most recent
430   notified event where possible (Dw immediately following Dr and
431   having the same size and EA can be merged).
432
433   This merging is done so that for architectures which have
434   load-op-store instructions (x86, amd64), the instr is treated as if
435   it makes just one memory reference (a modify), rather than two (a
436   read followed by a write at the same address).
437
438   At various points the list will need to be flushed, that is, IR
439   generated from it.  That must happen before any possible exit from
440   the block (the end, or an IRStmt_Exit).  Flushing also takes place
441   when there is no space to add a new event, and before entering a
442   RMW (read-modify-write) section on processors supporting LL/SC.
443
444   If we require the simulation statistics to be up to date with
445   respect to possible memory exceptions, then the list would have to
446   be flushed before each memory reference.  That's a pain so we don't
447   bother.
448
449   Flushing the list consists of walking it start to end and emitting
450   instrumentation IR for each event, in the order in which they
451   appear. */
452
453static Event events[N_EVENTS];
454static Int   events_used = 0;
455
456
457static VG_REGPARM(2) void trace_instr(Addr addr, SizeT size)
458{
459   VG_(printf)("I  %08lx,%lu\n", addr, size);
460}
461
462static VG_REGPARM(2) void trace_load(Addr addr, SizeT size)
463{
464   VG_(printf)(" L %08lx,%lu\n", addr, size);
465}
466
467static VG_REGPARM(2) void trace_store(Addr addr, SizeT size)
468{
469   VG_(printf)(" S %08lx,%lu\n", addr, size);
470}
471
472static VG_REGPARM(2) void trace_modify(Addr addr, SizeT size)
473{
474   VG_(printf)(" M %08lx,%lu\n", addr, size);
475}
476
477
478static void flushEvents(IRSB* sb)
479{
480   Int        i;
481   const HChar* helperName;
482   void*      helperAddr;
483   IRExpr**   argv;
484   IRDirty*   di;
485   Event*     ev;
486
487   for (i = 0; i < events_used; i++) {
488
489      ev = &events[i];
490
491      // Decide on helper fn to call and args to pass it.
492      switch (ev->ekind) {
493         case Event_Ir: helperName = "trace_instr";
494                        helperAddr =  trace_instr;  break;
495
496         case Event_Dr: helperName = "trace_load";
497                        helperAddr =  trace_load;   break;
498
499         case Event_Dw: helperName = "trace_store";
500                        helperAddr =  trace_store;  break;
501
502         case Event_Dm: helperName = "trace_modify";
503                        helperAddr =  trace_modify; break;
504         default:
505            tl_assert(0);
506      }
507
508      // Add the helper.
509      argv = mkIRExprVec_2( ev->addr, mkIRExpr_HWord( ev->size ) );
510      di   = unsafeIRDirty_0_N( /*regparms*/2,
511                                helperName, VG_(fnptr_to_fnentry)( helperAddr ),
512                                argv );
513      if (ev->guard) {
514         di->guard = ev->guard;
515      }
516      addStmtToIRSB( sb, IRStmt_Dirty(di) );
517   }
518
519   events_used = 0;
520}
521
522// WARNING:  If you aren't interested in instruction reads, you can omit the
523// code that adds calls to trace_instr() in flushEvents().  However, you
524// must still call this function, addEvent_Ir() -- it is necessary to add
525// the Ir events to the events list so that merging of paired load/store
526// events into modify events works correctly.
527static void addEvent_Ir ( IRSB* sb, IRAtom* iaddr, UInt isize )
528{
529   Event* evt;
530   tl_assert(clo_trace_mem);
531   tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
532            || VG_CLREQ_SZB == isize );
533   if (events_used == N_EVENTS)
534      flushEvents(sb);
535   tl_assert(events_used >= 0 && events_used < N_EVENTS);
536   evt = &events[events_used];
537   evt->ekind = Event_Ir;
538   evt->addr  = iaddr;
539   evt->size  = isize;
540   evt->guard = NULL;
541   events_used++;
542}
543
544/* Add a guarded read event. */
545static
546void addEvent_Dr_guarded ( IRSB* sb, IRAtom* daddr, Int dsize, IRAtom* guard )
547{
548   Event* evt;
549   tl_assert(clo_trace_mem);
550   tl_assert(isIRAtom(daddr));
551   tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
552   if (events_used == N_EVENTS)
553      flushEvents(sb);
554   tl_assert(events_used >= 0 && events_used < N_EVENTS);
555   evt = &events[events_used];
556   evt->ekind = Event_Dr;
557   evt->addr  = daddr;
558   evt->size  = dsize;
559   evt->guard = guard;
560   events_used++;
561}
562
563/* Add an ordinary read event, by adding a guarded read event with an
564   always-true guard. */
565static
566void addEvent_Dr ( IRSB* sb, IRAtom* daddr, Int dsize )
567{
568   addEvent_Dr_guarded(sb, daddr, dsize, NULL);
569}
570
571/* Add a guarded write event. */
572static
573void addEvent_Dw_guarded ( IRSB* sb, IRAtom* daddr, Int dsize, IRAtom* guard )
574{
575   Event* evt;
576   tl_assert(clo_trace_mem);
577   tl_assert(isIRAtom(daddr));
578   tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
579   if (events_used == N_EVENTS)
580      flushEvents(sb);
581   tl_assert(events_used >= 0 && events_used < N_EVENTS);
582   evt = &events[events_used];
583   evt->ekind = Event_Dw;
584   evt->addr  = daddr;
585   evt->size  = dsize;
586   evt->guard = guard;
587   events_used++;
588}
589
590/* Add an ordinary write event.  Try to merge it with an immediately
591   preceding ordinary read event of the same size to the same
592   address. */
593static
594void addEvent_Dw ( IRSB* sb, IRAtom* daddr, Int dsize )
595{
596   Event* lastEvt;
597   Event* evt;
598   tl_assert(clo_trace_mem);
599   tl_assert(isIRAtom(daddr));
600   tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
601
602   // Is it possible to merge this write with the preceding read?
603   lastEvt = &events[events_used-1];
604   if (events_used > 0
605       && lastEvt->ekind == Event_Dr
606       && lastEvt->size  == dsize
607       && lastEvt->guard == NULL
608       && eqIRAtom(lastEvt->addr, daddr))
609   {
610      lastEvt->ekind = Event_Dm;
611      return;
612   }
613
614   // No.  Add as normal.
615   if (events_used == N_EVENTS)
616      flushEvents(sb);
617   tl_assert(events_used >= 0 && events_used < N_EVENTS);
618   evt = &events[events_used];
619   evt->ekind = Event_Dw;
620   evt->size  = dsize;
621   evt->addr  = daddr;
622   evt->guard = NULL;
623   events_used++;
624}
625
626
627/*------------------------------------------------------------*/
628/*--- Stuff for --trace-superblocks                        ---*/
629/*------------------------------------------------------------*/
630
631static void trace_superblock(Addr addr)
632{
633   VG_(printf)("SB %08lx\n", addr);
634}
635
636
637/*------------------------------------------------------------*/
638/*--- Basic tool functions                                 ---*/
639/*------------------------------------------------------------*/
640
641static void lk_post_clo_init(void)
642{
643   Int op, tyIx;
644
645   if (clo_detailed_counts) {
646      for (op = 0; op < N_OPS; op++)
647         for (tyIx = 0; tyIx < N_TYPES; tyIx++)
648            detailCounts[op][tyIx] = 0;
649   }
650}
651
652static
653IRSB* lk_instrument ( VgCallbackClosure* closure,
654                      IRSB* sbIn,
655                      const VexGuestLayout* layout,
656                      const VexGuestExtents* vge,
657                      const VexArchInfo* archinfo_host,
658                      IRType gWordTy, IRType hWordTy )
659{
660   IRDirty*   di;
661   Int        i;
662   IRSB*      sbOut;
663   IRTypeEnv* tyenv = sbIn->tyenv;
664   Addr       iaddr = 0, dst;
665   UInt       ilen = 0;
666   Bool       condition_inverted = False;
667
668   if (gWordTy != hWordTy) {
669      /* We don't currently support this case. */
670      VG_(tool_panic)("host/guest word size mismatch");
671   }
672
673   /* Set up SB */
674   sbOut = deepCopyIRSBExceptStmts(sbIn);
675
676   // Copy verbatim any IR preamble preceding the first IMark
677   i = 0;
678   while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
679      addStmtToIRSB( sbOut, sbIn->stmts[i] );
680      i++;
681   }
682
683   if (clo_basic_counts) {
684      /* Count this superblock. */
685      di = unsafeIRDirty_0_N( 0, "add_one_SB_entered",
686                                 VG_(fnptr_to_fnentry)( &add_one_SB_entered ),
687                                 mkIRExprVec_0() );
688      addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
689   }
690
691   if (clo_trace_sbs) {
692      /* Print this superblock's address. */
693      di = unsafeIRDirty_0_N(
694              0, "trace_superblock",
695              VG_(fnptr_to_fnentry)( &trace_superblock ),
696              mkIRExprVec_1( mkIRExpr_HWord( vge->base[0] ) )
697           );
698      addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
699   }
700
701   if (clo_trace_mem) {
702      events_used = 0;
703   }
704
705   for (/*use current i*/; i < sbIn->stmts_used; i++) {
706      IRStmt* st = sbIn->stmts[i];
707      if (!st || st->tag == Ist_NoOp) continue;
708
709      if (clo_basic_counts) {
710         /* Count one VEX statement. */
711         di = unsafeIRDirty_0_N( 0, "add_one_IRStmt",
712                                    VG_(fnptr_to_fnentry)( &add_one_IRStmt ),
713                                    mkIRExprVec_0() );
714         addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
715      }
716
717      switch (st->tag) {
718         case Ist_NoOp:
719         case Ist_AbiHint:
720         case Ist_Put:
721         case Ist_PutI:
722         case Ist_MBE:
723            addStmtToIRSB( sbOut, st );
724            break;
725
726         case Ist_IMark:
727            if (clo_basic_counts) {
728               /* Needed to be able to check for inverted condition in Ist_Exit */
729               iaddr = st->Ist.IMark.addr;
730               ilen  = st->Ist.IMark.len;
731
732               /* Count guest instruction. */
733               di = unsafeIRDirty_0_N( 0, "add_one_guest_instr",
734                                          VG_(fnptr_to_fnentry)( &add_one_guest_instr ),
735                                          mkIRExprVec_0() );
736               addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
737
738               /* An unconditional branch to a known destination in the
739                * guest's instructions can be represented, in the IRSB to
740                * instrument, by the VEX statements that are the
741                * translation of that known destination. This feature is
742                * called 'SB chasing' and can be influenced by command
743                * line option --vex-guest-chase-thresh.
744                *
745                * To get an accurate count of the calls to a specific
746                * function, taking SB chasing into account, we need to
747                * check for each guest instruction (Ist_IMark) if it is
748                * the entry point of a function.
749                */
750               tl_assert(clo_fnname);
751               tl_assert(clo_fnname[0]);
752               const HChar *fnname;
753               if (VG_(get_fnname_if_entry)(st->Ist.IMark.addr,
754                                            &fnname)
755                   && 0 == VG_(strcmp)(fnname, clo_fnname)) {
756                  di = unsafeIRDirty_0_N(
757                          0, "add_one_func_call",
758                             VG_(fnptr_to_fnentry)( &add_one_func_call ),
759                             mkIRExprVec_0() );
760                  addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
761               }
762            }
763            if (clo_trace_mem) {
764               // WARNING: do not remove this function call, even if you
765               // aren't interested in instruction reads.  See the comment
766               // above the function itself for more detail.
767               addEvent_Ir( sbOut, mkIRExpr_HWord( (HWord)st->Ist.IMark.addr ),
768                            st->Ist.IMark.len );
769            }
770            addStmtToIRSB( sbOut, st );
771            break;
772
773         case Ist_WrTmp:
774            // Add a call to trace_load() if --trace-mem=yes.
775            if (clo_trace_mem) {
776               IRExpr* data = st->Ist.WrTmp.data;
777               if (data->tag == Iex_Load) {
778                  addEvent_Dr( sbOut, data->Iex.Load.addr,
779                               sizeofIRType(data->Iex.Load.ty) );
780               }
781            }
782            if (clo_detailed_counts) {
783               IRExpr* expr = st->Ist.WrTmp.data;
784               IRType  type = typeOfIRExpr(sbOut->tyenv, expr);
785               tl_assert(type != Ity_INVALID);
786               switch (expr->tag) {
787                  case Iex_Load:
788                    instrument_detail( sbOut, OpLoad, type, NULL/*guard*/ );
789                     break;
790                  case Iex_Unop:
791                  case Iex_Binop:
792                  case Iex_Triop:
793                  case Iex_Qop:
794                  case Iex_ITE:
795                     instrument_detail( sbOut, OpAlu, type, NULL/*guard*/ );
796                     break;
797                  default:
798                     break;
799               }
800            }
801            addStmtToIRSB( sbOut, st );
802            break;
803
804         case Ist_Store: {
805            IRExpr* data = st->Ist.Store.data;
806            IRType  type = typeOfIRExpr(tyenv, data);
807            tl_assert(type != Ity_INVALID);
808            if (clo_trace_mem) {
809               addEvent_Dw( sbOut, st->Ist.Store.addr,
810                            sizeofIRType(type) );
811            }
812            if (clo_detailed_counts) {
813               instrument_detail( sbOut, OpStore, type, NULL/*guard*/ );
814            }
815            addStmtToIRSB( sbOut, st );
816            break;
817         }
818
819         case Ist_StoreG: {
820            IRStoreG* sg   = st->Ist.StoreG.details;
821            IRExpr*   data = sg->data;
822            IRType    type = typeOfIRExpr(tyenv, data);
823            tl_assert(type != Ity_INVALID);
824            if (clo_trace_mem) {
825               addEvent_Dw_guarded( sbOut, sg->addr,
826                                    sizeofIRType(type), sg->guard );
827            }
828            if (clo_detailed_counts) {
829               instrument_detail( sbOut, OpStore, type, sg->guard );
830            }
831            addStmtToIRSB( sbOut, st );
832            break;
833         }
834
835         case Ist_LoadG: {
836            IRLoadG* lg       = st->Ist.LoadG.details;
837            IRType   type     = Ity_INVALID; /* loaded type */
838            IRType   typeWide = Ity_INVALID; /* after implicit widening */
839            typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
840            tl_assert(type != Ity_INVALID);
841            if (clo_trace_mem) {
842               addEvent_Dr_guarded( sbOut, lg->addr,
843                                    sizeofIRType(type), lg->guard );
844            }
845            if (clo_detailed_counts) {
846               instrument_detail( sbOut, OpLoad, type, lg->guard );
847            }
848            addStmtToIRSB( sbOut, st );
849            break;
850         }
851
852         case Ist_Dirty: {
853            if (clo_trace_mem) {
854               Int      dsize;
855               IRDirty* d = st->Ist.Dirty.details;
856               if (d->mFx != Ifx_None) {
857                  // This dirty helper accesses memory.  Collect the details.
858                  tl_assert(d->mAddr != NULL);
859                  tl_assert(d->mSize != 0);
860                  dsize = d->mSize;
861                  if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
862                     addEvent_Dr( sbOut, d->mAddr, dsize );
863                  if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
864                     addEvent_Dw( sbOut, d->mAddr, dsize );
865               } else {
866                  tl_assert(d->mAddr == NULL);
867                  tl_assert(d->mSize == 0);
868               }
869            }
870            addStmtToIRSB( sbOut, st );
871            break;
872         }
873
874         case Ist_CAS: {
875            /* We treat it as a read and a write of the location.  I
876               think that is the same behaviour as it was before IRCAS
877               was introduced, since prior to that point, the Vex
878               front ends would translate a lock-prefixed instruction
879               into a (normal) read followed by a (normal) write. */
880            Int    dataSize;
881            IRType dataTy;
882            IRCAS* cas = st->Ist.CAS.details;
883            tl_assert(cas->addr != NULL);
884            tl_assert(cas->dataLo != NULL);
885            dataTy   = typeOfIRExpr(tyenv, cas->dataLo);
886            dataSize = sizeofIRType(dataTy);
887            if (cas->dataHi != NULL)
888               dataSize *= 2; /* since it's a doubleword-CAS */
889            if (clo_trace_mem) {
890               addEvent_Dr( sbOut, cas->addr, dataSize );
891               addEvent_Dw( sbOut, cas->addr, dataSize );
892            }
893            if (clo_detailed_counts) {
894               instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ );
895               if (cas->dataHi != NULL) /* dcas */
896                  instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ );
897               instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ );
898               if (cas->dataHi != NULL) /* dcas */
899                  instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ );
900            }
901            addStmtToIRSB( sbOut, st );
902            break;
903         }
904
905         case Ist_LLSC: {
906            IRType dataTy;
907            if (st->Ist.LLSC.storedata == NULL) {
908               /* LL */
909               dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
910               if (clo_trace_mem) {
911                  addEvent_Dr( sbOut, st->Ist.LLSC.addr,
912                                      sizeofIRType(dataTy) );
913                  /* flush events before LL, helps SC to succeed */
914                  flushEvents(sbOut);
915	       }
916               if (clo_detailed_counts)
917                  instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ );
918            } else {
919               /* SC */
920               dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
921               if (clo_trace_mem)
922                  addEvent_Dw( sbOut, st->Ist.LLSC.addr,
923                                      sizeofIRType(dataTy) );
924               if (clo_detailed_counts)
925                  instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ );
926            }
927            addStmtToIRSB( sbOut, st );
928            break;
929         }
930
931         case Ist_Exit:
932            if (clo_basic_counts) {
933               // The condition of a branch was inverted by VEX if a taken
934               // branch is in fact a fall trough according to client address
935               tl_assert(iaddr != 0);
936               dst = (sizeof(Addr) == 4) ? st->Ist.Exit.dst->Ico.U32 :
937                                           st->Ist.Exit.dst->Ico.U64;
938               condition_inverted = (dst == iaddr + ilen);
939
940               /* Count Jcc */
941               if (!condition_inverted)
942                  di = unsafeIRDirty_0_N( 0, "add_one_Jcc",
943                                          VG_(fnptr_to_fnentry)( &add_one_Jcc ),
944                                          mkIRExprVec_0() );
945               else
946                  di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc",
947                                          VG_(fnptr_to_fnentry)(
948                                             &add_one_inverted_Jcc ),
949                                          mkIRExprVec_0() );
950
951               addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
952            }
953            if (clo_trace_mem) {
954               flushEvents(sbOut);
955            }
956
957            addStmtToIRSB( sbOut, st );      // Original statement
958
959            if (clo_basic_counts) {
960               /* Count non-taken Jcc */
961               if (!condition_inverted)
962                  di = unsafeIRDirty_0_N( 0, "add_one_Jcc_untaken",
963                                          VG_(fnptr_to_fnentry)(
964                                             &add_one_Jcc_untaken ),
965                                          mkIRExprVec_0() );
966               else
967                  di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc_untaken",
968                                          VG_(fnptr_to_fnentry)(
969                                             &add_one_inverted_Jcc_untaken ),
970                                          mkIRExprVec_0() );
971
972               addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
973            }
974            break;
975
976         default:
977            ppIRStmt(st);
978            tl_assert(0);
979      }
980   }
981
982   if (clo_basic_counts) {
983      /* Count this basic block. */
984      di = unsafeIRDirty_0_N( 0, "add_one_SB_completed",
985                                 VG_(fnptr_to_fnentry)( &add_one_SB_completed ),
986                                 mkIRExprVec_0() );
987      addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
988   }
989
990   if (clo_trace_mem) {
991      /* At the end of the sbIn.  Flush outstandings. */
992      flushEvents(sbOut);
993   }
994
995   return sbOut;
996}
997
998static void lk_fini(Int exitcode)
999{
1000   tl_assert(clo_fnname);
1001   tl_assert(clo_fnname[0]);
1002
1003   if (clo_basic_counts) {
1004      ULong total_Jccs = n_Jccs + n_IJccs;
1005      ULong taken_Jccs = (n_Jccs - n_Jccs_untaken) + n_IJccs_untaken;
1006
1007      VG_(umsg)("Counted %'llu call%s to %s()\n",
1008                n_func_calls, ( n_func_calls==1 ? "" : "s" ), clo_fnname);
1009
1010      VG_(umsg)("\n");
1011      VG_(umsg)("Jccs:\n");
1012      VG_(umsg)("  total:         %'llu\n", total_Jccs);
1013      VG_(umsg)("  taken:         %'llu (%.0f%%)\n",
1014                taken_Jccs, taken_Jccs * 100.0 / total_Jccs ?: 1);
1015
1016      VG_(umsg)("\n");
1017      VG_(umsg)("Executed:\n");
1018      VG_(umsg)("  SBs entered:   %'llu\n", n_SBs_entered);
1019      VG_(umsg)("  SBs completed: %'llu\n", n_SBs_completed);
1020      VG_(umsg)("  guest instrs:  %'llu\n", n_guest_instrs);
1021      VG_(umsg)("  IRStmts:       %'llu\n", n_IRStmts);
1022
1023      VG_(umsg)("\n");
1024      VG_(umsg)("Ratios:\n");
1025      tl_assert(n_SBs_entered); // Paranoia time.
1026      VG_(umsg)("  guest instrs : SB entered  = %'llu : 10\n",
1027         10 * n_guest_instrs / n_SBs_entered);
1028      VG_(umsg)("       IRStmts : SB entered  = %'llu : 10\n",
1029         10 * n_IRStmts / n_SBs_entered);
1030      tl_assert(n_guest_instrs); // Paranoia time.
1031      VG_(umsg)("       IRStmts : guest instr = %'llu : 10\n",
1032         10 * n_IRStmts / n_guest_instrs);
1033   }
1034
1035   if (clo_detailed_counts) {
1036      VG_(umsg)("\n");
1037      VG_(umsg)("IR-level counts by type:\n");
1038      print_details();
1039   }
1040
1041   if (clo_basic_counts) {
1042      VG_(umsg)("\n");
1043      VG_(umsg)("Exit code:       %d\n", exitcode);
1044   }
1045}
1046
1047static void lk_pre_clo_init(void)
1048{
1049   VG_(details_name)            ("Lackey");
1050   VG_(details_version)         (NULL);
1051   VG_(details_description)     ("an example Valgrind tool");
1052   VG_(details_copyright_author)(
1053      "Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote.");
1054   VG_(details_bug_reports_to)  (VG_BUGS_TO);
1055   VG_(details_avg_translation_sizeB) ( 200 );
1056
1057   VG_(basic_tool_funcs)          (lk_post_clo_init,
1058                                   lk_instrument,
1059                                   lk_fini);
1060   VG_(needs_command_line_options)(lk_process_cmd_line_option,
1061                                   lk_print_usage,
1062                                   lk_print_debug_usage);
1063}
1064
1065VG_DETERMINE_INTERFACE_VERSION(lk_pre_clo_init)
1066
1067/*--------------------------------------------------------------------*/
1068/*--- end                                                lk_main.c ---*/
1069/*--------------------------------------------------------------------*/
1070