1
2/*--------------------------------------------------------------------*/
3/*--- Cachegrind: everything but the simulation itself.            ---*/
4/*---                                                    cg_main.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Cachegrind, a Valgrind tool for cache
9   profiling programs.
10
11   Copyright (C) 2002-2013 Nicholas Nethercote
12      njn@valgrind.org
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#include "pub_tool_basics.h"
33#include "pub_tool_debuginfo.h"
34#include "pub_tool_libcbase.h"
35#include "pub_tool_libcassert.h"
36#include "pub_tool_libcfile.h"
37#include "pub_tool_libcprint.h"
38#include "pub_tool_libcproc.h"
39#include "pub_tool_mallocfree.h"
40#include "pub_tool_options.h"
41#include "pub_tool_oset.h"
42#include "pub_tool_tooliface.h"
43#include "pub_tool_xarray.h"
44#include "pub_tool_clientstate.h"
45#include "pub_tool_machine.h"      // VG_(fnptr_to_fnentry)
46
47#include "cg_arch.h"
48#include "cg_sim.c"
49#include "cg_branchpred.c"
50
51/*------------------------------------------------------------*/
52/*--- Constants                                            ---*/
53/*------------------------------------------------------------*/
54
55/* Set to 1 for very verbose debugging */
56#define DEBUG_CG 0
57
58/*------------------------------------------------------------*/
59/*--- Options                                              ---*/
60/*------------------------------------------------------------*/
61
62static Bool  clo_cache_sim  = True;  /* do cache simulation? */
63static Bool  clo_branch_sim = False; /* do branch simulation? */
64static const HChar* clo_cachegrind_out_file = "cachegrind.out.%p";
65
66/*------------------------------------------------------------*/
67/*--- Cachesim configuration                               ---*/
68/*------------------------------------------------------------*/
69
70static Int min_line_size = 0; /* min of L1 and LL cache line sizes */
71
72/*------------------------------------------------------------*/
73/*--- Types and Data Structures                            ---*/
74/*------------------------------------------------------------*/
75
76typedef
77   struct {
78      ULong a;  /* total # memory accesses of this kind */
79      ULong m1; /* misses in the first level cache */
80      ULong mL; /* misses in the second level cache */
81   }
82   CacheCC;
83
84typedef
85   struct {
86      ULong b;  /* total # branches of this kind */
87      ULong mp; /* number of branches mispredicted */
88   }
89   BranchCC;
90
91//------------------------------------------------------------
92// Primary data structure #1: CC table
93// - Holds the per-source-line hit/miss stats, grouped by file/function/line.
94// - an ordered set of CCs.  CC indexing done by file/function/line (as
95//   determined from the instrAddr).
96// - Traversed for dumping stats at end in file/func/line hierarchy.
97
98typedef struct {
99   HChar* file;
100   const HChar* fn;
101   Int    line;
102}
103CodeLoc;
104
105typedef struct {
106   CodeLoc  loc; /* Source location that these counts pertain to */
107   CacheCC  Ir;  /* Insn read counts */
108   CacheCC  Dr;  /* Data read counts */
109   CacheCC  Dw;  /* Data write/modify counts */
110   BranchCC Bc;  /* Conditional branch counts */
111   BranchCC Bi;  /* Indirect branch counts */
112} LineCC;
113
114// First compare file, then fn, then line.
115static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc)
116{
117   Word res;
118   const CodeLoc* a = (const CodeLoc*)vloc;
119   const CodeLoc* b = &(((const LineCC*)vcc)->loc);
120
121   res = VG_(strcmp)(a->file, b->file);
122   if (0 != res)
123      return res;
124
125   res = VG_(strcmp)(a->fn, b->fn);
126   if (0 != res)
127      return res;
128
129   return a->line - b->line;
130}
131
132static OSet* CC_table;
133
134//------------------------------------------------------------
135// Primary data structure #2: InstrInfo table
136// - Holds the cached info about each instr that is used for simulation.
137// - table(SB_start_addr, list(InstrInfo))
138// - For each SB, each InstrInfo in the list holds info about the
139//   instruction (instrLen, instrAddr, etc), plus a pointer to its line
140//   CC.  This node is what's passed to the simulation function.
141// - When SBs are discarded the relevant list(instr_details) is freed.
142
143typedef struct _InstrInfo InstrInfo;
144struct _InstrInfo {
145   Addr    instr_addr;
146   UChar   instr_len;
147   LineCC* parent;         // parent line-CC
148};
149
150typedef struct _SB_info SB_info;
151struct _SB_info {
152   Addr      SB_addr;      // key;  MUST BE FIRST
153   Int       n_instrs;
154   InstrInfo instrs[0];
155};
156
157static OSet* instrInfoTable;
158
159//------------------------------------------------------------
160// Secondary data structure: string table
161// - holds strings, avoiding dups
162// - used for filenames and function names, each of which will be
163//   pointed to by one or more CCs.
164// - it also allows equality checks just by pointer comparison, which
165//   is good when printing the output file at the end.
166
167static OSet* stringTable;
168
169//------------------------------------------------------------
170// Stats
171static Int  distinct_files      = 0;
172static Int  distinct_fns        = 0;
173static Int  distinct_lines      = 0;
174static Int  distinct_instrsGen  = 0;
175static Int  distinct_instrsNoX  = 0;
176
177static Int  full_debugs         = 0;
178static Int  file_line_debugs    = 0;
179static Int  fn_debugs           = 0;
180static Int  no_debugs           = 0;
181
182/*------------------------------------------------------------*/
183/*--- String table operations                              ---*/
184/*------------------------------------------------------------*/
185
186static Word stringCmp( const void* key, const void* elem )
187{
188   return VG_(strcmp)(*(const HChar *const *)key, *(const HChar *const *)elem);
189}
190
191// Get a permanent string;  either pull it out of the string table if it's
192// been encountered before, or dup it and put it into the string table.
193static HChar* get_perm_string(const HChar* s)
194{
195   HChar** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s);
196   if (s_ptr) {
197      return *s_ptr;
198   } else {
199      HChar** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(HChar*));
200      *s_node = VG_(strdup)("cg.main.gps.1", s);
201      VG_(OSetGen_Insert)(stringTable, s_node);
202      return *s_node;
203   }
204}
205
206/*------------------------------------------------------------*/
207/*--- CC table operations                                  ---*/
208/*------------------------------------------------------------*/
209
210static void get_debug_info(Addr instr_addr, const HChar **dir,
211                           const HChar **file, const HChar **fn, UInt* line)
212{
213   Bool found_file_line = VG_(get_filename_linenum)(
214                             instr_addr,
215                             file, dir,
216                             line
217                          );
218   Bool found_fn        = VG_(get_fnname)(instr_addr, fn);
219
220   if (!found_file_line) {
221      *file = "???";
222      *line = 0;
223   }
224   if (!found_fn) {
225      *fn = "???";
226   }
227
228   if (found_file_line) {
229      if (found_fn) full_debugs++;
230      else          file_line_debugs++;
231   } else {
232      if (found_fn) fn_debugs++;
233      else          no_debugs++;
234   }
235}
236
237// Do a three step traversal: by file, then fn, then line.
238// Returns a pointer to the line CC, creates a new one if necessary.
239static LineCC* get_lineCC(Addr origAddr)
240{
241   const HChar *fn, *file, *dir;
242   UInt    line;
243   CodeLoc loc;
244   LineCC* lineCC;
245
246   get_debug_info(origAddr, &dir, &file, &fn, &line);
247
248   // Form an absolute pathname if a directory is available
249   HChar absfile[VG_(strlen)(dir) + 1 + VG_(strlen)(file) + 1];
250
251   if (dir[0]) {
252      VG_(sprintf)(absfile, "%s/%s", dir, file);
253   } else {
254      VG_(sprintf)(absfile, "%s", file);
255   }
256
257   loc.file = absfile;
258   loc.fn   = fn;
259   loc.line = line;
260
261   lineCC = VG_(OSetGen_Lookup)(CC_table, &loc);
262   if (!lineCC) {
263      // Allocate and zero a new node.
264      lineCC           = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC));
265      lineCC->loc.file = get_perm_string(loc.file);
266      lineCC->loc.fn   = get_perm_string(loc.fn);
267      lineCC->loc.line = loc.line;
268      lineCC->Ir.a     = 0;
269      lineCC->Ir.m1    = 0;
270      lineCC->Ir.mL    = 0;
271      lineCC->Dr.a     = 0;
272      lineCC->Dr.m1    = 0;
273      lineCC->Dr.mL    = 0;
274      lineCC->Dw.a     = 0;
275      lineCC->Dw.m1    = 0;
276      lineCC->Dw.mL    = 0;
277      lineCC->Bc.b     = 0;
278      lineCC->Bc.mp    = 0;
279      lineCC->Bi.b     = 0;
280      lineCC->Bi.mp    = 0;
281      VG_(OSetGen_Insert)(CC_table, lineCC);
282   }
283
284   return lineCC;
285}
286
287/*------------------------------------------------------------*/
288/*--- Cache simulation functions                           ---*/
289/*------------------------------------------------------------*/
290
291/* A common case for an instruction read event is that the
292 * bytes read belong to the same cache line in both L1I and LL
293 * (if cache line sizes of L1 and LL are the same).
294 * As this can be detected at instrumentation time, and results
295 * in faster simulation, special-casing is benefical.
296 *
297 * Abbrevations used in var/function names:
298 *  IrNoX - instruction read does not cross cache lines
299 *  IrGen - generic instruction read; not detected as IrNoX
300 *  Ir    - not known / not important whether it is an IrNoX
301 */
302
303// Only used with --cache-sim=no.
304static VG_REGPARM(1)
305void log_1Ir(InstrInfo* n)
306{
307   n->parent->Ir.a++;
308}
309
310// Only used with --cache-sim=no.
311static VG_REGPARM(2)
312void log_2Ir(InstrInfo* n, InstrInfo* n2)
313{
314   n->parent->Ir.a++;
315   n2->parent->Ir.a++;
316}
317
318// Only used with --cache-sim=no.
319static VG_REGPARM(3)
320void log_3Ir(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
321{
322   n->parent->Ir.a++;
323   n2->parent->Ir.a++;
324   n3->parent->Ir.a++;
325}
326
327// Generic case for instruction reads: may cross cache lines.
328// All other Ir handlers expect IrNoX instruction reads.
329static VG_REGPARM(1)
330void log_1IrGen_0D_cache_access(InstrInfo* n)
331{
332   //VG_(printf)("1IrGen_0D :  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n",
333   //             n, n->instr_addr, n->instr_len);
334   cachesim_I1_doref_Gen(n->instr_addr, n->instr_len,
335			 &n->parent->Ir.m1, &n->parent->Ir.mL);
336   n->parent->Ir.a++;
337}
338
339static VG_REGPARM(1)
340void log_1IrNoX_0D_cache_access(InstrInfo* n)
341{
342   //VG_(printf)("1IrNoX_0D :  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n",
343   //             n, n->instr_addr, n->instr_len);
344   cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
345			 &n->parent->Ir.m1, &n->parent->Ir.mL);
346   n->parent->Ir.a++;
347}
348
349static VG_REGPARM(2)
350void log_2IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2)
351{
352   //VG_(printf)("2IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
353   //            "            CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
354   //            n,  n->instr_addr,  n->instr_len,
355   //            n2, n2->instr_addr, n2->instr_len);
356   cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
357			 &n->parent->Ir.m1, &n->parent->Ir.mL);
358   n->parent->Ir.a++;
359   cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
360			 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
361   n2->parent->Ir.a++;
362}
363
364static VG_REGPARM(3)
365void log_3IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
366{
367   //VG_(printf)("3IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
368   //            "            CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
369   //            "            CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
370   //            n,  n->instr_addr,  n->instr_len,
371   //            n2, n2->instr_addr, n2->instr_len,
372   //            n3, n3->instr_addr, n3->instr_len);
373   cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
374			 &n->parent->Ir.m1, &n->parent->Ir.mL);
375   n->parent->Ir.a++;
376   cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
377			 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
378   n2->parent->Ir.a++;
379   cachesim_I1_doref_NoX(n3->instr_addr, n3->instr_len,
380			 &n3->parent->Ir.m1, &n3->parent->Ir.mL);
381   n3->parent->Ir.a++;
382}
383
384static VG_REGPARM(3)
385void log_1IrNoX_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
386{
387   //VG_(printf)("1IrNoX_1Dr:  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n"
388   //            "                               daddr=0x%010lx,  dsize=%lu\n",
389   //            n, n->instr_addr, n->instr_len, data_addr, data_size);
390   cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
391			 &n->parent->Ir.m1, &n->parent->Ir.mL);
392   n->parent->Ir.a++;
393
394   cachesim_D1_doref(data_addr, data_size,
395                     &n->parent->Dr.m1, &n->parent->Dr.mL);
396   n->parent->Dr.a++;
397}
398
399static VG_REGPARM(3)
400void log_1IrNoX_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
401{
402   //VG_(printf)("1IrNoX_1Dw:  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n"
403   //            "                               daddr=0x%010lx,  dsize=%lu\n",
404   //            n, n->instr_addr, n->instr_len, data_addr, data_size);
405   cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
406			 &n->parent->Ir.m1, &n->parent->Ir.mL);
407   n->parent->Ir.a++;
408
409   cachesim_D1_doref(data_addr, data_size,
410                     &n->parent->Dw.m1, &n->parent->Dw.mL);
411   n->parent->Dw.a++;
412}
413
414/* Note that addEvent_D_guarded assumes that log_0Ir_1Dr_cache_access
415   and log_0Ir_1Dw_cache_access have exactly the same prototype.  If
416   you change them, you must change addEvent_D_guarded too. */
417static VG_REGPARM(3)
418void log_0Ir_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
419{
420   //VG_(printf)("0Ir_1Dr:  CCaddr=0x%010lx,  daddr=0x%010lx,  dsize=%lu\n",
421   //            n, data_addr, data_size);
422   cachesim_D1_doref(data_addr, data_size,
423                     &n->parent->Dr.m1, &n->parent->Dr.mL);
424   n->parent->Dr.a++;
425}
426
427/* See comment on log_0Ir_1Dr_cache_access. */
428static VG_REGPARM(3)
429void log_0Ir_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
430{
431   //VG_(printf)("0Ir_1Dw:  CCaddr=0x%010lx,  daddr=0x%010lx,  dsize=%lu\n",
432   //            n, data_addr, data_size);
433   cachesim_D1_doref(data_addr, data_size,
434                     &n->parent->Dw.m1, &n->parent->Dw.mL);
435   n->parent->Dw.a++;
436}
437
438/* For branches, we consult two different predictors, one which
439   predicts taken/untaken for conditional branches, and the other
440   which predicts the branch target address for indirect branches
441   (jump-to-register style ones). */
442
443static VG_REGPARM(2)
444void log_cond_branch(InstrInfo* n, Word taken)
445{
446   //VG_(printf)("cbrnch:  CCaddr=0x%010lx,  taken=0x%010lx\n",
447   //             n, taken);
448   n->parent->Bc.b++;
449   n->parent->Bc.mp
450      += (1 & do_cond_branch_predict(n->instr_addr, taken));
451}
452
453static VG_REGPARM(2)
454void log_ind_branch(InstrInfo* n, UWord actual_dst)
455{
456   //VG_(printf)("ibrnch:  CCaddr=0x%010lx,    dst=0x%010lx\n",
457   //             n, actual_dst);
458   n->parent->Bi.b++;
459   n->parent->Bi.mp
460      += (1 & do_ind_branch_predict(n->instr_addr, actual_dst));
461}
462
463
464/*------------------------------------------------------------*/
465/*--- Instrumentation types and structures                 ---*/
466/*------------------------------------------------------------*/
467
468/* Maintain an ordered list of memory events which are outstanding, in
469   the sense that no IR has yet been generated to do the relevant
470   helper calls.  The BB is scanned top to bottom and memory events
471   are added to the end of the list, merging with the most recent
472   notified event where possible (Dw immediately following Dr and
473   having the same size and EA can be merged).
474
475   This merging is done so that for architectures which have
476   load-op-store instructions (x86, amd64), the insn is treated as if
477   it makes just one memory reference (a modify), rather than two (a
478   read followed by a write at the same address).
479
480   At various points the list will need to be flushed, that is, IR
481   generated from it.  That must happen before any possible exit from
482   the block (the end, or an IRStmt_Exit).  Flushing also takes place
483   when there is no space to add a new event.
484
485   If we require the simulation statistics to be up to date with
486   respect to possible memory exceptions, then the list would have to
487   be flushed before each memory reference.  That would however lose
488   performance by inhibiting event-merging during flushing.
489
490   Flushing the list consists of walking it start to end and emitting
491   instrumentation IR for each event, in the order in which they
492   appear.  It may be possible to emit a single call for two adjacent
493   events in order to reduce the number of helper function calls made.
494   For example, it could well be profitable to handle two adjacent Ir
495   events with a single helper call.  */
496
497typedef
498   IRExpr
499   IRAtom;
500
501typedef
502   enum {
503      Ev_IrNoX,  // Instruction read not crossing cache lines
504      Ev_IrGen,  // Generic Ir, not being detected as IrNoX
505      Ev_Dr,     // Data read
506      Ev_Dw,     // Data write
507      Ev_Dm,     // Data modify (read then write)
508      Ev_Bc,     // branch conditional
509      Ev_Bi      // branch indirect (to unknown destination)
510   }
511   EventTag;
512
513typedef
514   struct {
515      EventTag   tag;
516      InstrInfo* inode;
517      union {
518         struct {
519         } IrGen;
520         struct {
521         } IrNoX;
522         struct {
523            IRAtom* ea;
524            Int     szB;
525         } Dr;
526         struct {
527            IRAtom* ea;
528            Int     szB;
529         } Dw;
530         struct {
531            IRAtom* ea;
532            Int     szB;
533         } Dm;
534         struct {
535            IRAtom* taken; /* :: Ity_I1 */
536         } Bc;
537         struct {
538            IRAtom* dst;
539         } Bi;
540      } Ev;
541   }
542   Event;
543
544static void init_Event ( Event* ev ) {
545   VG_(memset)(ev, 0, sizeof(Event));
546}
547
548static IRAtom* get_Event_dea ( Event* ev ) {
549   switch (ev->tag) {
550      case Ev_Dr: return ev->Ev.Dr.ea;
551      case Ev_Dw: return ev->Ev.Dw.ea;
552      case Ev_Dm: return ev->Ev.Dm.ea;
553      default:    tl_assert(0);
554   }
555}
556
557static Int get_Event_dszB ( Event* ev ) {
558   switch (ev->tag) {
559      case Ev_Dr: return ev->Ev.Dr.szB;
560      case Ev_Dw: return ev->Ev.Dw.szB;
561      case Ev_Dm: return ev->Ev.Dm.szB;
562      default:    tl_assert(0);
563   }
564}
565
566
567/* Up to this many unnotified events are allowed.  Number is
568   arbitrary.  Larger numbers allow more event merging to occur, but
569   potentially induce more spilling due to extending live ranges of
570   address temporaries. */
571#define N_EVENTS 16
572
573
574/* A struct which holds all the running state during instrumentation.
575   Mostly to avoid passing loads of parameters everywhere. */
576typedef
577   struct {
578      /* The current outstanding-memory-event list. */
579      Event events[N_EVENTS];
580      Int   events_used;
581
582      /* The array of InstrInfo bins for the BB. */
583      SB_info* sbInfo;
584
585      /* Number InstrInfo bins 'used' so far. */
586      Int sbInfo_i;
587
588      /* The output SB being constructed. */
589      IRSB* sbOut;
590   }
591   CgState;
592
593
594/*------------------------------------------------------------*/
595/*--- Instrumentation main                                 ---*/
596/*------------------------------------------------------------*/
597
598// Note that origAddr is the real origAddr, not the address of the first
599// instruction in the block (they can be different due to redirection).
600static
601SB_info* get_SB_info(IRSB* sbIn, Addr origAddr)
602{
603   Int      i, n_instrs;
604   IRStmt*  st;
605   SB_info* sbInfo;
606
607   // Count number of original instrs in SB
608   n_instrs = 0;
609   for (i = 0; i < sbIn->stmts_used; i++) {
610      st = sbIn->stmts[i];
611      if (Ist_IMark == st->tag) n_instrs++;
612   }
613
614   // Check that we don't have an entry for this BB in the instr-info table.
615   // If this assertion fails, there has been some screwup:  some
616   // translations must have been discarded but Cachegrind hasn't discarded
617   // the corresponding entries in the instr-info table.
618   sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr);
619   tl_assert(NULL == sbInfo);
620
621   // BB never translated before (at this address, at least;  could have
622   // been unloaded and then reloaded elsewhere in memory)
623   sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable,
624                                sizeof(SB_info) + n_instrs*sizeof(InstrInfo));
625   sbInfo->SB_addr  = origAddr;
626   sbInfo->n_instrs = n_instrs;
627   VG_(OSetGen_Insert)( instrInfoTable, sbInfo );
628
629   return sbInfo;
630}
631
632
633static void showEvent ( Event* ev )
634{
635   switch (ev->tag) {
636      case Ev_IrGen:
637         VG_(printf)("IrGen %p\n", ev->inode);
638         break;
639      case Ev_IrNoX:
640         VG_(printf)("IrNoX %p\n", ev->inode);
641         break;
642      case Ev_Dr:
643         VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB);
644         ppIRExpr(ev->Ev.Dr.ea);
645         VG_(printf)("\n");
646         break;
647      case Ev_Dw:
648         VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB);
649         ppIRExpr(ev->Ev.Dw.ea);
650         VG_(printf)("\n");
651         break;
652      case Ev_Dm:
653         VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB);
654         ppIRExpr(ev->Ev.Dm.ea);
655         VG_(printf)("\n");
656         break;
657      case Ev_Bc:
658         VG_(printf)("Bc %p   GA=", ev->inode);
659         ppIRExpr(ev->Ev.Bc.taken);
660         VG_(printf)("\n");
661         break;
662      case Ev_Bi:
663         VG_(printf)("Bi %p  DST=", ev->inode);
664         ppIRExpr(ev->Ev.Bi.dst);
665         VG_(printf)("\n");
666         break;
667      default:
668         tl_assert(0);
669         break;
670   }
671}
672
673// Reserve and initialise an InstrInfo for the first mention of a new insn.
674static
675InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len )
676{
677   InstrInfo* i_node;
678   tl_assert(cgs->sbInfo_i >= 0);
679   tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs);
680   i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ];
681   i_node->instr_addr = instr_addr;
682   i_node->instr_len  = instr_len;
683   i_node->parent     = get_lineCC(instr_addr);
684   cgs->sbInfo_i++;
685   return i_node;
686}
687
688
689/* Generate code for all outstanding memory events, and mark the queue
690   empty.  Code is generated into cgs->bbOut, and this activity
691   'consumes' slots in cgs->sbInfo. */
692
693static void flushEvents ( CgState* cgs )
694{
695   Int        i, regparms;
696   const HChar* helperName;
697   void*      helperAddr;
698   IRExpr**   argv;
699   IRExpr*    i_node_expr;
700   IRDirty*   di;
701   Event*     ev;
702   Event*     ev2;
703   Event*     ev3;
704
705   i = 0;
706   while (i < cgs->events_used) {
707
708      helperName = NULL;
709      helperAddr = NULL;
710      argv       = NULL;
711      regparms   = 0;
712
713      /* generate IR to notify event i and possibly the ones
714         immediately following it. */
715      tl_assert(i >= 0 && i < cgs->events_used);
716
717      ev  = &cgs->events[i];
718      ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL );
719      ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL );
720
721      if (DEBUG_CG) {
722         VG_(printf)("   flush ");
723         showEvent( ev );
724      }
725
726      i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
727
728      /* Decide on helper fn to call and args to pass it, and advance
729         i appropriately. */
730      switch (ev->tag) {
731         case Ev_IrNoX:
732            /* Merge an IrNoX with a following Dr/Dm. */
733            if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) {
734               /* Why is this true?  It's because we're merging an Ir
735                  with a following Dr or Dm.  The Ir derives from the
736                  instruction's IMark and the Dr/Dm from data
737                  references which follow it.  In short it holds
738                  because each insn starts with an IMark, hence an
739                  Ev_Ir, and so these Dr/Dm must pertain to the
740                  immediately preceding Ir.  Same applies to analogous
741                  assertions in the subsequent cases. */
742               tl_assert(ev2->inode == ev->inode);
743               helperName = "log_1IrNoX_1Dr_cache_access";
744               helperAddr = &log_1IrNoX_1Dr_cache_access;
745               argv = mkIRExprVec_3( i_node_expr,
746                                     get_Event_dea(ev2),
747                                     mkIRExpr_HWord( get_Event_dszB(ev2) ) );
748               regparms = 3;
749               i += 2;
750            }
751            /* Merge an IrNoX with a following Dw. */
752            else
753            if (ev2 && ev2->tag == Ev_Dw) {
754               tl_assert(ev2->inode == ev->inode);
755               helperName = "log_1IrNoX_1Dw_cache_access";
756               helperAddr = &log_1IrNoX_1Dw_cache_access;
757               argv = mkIRExprVec_3( i_node_expr,
758                                     get_Event_dea(ev2),
759                                     mkIRExpr_HWord( get_Event_dszB(ev2) ) );
760               regparms = 3;
761               i += 2;
762            }
763            /* Merge an IrNoX with two following IrNoX's. */
764            else
765            if (ev2 && ev3 && ev2->tag == Ev_IrNoX && ev3->tag == Ev_IrNoX)
766            {
767               if (clo_cache_sim) {
768                  helperName = "log_3IrNoX_0D_cache_access";
769                  helperAddr = &log_3IrNoX_0D_cache_access;
770               } else {
771                  helperName = "log_3Ir";
772                  helperAddr = &log_3Ir;
773               }
774               argv = mkIRExprVec_3( i_node_expr,
775                                     mkIRExpr_HWord( (HWord)ev2->inode ),
776                                     mkIRExpr_HWord( (HWord)ev3->inode ) );
777               regparms = 3;
778               i += 3;
779            }
780            /* Merge an IrNoX with one following IrNoX. */
781            else
782            if (ev2 && ev2->tag == Ev_IrNoX) {
783               if (clo_cache_sim) {
784                  helperName = "log_2IrNoX_0D_cache_access";
785                  helperAddr = &log_2IrNoX_0D_cache_access;
786               } else {
787                  helperName = "log_2Ir";
788                  helperAddr = &log_2Ir;
789               }
790               argv = mkIRExprVec_2( i_node_expr,
791                                     mkIRExpr_HWord( (HWord)ev2->inode ) );
792               regparms = 2;
793               i += 2;
794            }
795            /* No merging possible; emit as-is. */
796            else {
797               if (clo_cache_sim) {
798                  helperName = "log_1IrNoX_0D_cache_access";
799                  helperAddr = &log_1IrNoX_0D_cache_access;
800               } else {
801                  helperName = "log_1Ir";
802                  helperAddr = &log_1Ir;
803               }
804               argv = mkIRExprVec_1( i_node_expr );
805               regparms = 1;
806               i++;
807            }
808            break;
809         case Ev_IrGen:
810            if (clo_cache_sim) {
811	       helperName = "log_1IrGen_0D_cache_access";
812	       helperAddr = &log_1IrGen_0D_cache_access;
813	    } else {
814	       helperName = "log_1Ir";
815	       helperAddr = &log_1Ir;
816	    }
817	    argv = mkIRExprVec_1( i_node_expr );
818	    regparms = 1;
819	    i++;
820            break;
821         case Ev_Dr:
822         case Ev_Dm:
823            /* Data read or modify */
824            helperName = "log_0Ir_1Dr_cache_access";
825            helperAddr = &log_0Ir_1Dr_cache_access;
826            argv = mkIRExprVec_3( i_node_expr,
827                                  get_Event_dea(ev),
828                                  mkIRExpr_HWord( get_Event_dszB(ev) ) );
829            regparms = 3;
830            i++;
831            break;
832         case Ev_Dw:
833            /* Data write */
834            helperName = "log_0Ir_1Dw_cache_access";
835            helperAddr = &log_0Ir_1Dw_cache_access;
836            argv = mkIRExprVec_3( i_node_expr,
837                                  get_Event_dea(ev),
838                                  mkIRExpr_HWord( get_Event_dszB(ev) ) );
839            regparms = 3;
840            i++;
841            break;
842         case Ev_Bc:
843            /* Conditional branch */
844            helperName = "log_cond_branch";
845            helperAddr = &log_cond_branch;
846            argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
847            regparms = 2;
848            i++;
849            break;
850         case Ev_Bi:
851            /* Branch to an unknown destination */
852            helperName = "log_ind_branch";
853            helperAddr = &log_ind_branch;
854            argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
855            regparms = 2;
856            i++;
857            break;
858         default:
859            tl_assert(0);
860      }
861
862      /* Add the helper. */
863      tl_assert(helperName);
864      tl_assert(helperAddr);
865      tl_assert(argv);
866      di = unsafeIRDirty_0_N( regparms,
867                              helperName, VG_(fnptr_to_fnentry)( helperAddr ),
868                              argv );
869      addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
870   }
871
872   cgs->events_used = 0;
873}
874
875static void addEvent_Ir ( CgState* cgs, InstrInfo* inode )
876{
877   Event* evt;
878   if (cgs->events_used == N_EVENTS)
879      flushEvents(cgs);
880   tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
881   evt = &cgs->events[cgs->events_used];
882   init_Event(evt);
883   evt->inode    = inode;
884   if (cachesim_is_IrNoX(inode->instr_addr, inode->instr_len)) {
885      evt->tag = Ev_IrNoX;
886      distinct_instrsNoX++;
887   } else {
888      evt->tag = Ev_IrGen;
889      distinct_instrsGen++;
890   }
891   cgs->events_used++;
892}
893
894static
895void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
896{
897   Event* evt;
898   tl_assert(isIRAtom(ea));
899   tl_assert(datasize >= 1 && datasize <= min_line_size);
900   if (!clo_cache_sim)
901      return;
902   if (cgs->events_used == N_EVENTS)
903      flushEvents(cgs);
904   tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
905   evt = &cgs->events[cgs->events_used];
906   init_Event(evt);
907   evt->tag       = Ev_Dr;
908   evt->inode     = inode;
909   evt->Ev.Dr.szB = datasize;
910   evt->Ev.Dr.ea  = ea;
911   cgs->events_used++;
912}
913
914static
915void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
916{
917   Event* lastEvt;
918   Event* evt;
919
920   tl_assert(isIRAtom(ea));
921   tl_assert(datasize >= 1 && datasize <= min_line_size);
922
923   if (!clo_cache_sim)
924      return;
925
926   /* Is it possible to merge this write with the preceding read? */
927   lastEvt = &cgs->events[cgs->events_used-1];
928   if (cgs->events_used > 0
929       && lastEvt->tag       == Ev_Dr
930       && lastEvt->Ev.Dr.szB == datasize
931       && lastEvt->inode     == inode
932       && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
933   {
934      lastEvt->tag   = Ev_Dm;
935      return;
936   }
937
938   /* No.  Add as normal. */
939   if (cgs->events_used == N_EVENTS)
940      flushEvents(cgs);
941   tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
942   evt = &cgs->events[cgs->events_used];
943   init_Event(evt);
944   evt->tag       = Ev_Dw;
945   evt->inode     = inode;
946   evt->Ev.Dw.szB = datasize;
947   evt->Ev.Dw.ea  = ea;
948   cgs->events_used++;
949}
950
951static
952void addEvent_D_guarded ( CgState* cgs, InstrInfo* inode,
953                          Int datasize, IRAtom* ea, IRAtom* guard,
954                          Bool isWrite )
955{
956   tl_assert(isIRAtom(ea));
957   tl_assert(guard);
958   tl_assert(isIRAtom(guard));
959   tl_assert(datasize >= 1 && datasize <= min_line_size);
960
961   if (!clo_cache_sim)
962      return;
963
964   /* Adding guarded memory actions and merging them with the existing
965      queue is too complex.  Simply flush the queue and add this
966      action immediately.  Since guarded loads and stores are pretty
967      rare, this is not thought likely to cause any noticeable
968      performance loss as a result of the loss of event-merging
969      opportunities. */
970   tl_assert(cgs->events_used >= 0);
971   flushEvents(cgs);
972   tl_assert(cgs->events_used == 0);
973   /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */
974   IRExpr*      i_node_expr;
975   const HChar* helperName;
976   void*        helperAddr;
977   IRExpr**     argv;
978   Int          regparms;
979   IRDirty*     di;
980   i_node_expr = mkIRExpr_HWord( (HWord)inode );
981   helperName  = isWrite ? "log_0Ir_1Dw_cache_access"
982                         : "log_0Ir_1Dr_cache_access";
983   helperAddr  = isWrite ? &log_0Ir_1Dw_cache_access
984                         : &log_0Ir_1Dr_cache_access;
985   argv        = mkIRExprVec_3( i_node_expr,
986                                ea, mkIRExpr_HWord( datasize ) );
987   regparms    = 3;
988   di          = unsafeIRDirty_0_N(
989                    regparms,
990                    helperName, VG_(fnptr_to_fnentry)( helperAddr ),
991                    argv );
992   di->guard = guard;
993   addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
994}
995
996
997static
998void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard )
999{
1000   Event* evt;
1001   tl_assert(isIRAtom(guard));
1002   tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard)
1003             == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
1004   if (!clo_branch_sim)
1005      return;
1006   if (cgs->events_used == N_EVENTS)
1007      flushEvents(cgs);
1008   tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
1009   evt = &cgs->events[cgs->events_used];
1010   init_Event(evt);
1011   evt->tag         = Ev_Bc;
1012   evt->inode       = inode;
1013   evt->Ev.Bc.taken = guard;
1014   cgs->events_used++;
1015}
1016
1017static
1018void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo )
1019{
1020   Event* evt;
1021   tl_assert(isIRAtom(whereTo));
1022   tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo)
1023             == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
1024   if (!clo_branch_sim)
1025      return;
1026   if (cgs->events_used == N_EVENTS)
1027      flushEvents(cgs);
1028   tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
1029   evt = &cgs->events[cgs->events_used];
1030   init_Event(evt);
1031   evt->tag       = Ev_Bi;
1032   evt->inode     = inode;
1033   evt->Ev.Bi.dst = whereTo;
1034   cgs->events_used++;
1035}
1036
1037////////////////////////////////////////////////////////////
1038
1039
1040static
1041IRSB* cg_instrument ( VgCallbackClosure* closure,
1042                      IRSB* sbIn,
1043                      const VexGuestLayout* layout,
1044                      const VexGuestExtents* vge,
1045                      const VexArchInfo* archinfo_host,
1046                      IRType gWordTy, IRType hWordTy )
1047{
1048   Int        i;
1049   UInt       isize;
1050   IRStmt*    st;
1051   Addr       cia; /* address of current insn */
1052   CgState    cgs;
1053   IRTypeEnv* tyenv = sbIn->tyenv;
1054   InstrInfo* curr_inode = NULL;
1055
1056   if (gWordTy != hWordTy) {
1057      /* We don't currently support this case. */
1058      VG_(tool_panic)("host/guest word size mismatch");
1059   }
1060
1061   // Set up new SB
1062   cgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
1063
1064   // Copy verbatim any IR preamble preceding the first IMark
1065   i = 0;
1066   while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
1067      addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] );
1068      i++;
1069   }
1070
1071   // Get the first statement, and initial cia from it
1072   tl_assert(sbIn->stmts_used > 0);
1073   tl_assert(i < sbIn->stmts_used);
1074   st = sbIn->stmts[i];
1075   tl_assert(Ist_IMark == st->tag);
1076
1077   cia   = st->Ist.IMark.addr;
1078   isize = st->Ist.IMark.len;
1079   // If Vex fails to decode an instruction, the size will be zero.
1080   // Pretend otherwise.
1081   if (isize == 0) isize = VG_MIN_INSTR_SZB;
1082
1083   // Set up running state and get block info
1084   tl_assert(closure->readdr == vge->base[0]);
1085   cgs.events_used = 0;
1086   cgs.sbInfo      = get_SB_info(sbIn, (Addr)closure->readdr);
1087   cgs.sbInfo_i    = 0;
1088
1089   if (DEBUG_CG)
1090      VG_(printf)("\n\n---------- cg_instrument ----------\n");
1091
1092   // Traverse the block, initialising inodes, adding events and flushing as
1093   // necessary.
1094   for (/*use current i*/; i < sbIn->stmts_used; i++) {
1095
1096      st = sbIn->stmts[i];
1097      tl_assert(isFlatIRStmt(st));
1098
1099      switch (st->tag) {
1100         case Ist_NoOp:
1101         case Ist_AbiHint:
1102         case Ist_Put:
1103         case Ist_PutI:
1104         case Ist_MBE:
1105            break;
1106
1107         case Ist_IMark:
1108            cia   = st->Ist.IMark.addr;
1109            isize = st->Ist.IMark.len;
1110
1111            // If Vex fails to decode an instruction, the size will be zero.
1112            // Pretend otherwise.
1113            if (isize == 0) isize = VG_MIN_INSTR_SZB;
1114
1115            // Sanity-check size.
1116            tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
1117                     || VG_CLREQ_SZB == isize );
1118
1119            // Get space for and init the inode, record it as the current one.
1120            // Subsequent Dr/Dw/Dm events from the same instruction will
1121            // also use it.
1122            curr_inode = setup_InstrInfo(&cgs, cia, isize);
1123
1124            addEvent_Ir( &cgs, curr_inode );
1125            break;
1126
1127         case Ist_WrTmp: {
1128            IRExpr* data = st->Ist.WrTmp.data;
1129            if (data->tag == Iex_Load) {
1130               IRExpr* aexpr = data->Iex.Load.addr;
1131               // Note also, endianness info is ignored.  I guess
1132               // that's not interesting.
1133               addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty),
1134                                  aexpr );
1135            }
1136            break;
1137         }
1138
1139         case Ist_Store: {
1140            IRExpr* data  = st->Ist.Store.data;
1141            IRExpr* aexpr = st->Ist.Store.addr;
1142            addEvent_Dw( &cgs, curr_inode,
1143                         sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr );
1144            break;
1145         }
1146
1147         case Ist_StoreG: {
1148            IRStoreG* sg   = st->Ist.StoreG.details;
1149            IRExpr*   data = sg->data;
1150            IRExpr*   addr = sg->addr;
1151            IRType    type = typeOfIRExpr(tyenv, data);
1152            tl_assert(type != Ity_INVALID);
1153            addEvent_D_guarded( &cgs, curr_inode,
1154                                sizeofIRType(type), addr, sg->guard,
1155                                True/*isWrite*/ );
1156            break;
1157         }
1158
1159         case Ist_LoadG: {
1160            IRLoadG* lg       = st->Ist.LoadG.details;
1161            IRType   type     = Ity_INVALID; /* loaded type */
1162            IRType   typeWide = Ity_INVALID; /* after implicit widening */
1163            IRExpr*  addr     = lg->addr;
1164            typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
1165            tl_assert(type != Ity_INVALID);
1166            addEvent_D_guarded( &cgs, curr_inode,
1167                                sizeofIRType(type), addr, lg->guard,
1168                                False/*!isWrite*/ );
1169            break;
1170         }
1171
1172         case Ist_Dirty: {
1173            Int      dataSize;
1174            IRDirty* d = st->Ist.Dirty.details;
1175            if (d->mFx != Ifx_None) {
1176               /* This dirty helper accesses memory.  Collect the details. */
1177               tl_assert(d->mAddr != NULL);
1178               tl_assert(d->mSize != 0);
1179               dataSize = d->mSize;
1180               // Large (eg. 28B, 108B, 512B on x86) data-sized
1181               // instructions will be done inaccurately, but they're
1182               // very rare and this avoids errors from hitting more
1183               // than two cache lines in the simulation.
1184               if (dataSize > min_line_size)
1185                  dataSize = min_line_size;
1186               if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1187                  addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
1188               if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1189                  addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr );
1190            } else {
1191               tl_assert(d->mAddr == NULL);
1192               tl_assert(d->mSize == 0);
1193            }
1194            break;
1195         }
1196
1197         case Ist_CAS: {
1198            /* We treat it as a read and a write of the location.  I
1199               think that is the same behaviour as it was before IRCAS
1200               was introduced, since prior to that point, the Vex
1201               front ends would translate a lock-prefixed instruction
1202               into a (normal) read followed by a (normal) write. */
1203            Int    dataSize;
1204            IRCAS* cas = st->Ist.CAS.details;
1205            tl_assert(cas->addr != NULL);
1206            tl_assert(cas->dataLo != NULL);
1207            dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
1208            if (cas->dataHi != NULL)
1209               dataSize *= 2; /* since it's a doubleword-CAS */
1210            /* I don't think this can ever happen, but play safe. */
1211            if (dataSize > min_line_size)
1212               dataSize = min_line_size;
1213            addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr );
1214            addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr );
1215            break;
1216         }
1217
1218         case Ist_LLSC: {
1219            IRType dataTy;
1220            if (st->Ist.LLSC.storedata == NULL) {
1221               /* LL */
1222               dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
1223               addEvent_Dr( &cgs, curr_inode,
1224                            sizeofIRType(dataTy), st->Ist.LLSC.addr );
1225               /* flush events before LL, should help SC to succeed */
1226               flushEvents( &cgs );
1227            } else {
1228               /* SC */
1229               dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
1230               addEvent_Dw( &cgs, curr_inode,
1231                            sizeofIRType(dataTy), st->Ist.LLSC.addr );
1232            }
1233            break;
1234         }
1235
1236         case Ist_Exit: {
1237            // call branch predictor only if this is a branch in guest code
1238            if ( (st->Ist.Exit.jk == Ijk_Boring) ||
1239                 (st->Ist.Exit.jk == Ijk_Call) ||
1240                 (st->Ist.Exit.jk == Ijk_Ret) )
1241            {
1242               /* Stuff to widen the guard expression to a host word, so
1243                  we can pass it to the branch predictor simulation
1244                  functions easily. */
1245               Bool     inverted;
1246               Addr     nia, sea;
1247               IRConst* dst;
1248               IRType   tyW    = hWordTy;
1249               IROp     widen  = tyW==Ity_I32  ? Iop_1Uto32  : Iop_1Uto64;
1250               IROp     opXOR  = tyW==Ity_I32  ? Iop_Xor32   : Iop_Xor64;
1251               IRTemp   guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1);
1252               IRTemp   guardW = newIRTemp(cgs.sbOut->tyenv, tyW);
1253               IRTemp   guard  = newIRTemp(cgs.sbOut->tyenv, tyW);
1254               IRExpr*  one    = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1255                                              : IRExpr_Const(IRConst_U64(1));
1256
1257               /* First we need to figure out whether the side exit got
1258                  inverted by the ir optimiser.  To do that, figure out
1259                  the next (fallthrough) instruction's address and the
1260                  side exit address and see if they are the same. */
1261               nia = cia + isize;
1262
1263               /* Side exit address */
1264               dst = st->Ist.Exit.dst;
1265               if (tyW == Ity_I32) {
1266                  tl_assert(dst->tag == Ico_U32);
1267                  sea = dst->Ico.U32;
1268               } else {
1269                  tl_assert(tyW == Ity_I64);
1270                  tl_assert(dst->tag == Ico_U64);
1271                  sea = dst->Ico.U64;
1272               }
1273
1274               inverted = nia == sea;
1275
1276               /* Widen the guard expression. */
1277               addStmtToIRSB( cgs.sbOut,
1278                              IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1279               addStmtToIRSB( cgs.sbOut,
1280                              IRStmt_WrTmp( guardW,
1281                                            IRExpr_Unop(widen,
1282                                                        IRExpr_RdTmp(guard1))) );
1283               /* If the exit is inverted, invert the sense of the guard. */
1284               addStmtToIRSB(
1285                     cgs.sbOut,
1286                     IRStmt_WrTmp(
1287                           guard,
1288                           inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1289                                    : IRExpr_RdTmp(guardW)
1290                              ));
1291               /* And post the event. */
1292               addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) );
1293            }
1294
1295            /* We may never reach the next statement, so need to flush
1296               all outstanding transactions now. */
1297            flushEvents( &cgs );
1298            break;
1299         }
1300
1301         default:
1302            ppIRStmt(st);
1303            tl_assert(0);
1304            break;
1305      }
1306
1307      /* Copy the original statement */
1308      addStmtToIRSB( cgs.sbOut, st );
1309
1310      if (DEBUG_CG) {
1311         ppIRStmt(st);
1312         VG_(printf)("\n");
1313      }
1314   }
1315
1316   /* Deal with branches to unknown destinations.  Except ignore ones
1317      which are function returns as we assume the return stack
1318      predictor never mispredicts. */
1319   if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
1320      if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1321      switch (sbIn->next->tag) {
1322         case Iex_Const:
1323            break; /* boring - branch to known address */
1324         case Iex_RdTmp:
1325            /* looks like an indirect branch (branch to unknown) */
1326            addEvent_Bi( &cgs, curr_inode, sbIn->next );
1327            break;
1328         default:
1329            /* shouldn't happen - if the incoming IR is properly
1330               flattened, should only have tmp and const cases to
1331               consider. */
1332            tl_assert(0);
1333      }
1334   }
1335
1336   /* At the end of the bb.  Flush outstandings. */
1337   flushEvents( &cgs );
1338
1339   /* done.  stay sane ... */
1340   tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs);
1341
1342   if (DEBUG_CG) {
1343      VG_(printf)( "goto {");
1344      ppIRJumpKind(sbIn->jumpkind);
1345      VG_(printf)( "} ");
1346      ppIRExpr( sbIn->next );
1347      VG_(printf)( "}\n");
1348   }
1349
1350   return cgs.sbOut;
1351}
1352
1353/*------------------------------------------------------------*/
1354/*--- Cache configuration                                  ---*/
1355/*------------------------------------------------------------*/
1356
1357static cache_t clo_I1_cache = UNDEFINED_CACHE;
1358static cache_t clo_D1_cache = UNDEFINED_CACHE;
1359static cache_t clo_LL_cache = UNDEFINED_CACHE;
1360
1361/*------------------------------------------------------------*/
1362/*--- cg_fini() and related function                       ---*/
1363/*------------------------------------------------------------*/
1364
1365// Total reads/writes/misses.  Calculated during CC traversal at the end.
1366// All auto-zeroed.
1367static CacheCC  Ir_total;
1368static CacheCC  Dr_total;
1369static CacheCC  Dw_total;
1370static BranchCC Bc_total;
1371static BranchCC Bi_total;
1372
1373static void fprint_CC_table_and_calc_totals(void)
1374{
1375   Int     i;
1376   VgFile  *fp;
1377   HChar   *currFile = NULL;
1378   const HChar *currFn = NULL;
1379   LineCC* lineCC;
1380
1381   // Setup output filename.  Nb: it's important to do this now, ie. as late
1382   // as possible.  If we do it at start-up and the program forks and the
1383   // output file format string contains a %p (pid) specifier, both the
1384   // parent and child will incorrectly write to the same file;  this
1385   // happened in 3.3.0.
1386   HChar* cachegrind_out_file =
1387      VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file);
1388
1389   fp = VG_(fopen)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1390                                        VKI_S_IRUSR|VKI_S_IWUSR);
1391   if (fp == NULL) {
1392      // If the file can't be opened for whatever reason (conflict
1393      // between multiple cachegrinded processes?), give up now.
1394      VG_(umsg)("error: can't open cache simulation output file '%s'\n",
1395                cachegrind_out_file );
1396      VG_(umsg)("       ... so simulation results will be missing.\n");
1397      VG_(free)(cachegrind_out_file);
1398      return;
1399   } else {
1400      VG_(free)(cachegrind_out_file);
1401   }
1402
1403   // "desc:" lines (giving I1/D1/LL cache configuration).  The spaces after
1404   // the 2nd colon makes cg_annotate's output look nicer.
1405   VG_(fprintf)(fp,  "desc: I1 cache:         %s\n"
1406                     "desc: D1 cache:         %s\n"
1407                     "desc: LL cache:         %s\n",
1408                     I1.desc_line, D1.desc_line, LL.desc_line);
1409
1410   // "cmd:" line
1411   VG_(fprintf)(fp, "cmd: %s", VG_(args_the_exename));
1412   for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
1413      HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
1414      VG_(fprintf)(fp, " %s", arg);
1415   }
1416   // "events:" line
1417   if (clo_cache_sim && clo_branch_sim) {
1418      VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
1419                                  "Bc Bcm Bi Bim\n");
1420   }
1421   else if (clo_cache_sim && !clo_branch_sim) {
1422      VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
1423                                  "\n");
1424   }
1425   else if (!clo_cache_sim && clo_branch_sim) {
1426      VG_(fprintf)(fp, "\nevents: Ir Bc Bcm Bi Bim\n");
1427   }
1428   else {
1429      VG_(fprintf)(fp, "\nevents: Ir\n");
1430   }
1431
1432   // Traverse every lineCC
1433   VG_(OSetGen_ResetIter)(CC_table);
1434   while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) {
1435      Bool just_hit_a_new_file = False;
1436      // If we've hit a new file, print a "fl=" line.  Note that because
1437      // each string is stored exactly once in the string table, we can use
1438      // pointer comparison rather than strcmp() to test for equality, which
1439      // is good because most of the time the comparisons are equal and so
1440      // the whole strings would have to be checked.
1441      if ( lineCC->loc.file != currFile ) {
1442         currFile = lineCC->loc.file;
1443         VG_(fprintf)(fp, "fl=%s\n", currFile);
1444         distinct_files++;
1445         just_hit_a_new_file = True;
1446      }
1447      // If we've hit a new function, print a "fn=" line.  We know to do
1448      // this when the function name changes, and also every time we hit a
1449      // new file (in which case the new function name might be the same as
1450      // in the old file, hence the just_hit_a_new_file test).
1451      if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) {
1452         currFn = lineCC->loc.fn;
1453         VG_(fprintf)(fp, "fn=%s\n", currFn);
1454         distinct_fns++;
1455      }
1456
1457      // Print the LineCC
1458      if (clo_cache_sim && clo_branch_sim) {
1459         VG_(fprintf)(fp,  "%u %llu %llu %llu"
1460                             " %llu %llu %llu"
1461                             " %llu %llu %llu"
1462                             " %llu %llu %llu %llu\n",
1463                            lineCC->loc.line,
1464                            lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1465                            lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1466                            lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL,
1467                            lineCC->Bc.b, lineCC->Bc.mp,
1468                            lineCC->Bi.b, lineCC->Bi.mp);
1469      }
1470      else if (clo_cache_sim && !clo_branch_sim) {
1471         VG_(fprintf)(fp,  "%u %llu %llu %llu"
1472                             " %llu %llu %llu"
1473                             " %llu %llu %llu\n",
1474                            lineCC->loc.line,
1475                            lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1476                            lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1477                            lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL);
1478      }
1479      else if (!clo_cache_sim && clo_branch_sim) {
1480         VG_(fprintf)(fp,  "%u %llu"
1481                             " %llu %llu %llu %llu\n",
1482                            lineCC->loc.line,
1483                            lineCC->Ir.a,
1484                            lineCC->Bc.b, lineCC->Bc.mp,
1485                            lineCC->Bi.b, lineCC->Bi.mp);
1486      }
1487      else {
1488         VG_(fprintf)(fp,  "%u %llu\n",
1489                            lineCC->loc.line,
1490                            lineCC->Ir.a);
1491      }
1492
1493      // Update summary stats
1494      Ir_total.a  += lineCC->Ir.a;
1495      Ir_total.m1 += lineCC->Ir.m1;
1496      Ir_total.mL += lineCC->Ir.mL;
1497      Dr_total.a  += lineCC->Dr.a;
1498      Dr_total.m1 += lineCC->Dr.m1;
1499      Dr_total.mL += lineCC->Dr.mL;
1500      Dw_total.a  += lineCC->Dw.a;
1501      Dw_total.m1 += lineCC->Dw.m1;
1502      Dw_total.mL += lineCC->Dw.mL;
1503      Bc_total.b  += lineCC->Bc.b;
1504      Bc_total.mp += lineCC->Bc.mp;
1505      Bi_total.b  += lineCC->Bi.b;
1506      Bi_total.mp += lineCC->Bi.mp;
1507
1508      distinct_lines++;
1509   }
1510
1511   // Summary stats must come after rest of table, since we calculate them
1512   // during traversal.  */
1513   if (clo_cache_sim && clo_branch_sim) {
1514      VG_(fprintf)(fp,  "summary:"
1515                        " %llu %llu %llu"
1516                        " %llu %llu %llu"
1517                        " %llu %llu %llu"
1518                        " %llu %llu %llu %llu\n",
1519                        Ir_total.a, Ir_total.m1, Ir_total.mL,
1520                        Dr_total.a, Dr_total.m1, Dr_total.mL,
1521                        Dw_total.a, Dw_total.m1, Dw_total.mL,
1522                        Bc_total.b, Bc_total.mp,
1523                        Bi_total.b, Bi_total.mp);
1524   }
1525   else if (clo_cache_sim && !clo_branch_sim) {
1526      VG_(fprintf)(fp,  "summary:"
1527                        " %llu %llu %llu"
1528                        " %llu %llu %llu"
1529                        " %llu %llu %llu\n",
1530                        Ir_total.a, Ir_total.m1, Ir_total.mL,
1531                        Dr_total.a, Dr_total.m1, Dr_total.mL,
1532                        Dw_total.a, Dw_total.m1, Dw_total.mL);
1533   }
1534   else if (!clo_cache_sim && clo_branch_sim) {
1535      VG_(fprintf)(fp,  "summary:"
1536                        " %llu"
1537                        " %llu %llu %llu %llu\n",
1538                        Ir_total.a,
1539                        Bc_total.b, Bc_total.mp,
1540                        Bi_total.b, Bi_total.mp);
1541   }
1542   else {
1543      VG_(fprintf)(fp, "summary:"
1544                        " %llu\n",
1545                        Ir_total.a);
1546   }
1547
1548   VG_(fclose)(fp);
1549}
1550
1551static UInt ULong_width(ULong n)
1552{
1553   UInt w = 0;
1554   while (n > 0) {
1555      n = n / 10;
1556      w++;
1557   }
1558   if (w == 0) w = 1;
1559   return w + (w-1)/3;   // add space for commas
1560}
1561
1562static void cg_fini(Int exitcode)
1563{
1564   static HChar fmt[128];   // OK; large enough
1565
1566   CacheCC  D_total;
1567   BranchCC B_total;
1568   ULong LL_total_m, LL_total_mr, LL_total_mw,
1569         LL_total, LL_total_r, LL_total_w;
1570   Int l1, l2, l3;
1571
1572   fprint_CC_table_and_calc_totals();
1573
1574   if (VG_(clo_verbosity) == 0)
1575      return;
1576
1577   // Nb: this isn't called "MAX" because that overshadows a global on Darwin.
1578   #define CG_MAX(a, b)  ((a) >= (b) ? (a) : (b))
1579
1580   /* I cache results.  Use the I_refs value to determine the first column
1581    * width. */
1582   l1 = ULong_width(Ir_total.a);
1583   l2 = ULong_width(CG_MAX(Dr_total.a, Bc_total.b));
1584   l3 = ULong_width(CG_MAX(Dw_total.a, Bi_total.b));
1585
1586   /* Make format string, getting width right for numbers */
1587   VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
1588
1589   /* Always print this */
1590   VG_(umsg)(fmt, "I   refs:     ", Ir_total.a);
1591
1592   /* If cache profiling is enabled, show D access numbers and all
1593      miss numbers */
1594   if (clo_cache_sim) {
1595      VG_(umsg)(fmt, "I1  misses:   ", Ir_total.m1);
1596      VG_(umsg)(fmt, "LLi misses:   ", Ir_total.mL);
1597
1598      if (0 == Ir_total.a) Ir_total.a = 1;
1599      VG_(umsg)("I1  miss rate: %*.2f%%\n", l1,
1600                Ir_total.m1 * 100.0 / Ir_total.a);
1601      VG_(umsg)("LLi miss rate: %*.2f%%\n", l1,
1602                Ir_total.mL * 100.0 / Ir_total.a);
1603      VG_(umsg)("\n");
1604
1605      /* D cache results.  Use the D_refs.rd and D_refs.wr values to
1606       * determine the width of columns 2 & 3. */
1607      D_total.a  = Dr_total.a  + Dw_total.a;
1608      D_total.m1 = Dr_total.m1 + Dw_total.m1;
1609      D_total.mL = Dr_total.mL + Dw_total.mL;
1610
1611      /* Make format string, getting width right for numbers */
1612      VG_(sprintf)(fmt, "%%s %%,%dllu  (%%,%dllu rd   + %%,%dllu wr)\n",
1613                        l1, l2, l3);
1614
1615      VG_(umsg)(fmt, "D   refs:     ",
1616                     D_total.a, Dr_total.a, Dw_total.a);
1617      VG_(umsg)(fmt, "D1  misses:   ",
1618                     D_total.m1, Dr_total.m1, Dw_total.m1);
1619      VG_(umsg)(fmt, "LLd misses:   ",
1620                     D_total.mL, Dr_total.mL, Dw_total.mL);
1621
1622      if (0 == D_total.a)  D_total.a = 1;
1623      if (0 == Dr_total.a) Dr_total.a = 1;
1624      if (0 == Dw_total.a) Dw_total.a = 1;
1625      VG_(umsg)("D1  miss rate: %*.1f%% (%*.1f%%     + %*.1f%%  )\n",
1626                l1, D_total.m1  * 100.0 / D_total.a,
1627                l2, Dr_total.m1 * 100.0 / Dr_total.a,
1628                l3, Dw_total.m1 * 100.0 / Dw_total.a);
1629      VG_(umsg)("LLd miss rate: %*.1f%% (%*.1f%%     + %*.1f%%  )\n",
1630                l1, D_total.mL  * 100.0 / D_total.a,
1631                l2, Dr_total.mL * 100.0 / Dr_total.a,
1632                l3, Dw_total.mL * 100.0 / Dw_total.a);
1633      VG_(umsg)("\n");
1634
1635      /* LL overall results */
1636
1637      LL_total   = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1638      LL_total_r = Dr_total.m1 + Ir_total.m1;
1639      LL_total_w = Dw_total.m1;
1640      VG_(umsg)(fmt, "LL refs:      ",
1641                     LL_total, LL_total_r, LL_total_w);
1642
1643      LL_total_m  = Dr_total.mL + Dw_total.mL + Ir_total.mL;
1644      LL_total_mr = Dr_total.mL + Ir_total.mL;
1645      LL_total_mw = Dw_total.mL;
1646      VG_(umsg)(fmt, "LL misses:    ",
1647                     LL_total_m, LL_total_mr, LL_total_mw);
1648
1649      VG_(umsg)("LL miss rate:  %*.1f%% (%*.1f%%     + %*.1f%%  )\n",
1650                l1, LL_total_m  * 100.0 / (Ir_total.a + D_total.a),
1651                l2, LL_total_mr * 100.0 / (Ir_total.a + Dr_total.a),
1652                l3, LL_total_mw * 100.0 / Dw_total.a);
1653   }
1654
1655   /* If branch profiling is enabled, show branch overall results. */
1656   if (clo_branch_sim) {
1657      /* Make format string, getting width right for numbers */
1658      VG_(sprintf)(fmt, "%%s %%,%dllu  (%%,%dllu cond + %%,%dllu ind)\n",
1659                        l1, l2, l3);
1660
1661      if (0 == Bc_total.b)  Bc_total.b = 1;
1662      if (0 == Bi_total.b)  Bi_total.b = 1;
1663      B_total.b  = Bc_total.b  + Bi_total.b;
1664      B_total.mp = Bc_total.mp + Bi_total.mp;
1665
1666      VG_(umsg)("\n");
1667      VG_(umsg)(fmt, "Branches:     ",
1668                     B_total.b, Bc_total.b, Bi_total.b);
1669
1670      VG_(umsg)(fmt, "Mispredicts:  ",
1671                     B_total.mp, Bc_total.mp, Bi_total.mp);
1672
1673      VG_(umsg)("Mispred rate:  %*.1f%% (%*.1f%%     + %*.1f%%   )\n",
1674                l1, B_total.mp  * 100.0 / B_total.b,
1675                l2, Bc_total.mp * 100.0 / Bc_total.b,
1676                l3, Bi_total.mp * 100.0 / Bi_total.b);
1677   }
1678
1679   // Various stats
1680   if (VG_(clo_stats)) {
1681      Int debug_lookups = full_debugs      + fn_debugs +
1682                          file_line_debugs + no_debugs;
1683
1684      VG_(dmsg)("\n");
1685      VG_(dmsg)("cachegrind: distinct files     : %d\n", distinct_files);
1686      VG_(dmsg)("cachegrind: distinct functions : %d\n", distinct_fns);
1687      VG_(dmsg)("cachegrind: distinct lines     : %d\n", distinct_lines);
1688      VG_(dmsg)("cachegrind: distinct instrs NoX: %d\n", distinct_instrsNoX);
1689      VG_(dmsg)("cachegrind: distinct instrs Gen: %d\n", distinct_instrsGen);
1690      VG_(dmsg)("cachegrind: debug lookups      : %d\n", debug_lookups);
1691
1692      VG_(dmsg)("cachegrind: with full      info:%6.1f%% (%d)\n",
1693                full_debugs * 100.0 / debug_lookups, full_debugs);
1694      VG_(dmsg)("cachegrind: with file/line info:%6.1f%% (%d)\n",
1695                file_line_debugs * 100.0 / debug_lookups, file_line_debugs);
1696      VG_(dmsg)("cachegrind: with fn name   info:%6.1f%% (%d)\n",
1697                fn_debugs * 100.0 / debug_lookups, fn_debugs);
1698      VG_(dmsg)("cachegrind: with zero      info:%6.1f%% (%d)\n",
1699                no_debugs * 100.0 / debug_lookups, no_debugs);
1700
1701      VG_(dmsg)("cachegrind: string table size: %lu\n",
1702                VG_(OSetGen_Size)(stringTable));
1703      VG_(dmsg)("cachegrind: CC table size: %lu\n",
1704                VG_(OSetGen_Size)(CC_table));
1705      VG_(dmsg)("cachegrind: InstrInfo table size: %lu\n",
1706                VG_(OSetGen_Size)(instrInfoTable));
1707   }
1708}
1709
1710/*--------------------------------------------------------------------*/
1711/*--- Discarding BB info                                           ---*/
1712/*--------------------------------------------------------------------*/
1713
1714// Called when a translation is removed from the translation cache for
1715// any reason at all: to free up space, because the guest code was
1716// unmapped or modified, or for any arbitrary reason.
1717static
1718void cg_discard_superblock_info ( Addr orig_addr64, VexGuestExtents vge )
1719{
1720   SB_info* sbInfo;
1721   Addr     orig_addr = vge.base[0];
1722
1723   tl_assert(vge.n_used > 0);
1724
1725   if (DEBUG_CG)
1726      VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n",
1727                   (void*)orig_addr,
1728                   (void*)vge.base[0], (ULong)vge.len[0]);
1729
1730   // Get BB info, remove from table, free BB info.  Simple!  Note that we
1731   // use orig_addr, not the first instruction address in vge.
1732   sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr);
1733   tl_assert(NULL != sbInfo);
1734   VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo);
1735}
1736
1737/*--------------------------------------------------------------------*/
1738/*--- Command line processing                                      ---*/
1739/*--------------------------------------------------------------------*/
1740
1741static Bool cg_process_cmd_line_option(const HChar* arg)
1742{
1743   if (VG_(str_clo_cache_opt)(arg,
1744                              &clo_I1_cache,
1745                              &clo_D1_cache,
1746                              &clo_LL_cache)) {}
1747
1748   else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {}
1749   else if VG_BOOL_CLO(arg, "--cache-sim",  clo_cache_sim)  {}
1750   else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {}
1751   else
1752      return False;
1753
1754   return True;
1755}
1756
1757static void cg_print_usage(void)
1758{
1759   VG_(print_cache_clo_opts)();
1760   VG_(printf)(
1761"    --cache-sim=yes|no  [yes]        collect cache stats?\n"
1762"    --branch-sim=yes|no [no]         collect branch prediction stats?\n"
1763"    --cachegrind-out-file=<file>     output file name [cachegrind.out.%%p]\n"
1764   );
1765}
1766
1767static void cg_print_debug_usage(void)
1768{
1769   VG_(printf)(
1770"    (none)\n"
1771   );
1772}
1773
1774/*--------------------------------------------------------------------*/
1775/*--- Setup                                                        ---*/
1776/*--------------------------------------------------------------------*/
1777
1778static void cg_post_clo_init(void); /* just below */
1779
1780static void cg_pre_clo_init(void)
1781{
1782   VG_(details_name)            ("Cachegrind");
1783   VG_(details_version)         (NULL);
1784   VG_(details_description)     ("a cache and branch-prediction profiler");
1785   VG_(details_copyright_author)(
1786      "Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al.");
1787   VG_(details_bug_reports_to)  (VG_BUGS_TO);
1788   VG_(details_avg_translation_sizeB) ( 500 );
1789
1790   VG_(clo_vex_control).iropt_register_updates_default
1791      = VG_(clo_px_file_backed)
1792      = VexRegUpdSpAtMemAccess; // overridable by the user.
1793
1794   VG_(basic_tool_funcs)          (cg_post_clo_init,
1795                                   cg_instrument,
1796                                   cg_fini);
1797
1798   VG_(needs_superblock_discards)(cg_discard_superblock_info);
1799   VG_(needs_command_line_options)(cg_process_cmd_line_option,
1800                                   cg_print_usage,
1801                                   cg_print_debug_usage);
1802}
1803
1804static void cg_post_clo_init(void)
1805{
1806   cache_t I1c, D1c, LLc;
1807
1808   CC_table =
1809      VG_(OSetGen_Create)(offsetof(LineCC, loc),
1810                          cmp_CodeLoc_LineCC,
1811                          VG_(malloc), "cg.main.cpci.1",
1812                          VG_(free));
1813   instrInfoTable =
1814      VG_(OSetGen_Create)(/*keyOff*/0,
1815                          NULL,
1816                          VG_(malloc), "cg.main.cpci.2",
1817                          VG_(free));
1818   stringTable =
1819      VG_(OSetGen_Create)(/*keyOff*/0,
1820                          stringCmp,
1821                          VG_(malloc), "cg.main.cpci.3",
1822                          VG_(free));
1823
1824   VG_(post_clo_init_configure_caches)(&I1c, &D1c, &LLc,
1825                                       &clo_I1_cache,
1826                                       &clo_D1_cache,
1827                                       &clo_LL_cache);
1828
1829   // min_line_size is used to make sure that we never feed
1830   // accesses to the simulator straddling more than two
1831   // cache lines at any cache level
1832   min_line_size = (I1c.line_size < D1c.line_size) ? I1c.line_size : D1c.line_size;
1833   min_line_size = (LLc.line_size < min_line_size) ? LLc.line_size : min_line_size;
1834
1835   Int largest_load_or_store_size
1836      = VG_(machine_get_size_of_largest_guest_register)();
1837   if (min_line_size < largest_load_or_store_size) {
1838      /* We can't continue, because the cache simulation might
1839         straddle more than 2 lines, and it will assert.  So let's
1840         just stop before we start. */
1841      VG_(umsg)("Cachegrind: cannot continue: the minimum line size (%d)\n",
1842                (Int)min_line_size);
1843      VG_(umsg)("  must be equal to or larger than the maximum register size (%d)\n",
1844                largest_load_or_store_size );
1845      VG_(umsg)("  but it is not.  Exiting now.\n");
1846      VG_(exit)(1);
1847   }
1848
1849   cachesim_initcaches(I1c, D1c, LLc);
1850}
1851
1852VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init)
1853
1854/*--------------------------------------------------------------------*/
1855/*--- end                                                          ---*/
1856/*--------------------------------------------------------------------*/
1857
1858