1 2/*--------------------------------------------------------------------*/ 3/*--- Cachegrind: everything but the simulation itself. ---*/ 4/*--- cg_main.c ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of Cachegrind, a Valgrind tool for cache 9 profiling programs. 10 11 Copyright (C) 2002-2013 Nicholas Nethercote 12 njn@valgrind.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30*/ 31 32#include "pub_tool_basics.h" 33#include "pub_tool_debuginfo.h" 34#include "pub_tool_libcbase.h" 35#include "pub_tool_libcassert.h" 36#include "pub_tool_libcfile.h" 37#include "pub_tool_libcprint.h" 38#include "pub_tool_libcproc.h" 39#include "pub_tool_mallocfree.h" 40#include "pub_tool_options.h" 41#include "pub_tool_oset.h" 42#include "pub_tool_tooliface.h" 43#include "pub_tool_xarray.h" 44#include "pub_tool_clientstate.h" 45#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry) 46 47#include "cg_arch.h" 48#include "cg_sim.c" 49#include "cg_branchpred.c" 50 51/*------------------------------------------------------------*/ 52/*--- Constants ---*/ 53/*------------------------------------------------------------*/ 54 55/* Set to 1 for very verbose debugging */ 56#define DEBUG_CG 0 57 58/*------------------------------------------------------------*/ 59/*--- Options ---*/ 60/*------------------------------------------------------------*/ 61 62static Bool clo_cache_sim = True; /* do cache simulation? */ 63static Bool clo_branch_sim = False; /* do branch simulation? */ 64static const HChar* clo_cachegrind_out_file = "cachegrind.out.%p"; 65 66/*------------------------------------------------------------*/ 67/*--- Cachesim configuration ---*/ 68/*------------------------------------------------------------*/ 69 70static Int min_line_size = 0; /* min of L1 and LL cache line sizes */ 71 72/*------------------------------------------------------------*/ 73/*--- Types and Data Structures ---*/ 74/*------------------------------------------------------------*/ 75 76typedef 77 struct { 78 ULong a; /* total # memory accesses of this kind */ 79 ULong m1; /* misses in the first level cache */ 80 ULong mL; /* misses in the second level cache */ 81 } 82 CacheCC; 83 84typedef 85 struct { 86 ULong b; /* total # branches of this kind */ 87 ULong mp; /* number of branches mispredicted */ 88 } 89 BranchCC; 90 91//------------------------------------------------------------ 92// Primary data structure #1: CC table 93// - Holds the per-source-line hit/miss stats, grouped by file/function/line. 94// - an ordered set of CCs. CC indexing done by file/function/line (as 95// determined from the instrAddr). 96// - Traversed for dumping stats at end in file/func/line hierarchy. 97 98typedef struct { 99 HChar* file; 100 const HChar* fn; 101 Int line; 102} 103CodeLoc; 104 105typedef struct { 106 CodeLoc loc; /* Source location that these counts pertain to */ 107 CacheCC Ir; /* Insn read counts */ 108 CacheCC Dr; /* Data read counts */ 109 CacheCC Dw; /* Data write/modify counts */ 110 BranchCC Bc; /* Conditional branch counts */ 111 BranchCC Bi; /* Indirect branch counts */ 112} LineCC; 113 114// First compare file, then fn, then line. 115static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc) 116{ 117 Word res; 118 const CodeLoc* a = (const CodeLoc*)vloc; 119 const CodeLoc* b = &(((const LineCC*)vcc)->loc); 120 121 res = VG_(strcmp)(a->file, b->file); 122 if (0 != res) 123 return res; 124 125 res = VG_(strcmp)(a->fn, b->fn); 126 if (0 != res) 127 return res; 128 129 return a->line - b->line; 130} 131 132static OSet* CC_table; 133 134//------------------------------------------------------------ 135// Primary data structure #2: InstrInfo table 136// - Holds the cached info about each instr that is used for simulation. 137// - table(SB_start_addr, list(InstrInfo)) 138// - For each SB, each InstrInfo in the list holds info about the 139// instruction (instrLen, instrAddr, etc), plus a pointer to its line 140// CC. This node is what's passed to the simulation function. 141// - When SBs are discarded the relevant list(instr_details) is freed. 142 143typedef struct _InstrInfo InstrInfo; 144struct _InstrInfo { 145 Addr instr_addr; 146 UChar instr_len; 147 LineCC* parent; // parent line-CC 148}; 149 150typedef struct _SB_info SB_info; 151struct _SB_info { 152 Addr SB_addr; // key; MUST BE FIRST 153 Int n_instrs; 154 InstrInfo instrs[0]; 155}; 156 157static OSet* instrInfoTable; 158 159//------------------------------------------------------------ 160// Secondary data structure: string table 161// - holds strings, avoiding dups 162// - used for filenames and function names, each of which will be 163// pointed to by one or more CCs. 164// - it also allows equality checks just by pointer comparison, which 165// is good when printing the output file at the end. 166 167static OSet* stringTable; 168 169//------------------------------------------------------------ 170// Stats 171static Int distinct_files = 0; 172static Int distinct_fns = 0; 173static Int distinct_lines = 0; 174static Int distinct_instrsGen = 0; 175static Int distinct_instrsNoX = 0; 176 177static Int full_debugs = 0; 178static Int file_line_debugs = 0; 179static Int fn_debugs = 0; 180static Int no_debugs = 0; 181 182/*------------------------------------------------------------*/ 183/*--- String table operations ---*/ 184/*------------------------------------------------------------*/ 185 186static Word stringCmp( const void* key, const void* elem ) 187{ 188 return VG_(strcmp)(*(const HChar *const *)key, *(const HChar *const *)elem); 189} 190 191// Get a permanent string; either pull it out of the string table if it's 192// been encountered before, or dup it and put it into the string table. 193static HChar* get_perm_string(const HChar* s) 194{ 195 HChar** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s); 196 if (s_ptr) { 197 return *s_ptr; 198 } else { 199 HChar** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(HChar*)); 200 *s_node = VG_(strdup)("cg.main.gps.1", s); 201 VG_(OSetGen_Insert)(stringTable, s_node); 202 return *s_node; 203 } 204} 205 206/*------------------------------------------------------------*/ 207/*--- CC table operations ---*/ 208/*------------------------------------------------------------*/ 209 210static void get_debug_info(Addr instr_addr, const HChar **dir, 211 const HChar **file, const HChar **fn, UInt* line) 212{ 213 Bool found_file_line = VG_(get_filename_linenum)( 214 instr_addr, 215 file, dir, 216 line 217 ); 218 Bool found_fn = VG_(get_fnname)(instr_addr, fn); 219 220 if (!found_file_line) { 221 *file = "???"; 222 *line = 0; 223 } 224 if (!found_fn) { 225 *fn = "???"; 226 } 227 228 if (found_file_line) { 229 if (found_fn) full_debugs++; 230 else file_line_debugs++; 231 } else { 232 if (found_fn) fn_debugs++; 233 else no_debugs++; 234 } 235} 236 237// Do a three step traversal: by file, then fn, then line. 238// Returns a pointer to the line CC, creates a new one if necessary. 239static LineCC* get_lineCC(Addr origAddr) 240{ 241 const HChar *fn, *file, *dir; 242 UInt line; 243 CodeLoc loc; 244 LineCC* lineCC; 245 246 get_debug_info(origAddr, &dir, &file, &fn, &line); 247 248 // Form an absolute pathname if a directory is available 249 HChar absfile[VG_(strlen)(dir) + 1 + VG_(strlen)(file) + 1]; 250 251 if (dir[0]) { 252 VG_(sprintf)(absfile, "%s/%s", dir, file); 253 } else { 254 VG_(sprintf)(absfile, "%s", file); 255 } 256 257 loc.file = absfile; 258 loc.fn = fn; 259 loc.line = line; 260 261 lineCC = VG_(OSetGen_Lookup)(CC_table, &loc); 262 if (!lineCC) { 263 // Allocate and zero a new node. 264 lineCC = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC)); 265 lineCC->loc.file = get_perm_string(loc.file); 266 lineCC->loc.fn = get_perm_string(loc.fn); 267 lineCC->loc.line = loc.line; 268 lineCC->Ir.a = 0; 269 lineCC->Ir.m1 = 0; 270 lineCC->Ir.mL = 0; 271 lineCC->Dr.a = 0; 272 lineCC->Dr.m1 = 0; 273 lineCC->Dr.mL = 0; 274 lineCC->Dw.a = 0; 275 lineCC->Dw.m1 = 0; 276 lineCC->Dw.mL = 0; 277 lineCC->Bc.b = 0; 278 lineCC->Bc.mp = 0; 279 lineCC->Bi.b = 0; 280 lineCC->Bi.mp = 0; 281 VG_(OSetGen_Insert)(CC_table, lineCC); 282 } 283 284 return lineCC; 285} 286 287/*------------------------------------------------------------*/ 288/*--- Cache simulation functions ---*/ 289/*------------------------------------------------------------*/ 290 291/* A common case for an instruction read event is that the 292 * bytes read belong to the same cache line in both L1I and LL 293 * (if cache line sizes of L1 and LL are the same). 294 * As this can be detected at instrumentation time, and results 295 * in faster simulation, special-casing is benefical. 296 * 297 * Abbrevations used in var/function names: 298 * IrNoX - instruction read does not cross cache lines 299 * IrGen - generic instruction read; not detected as IrNoX 300 * Ir - not known / not important whether it is an IrNoX 301 */ 302 303// Only used with --cache-sim=no. 304static VG_REGPARM(1) 305void log_1Ir(InstrInfo* n) 306{ 307 n->parent->Ir.a++; 308} 309 310// Only used with --cache-sim=no. 311static VG_REGPARM(2) 312void log_2Ir(InstrInfo* n, InstrInfo* n2) 313{ 314 n->parent->Ir.a++; 315 n2->parent->Ir.a++; 316} 317 318// Only used with --cache-sim=no. 319static VG_REGPARM(3) 320void log_3Ir(InstrInfo* n, InstrInfo* n2, InstrInfo* n3) 321{ 322 n->parent->Ir.a++; 323 n2->parent->Ir.a++; 324 n3->parent->Ir.a++; 325} 326 327// Generic case for instruction reads: may cross cache lines. 328// All other Ir handlers expect IrNoX instruction reads. 329static VG_REGPARM(1) 330void log_1IrGen_0D_cache_access(InstrInfo* n) 331{ 332 //VG_(printf)("1IrGen_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n", 333 // n, n->instr_addr, n->instr_len); 334 cachesim_I1_doref_Gen(n->instr_addr, n->instr_len, 335 &n->parent->Ir.m1, &n->parent->Ir.mL); 336 n->parent->Ir.a++; 337} 338 339static VG_REGPARM(1) 340void log_1IrNoX_0D_cache_access(InstrInfo* n) 341{ 342 //VG_(printf)("1IrNoX_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n", 343 // n, n->instr_addr, n->instr_len); 344 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len, 345 &n->parent->Ir.m1, &n->parent->Ir.mL); 346 n->parent->Ir.a++; 347} 348 349static VG_REGPARM(2) 350void log_2IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2) 351{ 352 //VG_(printf)("2IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n" 353 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n", 354 // n, n->instr_addr, n->instr_len, 355 // n2, n2->instr_addr, n2->instr_len); 356 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len, 357 &n->parent->Ir.m1, &n->parent->Ir.mL); 358 n->parent->Ir.a++; 359 cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len, 360 &n2->parent->Ir.m1, &n2->parent->Ir.mL); 361 n2->parent->Ir.a++; 362} 363 364static VG_REGPARM(3) 365void log_3IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3) 366{ 367 //VG_(printf)("3IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n" 368 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n" 369 // " CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n", 370 // n, n->instr_addr, n->instr_len, 371 // n2, n2->instr_addr, n2->instr_len, 372 // n3, n3->instr_addr, n3->instr_len); 373 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len, 374 &n->parent->Ir.m1, &n->parent->Ir.mL); 375 n->parent->Ir.a++; 376 cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len, 377 &n2->parent->Ir.m1, &n2->parent->Ir.mL); 378 n2->parent->Ir.a++; 379 cachesim_I1_doref_NoX(n3->instr_addr, n3->instr_len, 380 &n3->parent->Ir.m1, &n3->parent->Ir.mL); 381 n3->parent->Ir.a++; 382} 383 384static VG_REGPARM(3) 385void log_1IrNoX_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size) 386{ 387 //VG_(printf)("1IrNoX_1Dr: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n" 388 // " daddr=0x%010lx, dsize=%lu\n", 389 // n, n->instr_addr, n->instr_len, data_addr, data_size); 390 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len, 391 &n->parent->Ir.m1, &n->parent->Ir.mL); 392 n->parent->Ir.a++; 393 394 cachesim_D1_doref(data_addr, data_size, 395 &n->parent->Dr.m1, &n->parent->Dr.mL); 396 n->parent->Dr.a++; 397} 398 399static VG_REGPARM(3) 400void log_1IrNoX_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size) 401{ 402 //VG_(printf)("1IrNoX_1Dw: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n" 403 // " daddr=0x%010lx, dsize=%lu\n", 404 // n, n->instr_addr, n->instr_len, data_addr, data_size); 405 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len, 406 &n->parent->Ir.m1, &n->parent->Ir.mL); 407 n->parent->Ir.a++; 408 409 cachesim_D1_doref(data_addr, data_size, 410 &n->parent->Dw.m1, &n->parent->Dw.mL); 411 n->parent->Dw.a++; 412} 413 414/* Note that addEvent_D_guarded assumes that log_0Ir_1Dr_cache_access 415 and log_0Ir_1Dw_cache_access have exactly the same prototype. If 416 you change them, you must change addEvent_D_guarded too. */ 417static VG_REGPARM(3) 418void log_0Ir_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size) 419{ 420 //VG_(printf)("0Ir_1Dr: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n", 421 // n, data_addr, data_size); 422 cachesim_D1_doref(data_addr, data_size, 423 &n->parent->Dr.m1, &n->parent->Dr.mL); 424 n->parent->Dr.a++; 425} 426 427/* See comment on log_0Ir_1Dr_cache_access. */ 428static VG_REGPARM(3) 429void log_0Ir_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size) 430{ 431 //VG_(printf)("0Ir_1Dw: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n", 432 // n, data_addr, data_size); 433 cachesim_D1_doref(data_addr, data_size, 434 &n->parent->Dw.m1, &n->parent->Dw.mL); 435 n->parent->Dw.a++; 436} 437 438/* For branches, we consult two different predictors, one which 439 predicts taken/untaken for conditional branches, and the other 440 which predicts the branch target address for indirect branches 441 (jump-to-register style ones). */ 442 443static VG_REGPARM(2) 444void log_cond_branch(InstrInfo* n, Word taken) 445{ 446 //VG_(printf)("cbrnch: CCaddr=0x%010lx, taken=0x%010lx\n", 447 // n, taken); 448 n->parent->Bc.b++; 449 n->parent->Bc.mp 450 += (1 & do_cond_branch_predict(n->instr_addr, taken)); 451} 452 453static VG_REGPARM(2) 454void log_ind_branch(InstrInfo* n, UWord actual_dst) 455{ 456 //VG_(printf)("ibrnch: CCaddr=0x%010lx, dst=0x%010lx\n", 457 // n, actual_dst); 458 n->parent->Bi.b++; 459 n->parent->Bi.mp 460 += (1 & do_ind_branch_predict(n->instr_addr, actual_dst)); 461} 462 463 464/*------------------------------------------------------------*/ 465/*--- Instrumentation types and structures ---*/ 466/*------------------------------------------------------------*/ 467 468/* Maintain an ordered list of memory events which are outstanding, in 469 the sense that no IR has yet been generated to do the relevant 470 helper calls. The BB is scanned top to bottom and memory events 471 are added to the end of the list, merging with the most recent 472 notified event where possible (Dw immediately following Dr and 473 having the same size and EA can be merged). 474 475 This merging is done so that for architectures which have 476 load-op-store instructions (x86, amd64), the insn is treated as if 477 it makes just one memory reference (a modify), rather than two (a 478 read followed by a write at the same address). 479 480 At various points the list will need to be flushed, that is, IR 481 generated from it. That must happen before any possible exit from 482 the block (the end, or an IRStmt_Exit). Flushing also takes place 483 when there is no space to add a new event. 484 485 If we require the simulation statistics to be up to date with 486 respect to possible memory exceptions, then the list would have to 487 be flushed before each memory reference. That would however lose 488 performance by inhibiting event-merging during flushing. 489 490 Flushing the list consists of walking it start to end and emitting 491 instrumentation IR for each event, in the order in which they 492 appear. It may be possible to emit a single call for two adjacent 493 events in order to reduce the number of helper function calls made. 494 For example, it could well be profitable to handle two adjacent Ir 495 events with a single helper call. */ 496 497typedef 498 IRExpr 499 IRAtom; 500 501typedef 502 enum { 503 Ev_IrNoX, // Instruction read not crossing cache lines 504 Ev_IrGen, // Generic Ir, not being detected as IrNoX 505 Ev_Dr, // Data read 506 Ev_Dw, // Data write 507 Ev_Dm, // Data modify (read then write) 508 Ev_Bc, // branch conditional 509 Ev_Bi // branch indirect (to unknown destination) 510 } 511 EventTag; 512 513typedef 514 struct { 515 EventTag tag; 516 InstrInfo* inode; 517 union { 518 struct { 519 } IrGen; 520 struct { 521 } IrNoX; 522 struct { 523 IRAtom* ea; 524 Int szB; 525 } Dr; 526 struct { 527 IRAtom* ea; 528 Int szB; 529 } Dw; 530 struct { 531 IRAtom* ea; 532 Int szB; 533 } Dm; 534 struct { 535 IRAtom* taken; /* :: Ity_I1 */ 536 } Bc; 537 struct { 538 IRAtom* dst; 539 } Bi; 540 } Ev; 541 } 542 Event; 543 544static void init_Event ( Event* ev ) { 545 VG_(memset)(ev, 0, sizeof(Event)); 546} 547 548static IRAtom* get_Event_dea ( Event* ev ) { 549 switch (ev->tag) { 550 case Ev_Dr: return ev->Ev.Dr.ea; 551 case Ev_Dw: return ev->Ev.Dw.ea; 552 case Ev_Dm: return ev->Ev.Dm.ea; 553 default: tl_assert(0); 554 } 555} 556 557static Int get_Event_dszB ( Event* ev ) { 558 switch (ev->tag) { 559 case Ev_Dr: return ev->Ev.Dr.szB; 560 case Ev_Dw: return ev->Ev.Dw.szB; 561 case Ev_Dm: return ev->Ev.Dm.szB; 562 default: tl_assert(0); 563 } 564} 565 566 567/* Up to this many unnotified events are allowed. Number is 568 arbitrary. Larger numbers allow more event merging to occur, but 569 potentially induce more spilling due to extending live ranges of 570 address temporaries. */ 571#define N_EVENTS 16 572 573 574/* A struct which holds all the running state during instrumentation. 575 Mostly to avoid passing loads of parameters everywhere. */ 576typedef 577 struct { 578 /* The current outstanding-memory-event list. */ 579 Event events[N_EVENTS]; 580 Int events_used; 581 582 /* The array of InstrInfo bins for the BB. */ 583 SB_info* sbInfo; 584 585 /* Number InstrInfo bins 'used' so far. */ 586 Int sbInfo_i; 587 588 /* The output SB being constructed. */ 589 IRSB* sbOut; 590 } 591 CgState; 592 593 594/*------------------------------------------------------------*/ 595/*--- Instrumentation main ---*/ 596/*------------------------------------------------------------*/ 597 598// Note that origAddr is the real origAddr, not the address of the first 599// instruction in the block (they can be different due to redirection). 600static 601SB_info* get_SB_info(IRSB* sbIn, Addr origAddr) 602{ 603 Int i, n_instrs; 604 IRStmt* st; 605 SB_info* sbInfo; 606 607 // Count number of original instrs in SB 608 n_instrs = 0; 609 for (i = 0; i < sbIn->stmts_used; i++) { 610 st = sbIn->stmts[i]; 611 if (Ist_IMark == st->tag) n_instrs++; 612 } 613 614 // Check that we don't have an entry for this BB in the instr-info table. 615 // If this assertion fails, there has been some screwup: some 616 // translations must have been discarded but Cachegrind hasn't discarded 617 // the corresponding entries in the instr-info table. 618 sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr); 619 tl_assert(NULL == sbInfo); 620 621 // BB never translated before (at this address, at least; could have 622 // been unloaded and then reloaded elsewhere in memory) 623 sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable, 624 sizeof(SB_info) + n_instrs*sizeof(InstrInfo)); 625 sbInfo->SB_addr = origAddr; 626 sbInfo->n_instrs = n_instrs; 627 VG_(OSetGen_Insert)( instrInfoTable, sbInfo ); 628 629 return sbInfo; 630} 631 632 633static void showEvent ( Event* ev ) 634{ 635 switch (ev->tag) { 636 case Ev_IrGen: 637 VG_(printf)("IrGen %p\n", ev->inode); 638 break; 639 case Ev_IrNoX: 640 VG_(printf)("IrNoX %p\n", ev->inode); 641 break; 642 case Ev_Dr: 643 VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB); 644 ppIRExpr(ev->Ev.Dr.ea); 645 VG_(printf)("\n"); 646 break; 647 case Ev_Dw: 648 VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB); 649 ppIRExpr(ev->Ev.Dw.ea); 650 VG_(printf)("\n"); 651 break; 652 case Ev_Dm: 653 VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB); 654 ppIRExpr(ev->Ev.Dm.ea); 655 VG_(printf)("\n"); 656 break; 657 case Ev_Bc: 658 VG_(printf)("Bc %p GA=", ev->inode); 659 ppIRExpr(ev->Ev.Bc.taken); 660 VG_(printf)("\n"); 661 break; 662 case Ev_Bi: 663 VG_(printf)("Bi %p DST=", ev->inode); 664 ppIRExpr(ev->Ev.Bi.dst); 665 VG_(printf)("\n"); 666 break; 667 default: 668 tl_assert(0); 669 break; 670 } 671} 672 673// Reserve and initialise an InstrInfo for the first mention of a new insn. 674static 675InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len ) 676{ 677 InstrInfo* i_node; 678 tl_assert(cgs->sbInfo_i >= 0); 679 tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs); 680 i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ]; 681 i_node->instr_addr = instr_addr; 682 i_node->instr_len = instr_len; 683 i_node->parent = get_lineCC(instr_addr); 684 cgs->sbInfo_i++; 685 return i_node; 686} 687 688 689/* Generate code for all outstanding memory events, and mark the queue 690 empty. Code is generated into cgs->bbOut, and this activity 691 'consumes' slots in cgs->sbInfo. */ 692 693static void flushEvents ( CgState* cgs ) 694{ 695 Int i, regparms; 696 const HChar* helperName; 697 void* helperAddr; 698 IRExpr** argv; 699 IRExpr* i_node_expr; 700 IRDirty* di; 701 Event* ev; 702 Event* ev2; 703 Event* ev3; 704 705 i = 0; 706 while (i < cgs->events_used) { 707 708 helperName = NULL; 709 helperAddr = NULL; 710 argv = NULL; 711 regparms = 0; 712 713 /* generate IR to notify event i and possibly the ones 714 immediately following it. */ 715 tl_assert(i >= 0 && i < cgs->events_used); 716 717 ev = &cgs->events[i]; 718 ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL ); 719 ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL ); 720 721 if (DEBUG_CG) { 722 VG_(printf)(" flush "); 723 showEvent( ev ); 724 } 725 726 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode ); 727 728 /* Decide on helper fn to call and args to pass it, and advance 729 i appropriately. */ 730 switch (ev->tag) { 731 case Ev_IrNoX: 732 /* Merge an IrNoX with a following Dr/Dm. */ 733 if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) { 734 /* Why is this true? It's because we're merging an Ir 735 with a following Dr or Dm. The Ir derives from the 736 instruction's IMark and the Dr/Dm from data 737 references which follow it. In short it holds 738 because each insn starts with an IMark, hence an 739 Ev_Ir, and so these Dr/Dm must pertain to the 740 immediately preceding Ir. Same applies to analogous 741 assertions in the subsequent cases. */ 742 tl_assert(ev2->inode == ev->inode); 743 helperName = "log_1IrNoX_1Dr_cache_access"; 744 helperAddr = &log_1IrNoX_1Dr_cache_access; 745 argv = mkIRExprVec_3( i_node_expr, 746 get_Event_dea(ev2), 747 mkIRExpr_HWord( get_Event_dszB(ev2) ) ); 748 regparms = 3; 749 i += 2; 750 } 751 /* Merge an IrNoX with a following Dw. */ 752 else 753 if (ev2 && ev2->tag == Ev_Dw) { 754 tl_assert(ev2->inode == ev->inode); 755 helperName = "log_1IrNoX_1Dw_cache_access"; 756 helperAddr = &log_1IrNoX_1Dw_cache_access; 757 argv = mkIRExprVec_3( i_node_expr, 758 get_Event_dea(ev2), 759 mkIRExpr_HWord( get_Event_dszB(ev2) ) ); 760 regparms = 3; 761 i += 2; 762 } 763 /* Merge an IrNoX with two following IrNoX's. */ 764 else 765 if (ev2 && ev3 && ev2->tag == Ev_IrNoX && ev3->tag == Ev_IrNoX) 766 { 767 if (clo_cache_sim) { 768 helperName = "log_3IrNoX_0D_cache_access"; 769 helperAddr = &log_3IrNoX_0D_cache_access; 770 } else { 771 helperName = "log_3Ir"; 772 helperAddr = &log_3Ir; 773 } 774 argv = mkIRExprVec_3( i_node_expr, 775 mkIRExpr_HWord( (HWord)ev2->inode ), 776 mkIRExpr_HWord( (HWord)ev3->inode ) ); 777 regparms = 3; 778 i += 3; 779 } 780 /* Merge an IrNoX with one following IrNoX. */ 781 else 782 if (ev2 && ev2->tag == Ev_IrNoX) { 783 if (clo_cache_sim) { 784 helperName = "log_2IrNoX_0D_cache_access"; 785 helperAddr = &log_2IrNoX_0D_cache_access; 786 } else { 787 helperName = "log_2Ir"; 788 helperAddr = &log_2Ir; 789 } 790 argv = mkIRExprVec_2( i_node_expr, 791 mkIRExpr_HWord( (HWord)ev2->inode ) ); 792 regparms = 2; 793 i += 2; 794 } 795 /* No merging possible; emit as-is. */ 796 else { 797 if (clo_cache_sim) { 798 helperName = "log_1IrNoX_0D_cache_access"; 799 helperAddr = &log_1IrNoX_0D_cache_access; 800 } else { 801 helperName = "log_1Ir"; 802 helperAddr = &log_1Ir; 803 } 804 argv = mkIRExprVec_1( i_node_expr ); 805 regparms = 1; 806 i++; 807 } 808 break; 809 case Ev_IrGen: 810 if (clo_cache_sim) { 811 helperName = "log_1IrGen_0D_cache_access"; 812 helperAddr = &log_1IrGen_0D_cache_access; 813 } else { 814 helperName = "log_1Ir"; 815 helperAddr = &log_1Ir; 816 } 817 argv = mkIRExprVec_1( i_node_expr ); 818 regparms = 1; 819 i++; 820 break; 821 case Ev_Dr: 822 case Ev_Dm: 823 /* Data read or modify */ 824 helperName = "log_0Ir_1Dr_cache_access"; 825 helperAddr = &log_0Ir_1Dr_cache_access; 826 argv = mkIRExprVec_3( i_node_expr, 827 get_Event_dea(ev), 828 mkIRExpr_HWord( get_Event_dszB(ev) ) ); 829 regparms = 3; 830 i++; 831 break; 832 case Ev_Dw: 833 /* Data write */ 834 helperName = "log_0Ir_1Dw_cache_access"; 835 helperAddr = &log_0Ir_1Dw_cache_access; 836 argv = mkIRExprVec_3( i_node_expr, 837 get_Event_dea(ev), 838 mkIRExpr_HWord( get_Event_dszB(ev) ) ); 839 regparms = 3; 840 i++; 841 break; 842 case Ev_Bc: 843 /* Conditional branch */ 844 helperName = "log_cond_branch"; 845 helperAddr = &log_cond_branch; 846 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken ); 847 regparms = 2; 848 i++; 849 break; 850 case Ev_Bi: 851 /* Branch to an unknown destination */ 852 helperName = "log_ind_branch"; 853 helperAddr = &log_ind_branch; 854 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst ); 855 regparms = 2; 856 i++; 857 break; 858 default: 859 tl_assert(0); 860 } 861 862 /* Add the helper. */ 863 tl_assert(helperName); 864 tl_assert(helperAddr); 865 tl_assert(argv); 866 di = unsafeIRDirty_0_N( regparms, 867 helperName, VG_(fnptr_to_fnentry)( helperAddr ), 868 argv ); 869 addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) ); 870 } 871 872 cgs->events_used = 0; 873} 874 875static void addEvent_Ir ( CgState* cgs, InstrInfo* inode ) 876{ 877 Event* evt; 878 if (cgs->events_used == N_EVENTS) 879 flushEvents(cgs); 880 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS); 881 evt = &cgs->events[cgs->events_used]; 882 init_Event(evt); 883 evt->inode = inode; 884 if (cachesim_is_IrNoX(inode->instr_addr, inode->instr_len)) { 885 evt->tag = Ev_IrNoX; 886 distinct_instrsNoX++; 887 } else { 888 evt->tag = Ev_IrGen; 889 distinct_instrsGen++; 890 } 891 cgs->events_used++; 892} 893 894static 895void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea ) 896{ 897 Event* evt; 898 tl_assert(isIRAtom(ea)); 899 tl_assert(datasize >= 1 && datasize <= min_line_size); 900 if (!clo_cache_sim) 901 return; 902 if (cgs->events_used == N_EVENTS) 903 flushEvents(cgs); 904 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS); 905 evt = &cgs->events[cgs->events_used]; 906 init_Event(evt); 907 evt->tag = Ev_Dr; 908 evt->inode = inode; 909 evt->Ev.Dr.szB = datasize; 910 evt->Ev.Dr.ea = ea; 911 cgs->events_used++; 912} 913 914static 915void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea ) 916{ 917 Event* lastEvt; 918 Event* evt; 919 920 tl_assert(isIRAtom(ea)); 921 tl_assert(datasize >= 1 && datasize <= min_line_size); 922 923 if (!clo_cache_sim) 924 return; 925 926 /* Is it possible to merge this write with the preceding read? */ 927 lastEvt = &cgs->events[cgs->events_used-1]; 928 if (cgs->events_used > 0 929 && lastEvt->tag == Ev_Dr 930 && lastEvt->Ev.Dr.szB == datasize 931 && lastEvt->inode == inode 932 && eqIRAtom(lastEvt->Ev.Dr.ea, ea)) 933 { 934 lastEvt->tag = Ev_Dm; 935 return; 936 } 937 938 /* No. Add as normal. */ 939 if (cgs->events_used == N_EVENTS) 940 flushEvents(cgs); 941 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS); 942 evt = &cgs->events[cgs->events_used]; 943 init_Event(evt); 944 evt->tag = Ev_Dw; 945 evt->inode = inode; 946 evt->Ev.Dw.szB = datasize; 947 evt->Ev.Dw.ea = ea; 948 cgs->events_used++; 949} 950 951static 952void addEvent_D_guarded ( CgState* cgs, InstrInfo* inode, 953 Int datasize, IRAtom* ea, IRAtom* guard, 954 Bool isWrite ) 955{ 956 tl_assert(isIRAtom(ea)); 957 tl_assert(guard); 958 tl_assert(isIRAtom(guard)); 959 tl_assert(datasize >= 1 && datasize <= min_line_size); 960 961 if (!clo_cache_sim) 962 return; 963 964 /* Adding guarded memory actions and merging them with the existing 965 queue is too complex. Simply flush the queue and add this 966 action immediately. Since guarded loads and stores are pretty 967 rare, this is not thought likely to cause any noticeable 968 performance loss as a result of the loss of event-merging 969 opportunities. */ 970 tl_assert(cgs->events_used >= 0); 971 flushEvents(cgs); 972 tl_assert(cgs->events_used == 0); 973 /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */ 974 IRExpr* i_node_expr; 975 const HChar* helperName; 976 void* helperAddr; 977 IRExpr** argv; 978 Int regparms; 979 IRDirty* di; 980 i_node_expr = mkIRExpr_HWord( (HWord)inode ); 981 helperName = isWrite ? "log_0Ir_1Dw_cache_access" 982 : "log_0Ir_1Dr_cache_access"; 983 helperAddr = isWrite ? &log_0Ir_1Dw_cache_access 984 : &log_0Ir_1Dr_cache_access; 985 argv = mkIRExprVec_3( i_node_expr, 986 ea, mkIRExpr_HWord( datasize ) ); 987 regparms = 3; 988 di = unsafeIRDirty_0_N( 989 regparms, 990 helperName, VG_(fnptr_to_fnentry)( helperAddr ), 991 argv ); 992 di->guard = guard; 993 addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) ); 994} 995 996 997static 998void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard ) 999{ 1000 Event* evt; 1001 tl_assert(isIRAtom(guard)); 1002 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard) 1003 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64)); 1004 if (!clo_branch_sim) 1005 return; 1006 if (cgs->events_used == N_EVENTS) 1007 flushEvents(cgs); 1008 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS); 1009 evt = &cgs->events[cgs->events_used]; 1010 init_Event(evt); 1011 evt->tag = Ev_Bc; 1012 evt->inode = inode; 1013 evt->Ev.Bc.taken = guard; 1014 cgs->events_used++; 1015} 1016 1017static 1018void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo ) 1019{ 1020 Event* evt; 1021 tl_assert(isIRAtom(whereTo)); 1022 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo) 1023 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64)); 1024 if (!clo_branch_sim) 1025 return; 1026 if (cgs->events_used == N_EVENTS) 1027 flushEvents(cgs); 1028 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS); 1029 evt = &cgs->events[cgs->events_used]; 1030 init_Event(evt); 1031 evt->tag = Ev_Bi; 1032 evt->inode = inode; 1033 evt->Ev.Bi.dst = whereTo; 1034 cgs->events_used++; 1035} 1036 1037//////////////////////////////////////////////////////////// 1038 1039 1040static 1041IRSB* cg_instrument ( VgCallbackClosure* closure, 1042 IRSB* sbIn, 1043 const VexGuestLayout* layout, 1044 const VexGuestExtents* vge, 1045 const VexArchInfo* archinfo_host, 1046 IRType gWordTy, IRType hWordTy ) 1047{ 1048 Int i; 1049 UInt isize; 1050 IRStmt* st; 1051 Addr cia; /* address of current insn */ 1052 CgState cgs; 1053 IRTypeEnv* tyenv = sbIn->tyenv; 1054 InstrInfo* curr_inode = NULL; 1055 1056 if (gWordTy != hWordTy) { 1057 /* We don't currently support this case. */ 1058 VG_(tool_panic)("host/guest word size mismatch"); 1059 } 1060 1061 // Set up new SB 1062 cgs.sbOut = deepCopyIRSBExceptStmts(sbIn); 1063 1064 // Copy verbatim any IR preamble preceding the first IMark 1065 i = 0; 1066 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) { 1067 addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] ); 1068 i++; 1069 } 1070 1071 // Get the first statement, and initial cia from it 1072 tl_assert(sbIn->stmts_used > 0); 1073 tl_assert(i < sbIn->stmts_used); 1074 st = sbIn->stmts[i]; 1075 tl_assert(Ist_IMark == st->tag); 1076 1077 cia = st->Ist.IMark.addr; 1078 isize = st->Ist.IMark.len; 1079 // If Vex fails to decode an instruction, the size will be zero. 1080 // Pretend otherwise. 1081 if (isize == 0) isize = VG_MIN_INSTR_SZB; 1082 1083 // Set up running state and get block info 1084 tl_assert(closure->readdr == vge->base[0]); 1085 cgs.events_used = 0; 1086 cgs.sbInfo = get_SB_info(sbIn, (Addr)closure->readdr); 1087 cgs.sbInfo_i = 0; 1088 1089 if (DEBUG_CG) 1090 VG_(printf)("\n\n---------- cg_instrument ----------\n"); 1091 1092 // Traverse the block, initialising inodes, adding events and flushing as 1093 // necessary. 1094 for (/*use current i*/; i < sbIn->stmts_used; i++) { 1095 1096 st = sbIn->stmts[i]; 1097 tl_assert(isFlatIRStmt(st)); 1098 1099 switch (st->tag) { 1100 case Ist_NoOp: 1101 case Ist_AbiHint: 1102 case Ist_Put: 1103 case Ist_PutI: 1104 case Ist_MBE: 1105 break; 1106 1107 case Ist_IMark: 1108 cia = st->Ist.IMark.addr; 1109 isize = st->Ist.IMark.len; 1110 1111 // If Vex fails to decode an instruction, the size will be zero. 1112 // Pretend otherwise. 1113 if (isize == 0) isize = VG_MIN_INSTR_SZB; 1114 1115 // Sanity-check size. 1116 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB) 1117 || VG_CLREQ_SZB == isize ); 1118 1119 // Get space for and init the inode, record it as the current one. 1120 // Subsequent Dr/Dw/Dm events from the same instruction will 1121 // also use it. 1122 curr_inode = setup_InstrInfo(&cgs, cia, isize); 1123 1124 addEvent_Ir( &cgs, curr_inode ); 1125 break; 1126 1127 case Ist_WrTmp: { 1128 IRExpr* data = st->Ist.WrTmp.data; 1129 if (data->tag == Iex_Load) { 1130 IRExpr* aexpr = data->Iex.Load.addr; 1131 // Note also, endianness info is ignored. I guess 1132 // that's not interesting. 1133 addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty), 1134 aexpr ); 1135 } 1136 break; 1137 } 1138 1139 case Ist_Store: { 1140 IRExpr* data = st->Ist.Store.data; 1141 IRExpr* aexpr = st->Ist.Store.addr; 1142 addEvent_Dw( &cgs, curr_inode, 1143 sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr ); 1144 break; 1145 } 1146 1147 case Ist_StoreG: { 1148 IRStoreG* sg = st->Ist.StoreG.details; 1149 IRExpr* data = sg->data; 1150 IRExpr* addr = sg->addr; 1151 IRType type = typeOfIRExpr(tyenv, data); 1152 tl_assert(type != Ity_INVALID); 1153 addEvent_D_guarded( &cgs, curr_inode, 1154 sizeofIRType(type), addr, sg->guard, 1155 True/*isWrite*/ ); 1156 break; 1157 } 1158 1159 case Ist_LoadG: { 1160 IRLoadG* lg = st->Ist.LoadG.details; 1161 IRType type = Ity_INVALID; /* loaded type */ 1162 IRType typeWide = Ity_INVALID; /* after implicit widening */ 1163 IRExpr* addr = lg->addr; 1164 typeOfIRLoadGOp(lg->cvt, &typeWide, &type); 1165 tl_assert(type != Ity_INVALID); 1166 addEvent_D_guarded( &cgs, curr_inode, 1167 sizeofIRType(type), addr, lg->guard, 1168 False/*!isWrite*/ ); 1169 break; 1170 } 1171 1172 case Ist_Dirty: { 1173 Int dataSize; 1174 IRDirty* d = st->Ist.Dirty.details; 1175 if (d->mFx != Ifx_None) { 1176 /* This dirty helper accesses memory. Collect the details. */ 1177 tl_assert(d->mAddr != NULL); 1178 tl_assert(d->mSize != 0); 1179 dataSize = d->mSize; 1180 // Large (eg. 28B, 108B, 512B on x86) data-sized 1181 // instructions will be done inaccurately, but they're 1182 // very rare and this avoids errors from hitting more 1183 // than two cache lines in the simulation. 1184 if (dataSize > min_line_size) 1185 dataSize = min_line_size; 1186 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) 1187 addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr ); 1188 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) 1189 addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr ); 1190 } else { 1191 tl_assert(d->mAddr == NULL); 1192 tl_assert(d->mSize == 0); 1193 } 1194 break; 1195 } 1196 1197 case Ist_CAS: { 1198 /* We treat it as a read and a write of the location. I 1199 think that is the same behaviour as it was before IRCAS 1200 was introduced, since prior to that point, the Vex 1201 front ends would translate a lock-prefixed instruction 1202 into a (normal) read followed by a (normal) write. */ 1203 Int dataSize; 1204 IRCAS* cas = st->Ist.CAS.details; 1205 tl_assert(cas->addr != NULL); 1206 tl_assert(cas->dataLo != NULL); 1207 dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo)); 1208 if (cas->dataHi != NULL) 1209 dataSize *= 2; /* since it's a doubleword-CAS */ 1210 /* I don't think this can ever happen, but play safe. */ 1211 if (dataSize > min_line_size) 1212 dataSize = min_line_size; 1213 addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr ); 1214 addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr ); 1215 break; 1216 } 1217 1218 case Ist_LLSC: { 1219 IRType dataTy; 1220 if (st->Ist.LLSC.storedata == NULL) { 1221 /* LL */ 1222 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result); 1223 addEvent_Dr( &cgs, curr_inode, 1224 sizeofIRType(dataTy), st->Ist.LLSC.addr ); 1225 /* flush events before LL, should help SC to succeed */ 1226 flushEvents( &cgs ); 1227 } else { 1228 /* SC */ 1229 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata); 1230 addEvent_Dw( &cgs, curr_inode, 1231 sizeofIRType(dataTy), st->Ist.LLSC.addr ); 1232 } 1233 break; 1234 } 1235 1236 case Ist_Exit: { 1237 // call branch predictor only if this is a branch in guest code 1238 if ( (st->Ist.Exit.jk == Ijk_Boring) || 1239 (st->Ist.Exit.jk == Ijk_Call) || 1240 (st->Ist.Exit.jk == Ijk_Ret) ) 1241 { 1242 /* Stuff to widen the guard expression to a host word, so 1243 we can pass it to the branch predictor simulation 1244 functions easily. */ 1245 Bool inverted; 1246 Addr nia, sea; 1247 IRConst* dst; 1248 IRType tyW = hWordTy; 1249 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64; 1250 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64; 1251 IRTemp guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1); 1252 IRTemp guardW = newIRTemp(cgs.sbOut->tyenv, tyW); 1253 IRTemp guard = newIRTemp(cgs.sbOut->tyenv, tyW); 1254 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1)) 1255 : IRExpr_Const(IRConst_U64(1)); 1256 1257 /* First we need to figure out whether the side exit got 1258 inverted by the ir optimiser. To do that, figure out 1259 the next (fallthrough) instruction's address and the 1260 side exit address and see if they are the same. */ 1261 nia = cia + isize; 1262 1263 /* Side exit address */ 1264 dst = st->Ist.Exit.dst; 1265 if (tyW == Ity_I32) { 1266 tl_assert(dst->tag == Ico_U32); 1267 sea = dst->Ico.U32; 1268 } else { 1269 tl_assert(tyW == Ity_I64); 1270 tl_assert(dst->tag == Ico_U64); 1271 sea = dst->Ico.U64; 1272 } 1273 1274 inverted = nia == sea; 1275 1276 /* Widen the guard expression. */ 1277 addStmtToIRSB( cgs.sbOut, 1278 IRStmt_WrTmp( guard1, st->Ist.Exit.guard )); 1279 addStmtToIRSB( cgs.sbOut, 1280 IRStmt_WrTmp( guardW, 1281 IRExpr_Unop(widen, 1282 IRExpr_RdTmp(guard1))) ); 1283 /* If the exit is inverted, invert the sense of the guard. */ 1284 addStmtToIRSB( 1285 cgs.sbOut, 1286 IRStmt_WrTmp( 1287 guard, 1288 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one) 1289 : IRExpr_RdTmp(guardW) 1290 )); 1291 /* And post the event. */ 1292 addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) ); 1293 } 1294 1295 /* We may never reach the next statement, so need to flush 1296 all outstanding transactions now. */ 1297 flushEvents( &cgs ); 1298 break; 1299 } 1300 1301 default: 1302 ppIRStmt(st); 1303 tl_assert(0); 1304 break; 1305 } 1306 1307 /* Copy the original statement */ 1308 addStmtToIRSB( cgs.sbOut, st ); 1309 1310 if (DEBUG_CG) { 1311 ppIRStmt(st); 1312 VG_(printf)("\n"); 1313 } 1314 } 1315 1316 /* Deal with branches to unknown destinations. Except ignore ones 1317 which are function returns as we assume the return stack 1318 predictor never mispredicts. */ 1319 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) { 1320 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); } 1321 switch (sbIn->next->tag) { 1322 case Iex_Const: 1323 break; /* boring - branch to known address */ 1324 case Iex_RdTmp: 1325 /* looks like an indirect branch (branch to unknown) */ 1326 addEvent_Bi( &cgs, curr_inode, sbIn->next ); 1327 break; 1328 default: 1329 /* shouldn't happen - if the incoming IR is properly 1330 flattened, should only have tmp and const cases to 1331 consider. */ 1332 tl_assert(0); 1333 } 1334 } 1335 1336 /* At the end of the bb. Flush outstandings. */ 1337 flushEvents( &cgs ); 1338 1339 /* done. stay sane ... */ 1340 tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs); 1341 1342 if (DEBUG_CG) { 1343 VG_(printf)( "goto {"); 1344 ppIRJumpKind(sbIn->jumpkind); 1345 VG_(printf)( "} "); 1346 ppIRExpr( sbIn->next ); 1347 VG_(printf)( "}\n"); 1348 } 1349 1350 return cgs.sbOut; 1351} 1352 1353/*------------------------------------------------------------*/ 1354/*--- Cache configuration ---*/ 1355/*------------------------------------------------------------*/ 1356 1357static cache_t clo_I1_cache = UNDEFINED_CACHE; 1358static cache_t clo_D1_cache = UNDEFINED_CACHE; 1359static cache_t clo_LL_cache = UNDEFINED_CACHE; 1360 1361/*------------------------------------------------------------*/ 1362/*--- cg_fini() and related function ---*/ 1363/*------------------------------------------------------------*/ 1364 1365// Total reads/writes/misses. Calculated during CC traversal at the end. 1366// All auto-zeroed. 1367static CacheCC Ir_total; 1368static CacheCC Dr_total; 1369static CacheCC Dw_total; 1370static BranchCC Bc_total; 1371static BranchCC Bi_total; 1372 1373static void fprint_CC_table_and_calc_totals(void) 1374{ 1375 Int i; 1376 VgFile *fp; 1377 HChar *currFile = NULL; 1378 const HChar *currFn = NULL; 1379 LineCC* lineCC; 1380 1381 // Setup output filename. Nb: it's important to do this now, ie. as late 1382 // as possible. If we do it at start-up and the program forks and the 1383 // output file format string contains a %p (pid) specifier, both the 1384 // parent and child will incorrectly write to the same file; this 1385 // happened in 3.3.0. 1386 HChar* cachegrind_out_file = 1387 VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file); 1388 1389 fp = VG_(fopen)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY, 1390 VKI_S_IRUSR|VKI_S_IWUSR); 1391 if (fp == NULL) { 1392 // If the file can't be opened for whatever reason (conflict 1393 // between multiple cachegrinded processes?), give up now. 1394 VG_(umsg)("error: can't open cache simulation output file '%s'\n", 1395 cachegrind_out_file ); 1396 VG_(umsg)(" ... so simulation results will be missing.\n"); 1397 VG_(free)(cachegrind_out_file); 1398 return; 1399 } else { 1400 VG_(free)(cachegrind_out_file); 1401 } 1402 1403 // "desc:" lines (giving I1/D1/LL cache configuration). The spaces after 1404 // the 2nd colon makes cg_annotate's output look nicer. 1405 VG_(fprintf)(fp, "desc: I1 cache: %s\n" 1406 "desc: D1 cache: %s\n" 1407 "desc: LL cache: %s\n", 1408 I1.desc_line, D1.desc_line, LL.desc_line); 1409 1410 // "cmd:" line 1411 VG_(fprintf)(fp, "cmd: %s", VG_(args_the_exename)); 1412 for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) { 1413 HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i ); 1414 VG_(fprintf)(fp, " %s", arg); 1415 } 1416 // "events:" line 1417 if (clo_cache_sim && clo_branch_sim) { 1418 VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw " 1419 "Bc Bcm Bi Bim\n"); 1420 } 1421 else if (clo_cache_sim && !clo_branch_sim) { 1422 VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw " 1423 "\n"); 1424 } 1425 else if (!clo_cache_sim && clo_branch_sim) { 1426 VG_(fprintf)(fp, "\nevents: Ir Bc Bcm Bi Bim\n"); 1427 } 1428 else { 1429 VG_(fprintf)(fp, "\nevents: Ir\n"); 1430 } 1431 1432 // Traverse every lineCC 1433 VG_(OSetGen_ResetIter)(CC_table); 1434 while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) { 1435 Bool just_hit_a_new_file = False; 1436 // If we've hit a new file, print a "fl=" line. Note that because 1437 // each string is stored exactly once in the string table, we can use 1438 // pointer comparison rather than strcmp() to test for equality, which 1439 // is good because most of the time the comparisons are equal and so 1440 // the whole strings would have to be checked. 1441 if ( lineCC->loc.file != currFile ) { 1442 currFile = lineCC->loc.file; 1443 VG_(fprintf)(fp, "fl=%s\n", currFile); 1444 distinct_files++; 1445 just_hit_a_new_file = True; 1446 } 1447 // If we've hit a new function, print a "fn=" line. We know to do 1448 // this when the function name changes, and also every time we hit a 1449 // new file (in which case the new function name might be the same as 1450 // in the old file, hence the just_hit_a_new_file test). 1451 if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) { 1452 currFn = lineCC->loc.fn; 1453 VG_(fprintf)(fp, "fn=%s\n", currFn); 1454 distinct_fns++; 1455 } 1456 1457 // Print the LineCC 1458 if (clo_cache_sim && clo_branch_sim) { 1459 VG_(fprintf)(fp, "%u %llu %llu %llu" 1460 " %llu %llu %llu" 1461 " %llu %llu %llu" 1462 " %llu %llu %llu %llu\n", 1463 lineCC->loc.line, 1464 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL, 1465 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL, 1466 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL, 1467 lineCC->Bc.b, lineCC->Bc.mp, 1468 lineCC->Bi.b, lineCC->Bi.mp); 1469 } 1470 else if (clo_cache_sim && !clo_branch_sim) { 1471 VG_(fprintf)(fp, "%u %llu %llu %llu" 1472 " %llu %llu %llu" 1473 " %llu %llu %llu\n", 1474 lineCC->loc.line, 1475 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL, 1476 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL, 1477 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL); 1478 } 1479 else if (!clo_cache_sim && clo_branch_sim) { 1480 VG_(fprintf)(fp, "%u %llu" 1481 " %llu %llu %llu %llu\n", 1482 lineCC->loc.line, 1483 lineCC->Ir.a, 1484 lineCC->Bc.b, lineCC->Bc.mp, 1485 lineCC->Bi.b, lineCC->Bi.mp); 1486 } 1487 else { 1488 VG_(fprintf)(fp, "%u %llu\n", 1489 lineCC->loc.line, 1490 lineCC->Ir.a); 1491 } 1492 1493 // Update summary stats 1494 Ir_total.a += lineCC->Ir.a; 1495 Ir_total.m1 += lineCC->Ir.m1; 1496 Ir_total.mL += lineCC->Ir.mL; 1497 Dr_total.a += lineCC->Dr.a; 1498 Dr_total.m1 += lineCC->Dr.m1; 1499 Dr_total.mL += lineCC->Dr.mL; 1500 Dw_total.a += lineCC->Dw.a; 1501 Dw_total.m1 += lineCC->Dw.m1; 1502 Dw_total.mL += lineCC->Dw.mL; 1503 Bc_total.b += lineCC->Bc.b; 1504 Bc_total.mp += lineCC->Bc.mp; 1505 Bi_total.b += lineCC->Bi.b; 1506 Bi_total.mp += lineCC->Bi.mp; 1507 1508 distinct_lines++; 1509 } 1510 1511 // Summary stats must come after rest of table, since we calculate them 1512 // during traversal. */ 1513 if (clo_cache_sim && clo_branch_sim) { 1514 VG_(fprintf)(fp, "summary:" 1515 " %llu %llu %llu" 1516 " %llu %llu %llu" 1517 " %llu %llu %llu" 1518 " %llu %llu %llu %llu\n", 1519 Ir_total.a, Ir_total.m1, Ir_total.mL, 1520 Dr_total.a, Dr_total.m1, Dr_total.mL, 1521 Dw_total.a, Dw_total.m1, Dw_total.mL, 1522 Bc_total.b, Bc_total.mp, 1523 Bi_total.b, Bi_total.mp); 1524 } 1525 else if (clo_cache_sim && !clo_branch_sim) { 1526 VG_(fprintf)(fp, "summary:" 1527 " %llu %llu %llu" 1528 " %llu %llu %llu" 1529 " %llu %llu %llu\n", 1530 Ir_total.a, Ir_total.m1, Ir_total.mL, 1531 Dr_total.a, Dr_total.m1, Dr_total.mL, 1532 Dw_total.a, Dw_total.m1, Dw_total.mL); 1533 } 1534 else if (!clo_cache_sim && clo_branch_sim) { 1535 VG_(fprintf)(fp, "summary:" 1536 " %llu" 1537 " %llu %llu %llu %llu\n", 1538 Ir_total.a, 1539 Bc_total.b, Bc_total.mp, 1540 Bi_total.b, Bi_total.mp); 1541 } 1542 else { 1543 VG_(fprintf)(fp, "summary:" 1544 " %llu\n", 1545 Ir_total.a); 1546 } 1547 1548 VG_(fclose)(fp); 1549} 1550 1551static UInt ULong_width(ULong n) 1552{ 1553 UInt w = 0; 1554 while (n > 0) { 1555 n = n / 10; 1556 w++; 1557 } 1558 if (w == 0) w = 1; 1559 return w + (w-1)/3; // add space for commas 1560} 1561 1562static void cg_fini(Int exitcode) 1563{ 1564 static HChar fmt[128]; // OK; large enough 1565 1566 CacheCC D_total; 1567 BranchCC B_total; 1568 ULong LL_total_m, LL_total_mr, LL_total_mw, 1569 LL_total, LL_total_r, LL_total_w; 1570 Int l1, l2, l3; 1571 1572 fprint_CC_table_and_calc_totals(); 1573 1574 if (VG_(clo_verbosity) == 0) 1575 return; 1576 1577 // Nb: this isn't called "MAX" because that overshadows a global on Darwin. 1578 #define CG_MAX(a, b) ((a) >= (b) ? (a) : (b)) 1579 1580 /* I cache results. Use the I_refs value to determine the first column 1581 * width. */ 1582 l1 = ULong_width(Ir_total.a); 1583 l2 = ULong_width(CG_MAX(Dr_total.a, Bc_total.b)); 1584 l3 = ULong_width(CG_MAX(Dw_total.a, Bi_total.b)); 1585 1586 /* Make format string, getting width right for numbers */ 1587 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1); 1588 1589 /* Always print this */ 1590 VG_(umsg)(fmt, "I refs: ", Ir_total.a); 1591 1592 /* If cache profiling is enabled, show D access numbers and all 1593 miss numbers */ 1594 if (clo_cache_sim) { 1595 VG_(umsg)(fmt, "I1 misses: ", Ir_total.m1); 1596 VG_(umsg)(fmt, "LLi misses: ", Ir_total.mL); 1597 1598 if (0 == Ir_total.a) Ir_total.a = 1; 1599 VG_(umsg)("I1 miss rate: %*.2f%%\n", l1, 1600 Ir_total.m1 * 100.0 / Ir_total.a); 1601 VG_(umsg)("LLi miss rate: %*.2f%%\n", l1, 1602 Ir_total.mL * 100.0 / Ir_total.a); 1603 VG_(umsg)("\n"); 1604 1605 /* D cache results. Use the D_refs.rd and D_refs.wr values to 1606 * determine the width of columns 2 & 3. */ 1607 D_total.a = Dr_total.a + Dw_total.a; 1608 D_total.m1 = Dr_total.m1 + Dw_total.m1; 1609 D_total.mL = Dr_total.mL + Dw_total.mL; 1610 1611 /* Make format string, getting width right for numbers */ 1612 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu rd + %%,%dllu wr)\n", 1613 l1, l2, l3); 1614 1615 VG_(umsg)(fmt, "D refs: ", 1616 D_total.a, Dr_total.a, Dw_total.a); 1617 VG_(umsg)(fmt, "D1 misses: ", 1618 D_total.m1, Dr_total.m1, Dw_total.m1); 1619 VG_(umsg)(fmt, "LLd misses: ", 1620 D_total.mL, Dr_total.mL, Dw_total.mL); 1621 1622 if (0 == D_total.a) D_total.a = 1; 1623 if (0 == Dr_total.a) Dr_total.a = 1; 1624 if (0 == Dw_total.a) Dw_total.a = 1; 1625 VG_(umsg)("D1 miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n", 1626 l1, D_total.m1 * 100.0 / D_total.a, 1627 l2, Dr_total.m1 * 100.0 / Dr_total.a, 1628 l3, Dw_total.m1 * 100.0 / Dw_total.a); 1629 VG_(umsg)("LLd miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n", 1630 l1, D_total.mL * 100.0 / D_total.a, 1631 l2, Dr_total.mL * 100.0 / Dr_total.a, 1632 l3, Dw_total.mL * 100.0 / Dw_total.a); 1633 VG_(umsg)("\n"); 1634 1635 /* LL overall results */ 1636 1637 LL_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1; 1638 LL_total_r = Dr_total.m1 + Ir_total.m1; 1639 LL_total_w = Dw_total.m1; 1640 VG_(umsg)(fmt, "LL refs: ", 1641 LL_total, LL_total_r, LL_total_w); 1642 1643 LL_total_m = Dr_total.mL + Dw_total.mL + Ir_total.mL; 1644 LL_total_mr = Dr_total.mL + Ir_total.mL; 1645 LL_total_mw = Dw_total.mL; 1646 VG_(umsg)(fmt, "LL misses: ", 1647 LL_total_m, LL_total_mr, LL_total_mw); 1648 1649 VG_(umsg)("LL miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n", 1650 l1, LL_total_m * 100.0 / (Ir_total.a + D_total.a), 1651 l2, LL_total_mr * 100.0 / (Ir_total.a + Dr_total.a), 1652 l3, LL_total_mw * 100.0 / Dw_total.a); 1653 } 1654 1655 /* If branch profiling is enabled, show branch overall results. */ 1656 if (clo_branch_sim) { 1657 /* Make format string, getting width right for numbers */ 1658 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n", 1659 l1, l2, l3); 1660 1661 if (0 == Bc_total.b) Bc_total.b = 1; 1662 if (0 == Bi_total.b) Bi_total.b = 1; 1663 B_total.b = Bc_total.b + Bi_total.b; 1664 B_total.mp = Bc_total.mp + Bi_total.mp; 1665 1666 VG_(umsg)("\n"); 1667 VG_(umsg)(fmt, "Branches: ", 1668 B_total.b, Bc_total.b, Bi_total.b); 1669 1670 VG_(umsg)(fmt, "Mispredicts: ", 1671 B_total.mp, Bc_total.mp, Bi_total.mp); 1672 1673 VG_(umsg)("Mispred rate: %*.1f%% (%*.1f%% + %*.1f%% )\n", 1674 l1, B_total.mp * 100.0 / B_total.b, 1675 l2, Bc_total.mp * 100.0 / Bc_total.b, 1676 l3, Bi_total.mp * 100.0 / Bi_total.b); 1677 } 1678 1679 // Various stats 1680 if (VG_(clo_stats)) { 1681 Int debug_lookups = full_debugs + fn_debugs + 1682 file_line_debugs + no_debugs; 1683 1684 VG_(dmsg)("\n"); 1685 VG_(dmsg)("cachegrind: distinct files : %d\n", distinct_files); 1686 VG_(dmsg)("cachegrind: distinct functions : %d\n", distinct_fns); 1687 VG_(dmsg)("cachegrind: distinct lines : %d\n", distinct_lines); 1688 VG_(dmsg)("cachegrind: distinct instrs NoX: %d\n", distinct_instrsNoX); 1689 VG_(dmsg)("cachegrind: distinct instrs Gen: %d\n", distinct_instrsGen); 1690 VG_(dmsg)("cachegrind: debug lookups : %d\n", debug_lookups); 1691 1692 VG_(dmsg)("cachegrind: with full info:%6.1f%% (%d)\n", 1693 full_debugs * 100.0 / debug_lookups, full_debugs); 1694 VG_(dmsg)("cachegrind: with file/line info:%6.1f%% (%d)\n", 1695 file_line_debugs * 100.0 / debug_lookups, file_line_debugs); 1696 VG_(dmsg)("cachegrind: with fn name info:%6.1f%% (%d)\n", 1697 fn_debugs * 100.0 / debug_lookups, fn_debugs); 1698 VG_(dmsg)("cachegrind: with zero info:%6.1f%% (%d)\n", 1699 no_debugs * 100.0 / debug_lookups, no_debugs); 1700 1701 VG_(dmsg)("cachegrind: string table size: %lu\n", 1702 VG_(OSetGen_Size)(stringTable)); 1703 VG_(dmsg)("cachegrind: CC table size: %lu\n", 1704 VG_(OSetGen_Size)(CC_table)); 1705 VG_(dmsg)("cachegrind: InstrInfo table size: %lu\n", 1706 VG_(OSetGen_Size)(instrInfoTable)); 1707 } 1708} 1709 1710/*--------------------------------------------------------------------*/ 1711/*--- Discarding BB info ---*/ 1712/*--------------------------------------------------------------------*/ 1713 1714// Called when a translation is removed from the translation cache for 1715// any reason at all: to free up space, because the guest code was 1716// unmapped or modified, or for any arbitrary reason. 1717static 1718void cg_discard_superblock_info ( Addr orig_addr64, VexGuestExtents vge ) 1719{ 1720 SB_info* sbInfo; 1721 Addr orig_addr = vge.base[0]; 1722 1723 tl_assert(vge.n_used > 0); 1724 1725 if (DEBUG_CG) 1726 VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n", 1727 (void*)orig_addr, 1728 (void*)vge.base[0], (ULong)vge.len[0]); 1729 1730 // Get BB info, remove from table, free BB info. Simple! Note that we 1731 // use orig_addr, not the first instruction address in vge. 1732 sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr); 1733 tl_assert(NULL != sbInfo); 1734 VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo); 1735} 1736 1737/*--------------------------------------------------------------------*/ 1738/*--- Command line processing ---*/ 1739/*--------------------------------------------------------------------*/ 1740 1741static Bool cg_process_cmd_line_option(const HChar* arg) 1742{ 1743 if (VG_(str_clo_cache_opt)(arg, 1744 &clo_I1_cache, 1745 &clo_D1_cache, 1746 &clo_LL_cache)) {} 1747 1748 else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {} 1749 else if VG_BOOL_CLO(arg, "--cache-sim", clo_cache_sim) {} 1750 else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {} 1751 else 1752 return False; 1753 1754 return True; 1755} 1756 1757static void cg_print_usage(void) 1758{ 1759 VG_(print_cache_clo_opts)(); 1760 VG_(printf)( 1761" --cache-sim=yes|no [yes] collect cache stats?\n" 1762" --branch-sim=yes|no [no] collect branch prediction stats?\n" 1763" --cachegrind-out-file=<file> output file name [cachegrind.out.%%p]\n" 1764 ); 1765} 1766 1767static void cg_print_debug_usage(void) 1768{ 1769 VG_(printf)( 1770" (none)\n" 1771 ); 1772} 1773 1774/*--------------------------------------------------------------------*/ 1775/*--- Setup ---*/ 1776/*--------------------------------------------------------------------*/ 1777 1778static void cg_post_clo_init(void); /* just below */ 1779 1780static void cg_pre_clo_init(void) 1781{ 1782 VG_(details_name) ("Cachegrind"); 1783 VG_(details_version) (NULL); 1784 VG_(details_description) ("a cache and branch-prediction profiler"); 1785 VG_(details_copyright_author)( 1786 "Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al."); 1787 VG_(details_bug_reports_to) (VG_BUGS_TO); 1788 VG_(details_avg_translation_sizeB) ( 500 ); 1789 1790 VG_(clo_vex_control).iropt_register_updates_default 1791 = VG_(clo_px_file_backed) 1792 = VexRegUpdSpAtMemAccess; // overridable by the user. 1793 1794 VG_(basic_tool_funcs) (cg_post_clo_init, 1795 cg_instrument, 1796 cg_fini); 1797 1798 VG_(needs_superblock_discards)(cg_discard_superblock_info); 1799 VG_(needs_command_line_options)(cg_process_cmd_line_option, 1800 cg_print_usage, 1801 cg_print_debug_usage); 1802} 1803 1804static void cg_post_clo_init(void) 1805{ 1806 cache_t I1c, D1c, LLc; 1807 1808 CC_table = 1809 VG_(OSetGen_Create)(offsetof(LineCC, loc), 1810 cmp_CodeLoc_LineCC, 1811 VG_(malloc), "cg.main.cpci.1", 1812 VG_(free)); 1813 instrInfoTable = 1814 VG_(OSetGen_Create)(/*keyOff*/0, 1815 NULL, 1816 VG_(malloc), "cg.main.cpci.2", 1817 VG_(free)); 1818 stringTable = 1819 VG_(OSetGen_Create)(/*keyOff*/0, 1820 stringCmp, 1821 VG_(malloc), "cg.main.cpci.3", 1822 VG_(free)); 1823 1824 VG_(post_clo_init_configure_caches)(&I1c, &D1c, &LLc, 1825 &clo_I1_cache, 1826 &clo_D1_cache, 1827 &clo_LL_cache); 1828 1829 // min_line_size is used to make sure that we never feed 1830 // accesses to the simulator straddling more than two 1831 // cache lines at any cache level 1832 min_line_size = (I1c.line_size < D1c.line_size) ? I1c.line_size : D1c.line_size; 1833 min_line_size = (LLc.line_size < min_line_size) ? LLc.line_size : min_line_size; 1834 1835 Int largest_load_or_store_size 1836 = VG_(machine_get_size_of_largest_guest_register)(); 1837 if (min_line_size < largest_load_or_store_size) { 1838 /* We can't continue, because the cache simulation might 1839 straddle more than 2 lines, and it will assert. So let's 1840 just stop before we start. */ 1841 VG_(umsg)("Cachegrind: cannot continue: the minimum line size (%d)\n", 1842 (Int)min_line_size); 1843 VG_(umsg)(" must be equal to or larger than the maximum register size (%d)\n", 1844 largest_load_or_store_size ); 1845 VG_(umsg)(" but it is not. Exiting now.\n"); 1846 VG_(exit)(1); 1847 } 1848 1849 cachesim_initcaches(I1c, D1c, LLc); 1850} 1851 1852VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init) 1853 1854/*--------------------------------------------------------------------*/ 1855/*--- end ---*/ 1856/*--------------------------------------------------------------------*/ 1857 1858