1/*--------------------------------------------------------------------*/ 2/*--- Cache simulation. ---*/ 3/*--- sim.c ---*/ 4/*--------------------------------------------------------------------*/ 5 6/* 7 This file is part of Callgrind, a Valgrind tool for call graph 8 profiling programs. 9 10 Copyright (C) 2003-2013, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) 11 12 This tool is derived from and contains code from Cachegrind 13 Copyright (C) 2002-2013 Nicholas Nethercote (njn@valgrind.org) 14 15 This program is free software; you can redistribute it and/or 16 modify it under the terms of the GNU General Public License as 17 published by the Free Software Foundation; either version 2 of the 18 License, or (at your option) any later version. 19 20 This program is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 General Public License for more details. 24 25 You should have received a copy of the GNU General Public License 26 along with this program; if not, write to the Free Software 27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 28 02111-1307, USA. 29 30 The GNU General Public License is contained in the file COPYING. 31*/ 32 33#include "global.h" 34 35 36/* Notes: 37 - simulates a write-allocate cache 38 - (block --> set) hash function uses simple bit selection 39 - handling of references straddling two cache blocks: 40 - counts as only one cache access (not two) 41 - both blocks hit --> one hit 42 - one block hits, the other misses --> one miss 43 - both blocks miss --> one miss (not two) 44*/ 45 46/* Cache configuration */ 47#include "cg_arch.c" 48 49/* additional structures for cache use info, separated 50 * according usage frequency: 51 * - line_loaded : pointer to cost center of instruction 52 * which loaded the line into cache. 53 * Needed to increment counters when line is evicted. 54 * - line_use : updated on every access 55 */ 56typedef struct { 57 UInt count; 58 UInt mask; /* e.g. for 64Byte line size 1bit/2Byte */ 59} line_use; 60 61typedef struct { 62 Addr memline, iaddr; 63 line_use* dep_use; /* point to higher-level cacheblock for this memline */ 64 ULong* use_base; 65} line_loaded; 66 67/* Cache state */ 68typedef struct { 69 const HChar* name; 70 int size; /* bytes */ 71 int assoc; 72 int line_size; /* bytes */ 73 Bool sectored; /* prefetch nearside cacheline on read */ 74 int sets; 75 int sets_min_1; 76 int line_size_bits; 77 int tag_shift; 78 UWord tag_mask; 79 HChar desc_line[128]; 80 UWord* tags; 81 82 /* for cache use */ 83 int line_size_mask; 84 int* line_start_mask; 85 int* line_end_mask; 86 line_loaded* loaded; 87 line_use* use; 88} cache_t2; 89 90/* 91 * States of flat caches in our model. 92 * We use a 2-level hierarchy, 93 */ 94static cache_t2 I1, D1, LL; 95 96/* Lower bits of cache tags are used as flags for a cache line */ 97#define CACHELINE_FLAGMASK (MIN_LINE_SIZE-1) 98#define CACHELINE_DIRTY 1 99 100 101/* Cache simulator Options */ 102static Bool clo_simulate_writeback = False; 103static Bool clo_simulate_hwpref = False; 104static Bool clo_simulate_sectors = False; 105static Bool clo_collect_cacheuse = False; 106 107/* Following global vars are setup before by setup_bbcc(): 108 * 109 * - Addr CLG_(bb_base) (instruction start address of original BB) 110 * - ULong* CLG_(cost_base) (start of cost array for BB) 111 */ 112 113Addr CLG_(bb_base); 114ULong* CLG_(cost_base); 115 116static InstrInfo* current_ii; 117 118/* Cache use offsets */ 119/* The offsets are only correct because all per-instruction event sets get 120 * the "Use" set added first ! 121 */ 122static Int off_I1_AcCost = 0; 123static Int off_I1_SpLoss = 1; 124static Int off_D1_AcCost = 0; 125static Int off_D1_SpLoss = 1; 126static Int off_LL_AcCost = 2; 127static Int off_LL_SpLoss = 3; 128 129/* Cache access types */ 130typedef enum { Read = 0, Write = CACHELINE_DIRTY } RefType; 131 132/* Result of a reference into a flat cache */ 133typedef enum { Hit = 0, Miss, MissDirty } CacheResult; 134 135/* Result of a reference into a hierarchical cache model */ 136typedef enum { 137 L1_Hit, 138 LL_Hit, 139 MemAccess, 140 WriteBackMemAccess } CacheModelResult; 141 142typedef CacheModelResult (*simcall_type)(Addr, UChar); 143 144static struct { 145 simcall_type I1_Read; 146 simcall_type D1_Read; 147 simcall_type D1_Write; 148} simulator; 149 150/*------------------------------------------------------------*/ 151/*--- Cache Simulator Initialization ---*/ 152/*------------------------------------------------------------*/ 153 154static void cachesim_clearcache(cache_t2* c) 155{ 156 Int i; 157 158 for (i = 0; i < c->sets * c->assoc; i++) 159 c->tags[i] = 0; 160 if (c->use) { 161 for (i = 0; i < c->sets * c->assoc; i++) { 162 c->loaded[i].memline = 0; 163 c->loaded[i].use_base = 0; 164 c->loaded[i].dep_use = 0; 165 c->loaded[i].iaddr = 0; 166 c->use[i].mask = 0; 167 c->use[i].count = 0; 168 c->tags[i] = i % c->assoc; /* init lower bits as pointer */ 169 } 170 } 171} 172 173static void cacheuse_initcache(cache_t2* c); 174 175/* By this point, the size/assoc/line_size has been checked. */ 176static void cachesim_initcache(cache_t config, cache_t2* c) 177{ 178 c->size = config.size; 179 c->assoc = config.assoc; 180 c->line_size = config.line_size; 181 c->sectored = False; // FIXME 182 183 c->sets = (c->size / c->line_size) / c->assoc; 184 c->sets_min_1 = c->sets - 1; 185 c->line_size_bits = VG_(log2)(c->line_size); 186 c->tag_shift = c->line_size_bits + VG_(log2)(c->sets); 187 c->tag_mask = ~((1<<c->tag_shift)-1); 188 189 /* Can bits in tag entries be used for flags? 190 * Should be always true as MIN_LINE_SIZE >= 16 */ 191 CLG_ASSERT( (c->tag_mask & CACHELINE_FLAGMASK) == 0); 192 193 if (c->assoc == 1) { 194 VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped%s", 195 c->size, c->line_size, 196 c->sectored ? ", sectored":""); 197 } else { 198 VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative%s", 199 c->size, c->line_size, c->assoc, 200 c->sectored ? ", sectored":""); 201 } 202 203 c->tags = (UWord*) CLG_MALLOC("cl.sim.cs_ic.1", 204 sizeof(UWord) * c->sets * c->assoc); 205 if (clo_collect_cacheuse) 206 cacheuse_initcache(c); 207 else 208 c->use = 0; 209 cachesim_clearcache(c); 210} 211 212 213#if 0 214static void print_cache(cache_t2* c) 215{ 216 UInt set, way, i; 217 218 /* Note initialisation and update of 'i'. */ 219 for (i = 0, set = 0; set < c->sets; set++) { 220 for (way = 0; way < c->assoc; way++, i++) { 221 VG_(printf)("%8x ", c->tags[i]); 222 } 223 VG_(printf)("\n"); 224 } 225} 226#endif 227 228 229/*------------------------------------------------------------*/ 230/*--- Simple Cache Simulation ---*/ 231/*------------------------------------------------------------*/ 232 233/* 234 * Model: single inclusive, 2-level cache hierarchy (L1/LL) 235 * with write-allocate 236 * 237 * For simple cache hit/miss counts, we do not have to 238 * maintain the dirty state of lines (no need to distinguish 239 * read/write references), and the resulting counts are the 240 * same for write-through and write-back caches. 241 * 242 * Simulator functions: 243 * CacheModelResult cachesim_I1_ref(Addr a, UChar size) 244 * CacheModelResult cachesim_D1_ref(Addr a, UChar size) 245 */ 246__attribute__((always_inline)) 247static __inline__ 248CacheResult cachesim_setref(cache_t2* c, UInt set_no, UWord tag) 249{ 250 int i, j; 251 UWord *set; 252 253 set = &(c->tags[set_no * c->assoc]); 254 255 /* This loop is unrolled for just the first case, which is the most */ 256 /* common. We can't unroll any further because it would screw up */ 257 /* if we have a direct-mapped (1-way) cache. */ 258 if (tag == set[0]) 259 return Hit; 260 261 /* If the tag is one other than the MRU, move it into the MRU spot */ 262 /* and shuffle the rest down. */ 263 for (i = 1; i < c->assoc; i++) { 264 if (tag == set[i]) { 265 for (j = i; j > 0; j--) { 266 set[j] = set[j - 1]; 267 } 268 set[0] = tag; 269 return Hit; 270 } 271 } 272 273 /* A miss; install this tag as MRU, shuffle rest down. */ 274 for (j = c->assoc - 1; j > 0; j--) { 275 set[j] = set[j - 1]; 276 } 277 set[0] = tag; 278 279 return Miss; 280} 281 282__attribute__((always_inline)) 283static __inline__ 284CacheResult cachesim_ref(cache_t2* c, Addr a, UChar size) 285{ 286 UWord block1 = a >> c->line_size_bits; 287 UWord block2 = (a+size-1) >> c->line_size_bits; 288 UInt set1 = block1 & c->sets_min_1; 289 /* the tag does not need to include bits specifying the set, 290 * but it can, and this saves instructions */ 291 UWord tag1 = block1; 292 293 /* Access entirely within line. */ 294 if (block1 == block2) 295 return cachesim_setref(c, set1, tag1); 296 297 /* Access straddles two lines. */ 298 else if (block1 + 1 == block2) { 299 UInt set2 = block2 & c->sets_min_1; 300 UWord tag2 = block2; 301 302 /* the call updates cache structures as side effect */ 303 CacheResult res1 = cachesim_setref(c, set1, tag1); 304 CacheResult res2 = cachesim_setref(c, set2, tag2); 305 return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit; 306 307 } else { 308 VG_(printf)("addr: %lx size: %u blocks: %ld %ld", 309 a, size, block1, block2); 310 VG_(tool_panic)("item straddles more than two cache sets"); 311 } 312 return Hit; 313} 314 315static 316CacheModelResult cachesim_I1_ref(Addr a, UChar size) 317{ 318 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit; 319 if ( cachesim_ref( &LL, a, size) == Hit ) return LL_Hit; 320 return MemAccess; 321} 322 323static 324CacheModelResult cachesim_D1_ref(Addr a, UChar size) 325{ 326 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit; 327 if ( cachesim_ref( &LL, a, size) == Hit ) return LL_Hit; 328 return MemAccess; 329} 330 331 332/*------------------------------------------------------------*/ 333/*--- Write Back Cache Simulation ---*/ 334/*------------------------------------------------------------*/ 335 336/* 337 * More complex model: L1 Write-through, LL Write-back 338 * This needs to distinguish among read and write references. 339 * 340 * Simulator functions: 341 * CacheModelResult cachesim_I1_Read(Addr a, UChar size) 342 * CacheModelResult cachesim_D1_Read(Addr a, UChar size) 343 * CacheModelResult cachesim_D1_Write(Addr a, UChar size) 344 */ 345 346/* 347 * With write-back, result can be a miss evicting a dirty line 348 * The dirty state of a cache line is stored in Bit0 of the tag for 349 * this cache line (CACHELINE_DIRTY = 1). By OR'ing the reference 350 * type (Read/Write), the line gets dirty on a write. 351 */ 352__attribute__((always_inline)) 353static __inline__ 354CacheResult cachesim_setref_wb(cache_t2* c, RefType ref, UInt set_no, UWord tag) 355{ 356 int i, j; 357 UWord *set, tmp_tag; 358 359 set = &(c->tags[set_no * c->assoc]); 360 361 /* This loop is unrolled for just the first case, which is the most */ 362 /* common. We can't unroll any further because it would screw up */ 363 /* if we have a direct-mapped (1-way) cache. */ 364 if (tag == (set[0] & ~CACHELINE_DIRTY)) { 365 set[0] |= ref; 366 return Hit; 367 } 368 /* If the tag is one other than the MRU, move it into the MRU spot */ 369 /* and shuffle the rest down. */ 370 for (i = 1; i < c->assoc; i++) { 371 if (tag == (set[i] & ~CACHELINE_DIRTY)) { 372 tmp_tag = set[i] | ref; // update dirty flag 373 for (j = i; j > 0; j--) { 374 set[j] = set[j - 1]; 375 } 376 set[0] = tmp_tag; 377 return Hit; 378 } 379 } 380 381 /* A miss; install this tag as MRU, shuffle rest down. */ 382 tmp_tag = set[c->assoc - 1]; 383 for (j = c->assoc - 1; j > 0; j--) { 384 set[j] = set[j - 1]; 385 } 386 set[0] = tag | ref; 387 388 return (tmp_tag & CACHELINE_DIRTY) ? MissDirty : Miss; 389} 390 391__attribute__((always_inline)) 392static __inline__ 393CacheResult cachesim_ref_wb(cache_t2* c, RefType ref, Addr a, UChar size) 394{ 395 UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1); 396 UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1); 397 UWord tag = a & c->tag_mask; 398 399 /* Access entirely within line. */ 400 if (set1 == set2) 401 return cachesim_setref_wb(c, ref, set1, tag); 402 403 /* Access straddles two lines. */ 404 /* Nb: this is a fast way of doing ((set1+1) % c->sets) */ 405 else if (((set1 + 1) & (c->sets_min_1)) == set2) { 406 UWord tag2 = (a+size-1) & c->tag_mask; 407 408 /* the call updates cache structures as side effect */ 409 CacheResult res1 = cachesim_setref_wb(c, ref, set1, tag); 410 CacheResult res2 = cachesim_setref_wb(c, ref, set2, tag2); 411 412 if ((res1 == MissDirty) || (res2 == MissDirty)) return MissDirty; 413 return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit; 414 415 } else { 416 VG_(printf)("addr: %lx size: %u sets: %d %d", a, size, set1, set2); 417 VG_(tool_panic)("item straddles more than two cache sets"); 418 } 419 return Hit; 420} 421 422 423static 424CacheModelResult cachesim_I1_Read(Addr a, UChar size) 425{ 426 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit; 427 switch( cachesim_ref_wb( &LL, Read, a, size) ) { 428 case Hit: return LL_Hit; 429 case Miss: return MemAccess; 430 default: break; 431 } 432 return WriteBackMemAccess; 433} 434 435static 436CacheModelResult cachesim_D1_Read(Addr a, UChar size) 437{ 438 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit; 439 switch( cachesim_ref_wb( &LL, Read, a, size) ) { 440 case Hit: return LL_Hit; 441 case Miss: return MemAccess; 442 default: break; 443 } 444 return WriteBackMemAccess; 445} 446 447static 448CacheModelResult cachesim_D1_Write(Addr a, UChar size) 449{ 450 if ( cachesim_ref( &D1, a, size) == Hit ) { 451 /* Even for a L1 hit, the write-trough L1 passes 452 * the write to the LL to make the LL line dirty. 453 * But this causes no latency, so return the hit. 454 */ 455 cachesim_ref_wb( &LL, Write, a, size); 456 return L1_Hit; 457 } 458 switch( cachesim_ref_wb( &LL, Write, a, size) ) { 459 case Hit: return LL_Hit; 460 case Miss: return MemAccess; 461 default: break; 462 } 463 return WriteBackMemAccess; 464} 465 466 467/*------------------------------------------------------------*/ 468/*--- Hardware Prefetch Simulation ---*/ 469/*------------------------------------------------------------*/ 470 471static ULong prefetch_up = 0; 472static ULong prefetch_down = 0; 473 474#define PF_STREAMS 8 475#define PF_PAGEBITS 12 476 477static UInt pf_lastblock[PF_STREAMS]; 478static Int pf_seqblocks[PF_STREAMS]; 479 480static 481void prefetch_clear(void) 482{ 483 int i; 484 for(i=0;i<PF_STREAMS;i++) 485 pf_lastblock[i] = pf_seqblocks[i] = 0; 486} 487 488/* 489 * HW Prefetch emulation 490 * Start prefetching when detecting sequential access to 3 memory blocks. 491 * One stream can be detected per 4k page. 492 */ 493static __inline__ 494void prefetch_LL_doref(Addr a) 495{ 496 UInt stream = (a >> PF_PAGEBITS) % PF_STREAMS; 497 UInt block = ( a >> LL.line_size_bits); 498 499 if (block != pf_lastblock[stream]) { 500 if (pf_seqblocks[stream] == 0) { 501 if (pf_lastblock[stream] +1 == block) pf_seqblocks[stream]++; 502 else if (pf_lastblock[stream] -1 == block) pf_seqblocks[stream]--; 503 } 504 else if (pf_seqblocks[stream] >0) { 505 if (pf_lastblock[stream] +1 == block) { 506 pf_seqblocks[stream]++; 507 if (pf_seqblocks[stream] >= 2) { 508 prefetch_up++; 509 cachesim_ref(&LL, a + 5 * LL.line_size,1); 510 } 511 } 512 else pf_seqblocks[stream] = 0; 513 } 514 else if (pf_seqblocks[stream] <0) { 515 if (pf_lastblock[stream] -1 == block) { 516 pf_seqblocks[stream]--; 517 if (pf_seqblocks[stream] <= -2) { 518 prefetch_down++; 519 cachesim_ref(&LL, a - 5 * LL.line_size,1); 520 } 521 } 522 else pf_seqblocks[stream] = 0; 523 } 524 pf_lastblock[stream] = block; 525 } 526} 527 528/* simple model with hardware prefetch */ 529 530static 531CacheModelResult prefetch_I1_ref(Addr a, UChar size) 532{ 533 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit; 534 prefetch_LL_doref(a); 535 if ( cachesim_ref( &LL, a, size) == Hit ) return LL_Hit; 536 return MemAccess; 537} 538 539static 540CacheModelResult prefetch_D1_ref(Addr a, UChar size) 541{ 542 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit; 543 prefetch_LL_doref(a); 544 if ( cachesim_ref( &LL, a, size) == Hit ) return LL_Hit; 545 return MemAccess; 546} 547 548 549/* complex model with hardware prefetch */ 550 551static 552CacheModelResult prefetch_I1_Read(Addr a, UChar size) 553{ 554 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit; 555 prefetch_LL_doref(a); 556 switch( cachesim_ref_wb( &LL, Read, a, size) ) { 557 case Hit: return LL_Hit; 558 case Miss: return MemAccess; 559 default: break; 560 } 561 return WriteBackMemAccess; 562} 563 564static 565CacheModelResult prefetch_D1_Read(Addr a, UChar size) 566{ 567 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit; 568 prefetch_LL_doref(a); 569 switch( cachesim_ref_wb( &LL, Read, a, size) ) { 570 case Hit: return LL_Hit; 571 case Miss: return MemAccess; 572 default: break; 573 } 574 return WriteBackMemAccess; 575} 576 577static 578CacheModelResult prefetch_D1_Write(Addr a, UChar size) 579{ 580 prefetch_LL_doref(a); 581 if ( cachesim_ref( &D1, a, size) == Hit ) { 582 /* Even for a L1 hit, the write-trough L1 passes 583 * the write to the LL to make the LL line dirty. 584 * But this causes no latency, so return the hit. 585 */ 586 cachesim_ref_wb( &LL, Write, a, size); 587 return L1_Hit; 588 } 589 switch( cachesim_ref_wb( &LL, Write, a, size) ) { 590 case Hit: return LL_Hit; 591 case Miss: return MemAccess; 592 default: break; 593 } 594 return WriteBackMemAccess; 595} 596 597 598/*------------------------------------------------------------*/ 599/*--- Cache Simulation with use metric collection ---*/ 600/*------------------------------------------------------------*/ 601 602/* can not be combined with write-back or prefetch */ 603 604static 605void cacheuse_initcache(cache_t2* c) 606{ 607 int i; 608 unsigned int start_mask, start_val; 609 unsigned int end_mask, end_val; 610 611 c->use = CLG_MALLOC("cl.sim.cu_ic.1", 612 sizeof(line_use) * c->sets * c->assoc); 613 c->loaded = CLG_MALLOC("cl.sim.cu_ic.2", 614 sizeof(line_loaded) * c->sets * c->assoc); 615 c->line_start_mask = CLG_MALLOC("cl.sim.cu_ic.3", 616 sizeof(int) * c->line_size); 617 c->line_end_mask = CLG_MALLOC("cl.sim.cu_ic.4", 618 sizeof(int) * c->line_size); 619 620 c->line_size_mask = c->line_size-1; 621 622 /* Meaning of line_start_mask/line_end_mask 623 * Example: for a given cache line, you get an access starting at 624 * byte offset 5, length 4, byte 5 - 8 was touched. For a cache 625 * line size of 32, you have 1 bit per byte in the mask: 626 * 627 * bit31 bit8 bit5 bit 0 628 * | | | | 629 * 11..111111100000 line_start_mask[5] 630 * 00..000111111111 line_end_mask[(5+4)-1] 631 * 632 * use_mask |= line_start_mask[5] && line_end_mask[8] 633 * 634 */ 635 start_val = end_val = ~0; 636 if (c->line_size < 32) { 637 int bits_per_byte = 32/c->line_size; 638 start_mask = (1<<bits_per_byte)-1; 639 end_mask = start_mask << (32-bits_per_byte); 640 for(i=0;i<c->line_size;i++) { 641 c->line_start_mask[i] = start_val; 642 start_val = start_val & ~start_mask; 643 start_mask = start_mask << bits_per_byte; 644 645 c->line_end_mask[c->line_size-i-1] = end_val; 646 end_val = end_val & ~end_mask; 647 end_mask = end_mask >> bits_per_byte; 648 } 649 } 650 else { 651 int bytes_per_bit = c->line_size/32; 652 start_mask = 1; 653 end_mask = 1 << 31; 654 for(i=0;i<c->line_size;i++) { 655 c->line_start_mask[i] = start_val; 656 c->line_end_mask[c->line_size-i-1] = end_val; 657 if ( ((i+1)%bytes_per_bit) == 0) { 658 start_val &= ~start_mask; 659 end_val &= ~end_mask; 660 start_mask <<= 1; 661 end_mask >>= 1; 662 } 663 } 664 } 665 666 CLG_DEBUG(6, "Config %s:\n", c->desc_line); 667 for(i=0;i<c->line_size;i++) { 668 CLG_DEBUG(6, " [%2d]: start mask %8x, end mask %8x\n", 669 i, c->line_start_mask[i], c->line_end_mask[i]); 670 } 671 672 /* We use lower tag bits as offset pointers to cache use info. 673 * I.e. some cache parameters don't work. 674 */ 675 if ( (1<<c->tag_shift) < c->assoc) { 676 VG_(message)(Vg_DebugMsg, 677 "error: Use associativity < %d for cache use statistics!\n", 678 (1<<c->tag_shift) ); 679 VG_(tool_panic)("Unsupported cache configuration"); 680 } 681} 682 683 684/* for I1/D1 caches */ 685#define CACHEUSE(L) \ 686 \ 687static CacheModelResult cacheuse##_##L##_doRead(Addr a, UChar size) \ 688{ \ 689 UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \ 690 UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \ 691 UWord tag = a & L.tag_mask; \ 692 UWord tag2; \ 693 int i, j, idx; \ 694 UWord *set, tmp_tag; \ 695 UInt use_mask; \ 696 \ 697 CLG_DEBUG(6,"%s.Acc(Addr %#lx, size %d): Sets [%d/%d]\n", \ 698 L.name, a, size, set1, set2); \ 699 \ 700 /* First case: word entirely within line. */ \ 701 if (set1 == set2) { \ 702 \ 703 set = &(L.tags[set1 * L.assoc]); \ 704 use_mask = L.line_start_mask[a & L.line_size_mask] & \ 705 L.line_end_mask[(a+size-1) & L.line_size_mask]; \ 706 \ 707 /* This loop is unrolled for just the first case, which is the most */\ 708 /* common. We can't unroll any further because it would screw up */\ 709 /* if we have a direct-mapped (1-way) cache. */\ 710 if (tag == (set[0] & L.tag_mask)) { \ 711 idx = (set1 * L.assoc) + (set[0] & ~L.tag_mask); \ 712 L.use[idx].count ++; \ 713 L.use[idx].mask |= use_mask; \ 714 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\ 715 idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ 716 use_mask, L.use[idx].mask, L.use[idx].count); \ 717 return L1_Hit; \ 718 } \ 719 /* If the tag is one other than the MRU, move it into the MRU spot */\ 720 /* and shuffle the rest down. */\ 721 for (i = 1; i < L.assoc; i++) { \ 722 if (tag == (set[i] & L.tag_mask)) { \ 723 tmp_tag = set[i]; \ 724 for (j = i; j > 0; j--) { \ 725 set[j] = set[j - 1]; \ 726 } \ 727 set[0] = tmp_tag; \ 728 idx = (set1 * L.assoc) + (tmp_tag & ~L.tag_mask); \ 729 L.use[idx].count ++; \ 730 L.use[idx].mask |= use_mask; \ 731 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\ 732 i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ 733 use_mask, L.use[idx].mask, L.use[idx].count); \ 734 return L1_Hit; \ 735 } \ 736 } \ 737 \ 738 /* A miss; install this tag as MRU, shuffle rest down. */ \ 739 tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \ 740 for (j = L.assoc - 1; j > 0; j--) { \ 741 set[j] = set[j - 1]; \ 742 } \ 743 set[0] = tag | tmp_tag; \ 744 idx = (set1 * L.assoc) + tmp_tag; \ 745 return update_##L##_use(&L, idx, \ 746 use_mask, a &~ L.line_size_mask); \ 747 \ 748 /* Second case: word straddles two lines. */ \ 749 /* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \ 750 } else if (((set1 + 1) & (L.sets_min_1)) == set2) { \ 751 Int miss1=0, miss2=0; /* 0: L1 hit, 1:L1 miss, 2:LL miss */ \ 752 set = &(L.tags[set1 * L.assoc]); \ 753 use_mask = L.line_start_mask[a & L.line_size_mask]; \ 754 if (tag == (set[0] & L.tag_mask)) { \ 755 idx = (set1 * L.assoc) + (set[0] & ~L.tag_mask); \ 756 L.use[idx].count ++; \ 757 L.use[idx].mask |= use_mask; \ 758 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\ 759 idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ 760 use_mask, L.use[idx].mask, L.use[idx].count); \ 761 goto block2; \ 762 } \ 763 for (i = 1; i < L.assoc; i++) { \ 764 if (tag == (set[i] & L.tag_mask)) { \ 765 tmp_tag = set[i]; \ 766 for (j = i; j > 0; j--) { \ 767 set[j] = set[j - 1]; \ 768 } \ 769 set[0] = tmp_tag; \ 770 idx = (set1 * L.assoc) + (tmp_tag & ~L.tag_mask); \ 771 L.use[idx].count ++; \ 772 L.use[idx].mask |= use_mask; \ 773 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\ 774 i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ 775 use_mask, L.use[idx].mask, L.use[idx].count); \ 776 goto block2; \ 777 } \ 778 } \ 779 tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \ 780 for (j = L.assoc - 1; j > 0; j--) { \ 781 set[j] = set[j - 1]; \ 782 } \ 783 set[0] = tag | tmp_tag; \ 784 idx = (set1 * L.assoc) + tmp_tag; \ 785 miss1 = update_##L##_use(&L, idx, \ 786 use_mask, a &~ L.line_size_mask); \ 787block2: \ 788 set = &(L.tags[set2 * L.assoc]); \ 789 use_mask = L.line_end_mask[(a+size-1) & L.line_size_mask]; \ 790 tag2 = (a+size-1) & L.tag_mask; \ 791 if (tag2 == (set[0] & L.tag_mask)) { \ 792 idx = (set2 * L.assoc) + (set[0] & ~L.tag_mask); \ 793 L.use[idx].count ++; \ 794 L.use[idx].mask |= use_mask; \ 795 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\ 796 idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ 797 use_mask, L.use[idx].mask, L.use[idx].count); \ 798 return miss1; \ 799 } \ 800 for (i = 1; i < L.assoc; i++) { \ 801 if (tag2 == (set[i] & L.tag_mask)) { \ 802 tmp_tag = set[i]; \ 803 for (j = i; j > 0; j--) { \ 804 set[j] = set[j - 1]; \ 805 } \ 806 set[0] = tmp_tag; \ 807 idx = (set2 * L.assoc) + (tmp_tag & ~L.tag_mask); \ 808 L.use[idx].count ++; \ 809 L.use[idx].mask |= use_mask; \ 810 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\ 811 i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ 812 use_mask, L.use[idx].mask, L.use[idx].count); \ 813 return miss1; \ 814 } \ 815 } \ 816 tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \ 817 for (j = L.assoc - 1; j > 0; j--) { \ 818 set[j] = set[j - 1]; \ 819 } \ 820 set[0] = tag2 | tmp_tag; \ 821 idx = (set2 * L.assoc) + tmp_tag; \ 822 miss2 = update_##L##_use(&L, idx, \ 823 use_mask, (a+size-1) &~ L.line_size_mask); \ 824 return (miss1==MemAccess || miss2==MemAccess) ? MemAccess:LL_Hit; \ 825 \ 826 } else { \ 827 VG_(printf)("addr: %#lx size: %u sets: %d %d", a, size, set1, set2); \ 828 VG_(tool_panic)("item straddles more than two cache sets"); \ 829 } \ 830 return 0; \ 831} 832 833 834/* logarithmic bitcounting algorithm, see 835 * http://graphics.stanford.edu/~seander/bithacks.html 836 */ 837static __inline__ unsigned int countBits(unsigned int bits) 838{ 839 unsigned int c; // store the total here 840 const int S[] = {1, 2, 4, 8, 16}; // Magic Binary Numbers 841 const int B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF, 0x0000FFFF}; 842 843 c = bits; 844 c = ((c >> S[0]) & B[0]) + (c & B[0]); 845 c = ((c >> S[1]) & B[1]) + (c & B[1]); 846 c = ((c >> S[2]) & B[2]) + (c & B[2]); 847 c = ((c >> S[3]) & B[3]) + (c & B[3]); 848 c = ((c >> S[4]) & B[4]) + (c & B[4]); 849 return c; 850} 851 852static void update_LL_use(int idx, Addr memline) 853{ 854 line_loaded* loaded = &(LL.loaded[idx]); 855 line_use* use = &(LL.use[idx]); 856 int i = ((32 - countBits(use->mask)) * LL.line_size)>>5; 857 858 CLG_DEBUG(2, " LL.miss [%d]: at %#lx accessing memline %#lx\n", 859 idx, CLG_(bb_base) + current_ii->instr_offset, memline); 860 if (use->count>0) { 861 CLG_DEBUG(2, " old: used %d, loss bits %d (%08x) [line %#lx from %#lx]\n", 862 use->count, i, use->mask, loaded->memline, loaded->iaddr); 863 CLG_DEBUG(2, " collect: %d, use_base %p\n", 864 CLG_(current_state).collect, loaded->use_base); 865 866 if (CLG_(current_state).collect && loaded->use_base) { 867 (loaded->use_base)[off_LL_AcCost] += 1000 / use->count; 868 (loaded->use_base)[off_LL_SpLoss] += i; 869 } 870 } 871 872 use->count = 0; 873 use->mask = 0; 874 875 loaded->memline = memline; 876 loaded->iaddr = CLG_(bb_base) + current_ii->instr_offset; 877 loaded->use_base = (CLG_(current_state).nonskipped) ? 878 CLG_(current_state).nonskipped->skipped : 879 CLG_(cost_base) + current_ii->cost_offset; 880} 881 882static 883CacheModelResult cacheuse_LL_access(Addr memline, line_loaded* l1_loaded) 884{ 885 UInt setNo = (memline >> LL.line_size_bits) & (LL.sets_min_1); 886 UWord* set = &(LL.tags[setNo * LL.assoc]); 887 UWord tag = memline & LL.tag_mask; 888 889 int i, j, idx; 890 UWord tmp_tag; 891 892 CLG_DEBUG(6,"LL.Acc(Memline %#lx): Set %d\n", memline, setNo); 893 894 if (tag == (set[0] & LL.tag_mask)) { 895 idx = (setNo * LL.assoc) + (set[0] & ~LL.tag_mask); 896 l1_loaded->dep_use = &(LL.use[idx]); 897 898 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): => %08x, count %d\n", 899 idx, LL.loaded[idx].memline, LL.loaded[idx].iaddr, 900 LL.use[idx].mask, LL.use[idx].count); 901 return LL_Hit; 902 } 903 for (i = 1; i < LL.assoc; i++) { 904 if (tag == (set[i] & LL.tag_mask)) { 905 tmp_tag = set[i]; 906 for (j = i; j > 0; j--) { 907 set[j] = set[j - 1]; 908 } 909 set[0] = tmp_tag; 910 idx = (setNo * LL.assoc) + (tmp_tag & ~LL.tag_mask); 911 l1_loaded->dep_use = &(LL.use[idx]); 912 913 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): => %08x, count %d\n", 914 i, idx, LL.loaded[idx].memline, LL.loaded[idx].iaddr, 915 LL.use[idx].mask, LL.use[idx].count); 916 return LL_Hit; 917 } 918 } 919 920 /* A miss; install this tag as MRU, shuffle rest down. */ 921 tmp_tag = set[LL.assoc - 1] & ~LL.tag_mask; 922 for (j = LL.assoc - 1; j > 0; j--) { 923 set[j] = set[j - 1]; 924 } 925 set[0] = tag | tmp_tag; 926 idx = (setNo * LL.assoc) + tmp_tag; 927 l1_loaded->dep_use = &(LL.use[idx]); 928 929 update_LL_use(idx, memline); 930 931 return MemAccess; 932} 933 934 935 936 937#define UPDATE_USE(L) \ 938 \ 939static CacheModelResult update##_##L##_use(cache_t2* cache, int idx, \ 940 UInt mask, Addr memline) \ 941{ \ 942 line_loaded* loaded = &(cache->loaded[idx]); \ 943 line_use* use = &(cache->use[idx]); \ 944 int c = ((32 - countBits(use->mask)) * cache->line_size)>>5; \ 945 \ 946 CLG_DEBUG(2, " %s.miss [%d]: at %#lx accessing memline %#lx (mask %08x)\n", \ 947 cache->name, idx, CLG_(bb_base) + current_ii->instr_offset, memline, mask); \ 948 if (use->count>0) { \ 949 CLG_DEBUG(2, " old: used %d, loss bits %d (%08x) [line %#lx from %#lx]\n",\ 950 use->count, c, use->mask, loaded->memline, loaded->iaddr); \ 951 CLG_DEBUG(2, " collect: %d, use_base %p\n", \ 952 CLG_(current_state).collect, loaded->use_base); \ 953 \ 954 if (CLG_(current_state).collect && loaded->use_base) { \ 955 (loaded->use_base)[off_##L##_AcCost] += 1000 / use->count; \ 956 (loaded->use_base)[off_##L##_SpLoss] += c; \ 957 \ 958 /* FIXME (?): L1/LL line sizes must be equal ! */ \ 959 loaded->dep_use->mask |= use->mask; \ 960 loaded->dep_use->count += use->count; \ 961 } \ 962 } \ 963 \ 964 use->count = 1; \ 965 use->mask = mask; \ 966 loaded->memline = memline; \ 967 loaded->iaddr = CLG_(bb_base) + current_ii->instr_offset; \ 968 loaded->use_base = (CLG_(current_state).nonskipped) ? \ 969 CLG_(current_state).nonskipped->skipped : \ 970 CLG_(cost_base) + current_ii->cost_offset; \ 971 \ 972 if (memline == 0) return LL_Hit; \ 973 return cacheuse_LL_access(memline, loaded); \ 974} 975 976UPDATE_USE(I1); 977UPDATE_USE(D1); 978 979CACHEUSE(I1); 980CACHEUSE(D1); 981 982 983static 984void cacheuse_finish(void) 985{ 986 int i; 987 InstrInfo ii = { 0,0,0,0 }; 988 989 if (!CLG_(current_state).collect) return; 990 991 CLG_(bb_base) = 0; 992 current_ii = ⅈ /* needs to be set for update_XX_use */ 993 CLG_(cost_base) = 0; 994 995 /* update usage counters */ 996 if (I1.use) 997 for (i = 0; i < I1.sets * I1.assoc; i++) 998 if (I1.loaded[i].use_base) 999 update_I1_use( &I1, i, 0,0); 1000 1001 if (D1.use) 1002 for (i = 0; i < D1.sets * D1.assoc; i++) 1003 if (D1.loaded[i].use_base) 1004 update_D1_use( &D1, i, 0,0); 1005 1006 if (LL.use) 1007 for (i = 0; i < LL.sets * LL.assoc; i++) 1008 if (LL.loaded[i].use_base) 1009 update_LL_use(i, 0); 1010 1011 current_ii = 0; 1012} 1013 1014 1015 1016/*------------------------------------------------------------*/ 1017/*--- Helper functions called by instrumented code ---*/ 1018/*------------------------------------------------------------*/ 1019 1020 1021static __inline__ 1022void inc_costs(CacheModelResult r, ULong* c1, ULong* c2) 1023{ 1024 switch(r) { 1025 case WriteBackMemAccess: 1026 if (clo_simulate_writeback) { 1027 c1[3]++; 1028 c2[3]++; 1029 } 1030 // fall through 1031 1032 case MemAccess: 1033 c1[2]++; 1034 c2[2]++; 1035 // fall through 1036 1037 case LL_Hit: 1038 c1[1]++; 1039 c2[1]++; 1040 // fall through 1041 1042 default: 1043 c1[0]++; 1044 c2[0]++; 1045 } 1046} 1047 1048static 1049const HChar* cacheRes(CacheModelResult r) 1050{ 1051 switch(r) { 1052 case L1_Hit: return "L1 Hit "; 1053 case LL_Hit: return "LL Hit "; 1054 case MemAccess: return "LL Miss"; 1055 case WriteBackMemAccess: return "LL Miss (dirty)"; 1056 default: 1057 tl_assert(0); 1058 } 1059 return "??"; 1060} 1061 1062VG_REGPARM(1) 1063static void log_1I0D(InstrInfo* ii) 1064{ 1065 CacheModelResult IrRes; 1066 1067 current_ii = ii; 1068 IrRes = (*simulator.I1_Read)(CLG_(bb_base) + ii->instr_offset, ii->instr_size); 1069 1070 CLG_DEBUG(6, "log_1I0D: Ir %#lx/%u => %s\n", 1071 CLG_(bb_base) + ii->instr_offset, ii->instr_size, cacheRes(IrRes)); 1072 1073 if (CLG_(current_state).collect) { 1074 ULong* cost_Ir; 1075 1076 if (CLG_(current_state).nonskipped) 1077 cost_Ir = CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR); 1078 else 1079 cost_Ir = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_IR]; 1080 1081 inc_costs(IrRes, cost_Ir, 1082 CLG_(current_state).cost + fullOffset(EG_IR) ); 1083 } 1084} 1085 1086VG_REGPARM(2) 1087static void log_2I0D(InstrInfo* ii1, InstrInfo* ii2) 1088{ 1089 CacheModelResult Ir1Res, Ir2Res; 1090 ULong *global_cost_Ir; 1091 1092 current_ii = ii1; 1093 Ir1Res = (*simulator.I1_Read)(CLG_(bb_base) + ii1->instr_offset, ii1->instr_size); 1094 current_ii = ii2; 1095 Ir2Res = (*simulator.I1_Read)(CLG_(bb_base) + ii2->instr_offset, ii2->instr_size); 1096 1097 CLG_DEBUG(6, "log_2I0D: Ir1 %#lx/%u => %s, Ir2 %#lx/%u => %s\n", 1098 CLG_(bb_base) + ii1->instr_offset, ii1->instr_size, cacheRes(Ir1Res), 1099 CLG_(bb_base) + ii2->instr_offset, ii2->instr_size, cacheRes(Ir2Res) ); 1100 1101 if (!CLG_(current_state).collect) return; 1102 1103 global_cost_Ir = CLG_(current_state).cost + fullOffset(EG_IR); 1104 if (CLG_(current_state).nonskipped) { 1105 ULong* skipped_cost_Ir = 1106 CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR); 1107 1108 inc_costs(Ir1Res, global_cost_Ir, skipped_cost_Ir); 1109 inc_costs(Ir2Res, global_cost_Ir, skipped_cost_Ir); 1110 return; 1111 } 1112 1113 inc_costs(Ir1Res, global_cost_Ir, 1114 CLG_(cost_base) + ii1->cost_offset + ii1->eventset->offset[EG_IR]); 1115 inc_costs(Ir2Res, global_cost_Ir, 1116 CLG_(cost_base) + ii2->cost_offset + ii2->eventset->offset[EG_IR]); 1117} 1118 1119VG_REGPARM(3) 1120static void log_3I0D(InstrInfo* ii1, InstrInfo* ii2, InstrInfo* ii3) 1121{ 1122 CacheModelResult Ir1Res, Ir2Res, Ir3Res; 1123 ULong *global_cost_Ir; 1124 1125 current_ii = ii1; 1126 Ir1Res = (*simulator.I1_Read)(CLG_(bb_base) + ii1->instr_offset, ii1->instr_size); 1127 current_ii = ii2; 1128 Ir2Res = (*simulator.I1_Read)(CLG_(bb_base) + ii2->instr_offset, ii2->instr_size); 1129 current_ii = ii3; 1130 Ir3Res = (*simulator.I1_Read)(CLG_(bb_base) + ii3->instr_offset, ii3->instr_size); 1131 1132 CLG_DEBUG(6, "log_3I0D: Ir1 %#lx/%u => %s, Ir2 %#lx/%u => %s, Ir3 %#lx/%u => %s\n", 1133 CLG_(bb_base) + ii1->instr_offset, ii1->instr_size, cacheRes(Ir1Res), 1134 CLG_(bb_base) + ii2->instr_offset, ii2->instr_size, cacheRes(Ir2Res), 1135 CLG_(bb_base) + ii3->instr_offset, ii3->instr_size, cacheRes(Ir3Res) ); 1136 1137 if (!CLG_(current_state).collect) return; 1138 1139 global_cost_Ir = CLG_(current_state).cost + fullOffset(EG_IR); 1140 if (CLG_(current_state).nonskipped) { 1141 ULong* skipped_cost_Ir = 1142 CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR); 1143 inc_costs(Ir1Res, global_cost_Ir, skipped_cost_Ir); 1144 inc_costs(Ir2Res, global_cost_Ir, skipped_cost_Ir); 1145 inc_costs(Ir3Res, global_cost_Ir, skipped_cost_Ir); 1146 return; 1147 } 1148 1149 inc_costs(Ir1Res, global_cost_Ir, 1150 CLG_(cost_base) + ii1->cost_offset + ii1->eventset->offset[EG_IR]); 1151 inc_costs(Ir2Res, global_cost_Ir, 1152 CLG_(cost_base) + ii2->cost_offset + ii2->eventset->offset[EG_IR]); 1153 inc_costs(Ir3Res, global_cost_Ir, 1154 CLG_(cost_base) + ii3->cost_offset + ii3->eventset->offset[EG_IR]); 1155} 1156 1157/* Instruction doing a read access */ 1158 1159VG_REGPARM(3) 1160static void log_1I1Dr(InstrInfo* ii, Addr data_addr, Word data_size) 1161{ 1162 CacheModelResult IrRes, DrRes; 1163 1164 current_ii = ii; 1165 IrRes = (*simulator.I1_Read)(CLG_(bb_base) + ii->instr_offset, ii->instr_size); 1166 DrRes = (*simulator.D1_Read)(data_addr, data_size); 1167 1168 CLG_DEBUG(6, "log_1I1Dr: Ir %#lx/%u => %s, Dr %#lx/%lu => %s\n", 1169 CLG_(bb_base) + ii->instr_offset, ii->instr_size, cacheRes(IrRes), 1170 data_addr, data_size, cacheRes(DrRes)); 1171 1172 if (CLG_(current_state).collect) { 1173 ULong *cost_Ir, *cost_Dr; 1174 1175 if (CLG_(current_state).nonskipped) { 1176 cost_Ir = CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR); 1177 cost_Dr = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DR); 1178 } 1179 else { 1180 cost_Ir = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_IR]; 1181 cost_Dr = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_DR]; 1182 } 1183 1184 inc_costs(IrRes, cost_Ir, 1185 CLG_(current_state).cost + fullOffset(EG_IR) ); 1186 inc_costs(DrRes, cost_Dr, 1187 CLG_(current_state).cost + fullOffset(EG_DR) ); 1188 } 1189} 1190 1191 1192/* Note that addEvent_D_guarded assumes that log_0I1Dr and log_0I1Dw 1193 have exactly the same prototype. If you change them, you must 1194 change addEvent_D_guarded too. */ 1195VG_REGPARM(3) 1196static void log_0I1Dr(InstrInfo* ii, Addr data_addr, Word data_size) 1197{ 1198 CacheModelResult DrRes; 1199 1200 current_ii = ii; 1201 DrRes = (*simulator.D1_Read)(data_addr, data_size); 1202 1203 CLG_DEBUG(6, "log_0I1Dr: Dr %#lx/%lu => %s\n", 1204 data_addr, data_size, cacheRes(DrRes)); 1205 1206 if (CLG_(current_state).collect) { 1207 ULong *cost_Dr; 1208 1209 if (CLG_(current_state).nonskipped) 1210 cost_Dr = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DR); 1211 else 1212 cost_Dr = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_DR]; 1213 1214 inc_costs(DrRes, cost_Dr, 1215 CLG_(current_state).cost + fullOffset(EG_DR) ); 1216 } 1217} 1218 1219 1220/* Instruction doing a write access */ 1221 1222VG_REGPARM(3) 1223static void log_1I1Dw(InstrInfo* ii, Addr data_addr, Word data_size) 1224{ 1225 CacheModelResult IrRes, DwRes; 1226 1227 current_ii = ii; 1228 IrRes = (*simulator.I1_Read)(CLG_(bb_base) + ii->instr_offset, ii->instr_size); 1229 DwRes = (*simulator.D1_Write)(data_addr, data_size); 1230 1231 CLG_DEBUG(6, "log_1I1Dw: Ir %#lx/%u => %s, Dw %#lx/%lu => %s\n", 1232 CLG_(bb_base) + ii->instr_offset, ii->instr_size, cacheRes(IrRes), 1233 data_addr, data_size, cacheRes(DwRes)); 1234 1235 if (CLG_(current_state).collect) { 1236 ULong *cost_Ir, *cost_Dw; 1237 1238 if (CLG_(current_state).nonskipped) { 1239 cost_Ir = CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR); 1240 cost_Dw = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DW); 1241 } 1242 else { 1243 cost_Ir = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_IR]; 1244 cost_Dw = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_DW]; 1245 } 1246 1247 inc_costs(IrRes, cost_Ir, 1248 CLG_(current_state).cost + fullOffset(EG_IR) ); 1249 inc_costs(DwRes, cost_Dw, 1250 CLG_(current_state).cost + fullOffset(EG_DW) ); 1251 } 1252} 1253 1254/* See comment on log_0I1Dr. */ 1255VG_REGPARM(3) 1256static void log_0I1Dw(InstrInfo* ii, Addr data_addr, Word data_size) 1257{ 1258 CacheModelResult DwRes; 1259 1260 current_ii = ii; 1261 DwRes = (*simulator.D1_Write)(data_addr, data_size); 1262 1263 CLG_DEBUG(6, "log_0I1Dw: Dw %#lx/%lu => %s\n", 1264 data_addr, data_size, cacheRes(DwRes)); 1265 1266 if (CLG_(current_state).collect) { 1267 ULong *cost_Dw; 1268 1269 if (CLG_(current_state).nonskipped) 1270 cost_Dw = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DW); 1271 else 1272 cost_Dw = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_DW]; 1273 1274 inc_costs(DwRes, cost_Dw, 1275 CLG_(current_state).cost + fullOffset(EG_DW) ); 1276 } 1277} 1278 1279 1280 1281/*------------------------------------------------------------*/ 1282/*--- Cache configuration ---*/ 1283/*------------------------------------------------------------*/ 1284 1285static cache_t clo_I1_cache = UNDEFINED_CACHE; 1286static cache_t clo_D1_cache = UNDEFINED_CACHE; 1287static cache_t clo_LL_cache = UNDEFINED_CACHE; 1288 1289/* Initialize and clear simulator state */ 1290static void cachesim_post_clo_init(void) 1291{ 1292 /* Cache configurations. */ 1293 cache_t I1c, D1c, LLc; 1294 1295 /* Initialize access handlers */ 1296 if (!CLG_(clo).simulate_cache) { 1297 CLG_(cachesim).log_1I0D = 0; 1298 CLG_(cachesim).log_1I0D_name = "(no function)"; 1299 CLG_(cachesim).log_2I0D = 0; 1300 CLG_(cachesim).log_2I0D_name = "(no function)"; 1301 CLG_(cachesim).log_3I0D = 0; 1302 CLG_(cachesim).log_3I0D_name = "(no function)"; 1303 1304 CLG_(cachesim).log_1I1Dr = 0; 1305 CLG_(cachesim).log_1I1Dr_name = "(no function)"; 1306 CLG_(cachesim).log_1I1Dw = 0; 1307 CLG_(cachesim).log_1I1Dw_name = "(no function)"; 1308 1309 CLG_(cachesim).log_0I1Dr = 0; 1310 CLG_(cachesim).log_0I1Dr_name = "(no function)"; 1311 CLG_(cachesim).log_0I1Dw = 0; 1312 CLG_(cachesim).log_0I1Dw_name = "(no function)"; 1313 return; 1314 } 1315 1316 /* Configuration of caches only needed with real cache simulation */ 1317 VG_(post_clo_init_configure_caches)(&I1c, &D1c, &LLc, 1318 &clo_I1_cache, 1319 &clo_D1_cache, 1320 &clo_LL_cache); 1321 1322 I1.name = "I1"; 1323 D1.name = "D1"; 1324 LL.name = "LL"; 1325 1326 // min_line_size is used to make sure that we never feed 1327 // accesses to the simulator straddling more than two 1328 // cache lines at any cache level 1329 CLG_(min_line_size) = (I1c.line_size < D1c.line_size) 1330 ? I1c.line_size : D1c.line_size; 1331 CLG_(min_line_size) = (LLc.line_size < CLG_(min_line_size)) 1332 ? LLc.line_size : CLG_(min_line_size); 1333 1334 Int largest_load_or_store_size 1335 = VG_(machine_get_size_of_largest_guest_register)(); 1336 if (CLG_(min_line_size) < largest_load_or_store_size) { 1337 /* We can't continue, because the cache simulation might 1338 straddle more than 2 lines, and it will assert. So let's 1339 just stop before we start. */ 1340 VG_(umsg)("Callgrind: cannot continue: the minimum line size (%d)\n", 1341 (Int)CLG_(min_line_size)); 1342 VG_(umsg)(" must be equal to or larger than the maximum register size (%d)\n", 1343 largest_load_or_store_size ); 1344 VG_(umsg)(" but it is not. Exiting now.\n"); 1345 VG_(exit)(1); 1346 } 1347 1348 cachesim_initcache(I1c, &I1); 1349 cachesim_initcache(D1c, &D1); 1350 cachesim_initcache(LLc, &LL); 1351 1352 /* the other cache simulators use the standard helpers 1353 * with dispatching via simulator struct */ 1354 1355 CLG_(cachesim).log_1I0D = log_1I0D; 1356 CLG_(cachesim).log_1I0D_name = "log_1I0D"; 1357 CLG_(cachesim).log_2I0D = log_2I0D; 1358 CLG_(cachesim).log_2I0D_name = "log_2I0D"; 1359 CLG_(cachesim).log_3I0D = log_3I0D; 1360 CLG_(cachesim).log_3I0D_name = "log_3I0D"; 1361 1362 CLG_(cachesim).log_1I1Dr = log_1I1Dr; 1363 CLG_(cachesim).log_1I1Dw = log_1I1Dw; 1364 CLG_(cachesim).log_1I1Dr_name = "log_1I1Dr"; 1365 CLG_(cachesim).log_1I1Dw_name = "log_1I1Dw"; 1366 1367 CLG_(cachesim).log_0I1Dr = log_0I1Dr; 1368 CLG_(cachesim).log_0I1Dw = log_0I1Dw; 1369 CLG_(cachesim).log_0I1Dr_name = "log_0I1Dr"; 1370 CLG_(cachesim).log_0I1Dw_name = "log_0I1Dw"; 1371 1372 if (clo_collect_cacheuse) { 1373 1374 /* Output warning for not supported option combinations */ 1375 if (clo_simulate_hwpref) { 1376 VG_(message)(Vg_DebugMsg, 1377 "warning: prefetch simulation can not be " 1378 "used with cache usage\n"); 1379 clo_simulate_hwpref = False; 1380 } 1381 1382 if (clo_simulate_writeback) { 1383 VG_(message)(Vg_DebugMsg, 1384 "warning: write-back simulation can not be " 1385 "used with cache usage\n"); 1386 clo_simulate_writeback = False; 1387 } 1388 1389 simulator.I1_Read = cacheuse_I1_doRead; 1390 simulator.D1_Read = cacheuse_D1_doRead; 1391 simulator.D1_Write = cacheuse_D1_doRead; 1392 return; 1393 } 1394 1395 if (clo_simulate_hwpref) { 1396 prefetch_clear(); 1397 1398 if (clo_simulate_writeback) { 1399 simulator.I1_Read = prefetch_I1_Read; 1400 simulator.D1_Read = prefetch_D1_Read; 1401 simulator.D1_Write = prefetch_D1_Write; 1402 } 1403 else { 1404 simulator.I1_Read = prefetch_I1_ref; 1405 simulator.D1_Read = prefetch_D1_ref; 1406 simulator.D1_Write = prefetch_D1_ref; 1407 } 1408 1409 return; 1410 } 1411 1412 if (clo_simulate_writeback) { 1413 simulator.I1_Read = cachesim_I1_Read; 1414 simulator.D1_Read = cachesim_D1_Read; 1415 simulator.D1_Write = cachesim_D1_Write; 1416 } 1417 else { 1418 simulator.I1_Read = cachesim_I1_ref; 1419 simulator.D1_Read = cachesim_D1_ref; 1420 simulator.D1_Write = cachesim_D1_ref; 1421 } 1422} 1423 1424 1425/* Clear simulator state. Has to be initialized before */ 1426static 1427void cachesim_clear(void) 1428{ 1429 cachesim_clearcache(&I1); 1430 cachesim_clearcache(&D1); 1431 cachesim_clearcache(&LL); 1432 1433 prefetch_clear(); 1434} 1435 1436 1437static void cachesim_getdesc(HChar* buf) 1438{ 1439 Int p; 1440 p = VG_(sprintf)(buf, "\ndesc: I1 cache: %s\n", I1.desc_line); 1441 p += VG_(sprintf)(buf+p, "desc: D1 cache: %s\n", D1.desc_line); 1442 VG_(sprintf)(buf+p, "desc: LL cache: %s\n", LL.desc_line); 1443} 1444 1445static 1446void cachesim_print_opts(void) 1447{ 1448 VG_(printf)( 1449"\n cache simulator options (does cache simulation if used):\n" 1450" --simulate-wb=no|yes Count write-back events [no]\n" 1451" --simulate-hwpref=no|yes Simulate hardware prefetch [no]\n" 1452#if CLG_EXPERIMENTAL 1453" --simulate-sectors=no|yes Simulate sectored behaviour [no]\n" 1454#endif 1455" --cacheuse=no|yes Collect cache block use [no]\n"); 1456 VG_(print_cache_clo_opts)(); 1457} 1458 1459/* Check for command line option for cache configuration. 1460 * Return False if unknown and not handled. 1461 * 1462 * Called from CLG_(process_cmd_line_option)() in clo.c 1463 */ 1464static Bool cachesim_parse_opt(const HChar* arg) 1465{ 1466 if VG_BOOL_CLO(arg, "--simulate-wb", clo_simulate_writeback) {} 1467 else if VG_BOOL_CLO(arg, "--simulate-hwpref", clo_simulate_hwpref) {} 1468 else if VG_BOOL_CLO(arg, "--simulate-sectors", clo_simulate_sectors) {} 1469 1470 else if VG_BOOL_CLO(arg, "--cacheuse", clo_collect_cacheuse) { 1471 if (clo_collect_cacheuse) { 1472 /* Use counters only make sense with fine dumping */ 1473 CLG_(clo).dump_instr = True; 1474 } 1475 } 1476 1477 else if (VG_(str_clo_cache_opt)(arg, 1478 &clo_I1_cache, 1479 &clo_D1_cache, 1480 &clo_LL_cache)) {} 1481 1482 else 1483 return False; 1484 1485 return True; 1486} 1487 1488/* Adds commas to ULong, right justifying in a field field_width wide, returns 1489 * the string in buf. */ 1490static 1491Int commify(ULong n, int field_width, HChar* buf) 1492{ 1493 int len, n_commas, i, j, new_len, space; 1494 1495 VG_(sprintf)(buf, "%llu", n); 1496 len = VG_(strlen)(buf); 1497 n_commas = (len - 1) / 3; 1498 new_len = len + n_commas; 1499 space = field_width - new_len; 1500 1501 /* Allow for printing a number in a field_width smaller than it's size */ 1502 if (space < 0) space = 0; 1503 1504 /* Make j = -1 because we copy the '\0' before doing the numbers in groups 1505 * of three. */ 1506 for (j = -1, i = len ; i >= 0; i--) { 1507 buf[i + n_commas + space] = buf[i]; 1508 1509 if ((i>0) && (3 == ++j)) { 1510 j = 0; 1511 n_commas--; 1512 buf[i + n_commas + space] = ','; 1513 } 1514 } 1515 /* Right justify in field. */ 1516 for (i = 0; i < space; i++) buf[i] = ' '; 1517 return new_len; 1518} 1519 1520static 1521void percentify(Int n, Int ex, Int field_width, HChar buf[]) 1522{ 1523 int i, len, space; 1524 1525 VG_(sprintf)(buf, "%d.%d%%", n / ex, n % ex); 1526 len = VG_(strlen)(buf); 1527 space = field_width - len; 1528 if (space < 0) space = 0; /* Allow for v. small field_width */ 1529 i = len; 1530 1531 /* Right justify in field */ 1532 for ( ; i >= 0; i--) buf[i + space] = buf[i]; 1533 for (i = 0; i < space; i++) buf[i] = ' '; 1534} 1535 1536static 1537void cachesim_printstat(Int l1, Int l2, Int l3) 1538{ 1539 FullCost total = CLG_(total_cost), D_total = 0; 1540 ULong LL_total_m, LL_total_mr, LL_total_mw, 1541 LL_total, LL_total_r, LL_total_w; 1542 HChar buf1[RESULTS_BUF_LEN], 1543 buf2[RESULTS_BUF_LEN], 1544 buf3[RESULTS_BUF_LEN]; 1545 Int p; 1546 1547 if ((VG_(clo_verbosity) >1) && clo_simulate_hwpref) { 1548 VG_(message)(Vg_DebugMsg, "Prefetch Up: %llu\n", 1549 prefetch_up); 1550 VG_(message)(Vg_DebugMsg, "Prefetch Down: %llu\n", 1551 prefetch_down); 1552 VG_(message)(Vg_DebugMsg, "\n"); 1553 } 1554 1555 commify(total[fullOffset(EG_IR) +1], l1, buf1); 1556 VG_(message)(Vg_UserMsg, "I1 misses: %s\n", buf1); 1557 1558 commify(total[fullOffset(EG_IR) +2], l1, buf1); 1559 VG_(message)(Vg_UserMsg, "LLi misses: %s\n", buf1); 1560 1561 p = 100; 1562 1563 if (0 == total[fullOffset(EG_IR)]) 1564 total[fullOffset(EG_IR)] = 1; 1565 1566 percentify(total[fullOffset(EG_IR)+1] * 100 * p / 1567 total[fullOffset(EG_IR)], p, l1+1, buf1); 1568 VG_(message)(Vg_UserMsg, "I1 miss rate: %s\n", buf1); 1569 1570 percentify(total[fullOffset(EG_IR)+2] * 100 * p / 1571 total[fullOffset(EG_IR)], p, l1+1, buf1); 1572 VG_(message)(Vg_UserMsg, "LLi miss rate: %s\n", buf1); 1573 VG_(message)(Vg_UserMsg, "\n"); 1574 1575 /* D cache results. 1576 Use the D_refs.rd and D_refs.wr values to determine the 1577 * width of columns 2 & 3. */ 1578 1579 D_total = CLG_(get_eventset_cost)( CLG_(sets).full ); 1580 CLG_(init_cost)( CLG_(sets).full, D_total); 1581 // we only use the first 3 values of D_total, adding up Dr and Dw costs 1582 CLG_(copy_cost)( CLG_(get_event_set)(EG_DR), D_total, total + fullOffset(EG_DR) ); 1583 CLG_(add_cost) ( CLG_(get_event_set)(EG_DW), D_total, total + fullOffset(EG_DW) ); 1584 1585 commify( D_total[0], l1, buf1); 1586 commify(total[fullOffset(EG_DR)], l2, buf2); 1587 commify(total[fullOffset(EG_DW)], l3, buf3); 1588 VG_(message)(Vg_UserMsg, "D refs: %s (%s rd + %s wr)\n", 1589 buf1, buf2, buf3); 1590 1591 commify( D_total[1], l1, buf1); 1592 commify(total[fullOffset(EG_DR)+1], l2, buf2); 1593 commify(total[fullOffset(EG_DW)+1], l3, buf3); 1594 VG_(message)(Vg_UserMsg, "D1 misses: %s (%s rd + %s wr)\n", 1595 buf1, buf2, buf3); 1596 1597 commify( D_total[2], l1, buf1); 1598 commify(total[fullOffset(EG_DR)+2], l2, buf2); 1599 commify(total[fullOffset(EG_DW)+2], l3, buf3); 1600 VG_(message)(Vg_UserMsg, "LLd misses: %s (%s rd + %s wr)\n", 1601 buf1, buf2, buf3); 1602 1603 p = 10; 1604 1605 if (0 == D_total[0]) D_total[0] = 1; 1606 if (0 == total[fullOffset(EG_DR)]) total[fullOffset(EG_DR)] = 1; 1607 if (0 == total[fullOffset(EG_DW)]) total[fullOffset(EG_DW)] = 1; 1608 1609 percentify( D_total[1] * 100 * p / D_total[0], p, l1+1, buf1); 1610 percentify(total[fullOffset(EG_DR)+1] * 100 * p / 1611 total[fullOffset(EG_DR)], p, l2+1, buf2); 1612 percentify(total[fullOffset(EG_DW)+1] * 100 * p / 1613 total[fullOffset(EG_DW)], p, l3+1, buf3); 1614 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )\n", 1615 buf1, buf2,buf3); 1616 1617 percentify( D_total[2] * 100 * p / D_total[0], p, l1+1, buf1); 1618 percentify(total[fullOffset(EG_DR)+2] * 100 * p / 1619 total[fullOffset(EG_DR)], p, l2+1, buf2); 1620 percentify(total[fullOffset(EG_DW)+2] * 100 * p / 1621 total[fullOffset(EG_DW)], p, l3+1, buf3); 1622 VG_(message)(Vg_UserMsg, "LLd miss rate: %s (%s + %s )\n", 1623 buf1, buf2,buf3); 1624 VG_(message)(Vg_UserMsg, "\n"); 1625 1626 1627 1628 /* LL overall results */ 1629 1630 LL_total = 1631 total[fullOffset(EG_DR) +1] + 1632 total[fullOffset(EG_DW) +1] + 1633 total[fullOffset(EG_IR) +1]; 1634 LL_total_r = 1635 total[fullOffset(EG_DR) +1] + 1636 total[fullOffset(EG_IR) +1]; 1637 LL_total_w = total[fullOffset(EG_DW) +1]; 1638 commify(LL_total, l1, buf1); 1639 commify(LL_total_r, l2, buf2); 1640 commify(LL_total_w, l3, buf3); 1641 VG_(message)(Vg_UserMsg, "LL refs: %s (%s rd + %s wr)\n", 1642 buf1, buf2, buf3); 1643 1644 LL_total_m = 1645 total[fullOffset(EG_DR) +2] + 1646 total[fullOffset(EG_DW) +2] + 1647 total[fullOffset(EG_IR) +2]; 1648 LL_total_mr = 1649 total[fullOffset(EG_DR) +2] + 1650 total[fullOffset(EG_IR) +2]; 1651 LL_total_mw = total[fullOffset(EG_DW) +2]; 1652 commify(LL_total_m, l1, buf1); 1653 commify(LL_total_mr, l2, buf2); 1654 commify(LL_total_mw, l3, buf3); 1655 VG_(message)(Vg_UserMsg, "LL misses: %s (%s rd + %s wr)\n", 1656 buf1, buf2, buf3); 1657 1658 percentify(LL_total_m * 100 * p / 1659 (total[fullOffset(EG_IR)] + D_total[0]), p, l1+1, buf1); 1660 percentify(LL_total_mr * 100 * p / 1661 (total[fullOffset(EG_IR)] + total[fullOffset(EG_DR)]), 1662 p, l2+1, buf2); 1663 percentify(LL_total_mw * 100 * p / 1664 total[fullOffset(EG_DW)], p, l3+1, buf3); 1665 VG_(message)(Vg_UserMsg, "LL miss rate: %s (%s + %s )\n", 1666 buf1, buf2,buf3); 1667} 1668 1669 1670/*------------------------------------------------------------*/ 1671/*--- Setup for Event set. ---*/ 1672/*------------------------------------------------------------*/ 1673 1674struct event_sets CLG_(sets); 1675 1676void CLG_(init_eventsets)() 1677{ 1678 // Event groups from which the event sets are composed 1679 // the "Use" group only is used with "cacheuse" simulation 1680 if (clo_collect_cacheuse) 1681 CLG_(register_event_group4)(EG_USE, 1682 "AcCost1", "SpLoss1", "AcCost2", "SpLoss2"); 1683 1684 if (!CLG_(clo).simulate_cache) 1685 CLG_(register_event_group)(EG_IR, "Ir"); 1686 else if (!clo_simulate_writeback) { 1687 CLG_(register_event_group3)(EG_IR, "Ir", "I1mr", "ILmr"); 1688 CLG_(register_event_group3)(EG_DR, "Dr", "D1mr", "DLmr"); 1689 CLG_(register_event_group3)(EG_DW, "Dw", "D1mw", "DLmw"); 1690 } 1691 else { // clo_simulate_writeback 1692 CLG_(register_event_group4)(EG_IR, "Ir", "I1mr", "ILmr", "ILdmr"); 1693 CLG_(register_event_group4)(EG_DR, "Dr", "D1mr", "DLmr", "DLdmr"); 1694 CLG_(register_event_group4)(EG_DW, "Dw", "D1mw", "DLmw", "DLdmw"); 1695 } 1696 1697 if (CLG_(clo).simulate_branch) { 1698 CLG_(register_event_group2)(EG_BC, "Bc", "Bcm"); 1699 CLG_(register_event_group2)(EG_BI, "Bi", "Bim"); 1700 } 1701 1702 if (CLG_(clo).collect_bus) 1703 CLG_(register_event_group)(EG_BUS, "Ge"); 1704 1705 if (CLG_(clo).collect_alloc) 1706 CLG_(register_event_group2)(EG_ALLOC, "allocCount", "allocSize"); 1707 1708 if (CLG_(clo).collect_systime) 1709 CLG_(register_event_group2)(EG_SYS, "sysCount", "sysTime"); 1710 1711 // event set used as base for instruction self cost 1712 CLG_(sets).base = CLG_(get_event_set2)(EG_USE, EG_IR); 1713 1714 // event set comprising all event groups, used for inclusive cost 1715 CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).base, EG_DR, EG_DW); 1716 CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).full, EG_BC, EG_BI); 1717 CLG_(sets).full = CLG_(add_event_group) (CLG_(sets).full, EG_BUS); 1718 CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).full, EG_ALLOC, EG_SYS); 1719 1720 CLG_DEBUGIF(1) { 1721 CLG_DEBUG(1, "EventSets:\n"); 1722 CLG_(print_eventset)(-2, CLG_(sets).base); 1723 CLG_(print_eventset)(-2, CLG_(sets).full); 1724 } 1725 1726 /* Not-existing events are silently ignored */ 1727 CLG_(dumpmap) = CLG_(get_eventmapping)(CLG_(sets).full); 1728 CLG_(append_event)(CLG_(dumpmap), "Ir"); 1729 CLG_(append_event)(CLG_(dumpmap), "Dr"); 1730 CLG_(append_event)(CLG_(dumpmap), "Dw"); 1731 CLG_(append_event)(CLG_(dumpmap), "I1mr"); 1732 CLG_(append_event)(CLG_(dumpmap), "D1mr"); 1733 CLG_(append_event)(CLG_(dumpmap), "D1mw"); 1734 CLG_(append_event)(CLG_(dumpmap), "ILmr"); 1735 CLG_(append_event)(CLG_(dumpmap), "DLmr"); 1736 CLG_(append_event)(CLG_(dumpmap), "DLmw"); 1737 CLG_(append_event)(CLG_(dumpmap), "ILdmr"); 1738 CLG_(append_event)(CLG_(dumpmap), "DLdmr"); 1739 CLG_(append_event)(CLG_(dumpmap), "DLdmw"); 1740 CLG_(append_event)(CLG_(dumpmap), "Bc"); 1741 CLG_(append_event)(CLG_(dumpmap), "Bcm"); 1742 CLG_(append_event)(CLG_(dumpmap), "Bi"); 1743 CLG_(append_event)(CLG_(dumpmap), "Bim"); 1744 CLG_(append_event)(CLG_(dumpmap), "AcCost1"); 1745 CLG_(append_event)(CLG_(dumpmap), "SpLoss1"); 1746 CLG_(append_event)(CLG_(dumpmap), "AcCost2"); 1747 CLG_(append_event)(CLG_(dumpmap), "SpLoss2"); 1748 CLG_(append_event)(CLG_(dumpmap), "Ge"); 1749 CLG_(append_event)(CLG_(dumpmap), "allocCount"); 1750 CLG_(append_event)(CLG_(dumpmap), "allocSize"); 1751 CLG_(append_event)(CLG_(dumpmap), "sysCount"); 1752 CLG_(append_event)(CLG_(dumpmap), "sysTime"); 1753} 1754 1755 1756/* this is called at dump time for every instruction executed */ 1757static void cachesim_add_icost(SimCost cost, BBCC* bbcc, 1758 InstrInfo* ii, ULong exe_count) 1759{ 1760 if (!CLG_(clo).simulate_cache) 1761 cost[ fullOffset(EG_IR) ] += exe_count; 1762 1763 if (ii->eventset) 1764 CLG_(add_and_zero_cost2)( CLG_(sets).full, cost, 1765 ii->eventset, bbcc->cost + ii->cost_offset); 1766} 1767 1768static 1769void cachesim_finish(void) 1770{ 1771 if (clo_collect_cacheuse) 1772 cacheuse_finish(); 1773} 1774 1775/*------------------------------------------------------------*/ 1776/*--- The simulator defined in this file ---*/ 1777/*------------------------------------------------------------*/ 1778 1779struct cachesim_if CLG_(cachesim) = { 1780 .print_opts = cachesim_print_opts, 1781 .parse_opt = cachesim_parse_opt, 1782 .post_clo_init = cachesim_post_clo_init, 1783 .clear = cachesim_clear, 1784 .getdesc = cachesim_getdesc, 1785 .printstat = cachesim_printstat, 1786 .add_icost = cachesim_add_icost, 1787 .finish = cachesim_finish, 1788 1789 /* these will be set by cachesim_post_clo_init */ 1790 .log_1I0D = 0, 1791 .log_2I0D = 0, 1792 .log_3I0D = 0, 1793 1794 .log_1I1Dr = 0, 1795 .log_1I1Dw = 0, 1796 1797 .log_0I1Dr = 0, 1798 .log_0I1Dw = 0, 1799 1800 .log_1I0D_name = "(no function)", 1801 .log_2I0D_name = "(no function)", 1802 .log_3I0D_name = "(no function)", 1803 1804 .log_1I1Dr_name = "(no function)", 1805 .log_1I1Dw_name = "(no function)", 1806 1807 .log_0I1Dr_name = "(no function)", 1808 .log_0I1Dw_name = "(no function)", 1809}; 1810 1811 1812/*--------------------------------------------------------------------*/ 1813/*--- end ct_sim.c ---*/ 1814/*--------------------------------------------------------------------*/ 1815