1/*--------------------------------------------------------------------*/ 2/*--- Cache simulation. ---*/ 3/*--- sim.c ---*/ 4/*--------------------------------------------------------------------*/ 5 6/* 7 This file is part of Callgrind, a Valgrind tool for call graph 8 profiling programs. 9 10 Copyright (C) 2003-2011, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) 11 12 This tool is derived from and contains code from Cachegrind 13 Copyright (C) 2002-2011 Nicholas Nethercote (njn@valgrind.org) 14 15 This program is free software; you can redistribute it and/or 16 modify it under the terms of the GNU General Public License as 17 published by the Free Software Foundation; either version 2 of the 18 License, or (at your option) any later version. 19 20 This program is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 General Public License for more details. 24 25 You should have received a copy of the GNU General Public License 26 along with this program; if not, write to the Free Software 27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 28 02111-1307, USA. 29 30 The GNU General Public License is contained in the file COPYING. 31*/ 32 33#include "global.h" 34 35 36/* Notes: 37 - simulates a write-allocate cache 38 - (block --> set) hash function uses simple bit selection 39 - handling of references straddling two cache blocks: 40 - counts as only one cache access (not two) 41 - both blocks hit --> one hit 42 - one block hits, the other misses --> one miss 43 - both blocks miss --> one miss (not two) 44*/ 45 46/* Cache configuration */ 47#include "cg_arch.h" 48 49/* additional structures for cache use info, separated 50 * according usage frequency: 51 * - line_loaded : pointer to cost center of instruction 52 * which loaded the line into cache. 53 * Needed to increment counters when line is evicted. 54 * - line_use : updated on every access 55 */ 56typedef struct { 57 UInt count; 58 UInt mask; /* e.g. for 64Byte line size 1bit/2Byte */ 59} line_use; 60 61typedef struct { 62 Addr memline, iaddr; 63 line_use* dep_use; /* point to higher-level cacheblock for this memline */ 64 ULong* use_base; 65} line_loaded; 66 67/* Cache state */ 68typedef struct { 69 char* name; 70 int size; /* bytes */ 71 int assoc; 72 int line_size; /* bytes */ 73 Bool sectored; /* prefetch nearside cacheline on read */ 74 int sets; 75 int sets_min_1; 76 int line_size_bits; 77 int tag_shift; 78 UWord tag_mask; 79 char desc_line[128]; 80 UWord* tags; 81 82 /* for cache use */ 83 int line_size_mask; 84 int* line_start_mask; 85 int* line_end_mask; 86 line_loaded* loaded; 87 line_use* use; 88} cache_t2; 89 90/* 91 * States of flat caches in our model. 92 * We use a 2-level hierarchy, 93 */ 94static cache_t2 I1, D1, LL; 95 96/* Lower bits of cache tags are used as flags for a cache line */ 97#define CACHELINE_FLAGMASK (MIN_LINE_SIZE-1) 98#define CACHELINE_DIRTY 1 99 100 101/* Cache simulator Options */ 102static Bool clo_simulate_writeback = False; 103static Bool clo_simulate_hwpref = False; 104static Bool clo_simulate_sectors = False; 105static Bool clo_collect_cacheuse = False; 106 107/* Following global vars are setup before by setup_bbcc(): 108 * 109 * - Addr CLG_(bb_base) (instruction start address of original BB) 110 * - ULong* CLG_(cost_base) (start of cost array for BB) 111 */ 112 113Addr CLG_(bb_base); 114ULong* CLG_(cost_base); 115 116static InstrInfo* current_ii; 117 118/* Cache use offsets */ 119/* The offsets are only correct because all per-instruction event sets get 120 * the "Use" set added first ! 121 */ 122static Int off_I1_AcCost = 0; 123static Int off_I1_SpLoss = 1; 124static Int off_D1_AcCost = 0; 125static Int off_D1_SpLoss = 1; 126static Int off_LL_AcCost = 2; 127static Int off_LL_SpLoss = 3; 128 129/* Cache access types */ 130typedef enum { Read = 0, Write = CACHELINE_DIRTY } RefType; 131 132/* Result of a reference into a flat cache */ 133typedef enum { Hit = 0, Miss, MissDirty } CacheResult; 134 135/* Result of a reference into a hierarchical cache model */ 136typedef enum { 137 L1_Hit, 138 LL_Hit, 139 MemAccess, 140 WriteBackMemAccess } CacheModelResult; 141 142typedef CacheModelResult (*simcall_type)(Addr, UChar); 143 144static struct { 145 simcall_type I1_Read; 146 simcall_type D1_Read; 147 simcall_type D1_Write; 148} simulator; 149 150/*------------------------------------------------------------*/ 151/*--- Cache Simulator Initialization ---*/ 152/*------------------------------------------------------------*/ 153 154static void cachesim_clearcache(cache_t2* c) 155{ 156 Int i; 157 158 for (i = 0; i < c->sets * c->assoc; i++) 159 c->tags[i] = 0; 160 if (c->use) { 161 for (i = 0; i < c->sets * c->assoc; i++) { 162 c->loaded[i].memline = 0; 163 c->loaded[i].use_base = 0; 164 c->loaded[i].dep_use = 0; 165 c->loaded[i].iaddr = 0; 166 c->use[i].mask = 0; 167 c->use[i].count = 0; 168 c->tags[i] = i % c->assoc; /* init lower bits as pointer */ 169 } 170 } 171} 172 173static void cacheuse_initcache(cache_t2* c); 174 175/* By this point, the size/assoc/line_size has been checked. */ 176static void cachesim_initcache(cache_t config, cache_t2* c) 177{ 178 c->size = config.size; 179 c->assoc = config.assoc; 180 c->line_size = config.line_size; 181 c->sectored = False; // FIXME 182 183 c->sets = (c->size / c->line_size) / c->assoc; 184 c->sets_min_1 = c->sets - 1; 185 c->line_size_bits = VG_(log2)(c->line_size); 186 c->tag_shift = c->line_size_bits + VG_(log2)(c->sets); 187 c->tag_mask = ~((1<<c->tag_shift)-1); 188 189 /* Can bits in tag entries be used for flags? 190 * Should be always true as MIN_LINE_SIZE >= 16 */ 191 CLG_ASSERT( (c->tag_mask & CACHELINE_FLAGMASK) == 0); 192 193 if (c->assoc == 1) { 194 VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped%s", 195 c->size, c->line_size, 196 c->sectored ? ", sectored":""); 197 } else { 198 VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative%s", 199 c->size, c->line_size, c->assoc, 200 c->sectored ? ", sectored":""); 201 } 202 203 c->tags = (UWord*) CLG_MALLOC("cl.sim.cs_ic.1", 204 sizeof(UWord) * c->sets * c->assoc); 205 if (clo_collect_cacheuse) 206 cacheuse_initcache(c); 207 else 208 c->use = 0; 209 cachesim_clearcache(c); 210} 211 212 213#if 0 214static void print_cache(cache_t2* c) 215{ 216 UInt set, way, i; 217 218 /* Note initialisation and update of 'i'. */ 219 for (i = 0, set = 0; set < c->sets; set++) { 220 for (way = 0; way < c->assoc; way++, i++) { 221 VG_(printf)("%8x ", c->tags[i]); 222 } 223 VG_(printf)("\n"); 224 } 225} 226#endif 227 228 229/*------------------------------------------------------------*/ 230/*--- Write Through Cache Simulation ---*/ 231/*------------------------------------------------------------*/ 232 233/* 234 * Simple model: L1 & LL Write Through 235 * Does not distinguish among read and write references 236 * 237 * Simulator functions: 238 * CacheModelResult cachesim_I1_ref(Addr a, UChar size) 239 * CacheModelResult cachesim_D1_ref(Addr a, UChar size) 240 */ 241 242static __inline__ 243CacheResult cachesim_setref(cache_t2* c, UInt set_no, UWord tag) 244{ 245 int i, j; 246 UWord *set; 247 248 set = &(c->tags[set_no * c->assoc]); 249 250 /* This loop is unrolled for just the first case, which is the most */ 251 /* common. We can't unroll any further because it would screw up */ 252 /* if we have a direct-mapped (1-way) cache. */ 253 if (tag == set[0]) 254 return Hit; 255 256 /* If the tag is one other than the MRU, move it into the MRU spot */ 257 /* and shuffle the rest down. */ 258 for (i = 1; i < c->assoc; i++) { 259 if (tag == set[i]) { 260 for (j = i; j > 0; j--) { 261 set[j] = set[j - 1]; 262 } 263 set[0] = tag; 264 return Hit; 265 } 266 } 267 268 /* A miss; install this tag as MRU, shuffle rest down. */ 269 for (j = c->assoc - 1; j > 0; j--) { 270 set[j] = set[j - 1]; 271 } 272 set[0] = tag; 273 274 return Miss; 275} 276 277static CacheResult cachesim_ref(cache_t2* c, Addr a, UChar size) 278{ 279 UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1); 280 UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1); 281 UWord tag = a >> c->tag_shift; 282 283 /* Access entirely within line. */ 284 if (set1 == set2) 285 return cachesim_setref(c, set1, tag); 286 287 /* Access straddles two lines. */ 288 /* Nb: this is a fast way of doing ((set1+1) % c->sets) */ 289 else if (((set1 + 1) & (c->sets_min_1)) == set2) { 290 UWord tag2 = (a+size-1) >> c->tag_shift; 291 292 /* the call updates cache structures as side effect */ 293 CacheResult res1 = cachesim_setref(c, set1, tag); 294 CacheResult res2 = cachesim_setref(c, set2, tag2); 295 return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit; 296 297 } else { 298 VG_(printf)("addr: %lx size: %u sets: %d %d", a, size, set1, set2); 299 VG_(tool_panic)("item straddles more than two cache sets"); 300 } 301 return Hit; 302} 303 304static 305CacheModelResult cachesim_I1_ref(Addr a, UChar size) 306{ 307 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit; 308 if ( cachesim_ref( &LL, a, size) == Hit ) return LL_Hit; 309 return MemAccess; 310} 311 312static 313CacheModelResult cachesim_D1_ref(Addr a, UChar size) 314{ 315 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit; 316 if ( cachesim_ref( &LL, a, size) == Hit ) return LL_Hit; 317 return MemAccess; 318} 319 320 321/*------------------------------------------------------------*/ 322/*--- Write Back Cache Simulation ---*/ 323/*------------------------------------------------------------*/ 324 325/* 326 * More complex model: L1 Write-through, LL Write-back 327 * This needs to distinguish among read and write references. 328 * 329 * Simulator functions: 330 * CacheModelResult cachesim_I1_Read(Addr a, UChar size) 331 * CacheModelResult cachesim_D1_Read(Addr a, UChar size) 332 * CacheModelResult cachesim_D1_Write(Addr a, UChar size) 333 */ 334 335/* 336 * With write-back, result can be a miss evicting a dirty line 337 * The dirty state of a cache line is stored in Bit0 of the tag for 338 * this cache line (CACHELINE_DIRTY = 1). By OR'ing the reference 339 * type (Read/Write), the line gets dirty on a write. 340 */ 341static __inline__ 342CacheResult cachesim_setref_wb(cache_t2* c, RefType ref, UInt set_no, UWord tag) 343{ 344 int i, j; 345 UWord *set, tmp_tag; 346 347 set = &(c->tags[set_no * c->assoc]); 348 349 /* This loop is unrolled for just the first case, which is the most */ 350 /* common. We can't unroll any further because it would screw up */ 351 /* if we have a direct-mapped (1-way) cache. */ 352 if (tag == (set[0] & ~CACHELINE_DIRTY)) { 353 set[0] |= ref; 354 return Hit; 355 } 356 /* If the tag is one other than the MRU, move it into the MRU spot */ 357 /* and shuffle the rest down. */ 358 for (i = 1; i < c->assoc; i++) { 359 if (tag == (set[i] & ~CACHELINE_DIRTY)) { 360 tmp_tag = set[i] | ref; // update dirty flag 361 for (j = i; j > 0; j--) { 362 set[j] = set[j - 1]; 363 } 364 set[0] = tmp_tag; 365 return Hit; 366 } 367 } 368 369 /* A miss; install this tag as MRU, shuffle rest down. */ 370 tmp_tag = set[c->assoc - 1]; 371 for (j = c->assoc - 1; j > 0; j--) { 372 set[j] = set[j - 1]; 373 } 374 set[0] = tag | ref; 375 376 return (tmp_tag & CACHELINE_DIRTY) ? MissDirty : Miss; 377} 378 379 380static __inline__ 381CacheResult cachesim_ref_wb(cache_t2* c, RefType ref, Addr a, UChar size) 382{ 383 UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1); 384 UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1); 385 UWord tag = a & c->tag_mask; 386 387 /* Access entirely within line. */ 388 if (set1 == set2) 389 return cachesim_setref_wb(c, ref, set1, tag); 390 391 /* Access straddles two lines. */ 392 /* Nb: this is a fast way of doing ((set1+1) % c->sets) */ 393 else if (((set1 + 1) & (c->sets_min_1)) == set2) { 394 UWord tag2 = (a+size-1) & c->tag_mask; 395 396 /* the call updates cache structures as side effect */ 397 CacheResult res1 = cachesim_setref_wb(c, ref, set1, tag); 398 CacheResult res2 = cachesim_setref_wb(c, ref, set2, tag2); 399 400 if ((res1 == MissDirty) || (res2 == MissDirty)) return MissDirty; 401 return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit; 402 403 } else { 404 VG_(printf)("addr: %lx size: %u sets: %d %d", a, size, set1, set2); 405 VG_(tool_panic)("item straddles more than two cache sets"); 406 } 407 return Hit; 408} 409 410 411static 412CacheModelResult cachesim_I1_Read(Addr a, UChar size) 413{ 414 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit; 415 switch( cachesim_ref_wb( &LL, Read, a, size) ) { 416 case Hit: return LL_Hit; 417 case Miss: return MemAccess; 418 default: break; 419 } 420 return WriteBackMemAccess; 421} 422 423static 424CacheModelResult cachesim_D1_Read(Addr a, UChar size) 425{ 426 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit; 427 switch( cachesim_ref_wb( &LL, Read, a, size) ) { 428 case Hit: return LL_Hit; 429 case Miss: return MemAccess; 430 default: break; 431 } 432 return WriteBackMemAccess; 433} 434 435static 436CacheModelResult cachesim_D1_Write(Addr a, UChar size) 437{ 438 if ( cachesim_ref( &D1, a, size) == Hit ) { 439 /* Even for a L1 hit, the write-trough L1 passes 440 * the write to the LL to make the LL line dirty. 441 * But this causes no latency, so return the hit. 442 */ 443 cachesim_ref_wb( &LL, Write, a, size); 444 return L1_Hit; 445 } 446 switch( cachesim_ref_wb( &LL, Write, a, size) ) { 447 case Hit: return LL_Hit; 448 case Miss: return MemAccess; 449 default: break; 450 } 451 return WriteBackMemAccess; 452} 453 454 455/*------------------------------------------------------------*/ 456/*--- Hardware Prefetch Simulation ---*/ 457/*------------------------------------------------------------*/ 458 459static ULong prefetch_up = 0; 460static ULong prefetch_down = 0; 461 462#define PF_STREAMS 8 463#define PF_PAGEBITS 12 464 465static UInt pf_lastblock[PF_STREAMS]; 466static Int pf_seqblocks[PF_STREAMS]; 467 468static 469void prefetch_clear(void) 470{ 471 int i; 472 for(i=0;i<PF_STREAMS;i++) 473 pf_lastblock[i] = pf_seqblocks[i] = 0; 474} 475 476/* 477 * HW Prefetch emulation 478 * Start prefetching when detecting sequential access to 3 memory blocks. 479 * One stream can be detected per 4k page. 480 */ 481static __inline__ 482void prefetch_LL_doref(Addr a) 483{ 484 UInt stream = (a >> PF_PAGEBITS) % PF_STREAMS; 485 UInt block = ( a >> LL.line_size_bits); 486 487 if (block != pf_lastblock[stream]) { 488 if (pf_seqblocks[stream] == 0) { 489 if (pf_lastblock[stream] +1 == block) pf_seqblocks[stream]++; 490 else if (pf_lastblock[stream] -1 == block) pf_seqblocks[stream]--; 491 } 492 else if (pf_seqblocks[stream] >0) { 493 if (pf_lastblock[stream] +1 == block) { 494 pf_seqblocks[stream]++; 495 if (pf_seqblocks[stream] >= 2) { 496 prefetch_up++; 497 cachesim_ref(&LL, a + 5 * LL.line_size,1); 498 } 499 } 500 else pf_seqblocks[stream] = 0; 501 } 502 else if (pf_seqblocks[stream] <0) { 503 if (pf_lastblock[stream] -1 == block) { 504 pf_seqblocks[stream]--; 505 if (pf_seqblocks[stream] <= -2) { 506 prefetch_down++; 507 cachesim_ref(&LL, a - 5 * LL.line_size,1); 508 } 509 } 510 else pf_seqblocks[stream] = 0; 511 } 512 pf_lastblock[stream] = block; 513 } 514} 515 516/* simple model with hardware prefetch */ 517 518static 519CacheModelResult prefetch_I1_ref(Addr a, UChar size) 520{ 521 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit; 522 prefetch_LL_doref(a); 523 if ( cachesim_ref( &LL, a, size) == Hit ) return LL_Hit; 524 return MemAccess; 525} 526 527static 528CacheModelResult prefetch_D1_ref(Addr a, UChar size) 529{ 530 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit; 531 prefetch_LL_doref(a); 532 if ( cachesim_ref( &LL, a, size) == Hit ) return LL_Hit; 533 return MemAccess; 534} 535 536 537/* complex model with hardware prefetch */ 538 539static 540CacheModelResult prefetch_I1_Read(Addr a, UChar size) 541{ 542 if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit; 543 prefetch_LL_doref(a); 544 switch( cachesim_ref_wb( &LL, Read, a, size) ) { 545 case Hit: return LL_Hit; 546 case Miss: return MemAccess; 547 default: break; 548 } 549 return WriteBackMemAccess; 550} 551 552static 553CacheModelResult prefetch_D1_Read(Addr a, UChar size) 554{ 555 if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit; 556 prefetch_LL_doref(a); 557 switch( cachesim_ref_wb( &LL, Read, a, size) ) { 558 case Hit: return LL_Hit; 559 case Miss: return MemAccess; 560 default: break; 561 } 562 return WriteBackMemAccess; 563} 564 565static 566CacheModelResult prefetch_D1_Write(Addr a, UChar size) 567{ 568 prefetch_LL_doref(a); 569 if ( cachesim_ref( &D1, a, size) == Hit ) { 570 /* Even for a L1 hit, the write-trough L1 passes 571 * the write to the LL to make the LL line dirty. 572 * But this causes no latency, so return the hit. 573 */ 574 cachesim_ref_wb( &LL, Write, a, size); 575 return L1_Hit; 576 } 577 switch( cachesim_ref_wb( &LL, Write, a, size) ) { 578 case Hit: return LL_Hit; 579 case Miss: return MemAccess; 580 default: break; 581 } 582 return WriteBackMemAccess; 583} 584 585 586/*------------------------------------------------------------*/ 587/*--- Cache Simulation with use metric collection ---*/ 588/*------------------------------------------------------------*/ 589 590/* can not be combined with write-back or prefetch */ 591 592static 593void cacheuse_initcache(cache_t2* c) 594{ 595 int i; 596 unsigned int start_mask, start_val; 597 unsigned int end_mask, end_val; 598 599 c->use = CLG_MALLOC("cl.sim.cu_ic.1", 600 sizeof(line_use) * c->sets * c->assoc); 601 c->loaded = CLG_MALLOC("cl.sim.cu_ic.2", 602 sizeof(line_loaded) * c->sets * c->assoc); 603 c->line_start_mask = CLG_MALLOC("cl.sim.cu_ic.3", 604 sizeof(int) * c->line_size); 605 c->line_end_mask = CLG_MALLOC("cl.sim.cu_ic.4", 606 sizeof(int) * c->line_size); 607 608 c->line_size_mask = c->line_size-1; 609 610 /* Meaning of line_start_mask/line_end_mask 611 * Example: for a given cache line, you get an access starting at 612 * byte offset 5, length 4, byte 5 - 8 was touched. For a cache 613 * line size of 32, you have 1 bit per byte in the mask: 614 * 615 * bit31 bit8 bit5 bit 0 616 * | | | | 617 * 11..111111100000 line_start_mask[5] 618 * 00..000111111111 line_end_mask[(5+4)-1] 619 * 620 * use_mask |= line_start_mask[5] && line_end_mask[8] 621 * 622 */ 623 start_val = end_val = ~0; 624 if (c->line_size < 32) { 625 int bits_per_byte = 32/c->line_size; 626 start_mask = (1<<bits_per_byte)-1; 627 end_mask = start_mask << (32-bits_per_byte); 628 for(i=0;i<c->line_size;i++) { 629 c->line_start_mask[i] = start_val; 630 start_val = start_val & ~start_mask; 631 start_mask = start_mask << bits_per_byte; 632 633 c->line_end_mask[c->line_size-i-1] = end_val; 634 end_val = end_val & ~end_mask; 635 end_mask = end_mask >> bits_per_byte; 636 } 637 } 638 else { 639 int bytes_per_bit = c->line_size/32; 640 start_mask = 1; 641 end_mask = 1 << 31; 642 for(i=0;i<c->line_size;i++) { 643 c->line_start_mask[i] = start_val; 644 c->line_end_mask[c->line_size-i-1] = end_val; 645 if ( ((i+1)%bytes_per_bit) == 0) { 646 start_val &= ~start_mask; 647 end_val &= ~end_mask; 648 start_mask <<= 1; 649 end_mask >>= 1; 650 } 651 } 652 } 653 654 CLG_DEBUG(6, "Config %s:\n", c->desc_line); 655 for(i=0;i<c->line_size;i++) { 656 CLG_DEBUG(6, " [%2d]: start mask %8x, end mask %8x\n", 657 i, c->line_start_mask[i], c->line_end_mask[i]); 658 } 659 660 /* We use lower tag bits as offset pointers to cache use info. 661 * I.e. some cache parameters don't work. 662 */ 663 if ( (1<<c->tag_shift) < c->assoc) { 664 VG_(message)(Vg_DebugMsg, 665 "error: Use associativity < %d for cache use statistics!\n", 666 (1<<c->tag_shift) ); 667 VG_(tool_panic)("Unsupported cache configuration"); 668 } 669} 670 671 672/* for I1/D1 caches */ 673#define CACHEUSE(L) \ 674 \ 675static CacheModelResult cacheuse##_##L##_doRead(Addr a, UChar size) \ 676{ \ 677 UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \ 678 UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \ 679 UWord tag = a & L.tag_mask; \ 680 UWord tag2; \ 681 int i, j, idx; \ 682 UWord *set, tmp_tag; \ 683 UInt use_mask; \ 684 \ 685 CLG_DEBUG(6,"%s.Acc(Addr %#lx, size %d): Sets [%d/%d]\n", \ 686 L.name, a, size, set1, set2); \ 687 \ 688 /* First case: word entirely within line. */ \ 689 if (set1 == set2) { \ 690 \ 691 set = &(L.tags[set1 * L.assoc]); \ 692 use_mask = L.line_start_mask[a & L.line_size_mask] & \ 693 L.line_end_mask[(a+size-1) & L.line_size_mask]; \ 694 \ 695 /* This loop is unrolled for just the first case, which is the most */\ 696 /* common. We can't unroll any further because it would screw up */\ 697 /* if we have a direct-mapped (1-way) cache. */\ 698 if (tag == (set[0] & L.tag_mask)) { \ 699 idx = (set1 * L.assoc) + (set[0] & ~L.tag_mask); \ 700 L.use[idx].count ++; \ 701 L.use[idx].mask |= use_mask; \ 702 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\ 703 idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ 704 use_mask, L.use[idx].mask, L.use[idx].count); \ 705 return L1_Hit; \ 706 } \ 707 /* If the tag is one other than the MRU, move it into the MRU spot */\ 708 /* and shuffle the rest down. */\ 709 for (i = 1; i < L.assoc; i++) { \ 710 if (tag == (set[i] & L.tag_mask)) { \ 711 tmp_tag = set[i]; \ 712 for (j = i; j > 0; j--) { \ 713 set[j] = set[j - 1]; \ 714 } \ 715 set[0] = tmp_tag; \ 716 idx = (set1 * L.assoc) + (tmp_tag & ~L.tag_mask); \ 717 L.use[idx].count ++; \ 718 L.use[idx].mask |= use_mask; \ 719 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\ 720 i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ 721 use_mask, L.use[idx].mask, L.use[idx].count); \ 722 return L1_Hit; \ 723 } \ 724 } \ 725 \ 726 /* A miss; install this tag as MRU, shuffle rest down. */ \ 727 tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \ 728 for (j = L.assoc - 1; j > 0; j--) { \ 729 set[j] = set[j - 1]; \ 730 } \ 731 set[0] = tag | tmp_tag; \ 732 idx = (set1 * L.assoc) + tmp_tag; \ 733 return update_##L##_use(&L, idx, \ 734 use_mask, a &~ L.line_size_mask); \ 735 \ 736 /* Second case: word straddles two lines. */ \ 737 /* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \ 738 } else if (((set1 + 1) & (L.sets_min_1)) == set2) { \ 739 Int miss1=0, miss2=0; /* 0: L1 hit, 1:L1 miss, 2:LL miss */ \ 740 set = &(L.tags[set1 * L.assoc]); \ 741 use_mask = L.line_start_mask[a & L.line_size_mask]; \ 742 if (tag == (set[0] & L.tag_mask)) { \ 743 idx = (set1 * L.assoc) + (set[0] & ~L.tag_mask); \ 744 L.use[idx].count ++; \ 745 L.use[idx].mask |= use_mask; \ 746 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\ 747 idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ 748 use_mask, L.use[idx].mask, L.use[idx].count); \ 749 goto block2; \ 750 } \ 751 for (i = 1; i < L.assoc; i++) { \ 752 if (tag == (set[i] & L.tag_mask)) { \ 753 tmp_tag = set[i]; \ 754 for (j = i; j > 0; j--) { \ 755 set[j] = set[j - 1]; \ 756 } \ 757 set[0] = tmp_tag; \ 758 idx = (set1 * L.assoc) + (tmp_tag & ~L.tag_mask); \ 759 L.use[idx].count ++; \ 760 L.use[idx].mask |= use_mask; \ 761 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\ 762 i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ 763 use_mask, L.use[idx].mask, L.use[idx].count); \ 764 goto block2; \ 765 } \ 766 } \ 767 tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \ 768 for (j = L.assoc - 1; j > 0; j--) { \ 769 set[j] = set[j - 1]; \ 770 } \ 771 set[0] = tag | tmp_tag; \ 772 idx = (set1 * L.assoc) + tmp_tag; \ 773 miss1 = update_##L##_use(&L, idx, \ 774 use_mask, a &~ L.line_size_mask); \ 775block2: \ 776 set = &(L.tags[set2 * L.assoc]); \ 777 use_mask = L.line_end_mask[(a+size-1) & L.line_size_mask]; \ 778 tag2 = (a+size-1) & L.tag_mask; \ 779 if (tag2 == (set[0] & L.tag_mask)) { \ 780 idx = (set2 * L.assoc) + (set[0] & ~L.tag_mask); \ 781 L.use[idx].count ++; \ 782 L.use[idx].mask |= use_mask; \ 783 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\ 784 idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ 785 use_mask, L.use[idx].mask, L.use[idx].count); \ 786 return miss1; \ 787 } \ 788 for (i = 1; i < L.assoc; i++) { \ 789 if (tag2 == (set[i] & L.tag_mask)) { \ 790 tmp_tag = set[i]; \ 791 for (j = i; j > 0; j--) { \ 792 set[j] = set[j - 1]; \ 793 } \ 794 set[0] = tmp_tag; \ 795 idx = (set2 * L.assoc) + (tmp_tag & ~L.tag_mask); \ 796 L.use[idx].count ++; \ 797 L.use[idx].mask |= use_mask; \ 798 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\ 799 i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \ 800 use_mask, L.use[idx].mask, L.use[idx].count); \ 801 return miss1; \ 802 } \ 803 } \ 804 tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \ 805 for (j = L.assoc - 1; j > 0; j--) { \ 806 set[j] = set[j - 1]; \ 807 } \ 808 set[0] = tag2 | tmp_tag; \ 809 idx = (set2 * L.assoc) + tmp_tag; \ 810 miss2 = update_##L##_use(&L, idx, \ 811 use_mask, (a+size-1) &~ L.line_size_mask); \ 812 return (miss1==MemAccess || miss2==MemAccess) ? MemAccess:LL_Hit; \ 813 \ 814 } else { \ 815 VG_(printf)("addr: %#lx size: %u sets: %d %d", a, size, set1, set2); \ 816 VG_(tool_panic)("item straddles more than two cache sets"); \ 817 } \ 818 return 0; \ 819} 820 821 822/* logarithmic bitcounting algorithm, see 823 * http://graphics.stanford.edu/~seander/bithacks.html 824 */ 825static __inline__ unsigned int countBits(unsigned int bits) 826{ 827 unsigned int c; // store the total here 828 const int S[] = {1, 2, 4, 8, 16}; // Magic Binary Numbers 829 const int B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF, 0x0000FFFF}; 830 831 c = bits; 832 c = ((c >> S[0]) & B[0]) + (c & B[0]); 833 c = ((c >> S[1]) & B[1]) + (c & B[1]); 834 c = ((c >> S[2]) & B[2]) + (c & B[2]); 835 c = ((c >> S[3]) & B[3]) + (c & B[3]); 836 c = ((c >> S[4]) & B[4]) + (c & B[4]); 837 return c; 838} 839 840static void update_LL_use(int idx, Addr memline) 841{ 842 line_loaded* loaded = &(LL.loaded[idx]); 843 line_use* use = &(LL.use[idx]); 844 int i = ((32 - countBits(use->mask)) * LL.line_size)>>5; 845 846 CLG_DEBUG(2, " LL.miss [%d]: at %#lx accessing memline %#lx\n", 847 idx, CLG_(bb_base) + current_ii->instr_offset, memline); 848 if (use->count>0) { 849 CLG_DEBUG(2, " old: used %d, loss bits %d (%08x) [line %#lx from %#lx]\n", 850 use->count, i, use->mask, loaded->memline, loaded->iaddr); 851 CLG_DEBUG(2, " collect: %d, use_base %p\n", 852 CLG_(current_state).collect, loaded->use_base); 853 854 if (CLG_(current_state).collect && loaded->use_base) { 855 (loaded->use_base)[off_LL_AcCost] += 1000 / use->count; 856 (loaded->use_base)[off_LL_SpLoss] += i; 857 } 858 } 859 860 use->count = 0; 861 use->mask = 0; 862 863 loaded->memline = memline; 864 loaded->iaddr = CLG_(bb_base) + current_ii->instr_offset; 865 loaded->use_base = (CLG_(current_state).nonskipped) ? 866 CLG_(current_state).nonskipped->skipped : 867 CLG_(cost_base) + current_ii->cost_offset; 868} 869 870static 871CacheModelResult cacheuse_LL_access(Addr memline, line_loaded* l1_loaded) 872{ 873 UInt setNo = (memline >> LL.line_size_bits) & (LL.sets_min_1); 874 UWord* set = &(LL.tags[setNo * LL.assoc]); 875 UWord tag = memline & LL.tag_mask; 876 877 int i, j, idx; 878 UWord tmp_tag; 879 880 CLG_DEBUG(6,"LL.Acc(Memline %#lx): Set %d\n", memline, setNo); 881 882 if (tag == (set[0] & LL.tag_mask)) { 883 idx = (setNo * LL.assoc) + (set[0] & ~LL.tag_mask); 884 l1_loaded->dep_use = &(LL.use[idx]); 885 886 CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): => %08x, count %d\n", 887 idx, LL.loaded[idx].memline, LL.loaded[idx].iaddr, 888 LL.use[idx].mask, LL.use[idx].count); 889 return LL_Hit; 890 } 891 for (i = 1; i < LL.assoc; i++) { 892 if (tag == (set[i] & LL.tag_mask)) { 893 tmp_tag = set[i]; 894 for (j = i; j > 0; j--) { 895 set[j] = set[j - 1]; 896 } 897 set[0] = tmp_tag; 898 idx = (setNo * LL.assoc) + (tmp_tag & ~LL.tag_mask); 899 l1_loaded->dep_use = &(LL.use[idx]); 900 901 CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): => %08x, count %d\n", 902 i, idx, LL.loaded[idx].memline, LL.loaded[idx].iaddr, 903 LL.use[idx].mask, LL.use[idx].count); 904 return LL_Hit; 905 } 906 } 907 908 /* A miss; install this tag as MRU, shuffle rest down. */ 909 tmp_tag = set[LL.assoc - 1] & ~LL.tag_mask; 910 for (j = LL.assoc - 1; j > 0; j--) { 911 set[j] = set[j - 1]; 912 } 913 set[0] = tag | tmp_tag; 914 idx = (setNo * LL.assoc) + tmp_tag; 915 l1_loaded->dep_use = &(LL.use[idx]); 916 917 update_LL_use(idx, memline); 918 919 return MemAccess; 920} 921 922 923 924 925#define UPDATE_USE(L) \ 926 \ 927static CacheModelResult update##_##L##_use(cache_t2* cache, int idx, \ 928 UInt mask, Addr memline) \ 929{ \ 930 line_loaded* loaded = &(cache->loaded[idx]); \ 931 line_use* use = &(cache->use[idx]); \ 932 int c = ((32 - countBits(use->mask)) * cache->line_size)>>5; \ 933 \ 934 CLG_DEBUG(2, " %s.miss [%d]: at %#lx accessing memline %#lx (mask %08x)\n", \ 935 cache->name, idx, CLG_(bb_base) + current_ii->instr_offset, memline, mask); \ 936 if (use->count>0) { \ 937 CLG_DEBUG(2, " old: used %d, loss bits %d (%08x) [line %#lx from %#lx]\n",\ 938 use->count, c, use->mask, loaded->memline, loaded->iaddr); \ 939 CLG_DEBUG(2, " collect: %d, use_base %p\n", \ 940 CLG_(current_state).collect, loaded->use_base); \ 941 \ 942 if (CLG_(current_state).collect && loaded->use_base) { \ 943 (loaded->use_base)[off_##L##_AcCost] += 1000 / use->count; \ 944 (loaded->use_base)[off_##L##_SpLoss] += c; \ 945 \ 946 /* FIXME (?): L1/LL line sizes must be equal ! */ \ 947 loaded->dep_use->mask |= use->mask; \ 948 loaded->dep_use->count += use->count; \ 949 } \ 950 } \ 951 \ 952 use->count = 1; \ 953 use->mask = mask; \ 954 loaded->memline = memline; \ 955 loaded->iaddr = CLG_(bb_base) + current_ii->instr_offset; \ 956 loaded->use_base = (CLG_(current_state).nonskipped) ? \ 957 CLG_(current_state).nonskipped->skipped : \ 958 CLG_(cost_base) + current_ii->cost_offset; \ 959 \ 960 if (memline == 0) return LL_Hit; \ 961 return cacheuse_LL_access(memline, loaded); \ 962} 963 964UPDATE_USE(I1); 965UPDATE_USE(D1); 966 967CACHEUSE(I1); 968CACHEUSE(D1); 969 970 971static 972void cacheuse_finish(void) 973{ 974 int i; 975 InstrInfo ii = { 0,0,0,0 }; 976 977 if (!CLG_(current_state).collect) return; 978 979 CLG_(bb_base) = 0; 980 current_ii = ⅈ /* needs to be set for update_XX_use */ 981 CLG_(cost_base) = 0; 982 983 /* update usage counters */ 984 if (I1.use) 985 for (i = 0; i < I1.sets * I1.assoc; i++) 986 if (I1.loaded[i].use_base) 987 update_I1_use( &I1, i, 0,0); 988 989 if (D1.use) 990 for (i = 0; i < D1.sets * D1.assoc; i++) 991 if (D1.loaded[i].use_base) 992 update_D1_use( &D1, i, 0,0); 993 994 if (LL.use) 995 for (i = 0; i < LL.sets * LL.assoc; i++) 996 if (LL.loaded[i].use_base) 997 update_LL_use(i, 0); 998 999 current_ii = 0; 1000} 1001 1002 1003 1004/*------------------------------------------------------------*/ 1005/*--- Helper functions called by instrumented code ---*/ 1006/*------------------------------------------------------------*/ 1007 1008 1009static __inline__ 1010void inc_costs(CacheModelResult r, ULong* c1, ULong* c2) 1011{ 1012 switch(r) { 1013 case WriteBackMemAccess: 1014 if (clo_simulate_writeback) { 1015 c1[3]++; 1016 c2[3]++; 1017 } 1018 // fall through 1019 1020 case MemAccess: 1021 c1[2]++; 1022 c2[2]++; 1023 // fall through 1024 1025 case LL_Hit: 1026 c1[1]++; 1027 c2[1]++; 1028 // fall through 1029 1030 default: 1031 c1[0]++; 1032 c2[0]++; 1033 } 1034} 1035 1036static 1037Char* cacheRes(CacheModelResult r) 1038{ 1039 switch(r) { 1040 case L1_Hit: return "L1 Hit "; 1041 case LL_Hit: return "LL Hit "; 1042 case MemAccess: return "LL Miss"; 1043 case WriteBackMemAccess: return "LL Miss (dirty)"; 1044 default: 1045 tl_assert(0); 1046 } 1047 return "??"; 1048} 1049 1050VG_REGPARM(1) 1051static void log_1I0D(InstrInfo* ii) 1052{ 1053 CacheModelResult IrRes; 1054 1055 current_ii = ii; 1056 IrRes = (*simulator.I1_Read)(CLG_(bb_base) + ii->instr_offset, ii->instr_size); 1057 1058 CLG_DEBUG(6, "log_1I0D: Ir %#lx/%u => %s\n", 1059 CLG_(bb_base) + ii->instr_offset, ii->instr_size, cacheRes(IrRes)); 1060 1061 if (CLG_(current_state).collect) { 1062 ULong* cost_Ir; 1063 1064 if (CLG_(current_state).nonskipped) 1065 cost_Ir = CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR); 1066 else 1067 cost_Ir = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_IR]; 1068 1069 inc_costs(IrRes, cost_Ir, 1070 CLG_(current_state).cost + fullOffset(EG_IR) ); 1071 } 1072} 1073 1074VG_REGPARM(2) 1075static void log_2I0D(InstrInfo* ii1, InstrInfo* ii2) 1076{ 1077 CacheModelResult Ir1Res, Ir2Res; 1078 ULong *global_cost_Ir; 1079 1080 current_ii = ii1; 1081 Ir1Res = (*simulator.I1_Read)(CLG_(bb_base) + ii1->instr_offset, ii1->instr_size); 1082 current_ii = ii2; 1083 Ir2Res = (*simulator.I1_Read)(CLG_(bb_base) + ii2->instr_offset, ii2->instr_size); 1084 1085 CLG_DEBUG(6, "log_2I0D: Ir1 %#lx/%u => %s, Ir2 %#lx/%u => %s\n", 1086 CLG_(bb_base) + ii1->instr_offset, ii1->instr_size, cacheRes(Ir1Res), 1087 CLG_(bb_base) + ii2->instr_offset, ii2->instr_size, cacheRes(Ir2Res) ); 1088 1089 if (!CLG_(current_state).collect) return; 1090 1091 global_cost_Ir = CLG_(current_state).cost + fullOffset(EG_IR); 1092 if (CLG_(current_state).nonskipped) { 1093 ULong* skipped_cost_Ir = 1094 CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR); 1095 1096 inc_costs(Ir1Res, global_cost_Ir, skipped_cost_Ir); 1097 inc_costs(Ir2Res, global_cost_Ir, skipped_cost_Ir); 1098 return; 1099 } 1100 1101 inc_costs(Ir1Res, global_cost_Ir, 1102 CLG_(cost_base) + ii1->cost_offset + ii1->eventset->offset[EG_IR]); 1103 inc_costs(Ir2Res, global_cost_Ir, 1104 CLG_(cost_base) + ii2->cost_offset + ii2->eventset->offset[EG_IR]); 1105} 1106 1107VG_REGPARM(3) 1108static void log_3I0D(InstrInfo* ii1, InstrInfo* ii2, InstrInfo* ii3) 1109{ 1110 CacheModelResult Ir1Res, Ir2Res, Ir3Res; 1111 ULong *global_cost_Ir; 1112 1113 current_ii = ii1; 1114 Ir1Res = (*simulator.I1_Read)(CLG_(bb_base) + ii1->instr_offset, ii1->instr_size); 1115 current_ii = ii2; 1116 Ir2Res = (*simulator.I1_Read)(CLG_(bb_base) + ii2->instr_offset, ii2->instr_size); 1117 current_ii = ii3; 1118 Ir3Res = (*simulator.I1_Read)(CLG_(bb_base) + ii3->instr_offset, ii3->instr_size); 1119 1120 CLG_DEBUG(6, "log_3I0D: Ir1 %#lx/%u => %s, Ir2 %#lx/%u => %s, Ir3 %#lx/%u => %s\n", 1121 CLG_(bb_base) + ii1->instr_offset, ii1->instr_size, cacheRes(Ir1Res), 1122 CLG_(bb_base) + ii2->instr_offset, ii2->instr_size, cacheRes(Ir2Res), 1123 CLG_(bb_base) + ii3->instr_offset, ii3->instr_size, cacheRes(Ir3Res) ); 1124 1125 if (!CLG_(current_state).collect) return; 1126 1127 global_cost_Ir = CLG_(current_state).cost + fullOffset(EG_IR); 1128 if (CLG_(current_state).nonskipped) { 1129 ULong* skipped_cost_Ir = 1130 CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR); 1131 inc_costs(Ir1Res, global_cost_Ir, skipped_cost_Ir); 1132 inc_costs(Ir2Res, global_cost_Ir, skipped_cost_Ir); 1133 inc_costs(Ir3Res, global_cost_Ir, skipped_cost_Ir); 1134 return; 1135 } 1136 1137 inc_costs(Ir1Res, global_cost_Ir, 1138 CLG_(cost_base) + ii1->cost_offset + ii1->eventset->offset[EG_IR]); 1139 inc_costs(Ir2Res, global_cost_Ir, 1140 CLG_(cost_base) + ii2->cost_offset + ii2->eventset->offset[EG_IR]); 1141 inc_costs(Ir3Res, global_cost_Ir, 1142 CLG_(cost_base) + ii3->cost_offset + ii3->eventset->offset[EG_IR]); 1143} 1144 1145/* Instruction doing a read access */ 1146 1147VG_REGPARM(3) 1148static void log_1I1Dr(InstrInfo* ii, Addr data_addr, Word data_size) 1149{ 1150 CacheModelResult IrRes, DrRes; 1151 1152 current_ii = ii; 1153 IrRes = (*simulator.I1_Read)(CLG_(bb_base) + ii->instr_offset, ii->instr_size); 1154 DrRes = (*simulator.D1_Read)(data_addr, data_size); 1155 1156 CLG_DEBUG(6, "log_1I1Dr: Ir %#lx/%u => %s, Dr %#lx/%lu => %s\n", 1157 CLG_(bb_base) + ii->instr_offset, ii->instr_size, cacheRes(IrRes), 1158 data_addr, data_size, cacheRes(DrRes)); 1159 1160 if (CLG_(current_state).collect) { 1161 ULong *cost_Ir, *cost_Dr; 1162 1163 if (CLG_(current_state).nonskipped) { 1164 cost_Ir = CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR); 1165 cost_Dr = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DR); 1166 } 1167 else { 1168 cost_Ir = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_IR]; 1169 cost_Dr = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_DR]; 1170 } 1171 1172 inc_costs(IrRes, cost_Ir, 1173 CLG_(current_state).cost + fullOffset(EG_IR) ); 1174 inc_costs(DrRes, cost_Dr, 1175 CLG_(current_state).cost + fullOffset(EG_DR) ); 1176 } 1177} 1178 1179 1180VG_REGPARM(3) 1181static void log_0I1Dr(InstrInfo* ii, Addr data_addr, Word data_size) 1182{ 1183 CacheModelResult DrRes; 1184 1185 current_ii = ii; 1186 DrRes = (*simulator.D1_Read)(data_addr, data_size); 1187 1188 CLG_DEBUG(6, "log_0I1Dr: Dr %#lx/%lu => %s\n", 1189 data_addr, data_size, cacheRes(DrRes)); 1190 1191 if (CLG_(current_state).collect) { 1192 ULong *cost_Dr; 1193 1194 if (CLG_(current_state).nonskipped) 1195 cost_Dr = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DR); 1196 else 1197 cost_Dr = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_DR]; 1198 1199 inc_costs(DrRes, cost_Dr, 1200 CLG_(current_state).cost + fullOffset(EG_DR) ); 1201 } 1202} 1203 1204 1205/* Instruction doing a write access */ 1206 1207VG_REGPARM(3) 1208static void log_1I1Dw(InstrInfo* ii, Addr data_addr, Word data_size) 1209{ 1210 CacheModelResult IrRes, DwRes; 1211 1212 current_ii = ii; 1213 IrRes = (*simulator.I1_Read)(CLG_(bb_base) + ii->instr_offset, ii->instr_size); 1214 DwRes = (*simulator.D1_Write)(data_addr, data_size); 1215 1216 CLG_DEBUG(6, "log_1I1Dw: Ir %#lx/%u => %s, Dw %#lx/%lu => %s\n", 1217 CLG_(bb_base) + ii->instr_offset, ii->instr_size, cacheRes(IrRes), 1218 data_addr, data_size, cacheRes(DwRes)); 1219 1220 if (CLG_(current_state).collect) { 1221 ULong *cost_Ir, *cost_Dw; 1222 1223 if (CLG_(current_state).nonskipped) { 1224 cost_Ir = CLG_(current_state).nonskipped->skipped + fullOffset(EG_IR); 1225 cost_Dw = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DW); 1226 } 1227 else { 1228 cost_Ir = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_IR]; 1229 cost_Dw = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_DW]; 1230 } 1231 1232 inc_costs(IrRes, cost_Ir, 1233 CLG_(current_state).cost + fullOffset(EG_IR) ); 1234 inc_costs(DwRes, cost_Dw, 1235 CLG_(current_state).cost + fullOffset(EG_DW) ); 1236 } 1237} 1238 1239VG_REGPARM(3) 1240static void log_0I1Dw(InstrInfo* ii, Addr data_addr, Word data_size) 1241{ 1242 CacheModelResult DwRes; 1243 1244 current_ii = ii; 1245 DwRes = (*simulator.D1_Write)(data_addr, data_size); 1246 1247 CLG_DEBUG(6, "log_0I1Dw: Dw %#lx/%lu => %s\n", 1248 data_addr, data_size, cacheRes(DwRes)); 1249 1250 if (CLG_(current_state).collect) { 1251 ULong *cost_Dw; 1252 1253 if (CLG_(current_state).nonskipped) 1254 cost_Dw = CLG_(current_state).nonskipped->skipped + fullOffset(EG_DW); 1255 else 1256 cost_Dw = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_DW]; 1257 1258 inc_costs(DwRes, cost_Dw, 1259 CLG_(current_state).cost + fullOffset(EG_DW) ); 1260 } 1261} 1262 1263 1264 1265/*------------------------------------------------------------*/ 1266/*--- Cache configuration ---*/ 1267/*------------------------------------------------------------*/ 1268 1269static cache_t clo_I1_cache = UNDEFINED_CACHE; 1270static cache_t clo_D1_cache = UNDEFINED_CACHE; 1271static cache_t clo_LL_cache = UNDEFINED_CACHE; 1272 1273/* Initialize and clear simulator state */ 1274static void cachesim_post_clo_init(void) 1275{ 1276 /* Cache configurations. */ 1277 cache_t I1c, D1c, LLc; 1278 1279 /* Initialize access handlers */ 1280 if (!CLG_(clo).simulate_cache) { 1281 CLG_(cachesim).log_1I0D = 0; 1282 CLG_(cachesim).log_1I0D_name = "(no function)"; 1283 CLG_(cachesim).log_2I0D = 0; 1284 CLG_(cachesim).log_2I0D_name = "(no function)"; 1285 CLG_(cachesim).log_3I0D = 0; 1286 CLG_(cachesim).log_3I0D_name = "(no function)"; 1287 1288 CLG_(cachesim).log_1I1Dr = 0; 1289 CLG_(cachesim).log_1I1Dr_name = "(no function)"; 1290 CLG_(cachesim).log_1I1Dw = 0; 1291 CLG_(cachesim).log_1I1Dw_name = "(no function)"; 1292 1293 CLG_(cachesim).log_0I1Dr = 0; 1294 CLG_(cachesim).log_0I1Dr_name = "(no function)"; 1295 CLG_(cachesim).log_0I1Dw = 0; 1296 CLG_(cachesim).log_0I1Dw_name = "(no function)"; 1297 return; 1298 } 1299 1300 /* Configuration of caches only needed with real cache simulation */ 1301 VG_(post_clo_init_configure_caches)(&I1c, &D1c, &LLc, 1302 &clo_I1_cache, 1303 &clo_D1_cache, 1304 &clo_LL_cache); 1305 1306 I1.name = "I1"; 1307 D1.name = "D1"; 1308 LL.name = "LL"; 1309 1310 cachesim_initcache(I1c, &I1); 1311 cachesim_initcache(D1c, &D1); 1312 cachesim_initcache(LLc, &LL); 1313 1314 /* the other cache simulators use the standard helpers 1315 * with dispatching via simulator struct */ 1316 1317 CLG_(cachesim).log_1I0D = log_1I0D; 1318 CLG_(cachesim).log_1I0D_name = "log_1I0D"; 1319 CLG_(cachesim).log_2I0D = log_2I0D; 1320 CLG_(cachesim).log_2I0D_name = "log_2I0D"; 1321 CLG_(cachesim).log_3I0D = log_3I0D; 1322 CLG_(cachesim).log_3I0D_name = "log_3I0D"; 1323 1324 CLG_(cachesim).log_1I1Dr = log_1I1Dr; 1325 CLG_(cachesim).log_1I1Dw = log_1I1Dw; 1326 CLG_(cachesim).log_1I1Dr_name = "log_1I1Dr"; 1327 CLG_(cachesim).log_1I1Dw_name = "log_1I1Dw"; 1328 1329 CLG_(cachesim).log_0I1Dr = log_0I1Dr; 1330 CLG_(cachesim).log_0I1Dw = log_0I1Dw; 1331 CLG_(cachesim).log_0I1Dr_name = "log_0I1Dr"; 1332 CLG_(cachesim).log_0I1Dw_name = "log_0I1Dw"; 1333 1334 if (clo_collect_cacheuse) { 1335 1336 /* Output warning for not supported option combinations */ 1337 if (clo_simulate_hwpref) { 1338 VG_(message)(Vg_DebugMsg, 1339 "warning: prefetch simulation can not be " 1340 "used with cache usage\n"); 1341 clo_simulate_hwpref = False; 1342 } 1343 1344 if (clo_simulate_writeback) { 1345 VG_(message)(Vg_DebugMsg, 1346 "warning: write-back simulation can not be " 1347 "used with cache usage\n"); 1348 clo_simulate_writeback = False; 1349 } 1350 1351 simulator.I1_Read = cacheuse_I1_doRead; 1352 simulator.D1_Read = cacheuse_D1_doRead; 1353 simulator.D1_Write = cacheuse_D1_doRead; 1354 return; 1355 } 1356 1357 if (clo_simulate_hwpref) { 1358 prefetch_clear(); 1359 1360 if (clo_simulate_writeback) { 1361 simulator.I1_Read = prefetch_I1_Read; 1362 simulator.D1_Read = prefetch_D1_Read; 1363 simulator.D1_Write = prefetch_D1_Write; 1364 } 1365 else { 1366 simulator.I1_Read = prefetch_I1_ref; 1367 simulator.D1_Read = prefetch_D1_ref; 1368 simulator.D1_Write = prefetch_D1_ref; 1369 } 1370 1371 return; 1372 } 1373 1374 if (clo_simulate_writeback) { 1375 simulator.I1_Read = cachesim_I1_Read; 1376 simulator.D1_Read = cachesim_D1_Read; 1377 simulator.D1_Write = cachesim_D1_Write; 1378 } 1379 else { 1380 simulator.I1_Read = cachesim_I1_ref; 1381 simulator.D1_Read = cachesim_D1_ref; 1382 simulator.D1_Write = cachesim_D1_ref; 1383 } 1384} 1385 1386 1387/* Clear simulator state. Has to be initialized before */ 1388static 1389void cachesim_clear(void) 1390{ 1391 cachesim_clearcache(&I1); 1392 cachesim_clearcache(&D1); 1393 cachesim_clearcache(&LL); 1394 1395 prefetch_clear(); 1396} 1397 1398 1399static void cachesim_getdesc(Char* buf) 1400{ 1401 Int p; 1402 p = VG_(sprintf)(buf, "\ndesc: I1 cache: %s\n", I1.desc_line); 1403 p += VG_(sprintf)(buf+p, "desc: D1 cache: %s\n", D1.desc_line); 1404 VG_(sprintf)(buf+p, "desc: LL cache: %s\n", LL.desc_line); 1405} 1406 1407static 1408void cachesim_print_opts(void) 1409{ 1410 VG_(printf)( 1411"\n cache simulator options (does cache simulation if used):\n" 1412" --simulate-wb=no|yes Count write-back events [no]\n" 1413" --simulate-hwpref=no|yes Simulate hardware prefetch [no]\n" 1414#if CLG_EXPERIMENTAL 1415" --simulate-sectors=no|yes Simulate sectored behaviour [no]\n" 1416#endif 1417" --cacheuse=no|yes Collect cache block use [no]\n"); 1418 VG_(print_cache_clo_opts)(); 1419} 1420 1421/* Check for command line option for cache configuration. 1422 * Return False if unknown and not handled. 1423 * 1424 * Called from CLG_(process_cmd_line_option)() in clo.c 1425 */ 1426static Bool cachesim_parse_opt(Char* arg) 1427{ 1428 if VG_BOOL_CLO(arg, "--simulate-wb", clo_simulate_writeback) {} 1429 else if VG_BOOL_CLO(arg, "--simulate-hwpref", clo_simulate_hwpref) {} 1430 else if VG_BOOL_CLO(arg, "--simulate-sectors", clo_simulate_sectors) {} 1431 1432 else if VG_BOOL_CLO(arg, "--cacheuse", clo_collect_cacheuse) { 1433 if (clo_collect_cacheuse) { 1434 /* Use counters only make sense with fine dumping */ 1435 CLG_(clo).dump_instr = True; 1436 } 1437 } 1438 1439 else if (VG_(str_clo_cache_opt)(arg, 1440 &clo_I1_cache, 1441 &clo_D1_cache, 1442 &clo_LL_cache)) {} 1443 1444 else 1445 return False; 1446 1447 return True; 1448} 1449 1450/* Adds commas to ULong, right justifying in a field field_width wide, returns 1451 * the string in buf. */ 1452static 1453Int commify(ULong n, int field_width, char* buf) 1454{ 1455 int len, n_commas, i, j, new_len, space; 1456 1457 VG_(sprintf)(buf, "%llu", n); 1458 len = VG_(strlen)(buf); 1459 n_commas = (len - 1) / 3; 1460 new_len = len + n_commas; 1461 space = field_width - new_len; 1462 1463 /* Allow for printing a number in a field_width smaller than it's size */ 1464 if (space < 0) space = 0; 1465 1466 /* Make j = -1 because we copy the '\0' before doing the numbers in groups 1467 * of three. */ 1468 for (j = -1, i = len ; i >= 0; i--) { 1469 buf[i + n_commas + space] = buf[i]; 1470 1471 if ((i>0) && (3 == ++j)) { 1472 j = 0; 1473 n_commas--; 1474 buf[i + n_commas + space] = ','; 1475 } 1476 } 1477 /* Right justify in field. */ 1478 for (i = 0; i < space; i++) buf[i] = ' '; 1479 return new_len; 1480} 1481 1482static 1483void percentify(Int n, Int ex, Int field_width, char buf[]) 1484{ 1485 int i, len, space; 1486 1487 VG_(sprintf)(buf, "%d.%d%%", n / ex, n % ex); 1488 len = VG_(strlen)(buf); 1489 space = field_width - len; 1490 if (space < 0) space = 0; /* Allow for v. small field_width */ 1491 i = len; 1492 1493 /* Right justify in field */ 1494 for ( ; i >= 0; i--) buf[i + space] = buf[i]; 1495 for (i = 0; i < space; i++) buf[i] = ' '; 1496} 1497 1498static 1499void cachesim_printstat(Int l1, Int l2, Int l3) 1500{ 1501 FullCost total = CLG_(total_cost), D_total = 0; 1502 ULong LL_total_m, LL_total_mr, LL_total_mw, 1503 LL_total, LL_total_r, LL_total_w; 1504 char buf1[RESULTS_BUF_LEN], 1505 buf2[RESULTS_BUF_LEN], 1506 buf3[RESULTS_BUF_LEN]; 1507 Int p; 1508 1509 if ((VG_(clo_verbosity) >1) && clo_simulate_hwpref) { 1510 VG_(message)(Vg_DebugMsg, "Prefetch Up: %llu\n", 1511 prefetch_up); 1512 VG_(message)(Vg_DebugMsg, "Prefetch Down: %llu\n", 1513 prefetch_down); 1514 VG_(message)(Vg_DebugMsg, "\n"); 1515 } 1516 1517 commify(total[fullOffset(EG_IR) +1], l1, buf1); 1518 VG_(message)(Vg_UserMsg, "I1 misses: %s\n", buf1); 1519 1520 commify(total[fullOffset(EG_IR) +2], l1, buf1); 1521 VG_(message)(Vg_UserMsg, "LLi misses: %s\n", buf1); 1522 1523 p = 100; 1524 1525 if (0 == total[fullOffset(EG_IR)]) 1526 total[fullOffset(EG_IR)] = 1; 1527 1528 percentify(total[fullOffset(EG_IR)+1] * 100 * p / 1529 total[fullOffset(EG_IR)], p, l1+1, buf1); 1530 VG_(message)(Vg_UserMsg, "I1 miss rate: %s\n", buf1); 1531 1532 percentify(total[fullOffset(EG_IR)+2] * 100 * p / 1533 total[fullOffset(EG_IR)], p, l1+1, buf1); 1534 VG_(message)(Vg_UserMsg, "LLi miss rate: %s\n", buf1); 1535 VG_(message)(Vg_UserMsg, "\n"); 1536 1537 /* D cache results. 1538 Use the D_refs.rd and D_refs.wr values to determine the 1539 * width of columns 2 & 3. */ 1540 1541 D_total = CLG_(get_eventset_cost)( CLG_(sets).full ); 1542 CLG_(init_cost)( CLG_(sets).full, D_total); 1543 // we only use the first 3 values of D_total, adding up Dr and Dw costs 1544 CLG_(copy_cost)( CLG_(get_event_set)(EG_DR), D_total, total + fullOffset(EG_DR) ); 1545 CLG_(add_cost) ( CLG_(get_event_set)(EG_DW), D_total, total + fullOffset(EG_DW) ); 1546 1547 commify( D_total[0], l1, buf1); 1548 commify(total[fullOffset(EG_DR)], l2, buf2); 1549 commify(total[fullOffset(EG_DW)], l3, buf3); 1550 VG_(message)(Vg_UserMsg, "D refs: %s (%s rd + %s wr)\n", 1551 buf1, buf2, buf3); 1552 1553 commify( D_total[1], l1, buf1); 1554 commify(total[fullOffset(EG_DR)+1], l2, buf2); 1555 commify(total[fullOffset(EG_DW)+1], l3, buf3); 1556 VG_(message)(Vg_UserMsg, "D1 misses: %s (%s rd + %s wr)\n", 1557 buf1, buf2, buf3); 1558 1559 commify( D_total[2], l1, buf1); 1560 commify(total[fullOffset(EG_DR)+2], l2, buf2); 1561 commify(total[fullOffset(EG_DW)+2], l3, buf3); 1562 VG_(message)(Vg_UserMsg, "LLd misses: %s (%s rd + %s wr)\n", 1563 buf1, buf2, buf3); 1564 1565 p = 10; 1566 1567 if (0 == D_total[0]) D_total[0] = 1; 1568 if (0 == total[fullOffset(EG_DR)]) total[fullOffset(EG_DR)] = 1; 1569 if (0 == total[fullOffset(EG_DW)]) total[fullOffset(EG_DW)] = 1; 1570 1571 percentify( D_total[1] * 100 * p / D_total[0], p, l1+1, buf1); 1572 percentify(total[fullOffset(EG_DR)+1] * 100 * p / 1573 total[fullOffset(EG_DR)], p, l2+1, buf2); 1574 percentify(total[fullOffset(EG_DW)+1] * 100 * p / 1575 total[fullOffset(EG_DW)], p, l3+1, buf3); 1576 VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )\n", 1577 buf1, buf2,buf3); 1578 1579 percentify( D_total[2] * 100 * p / D_total[0], p, l1+1, buf1); 1580 percentify(total[fullOffset(EG_DR)+2] * 100 * p / 1581 total[fullOffset(EG_DR)], p, l2+1, buf2); 1582 percentify(total[fullOffset(EG_DW)+2] * 100 * p / 1583 total[fullOffset(EG_DW)], p, l3+1, buf3); 1584 VG_(message)(Vg_UserMsg, "LLd miss rate: %s (%s + %s )\n", 1585 buf1, buf2,buf3); 1586 VG_(message)(Vg_UserMsg, "\n"); 1587 1588 1589 1590 /* LL overall results */ 1591 1592 LL_total = 1593 total[fullOffset(EG_DR) +1] + 1594 total[fullOffset(EG_DW) +1] + 1595 total[fullOffset(EG_IR) +1]; 1596 LL_total_r = 1597 total[fullOffset(EG_DR) +1] + 1598 total[fullOffset(EG_IR) +1]; 1599 LL_total_w = total[fullOffset(EG_DW) +1]; 1600 commify(LL_total, l1, buf1); 1601 commify(LL_total_r, l2, buf2); 1602 commify(LL_total_w, l3, buf3); 1603 VG_(message)(Vg_UserMsg, "LL refs: %s (%s rd + %s wr)\n", 1604 buf1, buf2, buf3); 1605 1606 LL_total_m = 1607 total[fullOffset(EG_DR) +2] + 1608 total[fullOffset(EG_DW) +2] + 1609 total[fullOffset(EG_IR) +2]; 1610 LL_total_mr = 1611 total[fullOffset(EG_DR) +2] + 1612 total[fullOffset(EG_IR) +2]; 1613 LL_total_mw = total[fullOffset(EG_DW) +2]; 1614 commify(LL_total_m, l1, buf1); 1615 commify(LL_total_mr, l2, buf2); 1616 commify(LL_total_mw, l3, buf3); 1617 VG_(message)(Vg_UserMsg, "LL misses: %s (%s rd + %s wr)\n", 1618 buf1, buf2, buf3); 1619 1620 percentify(LL_total_m * 100 * p / 1621 (total[fullOffset(EG_IR)] + D_total[0]), p, l1+1, buf1); 1622 percentify(LL_total_mr * 100 * p / 1623 (total[fullOffset(EG_IR)] + total[fullOffset(EG_DR)]), 1624 p, l2+1, buf2); 1625 percentify(LL_total_mw * 100 * p / 1626 total[fullOffset(EG_DW)], p, l3+1, buf3); 1627 VG_(message)(Vg_UserMsg, "LL miss rate: %s (%s + %s )\n", 1628 buf1, buf2,buf3); 1629} 1630 1631 1632/*------------------------------------------------------------*/ 1633/*--- Setup for Event set. ---*/ 1634/*------------------------------------------------------------*/ 1635 1636struct event_sets CLG_(sets); 1637 1638void CLG_(init_eventsets)() 1639{ 1640 // Event groups from which the event sets are composed 1641 // the "Use" group only is used with "cacheuse" simulation 1642 if (clo_collect_cacheuse) 1643 CLG_(register_event_group4)(EG_USE, 1644 "AcCost1", "SpLoss1", "AcCost2", "SpLoss2"); 1645 1646 if (!CLG_(clo).simulate_cache) 1647 CLG_(register_event_group)(EG_IR, "Ir"); 1648 else if (!clo_simulate_writeback) { 1649 CLG_(register_event_group3)(EG_IR, "Ir", "I1mr", "ILmr"); 1650 CLG_(register_event_group3)(EG_DR, "Dr", "D1mr", "DLmr"); 1651 CLG_(register_event_group3)(EG_DW, "Dw", "D1mw", "DLmw"); 1652 } 1653 else { // clo_simulate_writeback 1654 CLG_(register_event_group4)(EG_IR, "Ir", "I1mr", "ILmr", "ILdmr"); 1655 CLG_(register_event_group4)(EG_DR, "Dr", "D1mr", "DLmr", "DLdmr"); 1656 CLG_(register_event_group4)(EG_DW, "Dw", "D1mw", "DLmw", "DLdmw"); 1657 } 1658 1659 if (CLG_(clo).simulate_branch) { 1660 CLG_(register_event_group2)(EG_BC, "Bc", "Bcm"); 1661 CLG_(register_event_group2)(EG_BI, "Bi", "Bim"); 1662 } 1663 1664 if (CLG_(clo).collect_bus) 1665 CLG_(register_event_group)(EG_BUS, "Ge"); 1666 1667 if (CLG_(clo).collect_alloc) 1668 CLG_(register_event_group2)(EG_ALLOC, "allocCount", "allocSize"); 1669 1670 if (CLG_(clo).collect_systime) 1671 CLG_(register_event_group2)(EG_SYS, "sysCount", "sysTime"); 1672 1673 // event set used as base for instruction self cost 1674 CLG_(sets).base = CLG_(get_event_set2)(EG_USE, EG_IR); 1675 1676 // event set comprising all event groups, used for inclusive cost 1677 CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).base, EG_DR, EG_DW); 1678 CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).full, EG_BC, EG_BI); 1679 CLG_(sets).full = CLG_(add_event_group) (CLG_(sets).full, EG_BUS); 1680 CLG_(sets).full = CLG_(add_event_group2)(CLG_(sets).full, EG_ALLOC, EG_SYS); 1681 1682 CLG_DEBUGIF(1) { 1683 CLG_DEBUG(1, "EventSets:\n"); 1684 CLG_(print_eventset)(-2, CLG_(sets).base); 1685 CLG_(print_eventset)(-2, CLG_(sets).full); 1686 } 1687 1688 /* Not-existing events are silently ignored */ 1689 CLG_(dumpmap) = CLG_(get_eventmapping)(CLG_(sets).full); 1690 CLG_(append_event)(CLG_(dumpmap), "Ir"); 1691 CLG_(append_event)(CLG_(dumpmap), "Dr"); 1692 CLG_(append_event)(CLG_(dumpmap), "Dw"); 1693 CLG_(append_event)(CLG_(dumpmap), "I1mr"); 1694 CLG_(append_event)(CLG_(dumpmap), "D1mr"); 1695 CLG_(append_event)(CLG_(dumpmap), "D1mw"); 1696 CLG_(append_event)(CLG_(dumpmap), "ILmr"); 1697 CLG_(append_event)(CLG_(dumpmap), "DLmr"); 1698 CLG_(append_event)(CLG_(dumpmap), "DLmw"); 1699 CLG_(append_event)(CLG_(dumpmap), "ILdmr"); 1700 CLG_(append_event)(CLG_(dumpmap), "DLdmr"); 1701 CLG_(append_event)(CLG_(dumpmap), "DLdmw"); 1702 CLG_(append_event)(CLG_(dumpmap), "Bc"); 1703 CLG_(append_event)(CLG_(dumpmap), "Bcm"); 1704 CLG_(append_event)(CLG_(dumpmap), "Bi"); 1705 CLG_(append_event)(CLG_(dumpmap), "Bim"); 1706 CLG_(append_event)(CLG_(dumpmap), "AcCost1"); 1707 CLG_(append_event)(CLG_(dumpmap), "SpLoss1"); 1708 CLG_(append_event)(CLG_(dumpmap), "AcCost2"); 1709 CLG_(append_event)(CLG_(dumpmap), "SpLoss2"); 1710 CLG_(append_event)(CLG_(dumpmap), "Ge"); 1711 CLG_(append_event)(CLG_(dumpmap), "allocCount"); 1712 CLG_(append_event)(CLG_(dumpmap), "allocSize"); 1713 CLG_(append_event)(CLG_(dumpmap), "sysCount"); 1714 CLG_(append_event)(CLG_(dumpmap), "sysTime"); 1715} 1716 1717 1718/* this is called at dump time for every instruction executed */ 1719static void cachesim_add_icost(SimCost cost, BBCC* bbcc, 1720 InstrInfo* ii, ULong exe_count) 1721{ 1722 if (!CLG_(clo).simulate_cache) 1723 cost[ fullOffset(EG_IR) ] += exe_count; 1724 1725 if (ii->eventset) 1726 CLG_(add_and_zero_cost2)( CLG_(sets).full, cost, 1727 ii->eventset, bbcc->cost + ii->cost_offset); 1728} 1729 1730static 1731void cachesim_finish(void) 1732{ 1733 if (clo_collect_cacheuse) 1734 cacheuse_finish(); 1735} 1736 1737/*------------------------------------------------------------*/ 1738/*--- The simulator defined in this file ---*/ 1739/*------------------------------------------------------------*/ 1740 1741struct cachesim_if CLG_(cachesim) = { 1742 .print_opts = cachesim_print_opts, 1743 .parse_opt = cachesim_parse_opt, 1744 .post_clo_init = cachesim_post_clo_init, 1745 .clear = cachesim_clear, 1746 .getdesc = cachesim_getdesc, 1747 .printstat = cachesim_printstat, 1748 .add_icost = cachesim_add_icost, 1749 .finish = cachesim_finish, 1750 1751 /* these will be set by cachesim_post_clo_init */ 1752 .log_1I0D = 0, 1753 .log_2I0D = 0, 1754 .log_3I0D = 0, 1755 1756 .log_1I1Dr = 0, 1757 .log_1I1Dw = 0, 1758 1759 .log_0I1Dr = 0, 1760 .log_0I1Dw = 0, 1761 1762 .log_1I0D_name = "(no function)", 1763 .log_2I0D_name = "(no function)", 1764 .log_3I0D_name = "(no function)", 1765 1766 .log_1I1Dr_name = "(no function)", 1767 .log_1I1Dw_name = "(no function)", 1768 1769 .log_0I1Dr_name = "(no function)", 1770 .log_0I1Dw_name = "(no function)", 1771}; 1772 1773 1774/*--------------------------------------------------------------------*/ 1775/*--- end ct_sim.c ---*/ 1776/*--------------------------------------------------------------------*/ 1777 1778