1ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 2ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/ 3ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- Cache simulation cg_sim.c ---*/ 4ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/ 5ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 6ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* 7ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown This file is part of Cachegrind, a Valgrind tool for cache 8ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown profiling programs. 9ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 10663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng Copyright (C) 2002-2012 Nicholas Nethercote 11ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown njn@valgrind.org 12ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 13ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown This program is free software; you can redistribute it and/or 14ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown modify it under the terms of the GNU General Public License as 15ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown published by the Free Software Foundation; either version 2 of the 16ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown License, or (at your option) any later version. 17ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 18ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown This program is distributed in the hope that it will be useful, but 19ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown WITHOUT ANY WARRANTY; without even the implied warranty of 20ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown General Public License for more details. 22ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 23ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown You should have received a copy of the GNU General Public License 24ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown along with this program; if not, write to the Free Software 25ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 02111-1307, USA. 27ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 28ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown The GNU General Public License is contained in the file COPYING. 29ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/ 30ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 31ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Notes: 32ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown - simulates a write-allocate cache 33ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown - (block --> set) hash function uses simple bit selection 34ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown - handling of references straddling two cache blocks: 35ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown - counts as only one cache access (not two) 36ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown - both blocks hit --> one hit 37ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown - one block hits, the other misses --> one miss 38ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown - both blocks miss --> one miss (not two) 39ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/ 40ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 41ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Browntypedef struct { 42ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Int size; /* bytes */ 43ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Int assoc; 44ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Int line_size; /* bytes */ 45ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Int sets; 46ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Int sets_min_1; 47ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Int line_size_bits; 48ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Int tag_shift; 49ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Char desc_line[128]; 50ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UWord* tags; 51ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} cache_t2; 52ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 53ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* By this point, the size/assoc/line_size has been checked. */ 54ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic void cachesim_initcache(cache_t config, cache_t2* c) 55ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 56ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Int i; 57ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 58ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown c->size = config.size; 59ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown c->assoc = config.assoc; 60ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown c->line_size = config.line_size; 61ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 62ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown c->sets = (c->size / c->line_size) / c->assoc; 63ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown c->sets_min_1 = c->sets - 1; 64ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown c->line_size_bits = VG_(log2)(c->line_size); 65ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown c->tag_shift = c->line_size_bits + VG_(log2)(c->sets); 66ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 67ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (c->assoc == 1) { 68ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped", 69ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown c->size, c->line_size); 70ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } else { 71ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative", 72ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown c->size, c->line_size, c->assoc); 73ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 74ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 75ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown c->tags = VG_(malloc)("cg.sim.ci.1", 76ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown sizeof(UWord) * c->sets * c->assoc); 77ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 78ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (i = 0; i < c->sets * c->assoc; i++) 79ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown c->tags[i] = 0; 80ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 81ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 82ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* This is done as a macro rather than by passing in the cache_t2 as an 83ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * arg because it slows things down by a small amount (3-5%) due to all 84ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * that extra indirection. */ 85ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 86ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define CACHESIM(L, MISS_TREATMENT) \ 87ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* The cache and associated bits and pieces. */ \ 88ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic cache_t2 L; \ 89ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown \ 90ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic void cachesim_##L##_initcache(cache_t config) \ 91ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ \ 92ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown cachesim_initcache(config, &L); \ 93ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} \ 94ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown \ 95ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* This attribute forces GCC to inline this function, even though it's */ \ 96ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* bigger than its usual limit. Inlining gains around 5--10% speedup. */ \ 97ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown__attribute__((always_inline)) \ 98ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic __inline__ \ 99ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownvoid cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *mL) \ 100ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ \ 101ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \ 102ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \ 103ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UWord tag = a >> L.tag_shift; \ 104ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UWord tag2; \ 105ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Int i, j; \ 106ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Bool is_miss = False; \ 107ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UWord* set; \ 108ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown \ 109ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* First case: word entirely within line. */ \ 110ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (set1 == set2) { \ 111ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown \ 112ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown set = &(L.tags[set1 * L.assoc]); \ 113ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown \ 114ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* This loop is unrolled for just the first case, which is the most */\ 115ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* common. We can't unroll any further because it would screw up */\ 116ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* if we have a direct-mapped (1-way) cache. */\ 117ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (tag == set[0]) { \ 118ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return; \ 119ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } \ 120ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* If the tag is one other than the MRU, move it into the MRU spot */\ 121ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* and shuffle the rest down. */\ 122ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (i = 1; i < L.assoc; i++) { \ 123ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (tag == set[i]) { \ 124ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (j = i; j > 0; j--) { \ 125ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown set[j] = set[j - 1]; \ 126ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } \ 127ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown set[0] = tag; \ 128ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return; \ 129ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } \ 130ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } \ 131ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown \ 132ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* A miss; install this tag as MRU, shuffle rest down. */ \ 133ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (j = L.assoc - 1; j > 0; j--) { \ 134ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown set[j] = set[j - 1]; \ 135ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } \ 136ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown set[0] = tag; \ 137ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown MISS_TREATMENT; \ 138ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return; \ 139ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown \ 140ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Second case: word straddles two lines. */ \ 141ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \ 142b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov } else if (((set1 + 1) & (L.sets_min_1)) == set2) { \ 143ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown set = &(L.tags[set1 * L.assoc]); \ 144ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (tag == set[0]) { \ 145ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown goto block2; \ 146ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } \ 147ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (i = 1; i < L.assoc; i++) { \ 148ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (tag == set[i]) { \ 149ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (j = i; j > 0; j--) { \ 150ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown set[j] = set[j - 1]; \ 151ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } \ 152ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown set[0] = tag; \ 153ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown goto block2; \ 154ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } \ 155ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } \ 156ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (j = L.assoc - 1; j > 0; j--) { \ 157ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown set[j] = set[j - 1]; \ 158ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } \ 159ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown set[0] = tag; \ 160ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown is_miss = True; \ 161ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownblock2: \ 162ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown set = &(L.tags[set2 * L.assoc]); \ 163ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown tag2 = (a+size-1) >> L.tag_shift; \ 164ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (tag2 == set[0]) { \ 165ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown goto miss_treatment; \ 166ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } \ 167ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (i = 1; i < L.assoc; i++) { \ 168ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (tag2 == set[i]) { \ 169ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (j = i; j > 0; j--) { \ 170ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown set[j] = set[j - 1]; \ 171ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } \ 172ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown set[0] = tag2; \ 173ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown goto miss_treatment; \ 174ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } \ 175ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } \ 176ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (j = L.assoc - 1; j > 0; j--) { \ 177ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown set[j] = set[j - 1]; \ 178ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } \ 179ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown set[0] = tag2; \ 180ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown is_miss = True; \ 181ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownmiss_treatment: \ 182ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (is_miss) { MISS_TREATMENT; } \ 183ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown \ 184ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } else { \ 185ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown VG_(printf)("addr: %lx size: %u sets: %d %d", a, size, set1, set2);\ 186ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown VG_(tool_panic)("item straddles more than two cache sets"); \ 187ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } \ 188ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return; \ 189ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 190ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 191ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff BrownCACHESIM(LL, (*mL)++ ); 192ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff BrownCACHESIM(I1, { (*m1)++; cachesim_LL_doref(a, size, m1, mL); } ); 193ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff BrownCACHESIM(D1, { (*m1)++; cachesim_LL_doref(a, size, m1, mL); } ); 194ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 195ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/ 196ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- end cg_sim.c ---*/ 197ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/ 198ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 199