1 2/*--------------------------------------------------------------------*/ 3/*--- Cache simulation cg_sim.c ---*/ 4/*--------------------------------------------------------------------*/ 5 6/* 7 This file is part of Cachegrind, a Valgrind tool for cache 8 profiling programs. 9 10 Copyright (C) 2002-2013 Nicholas Nethercote 11 njn@valgrind.org 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29*/ 30 31/* Notes: 32 - simulates a write-allocate cache 33 - (block --> set) hash function uses simple bit selection 34 - handling of references straddling two cache blocks: 35 - counts as only one cache access (not two) 36 - both blocks hit --> one hit 37 - one block hits, the other misses --> one miss 38 - both blocks miss --> one miss (not two) 39*/ 40 41typedef struct { 42 Int size; /* bytes */ 43 Int assoc; 44 Int line_size; /* bytes */ 45 Int sets; 46 Int sets_min_1; 47 Int line_size_bits; 48 Int tag_shift; 49 HChar desc_line[128]; /* large enough */ 50 UWord* tags; 51} cache_t2; 52 53/* By this point, the size/assoc/line_size has been checked. */ 54static void cachesim_initcache(cache_t config, cache_t2* c) 55{ 56 Int i; 57 58 c->size = config.size; 59 c->assoc = config.assoc; 60 c->line_size = config.line_size; 61 62 c->sets = (c->size / c->line_size) / c->assoc; 63 c->sets_min_1 = c->sets - 1; 64 c->line_size_bits = VG_(log2)(c->line_size); 65 c->tag_shift = c->line_size_bits + VG_(log2)(c->sets); 66 67 if (c->assoc == 1) { 68 VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped", 69 c->size, c->line_size); 70 } else { 71 VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative", 72 c->size, c->line_size, c->assoc); 73 } 74 75 c->tags = VG_(malloc)("cg.sim.ci.1", 76 sizeof(UWord) * c->sets * c->assoc); 77 78 for (i = 0; i < c->sets * c->assoc; i++) 79 c->tags[i] = 0; 80} 81 82/* This attribute forces GCC to inline the function, getting rid of a 83 * lot of indirection around the cache_t2 pointer, if it is known to be 84 * constant in the caller (the caller is inlined itself). 85 * Without inlining of simulator functions, cachegrind can get 40% slower. 86 */ 87__attribute__((always_inline)) 88static __inline__ 89Bool cachesim_setref_is_miss(cache_t2* c, UInt set_no, UWord tag) 90{ 91 int i, j; 92 UWord *set; 93 94 set = &(c->tags[set_no * c->assoc]); 95 96 /* This loop is unrolled for just the first case, which is the most */ 97 /* common. We can't unroll any further because it would screw up */ 98 /* if we have a direct-mapped (1-way) cache. */ 99 if (tag == set[0]) 100 return False; 101 102 /* If the tag is one other than the MRU, move it into the MRU spot */ 103 /* and shuffle the rest down. */ 104 for (i = 1; i < c->assoc; i++) { 105 if (tag == set[i]) { 106 for (j = i; j > 0; j--) { 107 set[j] = set[j - 1]; 108 } 109 set[0] = tag; 110 111 return False; 112 } 113 } 114 115 /* A miss; install this tag as MRU, shuffle rest down. */ 116 for (j = c->assoc - 1; j > 0; j--) { 117 set[j] = set[j - 1]; 118 } 119 set[0] = tag; 120 121 return True; 122} 123 124__attribute__((always_inline)) 125static __inline__ 126Bool cachesim_ref_is_miss(cache_t2* c, Addr a, UChar size) 127{ 128 /* A memory block has the size of a cache line */ 129 UWord block1 = a >> c->line_size_bits; 130 UWord block2 = (a+size-1) >> c->line_size_bits; 131 UInt set1 = block1 & c->sets_min_1; 132 133 /* Tags used in real caches are minimal to save space. 134 * As the last bits of the block number of addresses mapping 135 * into one cache set are the same, real caches use as tag 136 * tag = block >> log2(#sets) 137 * But using the memory block as more specific tag is fine, 138 * and saves instructions. 139 */ 140 UWord tag1 = block1; 141 142 /* Access entirely within line. */ 143 if (block1 == block2) 144 return cachesim_setref_is_miss(c, set1, tag1); 145 146 /* Access straddles two lines. */ 147 else if (block1 + 1 == block2) { 148 UInt set2 = block2 & c->sets_min_1; 149 UWord tag2 = block2; 150 151 /* always do both, as state is updated as side effect */ 152 if (cachesim_setref_is_miss(c, set1, tag1)) { 153 cachesim_setref_is_miss(c, set2, tag2); 154 return True; 155 } 156 return cachesim_setref_is_miss(c, set2, tag2); 157 } 158 VG_(printf)("addr: %lx size: %u blocks: %ld %ld", 159 a, size, block1, block2); 160 VG_(tool_panic)("item straddles more than two cache sets"); 161 /* not reached */ 162 return True; 163} 164 165 166static cache_t2 LL; 167static cache_t2 I1; 168static cache_t2 D1; 169 170static void cachesim_initcaches(cache_t I1c, cache_t D1c, cache_t LLc) 171{ 172 cachesim_initcache(I1c, &I1); 173 cachesim_initcache(D1c, &D1); 174 cachesim_initcache(LLc, &LL); 175} 176 177__attribute__((always_inline)) 178static __inline__ 179void cachesim_I1_doref_Gen(Addr a, UChar size, ULong* m1, ULong *mL) 180{ 181 if (cachesim_ref_is_miss(&I1, a, size)) { 182 (*m1)++; 183 if (cachesim_ref_is_miss(&LL, a, size)) 184 (*mL)++; 185 } 186} 187 188// common special case IrNoX 189__attribute__((always_inline)) 190static __inline__ 191void cachesim_I1_doref_NoX(Addr a, UChar size, ULong* m1, ULong *mL) 192{ 193 UWord block = a >> I1.line_size_bits; 194 UInt I1_set = block & I1.sets_min_1; 195 196 // use block as tag 197 if (cachesim_setref_is_miss(&I1, I1_set, block)) { 198 UInt LL_set = block & LL.sets_min_1; 199 (*m1)++; 200 // can use block as tag as L1I and LL cache line sizes are equal 201 if (cachesim_setref_is_miss(&LL, LL_set, block)) 202 (*mL)++; 203 } 204} 205 206__attribute__((always_inline)) 207static __inline__ 208void cachesim_D1_doref(Addr a, UChar size, ULong* m1, ULong *mL) 209{ 210 if (cachesim_ref_is_miss(&D1, a, size)) { 211 (*m1)++; 212 if (cachesim_ref_is_miss(&LL, a, size)) 213 (*mL)++; 214 } 215} 216 217/* Check for special case IrNoX. Called at instrumentation time. 218 * 219 * Does this Ir only touch one cache line, and are L1I/LL cache 220 * line sizes the same? This allows to get rid of a runtime check. 221 * 222 * Returning false is always fine, as this calls the generic case 223 */ 224static Bool cachesim_is_IrNoX(Addr a, UChar size) 225{ 226 UWord block1, block2; 227 228 if (I1.line_size_bits != LL.line_size_bits) return False; 229 block1 = a >> I1.line_size_bits; 230 block2 = (a+size-1) >> I1.line_size_bits; 231 if (block1 != block2) return False; 232 233 return True; 234} 235 236/*--------------------------------------------------------------------*/ 237/*--- end cg_sim.c ---*/ 238/*--------------------------------------------------------------------*/ 239 240