cg_sim.c revision fcd04886dbda12c647f9e2d8dac85b2c834f0a6c
1 2/*--------------------------------------------------------------------*/ 3/*--- Cache simulation cg_sim.c ---*/ 4/*--------------------------------------------------------------------*/ 5 6/* 7 This file is part of Cachegrind, a Valgrind tool for cache 8 profiling programs. 9 10 Copyright (C) 2002-2005 Nicholas Nethercote 11 njn@valgrind.org 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29*/ 30 31/* Notes: 32 - simulates a write-allocate cache 33 - (block --> set) hash function uses simple bit selection 34 - handling of references straddling two cache blocks: 35 - counts as only one cache access (not two) 36 - both blocks hit --> one hit 37 - one block hits, the other misses --> one miss 38 - both blocks miss --> one miss (not two) 39*/ 40 41typedef struct { 42 Int size; /* bytes */ 43 Int assoc; 44 Int line_size; /* bytes */ 45 Int sets; 46 Int sets_min_1; 47 Int assoc_bits; 48 Int line_size_bits; 49 Int tag_shift; 50 Char desc_line[128]; 51 UWord* tags; 52} cache_t2; 53 54/* By this point, the size/assoc/line_size has been checked. */ 55static void cachesim_initcache(cache_t config, cache_t2* c) 56{ 57 Int i; 58 59 c->size = config.size; 60 c->assoc = config.assoc; 61 c->line_size = config.line_size; 62 63 c->sets = (c->size / c->line_size) / c->assoc; 64 c->sets_min_1 = c->sets - 1; 65 c->assoc_bits = VG_(log2)(c->assoc); 66 c->line_size_bits = VG_(log2)(c->line_size); 67 c->tag_shift = c->line_size_bits + VG_(log2)(c->sets); 68 69 if (c->assoc == 1) { 70 VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped", 71 c->size, c->line_size); 72 } else { 73 VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative", 74 c->size, c->line_size, c->assoc); 75 } 76 77 c->tags = VG_(malloc)(sizeof(UWord) * c->sets * c->assoc); 78 79 for (i = 0; i < c->sets * c->assoc; i++) 80 c->tags[i] = 0; 81} 82 83#if 0 84static void print_cache(cache_t2* c) 85{ 86 UInt set, way, i; 87 88 /* Note initialisation and update of 'i'. */ 89 for (i = 0, set = 0; set < c->sets; set++) { 90 for (way = 0; way < c->assoc; way++, i++) { 91 VG_(printf)("%16lx ", c->tags[i]); 92 } 93 VG_(printf)("\n"); 94 } 95} 96#endif 97 98/* This is done as a macro rather than by passing in the cache_t2 as an 99 * arg because it slows things down by a small amount (3-5%) due to all 100 * that extra indirection. */ 101 102#define CACHESIM(L, MISS_TREATMENT) \ 103/* The cache and associated bits and pieces. */ \ 104static cache_t2 L; \ 105 \ 106static void cachesim_##L##_initcache(cache_t config) \ 107{ \ 108 cachesim_initcache(config, &L); \ 109} \ 110 \ 111/* This attribute forces GCC to inline this function, even though it's */ \ 112/* bigger than its usual limit. Inlining gains around 5--10% speedup. */ \ 113__attribute__((always_inline)) \ 114static __inline__ \ 115void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *m2) \ 116{ \ 117 register UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \ 118 register UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \ 119 register UWord tag = a >> L.tag_shift; \ 120 Int i, j; \ 121 Bool is_miss = False; \ 122 UWord* set; \ 123 \ 124 /* First case: word entirely within line. */ \ 125 if (set1 == set2) { \ 126 \ 127 /* Shifting is a bit faster than multiplying */ \ 128 set = &(L.tags[set1 << L.assoc_bits]); \ 129 \ 130 /* This loop is unrolled for just the first case, which is the most */\ 131 /* common. We can't unroll any further because it would screw up */\ 132 /* if we have a direct-mapped (1-way) cache. */\ 133 if (tag == set[0]) { \ 134 return; \ 135 } \ 136 /* If the tag is one other than the MRU, move it into the MRU spot */\ 137 /* and shuffle the rest down. */\ 138 for (i = 1; i < L.assoc; i++) { \ 139 if (tag == set[i]) { \ 140 for (j = i; j > 0; j--) { \ 141 set[j] = set[j - 1]; \ 142 } \ 143 set[0] = tag; \ 144 return; \ 145 } \ 146 } \ 147 \ 148 /* A miss; install this tag as MRU, shuffle rest down. */ \ 149 for (j = L.assoc - 1; j > 0; j--) { \ 150 set[j] = set[j - 1]; \ 151 } \ 152 set[0] = tag; \ 153 MISS_TREATMENT; \ 154 return; \ 155 \ 156 /* Second case: word straddles two lines. */ \ 157 /* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \ 158 } else if (((set1 + 1) & (L.sets-1)) == set2) { \ 159 set = &(L.tags[set1 << L.assoc_bits]); \ 160 if (tag == set[0]) { \ 161 goto block2; \ 162 } \ 163 for (i = 1; i < L.assoc; i++) { \ 164 if (tag == set[i]) { \ 165 for (j = i; j > 0; j--) { \ 166 set[j] = set[j - 1]; \ 167 } \ 168 set[0] = tag; \ 169 goto block2; \ 170 } \ 171 } \ 172 for (j = L.assoc - 1; j > 0; j--) { \ 173 set[j] = set[j - 1]; \ 174 } \ 175 set[0] = tag; \ 176 is_miss = True; \ 177block2: \ 178 set = &(L.tags[set2 << L.assoc_bits]); \ 179 if (tag == set[0]) { \ 180 goto miss_treatment; \ 181 } \ 182 for (i = 1; i < L.assoc; i++) { \ 183 if (tag == set[i]) { \ 184 for (j = i; j > 0; j--) { \ 185 set[j] = set[j - 1]; \ 186 } \ 187 set[0] = tag; \ 188 goto miss_treatment; \ 189 } \ 190 } \ 191 for (j = L.assoc - 1; j > 0; j--) { \ 192 set[j] = set[j - 1]; \ 193 } \ 194 set[0] = tag; \ 195 is_miss = True; \ 196miss_treatment: \ 197 if (is_miss) { MISS_TREATMENT; } \ 198 \ 199 } else { \ 200 VG_(printf)("addr: %x size: %u sets: %d %d", a, size, set1, set2); \ 201 VG_(tool_panic)("item straddles more than two cache sets"); \ 202 } \ 203 return; \ 204} 205 206CACHESIM(L2, (*m2)++ ); 207CACHESIM(I1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } ); 208CACHESIM(D1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } ); 209 210/*--------------------------------------------------------------------*/ 211/*--- end cg_sim.c ---*/ 212/*--------------------------------------------------------------------*/ 213 214