cg_sim.c revision 0103de5f5d39add9080fe72884af2a5520c1f661
1 2/*--------------------------------------------------------------------*/ 3/*--- Cache simulation cg_sim.c ---*/ 4/*--------------------------------------------------------------------*/ 5 6/* 7 This file is part of Cachegrind, a Valgrind tool for cache 8 profiling programs. 9 10 Copyright (C) 2002-2005 Nicholas Nethercote 11 njn@valgrind.org 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29*/ 30 31/* Notes: 32 - simulates a write-allocate cache 33 - (block --> set) hash function uses simple bit selection 34 - handling of references straddling two cache blocks: 35 - counts as only one cache access (not two) 36 - both blocks hit --> one hit 37 - one block hits, the other misses --> one miss 38 - both blocks miss --> one miss (not two) 39*/ 40 41typedef struct { 42 Int size; /* bytes */ 43 Int assoc; 44 Int line_size; /* bytes */ 45 Int sets; 46 Int sets_min_1; 47 Int assoc_bits; 48 Int line_size_bits; 49 Int tag_shift; 50 Char desc_line[128]; 51 UWord* tags; 52} cache_t2; 53 54/* By this point, the size/assoc/line_size has been checked. */ 55static void cachesim_initcache(cache_t config, cache_t2* c) 56{ 57 Int i; 58 59 c->size = config.size; 60 c->assoc = config.assoc; 61 c->line_size = config.line_size; 62 63 c->sets = (c->size / c->line_size) / c->assoc; 64 c->sets_min_1 = c->sets - 1; 65 c->assoc_bits = VG_(log2)(c->assoc); 66 c->line_size_bits = VG_(log2)(c->line_size); 67 c->tag_shift = c->line_size_bits + VG_(log2)(c->sets); 68 69 if (c->assoc == 1) { 70 VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped", 71 c->size, c->line_size); 72 } else { 73 VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative", 74 c->size, c->line_size, c->assoc); 75 } 76 77 c->tags = VG_(malloc)(sizeof(UWord) * c->sets * c->assoc); 78 79 for (i = 0; i < c->sets * c->assoc; i++) 80 c->tags[i] = 0; 81} 82 83#if 0 84static void print_cache(cache_t2* c) 85{ 86 UInt set, way, i; 87 88 /* Note initialisation and update of 'i'. */ 89 for (i = 0, set = 0; set < c->sets; set++) { 90 for (way = 0; way < c->assoc; way++, i++) { 91 VG_(printf)("%16lx ", c->tags[i]); 92 } 93 VG_(printf)("\n"); 94 } 95} 96#endif 97 98/* This is done as a macro rather than by passing in the cache_t2 as an 99 * arg because it slows things down by a small amount (3-5%) due to all 100 * that extra indirection. */ 101 102#define CACHESIM(L, MISS_TREATMENT) \ 103/* The cache and associated bits and pieces. */ \ 104static cache_t2 L; \ 105 \ 106static void cachesim_##L##_initcache(cache_t config) \ 107{ \ 108 cachesim_initcache(config, &L); \ 109} \ 110 \ 111static /* __inline__ */ \ 112void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *m2) \ 113{ \ 114 register UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \ 115 register UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \ 116 register UWord tag = a >> L.tag_shift; \ 117 Int i, j; \ 118 Bool is_miss = False; \ 119 UWord* set; \ 120 \ 121 /* First case: word entirely within line. */ \ 122 if (set1 == set2) { \ 123 \ 124 /* Shifting is a bit faster than multiplying */ \ 125 set = &(L.tags[set1 << L.assoc_bits]); \ 126 \ 127 /* This loop is unrolled for just the first case, which is the most */\ 128 /* common. We can't unroll any further because it would screw up */\ 129 /* if we have a direct-mapped (1-way) cache. */\ 130 if (tag == set[0]) { \ 131 return; \ 132 } \ 133 /* If the tag is one other than the MRU, move it into the MRU spot */\ 134 /* and shuffle the rest down. */\ 135 for (i = 1; i < L.assoc; i++) { \ 136 if (tag == set[i]) { \ 137 for (j = i; j > 0; j--) { \ 138 set[j] = set[j - 1]; \ 139 } \ 140 set[0] = tag; \ 141 return; \ 142 } \ 143 } \ 144 \ 145 /* A miss; install this tag as MRU, shuffle rest down. */ \ 146 for (j = L.assoc - 1; j > 0; j--) { \ 147 set[j] = set[j - 1]; \ 148 } \ 149 set[0] = tag; \ 150 MISS_TREATMENT; \ 151 return; \ 152 \ 153 /* Second case: word straddles two lines. */ \ 154 /* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \ 155 } else if (((set1 + 1) & (L.sets-1)) == set2) { \ 156 set = &(L.tags[set1 << L.assoc_bits]); \ 157 if (tag == set[0]) { \ 158 goto block2; \ 159 } \ 160 for (i = 1; i < L.assoc; i++) { \ 161 if (tag == set[i]) { \ 162 for (j = i; j > 0; j--) { \ 163 set[j] = set[j - 1]; \ 164 } \ 165 set[0] = tag; \ 166 goto block2; \ 167 } \ 168 } \ 169 for (j = L.assoc - 1; j > 0; j--) { \ 170 set[j] = set[j - 1]; \ 171 } \ 172 set[0] = tag; \ 173 is_miss = True; \ 174block2: \ 175 set = &(L.tags[set2 << L.assoc_bits]); \ 176 if (tag == set[0]) { \ 177 goto miss_treatment; \ 178 } \ 179 for (i = 1; i < L.assoc; i++) { \ 180 if (tag == set[i]) { \ 181 for (j = i; j > 0; j--) { \ 182 set[j] = set[j - 1]; \ 183 } \ 184 set[0] = tag; \ 185 goto miss_treatment; \ 186 } \ 187 } \ 188 for (j = L.assoc - 1; j > 0; j--) { \ 189 set[j] = set[j - 1]; \ 190 } \ 191 set[0] = tag; \ 192 is_miss = True; \ 193miss_treatment: \ 194 if (is_miss) { MISS_TREATMENT; } \ 195 \ 196 } else { \ 197 VG_(printf)("addr: %x size: %u sets: %d %d", a, size, set1, set2); \ 198 VG_(tool_panic)("item straddles more than two cache sets"); \ 199 } \ 200 return; \ 201} 202 203CACHESIM(L2, (*m2)++ ); 204CACHESIM(I1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } ); 205CACHESIM(D1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } ); 206 207/*--------------------------------------------------------------------*/ 208/*--- end cg_sim.c ---*/ 209/*--------------------------------------------------------------------*/ 210 211