cg_sim.c revision 8f943afc22a6a683b78271836c8ddc462b4824a9
1 2/*--------------------------------------------------------------------*/ 3/*--- Cache simulation cg_sim.c ---*/ 4/*--------------------------------------------------------------------*/ 5 6/* 7 This file is part of Cachegrind, a Valgrind tool for cache 8 profiling programs. 9 10 Copyright (C) 2002-2011 Nicholas Nethercote 11 njn@valgrind.org 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29*/ 30 31/* Notes: 32 - simulates a write-allocate cache 33 - (block --> set) hash function uses simple bit selection 34 - handling of references straddling two cache blocks: 35 - counts as only one cache access (not two) 36 - both blocks hit --> one hit 37 - one block hits, the other misses --> one miss 38 - both blocks miss --> one miss (not two) 39*/ 40 41typedef struct { 42 Int size; /* bytes */ 43 Int assoc; 44 Int line_size; /* bytes */ 45 Int sets; 46 Int sets_min_1; 47 Int line_size_bits; 48 Int tag_shift; 49 Char desc_line[128]; 50 UWord* tags; 51} cache_t2; 52 53/* By this point, the size/assoc/line_size has been checked. */ 54static void cachesim_initcache(cache_t config, cache_t2* c) 55{ 56 Int i; 57 58 c->size = config.size; 59 c->assoc = config.assoc; 60 c->line_size = config.line_size; 61 62 c->sets = (c->size / c->line_size) / c->assoc; 63 c->sets_min_1 = c->sets - 1; 64 c->line_size_bits = VG_(log2)(c->line_size); 65 c->tag_shift = c->line_size_bits + VG_(log2)(c->sets); 66 67 if (c->assoc == 1) { 68 VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped", 69 c->size, c->line_size); 70 } else { 71 VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative", 72 c->size, c->line_size, c->assoc); 73 } 74 75 c->tags = VG_(malloc)("cg.sim.ci.1", 76 sizeof(UWord) * c->sets * c->assoc); 77 78 for (i = 0; i < c->sets * c->assoc; i++) 79 c->tags[i] = 0; 80} 81 82/* This is done as a macro rather than by passing in the cache_t2 as an 83 * arg because it slows things down by a small amount (3-5%) due to all 84 * that extra indirection. */ 85 86#define CACHESIM(L, MISS_TREATMENT) \ 87/* The cache and associated bits and pieces. */ \ 88static cache_t2 L; \ 89 \ 90static void cachesim_##L##_initcache(cache_t config) \ 91{ \ 92 cachesim_initcache(config, &L); \ 93} \ 94 \ 95/* This attribute forces GCC to inline this function, even though it's */ \ 96/* bigger than its usual limit. Inlining gains around 5--10% speedup. */ \ 97__attribute__((always_inline)) \ 98static __inline__ \ 99void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *mL) \ 100{ \ 101 UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \ 102 UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \ 103 UWord tag = a >> L.tag_shift; \ 104 UWord tag2; \ 105 Int i, j; \ 106 Bool is_miss = False; \ 107 UWord* set; \ 108 \ 109 /* First case: word entirely within line. */ \ 110 if (set1 == set2) { \ 111 \ 112 set = &(L.tags[set1 * L.assoc]); \ 113 \ 114 /* This loop is unrolled for just the first case, which is the most */\ 115 /* common. We can't unroll any further because it would screw up */\ 116 /* if we have a direct-mapped (1-way) cache. */\ 117 if (tag == set[0]) { \ 118 return; \ 119 } \ 120 /* If the tag is one other than the MRU, move it into the MRU spot */\ 121 /* and shuffle the rest down. */\ 122 for (i = 1; i < L.assoc; i++) { \ 123 if (tag == set[i]) { \ 124 for (j = i; j > 0; j--) { \ 125 set[j] = set[j - 1]; \ 126 } \ 127 set[0] = tag; \ 128 return; \ 129 } \ 130 } \ 131 \ 132 /* A miss; install this tag as MRU, shuffle rest down. */ \ 133 for (j = L.assoc - 1; j > 0; j--) { \ 134 set[j] = set[j - 1]; \ 135 } \ 136 set[0] = tag; \ 137 MISS_TREATMENT; \ 138 return; \ 139 \ 140 /* Second case: word straddles two lines. */ \ 141 /* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \ 142 } else if (((set1 + 1) & (L.sets_min_1)) == set2) { \ 143 set = &(L.tags[set1 * L.assoc]); \ 144 if (tag == set[0]) { \ 145 goto block2; \ 146 } \ 147 for (i = 1; i < L.assoc; i++) { \ 148 if (tag == set[i]) { \ 149 for (j = i; j > 0; j--) { \ 150 set[j] = set[j - 1]; \ 151 } \ 152 set[0] = tag; \ 153 goto block2; \ 154 } \ 155 } \ 156 for (j = L.assoc - 1; j > 0; j--) { \ 157 set[j] = set[j - 1]; \ 158 } \ 159 set[0] = tag; \ 160 is_miss = True; \ 161block2: \ 162 set = &(L.tags[set2 * L.assoc]); \ 163 tag2 = (a+size-1) >> L.tag_shift; \ 164 if (tag2 == set[0]) { \ 165 goto miss_treatment; \ 166 } \ 167 for (i = 1; i < L.assoc; i++) { \ 168 if (tag2 == set[i]) { \ 169 for (j = i; j > 0; j--) { \ 170 set[j] = set[j - 1]; \ 171 } \ 172 set[0] = tag2; \ 173 goto miss_treatment; \ 174 } \ 175 } \ 176 for (j = L.assoc - 1; j > 0; j--) { \ 177 set[j] = set[j - 1]; \ 178 } \ 179 set[0] = tag2; \ 180 is_miss = True; \ 181miss_treatment: \ 182 if (is_miss) { MISS_TREATMENT; } \ 183 \ 184 } else { \ 185 VG_(printf)("addr: %lx size: %u sets: %d %d", a, size, set1, set2);\ 186 VG_(tool_panic)("item straddles more than two cache sets"); \ 187 } \ 188 return; \ 189} 190 191CACHESIM(LL, (*mL)++ ); 192CACHESIM(I1, { (*m1)++; cachesim_LL_doref(a, size, m1, mL); } ); 193CACHESIM(D1, { (*m1)++; cachesim_LL_doref(a, size, m1, mL); } ); 194 195/*--------------------------------------------------------------------*/ 196/*--- end cg_sim.c ---*/ 197/*--------------------------------------------------------------------*/ 198 199