cg_sim.c revision fcd04886dbda12c647f9e2d8dac85b2c834f0a6c
1
2/*--------------------------------------------------------------------*/
3/*--- Cache simulation                                    cg_sim.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Cachegrind, a Valgrind tool for cache
8   profiling programs.
9
10   Copyright (C) 2002-2005 Nicholas Nethercote
11      njn@valgrind.org
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31/* Notes:
32  - simulates a write-allocate cache
33  - (block --> set) hash function uses simple bit selection
34  - handling of references straddling two cache blocks:
35      - counts as only one cache access (not two)
36      - both blocks hit                  --> one hit
37      - one block hits, the other misses --> one miss
38      - both blocks miss                 --> one miss (not two)
39*/
40
41typedef struct {
42   Int          size;                   /* bytes */
43   Int          assoc;
44   Int          line_size;              /* bytes */
45   Int          sets;
46   Int          sets_min_1;
47   Int          assoc_bits;
48   Int          line_size_bits;
49   Int          tag_shift;
50   Char         desc_line[128];
51   UWord*       tags;
52} cache_t2;
53
54/* By this point, the size/assoc/line_size has been checked. */
55static void cachesim_initcache(cache_t config, cache_t2* c)
56{
57   Int i;
58
59   c->size      = config.size;
60   c->assoc     = config.assoc;
61   c->line_size = config.line_size;
62
63   c->sets           = (c->size / c->line_size) / c->assoc;
64   c->sets_min_1     = c->sets - 1;
65   c->assoc_bits     = VG_(log2)(c->assoc);
66   c->line_size_bits = VG_(log2)(c->line_size);
67   c->tag_shift      = c->line_size_bits + VG_(log2)(c->sets);
68
69   if (c->assoc == 1) {
70      VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped",
71                                 c->size, c->line_size);
72   } else {
73      VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative",
74                                 c->size, c->line_size, c->assoc);
75   }
76
77   c->tags = VG_(malloc)(sizeof(UWord) * c->sets * c->assoc);
78
79   for (i = 0; i < c->sets * c->assoc; i++)
80      c->tags[i] = 0;
81}
82
83#if 0
84static void print_cache(cache_t2* c)
85{
86   UInt set, way, i;
87
88   /* Note initialisation and update of 'i'. */
89   for (i = 0, set = 0; set < c->sets; set++) {
90      for (way = 0; way < c->assoc; way++, i++) {
91         VG_(printf)("%16lx ", c->tags[i]);
92      }
93      VG_(printf)("\n");
94   }
95}
96#endif
97
98/* This is done as a macro rather than by passing in the cache_t2 as an
99 * arg because it slows things down by a small amount (3-5%) due to all
100 * that extra indirection. */
101
102#define CACHESIM(L, MISS_TREATMENT)                                         \
103/* The cache and associated bits and pieces. */                             \
104static cache_t2 L;                                                          \
105                                                                            \
106static void cachesim_##L##_initcache(cache_t config)                        \
107{                                                                           \
108    cachesim_initcache(config, &L);                                         \
109}                                                                           \
110                                                                            \
111/* This attribute forces GCC to inline this function, even though it's */   \
112/* bigger than its usual limit.  Inlining gains around 5--10% speedup. */   \
113__attribute__((always_inline))                                              \
114static __inline__                                                           \
115void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *m2)         \
116{                                                                           \
117   register UInt  set1 = ( a         >> L.line_size_bits) & (L.sets_min_1); \
118   register UInt  set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \
119   register UWord tag  = a >> L.tag_shift;                                  \
120   Int i, j;                                                                \
121   Bool is_miss = False;                                                    \
122   UWord* set;                                                              \
123                                                                            \
124   /* First case: word entirely within line. */                             \
125   if (set1 == set2) {                                                      \
126                                                                            \
127      /* Shifting is a bit faster than multiplying */                       \
128      set = &(L.tags[set1 << L.assoc_bits]);                                \
129                                                                            \
130      /* This loop is unrolled for just the first case, which is the most */\
131      /* common.  We can't unroll any further because it would screw up   */\
132      /* if we have a direct-mapped (1-way) cache.                        */\
133      if (tag == set[0]) {                                                  \
134         return;                                                            \
135      }                                                                     \
136      /* If the tag is one other than the MRU, move it into the MRU spot  */\
137      /* and shuffle the rest down.                                       */\
138      for (i = 1; i < L.assoc; i++) {                                       \
139         if (tag == set[i]) {                                               \
140            for (j = i; j > 0; j--) {                                       \
141               set[j] = set[j - 1];                                         \
142            }                                                               \
143            set[0] = tag;                                                   \
144            return;                                                         \
145         }                                                                  \
146      }                                                                     \
147                                                                            \
148      /* A miss;  install this tag as MRU, shuffle rest down. */            \
149      for (j = L.assoc - 1; j > 0; j--) {                                   \
150         set[j] = set[j - 1];                                               \
151      }                                                                     \
152      set[0] = tag;                                                         \
153      MISS_TREATMENT;                                                       \
154      return;                                                               \
155                                                                            \
156   /* Second case: word straddles two lines. */                             \
157   /* Nb: this is a fast way of doing ((set1+1) % L.sets) */                \
158   } else if (((set1 + 1) & (L.sets-1)) == set2) {                          \
159      set = &(L.tags[set1 << L.assoc_bits]);                                \
160      if (tag == set[0]) {                                                  \
161         goto block2;                                                       \
162      }                                                                     \
163      for (i = 1; i < L.assoc; i++) {                                       \
164         if (tag == set[i]) {                                               \
165            for (j = i; j > 0; j--) {                                       \
166               set[j] = set[j - 1];                                         \
167            }                                                               \
168            set[0] = tag;                                                   \
169            goto block2;                                                    \
170         }                                                                  \
171      }                                                                     \
172      for (j = L.assoc - 1; j > 0; j--) {                                   \
173         set[j] = set[j - 1];                                               \
174      }                                                                     \
175      set[0] = tag;                                                         \
176      is_miss = True;                                                       \
177block2:                                                                     \
178      set = &(L.tags[set2 << L.assoc_bits]);                                \
179      if (tag == set[0]) {                                                  \
180         goto miss_treatment;                                               \
181      }                                                                     \
182      for (i = 1; i < L.assoc; i++) {                                       \
183         if (tag == set[i]) {                                               \
184            for (j = i; j > 0; j--) {                                       \
185               set[j] = set[j - 1];                                         \
186            }                                                               \
187            set[0] = tag;                                                   \
188            goto miss_treatment;                                            \
189         }                                                                  \
190      }                                                                     \
191      for (j = L.assoc - 1; j > 0; j--) {                                   \
192         set[j] = set[j - 1];                                               \
193      }                                                                     \
194      set[0] = tag;                                                         \
195      is_miss = True;                                                       \
196miss_treatment:                                                             \
197      if (is_miss) { MISS_TREATMENT; }                                      \
198                                                                            \
199   } else {                                                                 \
200       VG_(printf)("addr: %x  size: %u  sets: %d %d", a, size, set1, set2); \
201       VG_(tool_panic)("item straddles more than two cache sets");          \
202   }                                                                        \
203   return;                                                                  \
204}
205
206CACHESIM(L2, (*m2)++ );
207CACHESIM(I1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } );
208CACHESIM(D1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } );
209
210/*--------------------------------------------------------------------*/
211/*--- end                                                 cg_sim.c ---*/
212/*--------------------------------------------------------------------*/
213
214