cg_sim.c revision b0909b40589ec17f370a9c716e47db87b1bb90a4
1
2/*--------------------------------------------------------------------*/
3/*--- Cache simulation                                    cg_sim.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Cachegrind, a Valgrind tool for cache
8   profiling programs.
9
10   Copyright (C) 2002-2012 Nicholas Nethercote
11      njn@valgrind.org
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31/* Notes:
32  - simulates a write-allocate cache
33  - (block --> set) hash function uses simple bit selection
34  - handling of references straddling two cache blocks:
35      - counts as only one cache access (not two)
36      - both blocks hit                  --> one hit
37      - one block hits, the other misses --> one miss
38      - both blocks miss                 --> one miss (not two)
39*/
40
41typedef struct {
42   Int          size;                   /* bytes */
43   Int          assoc;
44   Int          line_size;              /* bytes */
45   Int          sets;
46   Int          sets_min_1;
47   Int          line_size_bits;
48   Int          tag_shift;
49   HChar        desc_line[128];
50   UWord*       tags;
51} cache_t2;
52
53/* By this point, the size/assoc/line_size has been checked. */
54static void cachesim_initcache(cache_t config, cache_t2* c)
55{
56   Int i;
57
58   c->size      = config.size;
59   c->assoc     = config.assoc;
60   c->line_size = config.line_size;
61
62   c->sets           = (c->size / c->line_size) / c->assoc;
63   c->sets_min_1     = c->sets - 1;
64   c->line_size_bits = VG_(log2)(c->line_size);
65   c->tag_shift      = c->line_size_bits + VG_(log2)(c->sets);
66
67   if (c->assoc == 1) {
68      VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped",
69                                 c->size, c->line_size);
70   } else {
71      VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative",
72                                 c->size, c->line_size, c->assoc);
73   }
74
75   c->tags = VG_(malloc)("cg.sim.ci.1",
76                         sizeof(UWord) * c->sets * c->assoc);
77
78   for (i = 0; i < c->sets * c->assoc; i++)
79      c->tags[i] = 0;
80}
81
82/* This attribute forces GCC to inline the function, getting rid of a
83 * lot of indirection around the cache_t2 pointer, if it is known to be
84 * constant in the caller (the caller is inlined itself).
85 * Without inlining of simulator functions, cachegrind can get 40% slower.
86 */
87__attribute__((always_inline))
88static __inline__
89Bool cachesim_setref_is_miss(cache_t2* c, UInt set_no, UWord tag)
90{
91   int i, j;
92   UWord *set;
93
94   set = &(c->tags[set_no * c->assoc]);
95
96   /* This loop is unrolled for just the first case, which is the most */
97   /* common.  We can't unroll any further because it would screw up   */
98   /* if we have a direct-mapped (1-way) cache.                        */
99   if (tag == set[0])
100      return False;
101
102   /* If the tag is one other than the MRU, move it into the MRU spot  */
103   /* and shuffle the rest down.                                       */
104   for (i = 1; i < c->assoc; i++) {
105      if (tag == set[i]) {
106         for (j = i; j > 0; j--) {
107            set[j] = set[j - 1];
108         }
109         set[0] = tag;
110
111         return False;
112      }
113   }
114
115   /* A miss;  install this tag as MRU, shuffle rest down. */
116   for (j = c->assoc - 1; j > 0; j--) {
117      set[j] = set[j - 1];
118   }
119   set[0] = tag;
120
121   return True;
122}
123
124__attribute__((always_inline))
125static __inline__
126Bool cachesim_ref_is_miss(cache_t2* c, Addr a, UChar size)
127{
128   /* A memory block has the size of a cache line */
129   UWord block1 =  a         >> c->line_size_bits;
130   UWord block2 = (a+size-1) >> c->line_size_bits;
131   UInt  set1   = block1 & c->sets_min_1;
132
133   /* Tags used in real caches are minimal to save space.
134    * As the last bits of the block number of addresses mapping
135    * into one cache set are the same, real caches use as tag
136    *   tag = block >> log2(#sets)
137    * But using the memory block as more specific tag is fine,
138    * and saves instructions.
139    */
140   UWord tag1   = block1;
141
142   /* Access entirely within line. */
143   if (block1 == block2)
144      return cachesim_setref_is_miss(c, set1, tag1);
145
146   /* Access straddles two lines. */
147   else if (block1 + 1 == block2) {
148      UInt  set2 = block2 & c->sets_min_1;
149      UWord tag2 = block2;
150
151      /* always do both, as state is updated as side effect */
152      if (cachesim_setref_is_miss(c, set1, tag1)) {
153         cachesim_setref_is_miss(c, set2, tag2);
154         return True;
155      }
156      return cachesim_setref_is_miss(c, set2, tag2);
157   }
158   VG_(printf)("addr: %lx  size: %u  blocks: %ld %ld",
159               a, size, block1, block2);
160   VG_(tool_panic)("item straddles more than two cache sets");
161   /* not reached */
162   return True;
163}
164
165
166static cache_t2 LL;
167static cache_t2 I1;
168static cache_t2 D1;
169
170static void cachesim_initcaches(cache_t I1c, cache_t D1c, cache_t LLc)
171{
172   cachesim_initcache(I1c, &I1);
173   cachesim_initcache(D1c, &D1);
174   cachesim_initcache(LLc, &LL);
175}
176
177__attribute__((always_inline))
178static __inline__
179void cachesim_I1_doref(Addr a, UChar size, ULong* m1, ULong *mL)
180{
181   if (cachesim_ref_is_miss(&I1, a, size)) {
182      (*m1)++;
183      if (cachesim_ref_is_miss(&LL, a, size))
184         (*mL)++;
185   }
186}
187
188__attribute__((always_inline))
189static __inline__
190void cachesim_D1_doref(Addr a, UChar size, ULong* m1, ULong *mL)
191{
192   if (cachesim_ref_is_miss(&D1, a, size)) {
193      (*m1)++;
194      if (cachesim_ref_is_miss(&LL, a, size))
195         (*mL)++;
196   }
197}
198
199/*--------------------------------------------------------------------*/
200/*--- end                                                 cg_sim.c ---*/
201/*--------------------------------------------------------------------*/
202
203