1ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
2ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/
3ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- Cache simulation                                    cg_sim.c ---*/
4ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/
5ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
6ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*
7ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   This file is part of Cachegrind, a Valgrind tool for cache
8ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   profiling programs.
9ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
10b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   Copyright (C) 2002-2011 Nicholas Nethercote
11ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      njn@valgrind.org
12ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
13ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   This program is free software; you can redistribute it and/or
14ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   modify it under the terms of the GNU General Public License as
15ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   published by the Free Software Foundation; either version 2 of the
16ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   License, or (at your option) any later version.
17ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
18ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   This program is distributed in the hope that it will be useful, but
19ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   WITHOUT ANY WARRANTY; without even the implied warranty of
20ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   General Public License for more details.
22ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
23ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   You should have received a copy of the GNU General Public License
24ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   along with this program; if not, write to the Free Software
25ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   02111-1307, USA.
27ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
28ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   The GNU General Public License is contained in the file COPYING.
29ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/
30ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
31ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Notes:
32ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  - simulates a write-allocate cache
33ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  - (block --> set) hash function uses simple bit selection
34ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  - handling of references straddling two cache blocks:
35ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      - counts as only one cache access (not two)
36ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      - both blocks hit                  --> one hit
37ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      - one block hits, the other misses --> one miss
38ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      - both blocks miss                 --> one miss (not two)
39ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/
40ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
41ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Browntypedef struct {
42ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Int          size;                   /* bytes */
43ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Int          assoc;
44ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Int          line_size;              /* bytes */
45ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Int          sets;
46ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Int          sets_min_1;
47ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Int          line_size_bits;
48ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Int          tag_shift;
49ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Char         desc_line[128];
50ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UWord*       tags;
51ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} cache_t2;
52ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
53ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* By this point, the size/assoc/line_size has been checked. */
54ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic void cachesim_initcache(cache_t config, cache_t2* c)
55ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
56ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Int i;
57ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
58ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   c->size      = config.size;
59ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   c->assoc     = config.assoc;
60ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   c->line_size = config.line_size;
61ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
62ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   c->sets           = (c->size / c->line_size) / c->assoc;
63ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   c->sets_min_1     = c->sets - 1;
64ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   c->line_size_bits = VG_(log2)(c->line_size);
65ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   c->tag_shift      = c->line_size_bits + VG_(log2)(c->sets);
66ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
67ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (c->assoc == 1) {
68ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped",
69ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                 c->size, c->line_size);
70ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   } else {
71ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative",
72ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                 c->size, c->line_size, c->assoc);
73ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
74ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
75ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   c->tags = VG_(malloc)("cg.sim.ci.1",
76ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                         sizeof(UWord) * c->sets * c->assoc);
77ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
78ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   for (i = 0; i < c->sets * c->assoc; i++)
79ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      c->tags[i] = 0;
80ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
81ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
82ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* This is done as a macro rather than by passing in the cache_t2 as an
83ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * arg because it slows things down by a small amount (3-5%) due to all
84ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * that extra indirection. */
85ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
86ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define CACHESIM(L, MISS_TREATMENT)                                         \
87ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* The cache and associated bits and pieces. */                             \
88ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic cache_t2 L;                                                          \
89ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                                                            \
90ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic void cachesim_##L##_initcache(cache_t config)                        \
91ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{                                                                           \
92ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    cachesim_initcache(config, &L);                                         \
93ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}                                                                           \
94ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                                                            \
95ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* This attribute forces GCC to inline this function, even though it's */   \
96ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* bigger than its usual limit.  Inlining gains around 5--10% speedup. */   \
97ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown__attribute__((always_inline))                                              \
98ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic __inline__                                                           \
99ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownvoid cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *mL)         \
100ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{                                                                           \
101ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UInt  set1 = ( a         >> L.line_size_bits) & (L.sets_min_1);          \
102ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UInt  set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1);          \
103ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UWord tag  = a >> L.tag_shift;                                           \
104ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UWord tag2;                                                              \
105ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Int i, j;                                                                \
106ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Bool is_miss = False;                                                    \
107ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UWord* set;                                                              \
108ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                                                            \
109ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* First case: word entirely within line. */                             \
110ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (set1 == set2) {                                                      \
111ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                                                            \
112ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      set = &(L.tags[set1 * L.assoc]);                                      \
113ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                                                            \
114ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* This loop is unrolled for just the first case, which is the most */\
115ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* common.  We can't unroll any further because it would screw up   */\
116ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* if we have a direct-mapped (1-way) cache.                        */\
117ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      if (tag == set[0]) {                                                  \
118ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         return;                                                            \
119ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }                                                                     \
120ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* If the tag is one other than the MRU, move it into the MRU spot  */\
121ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* and shuffle the rest down.                                       */\
122ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      for (i = 1; i < L.assoc; i++) {                                       \
123ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         if (tag == set[i]) {                                               \
124ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            for (j = i; j > 0; j--) {                                       \
125ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown               set[j] = set[j - 1];                                         \
126ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            }                                                               \
127ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            set[0] = tag;                                                   \
128ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            return;                                                         \
129ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         }                                                                  \
130ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }                                                                     \
131ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                                                            \
132ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* A miss;  install this tag as MRU, shuffle rest down. */            \
133ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      for (j = L.assoc - 1; j > 0; j--) {                                   \
134ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         set[j] = set[j - 1];                                               \
135ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }                                                                     \
136ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      set[0] = tag;                                                         \
137ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      MISS_TREATMENT;                                                       \
138ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return;                                                               \
139ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                                                            \
140ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Second case: word straddles two lines. */                             \
141ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Nb: this is a fast way of doing ((set1+1) % L.sets) */                \
142b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   } else if (((set1 + 1) & (L.sets_min_1)) == set2) {                      \
143ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      set = &(L.tags[set1 * L.assoc]);                                      \
144ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      if (tag == set[0]) {                                                  \
145ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         goto block2;                                                       \
146ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }                                                                     \
147ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      for (i = 1; i < L.assoc; i++) {                                       \
148ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         if (tag == set[i]) {                                               \
149ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            for (j = i; j > 0; j--) {                                       \
150ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown               set[j] = set[j - 1];                                         \
151ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            }                                                               \
152ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            set[0] = tag;                                                   \
153ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            goto block2;                                                    \
154ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         }                                                                  \
155ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }                                                                     \
156ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      for (j = L.assoc - 1; j > 0; j--) {                                   \
157ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         set[j] = set[j - 1];                                               \
158ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }                                                                     \
159ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      set[0] = tag;                                                         \
160ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      is_miss = True;                                                       \
161ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownblock2:                                                                     \
162ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      set = &(L.tags[set2 * L.assoc]);                                      \
163ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      tag2 = (a+size-1) >> L.tag_shift;                                     \
164ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      if (tag2 == set[0]) {                                                 \
165ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         goto miss_treatment;                                               \
166ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }                                                                     \
167ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      for (i = 1; i < L.assoc; i++) {                                       \
168ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         if (tag2 == set[i]) {                                              \
169ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            for (j = i; j > 0; j--) {                                       \
170ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown               set[j] = set[j - 1];                                         \
171ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            }                                                               \
172ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            set[0] = tag2;                                                  \
173ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            goto miss_treatment;                                            \
174ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         }                                                                  \
175ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }                                                                     \
176ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      for (j = L.assoc - 1; j > 0; j--) {                                   \
177ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         set[j] = set[j - 1];                                               \
178ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }                                                                     \
179ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      set[0] = tag2;                                                        \
180ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      is_miss = True;                                                       \
181ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownmiss_treatment:                                                             \
182ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      if (is_miss) { MISS_TREATMENT; }                                      \
183ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                                                            \
184ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   } else {                                                                 \
185ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown       VG_(printf)("addr: %lx  size: %u  sets: %d %d", a, size, set1, set2);\
186ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown       VG_(tool_panic)("item straddles more than two cache sets");          \
187ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }                                                                        \
188ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return;                                                                  \
189ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
190ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
191ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff BrownCACHESIM(LL, (*mL)++ );
192ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff BrownCACHESIM(I1, { (*m1)++; cachesim_LL_doref(a, size, m1, mL); } );
193ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff BrownCACHESIM(D1, { (*m1)++; cachesim_LL_doref(a, size, m1, mL); } );
194ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
195ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/
196ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- end                                                 cg_sim.c ---*/
197ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/
198ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
199