cg_branchpred.c revision 8f943afc22a6a683b78271836c8ddc462b4824a9
1
2/*--------------------------------------------------------------------*/
3/*--- Branch predictor simulation                  cg_branchpred.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Cachegrind, a Valgrind tool for cache
8   profiling programs.
9
10   Copyright (C) 2002-2011 Nicholas Nethercote
11      njn@valgrind.org
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31
32/* This file contains the actual branch predictor simulator and its
33   associated state.  As with cg_sim.c it is #included directly into
34   cg_main.c.  It provides:
35
36   - a taken/not-taken predictor for conditional branches
37   - a branch target address predictor for indirect branches
38
39   Function return-address prediction is not modelled, on the basis
40   that return stack predictors almost always predict correctly, and
41   also that it is difficult for Valgrind to robustly identify
42   function calls and returns.
43*/
44
45/* How many bits at the bottom of an instruction address are
46   guaranteed to be zero? */
47#if defined(VGA_ppc32) || defined(VGA_ppc64) || defined(VGA_arm)
48#  define N_IADDR_LO_ZERO_BITS 2
49#elif defined(VGA_x86) || defined(VGA_amd64)
50#  define N_IADDR_LO_ZERO_BITS 0
51#elif defined(VGA_s390x)
52#  define N_IADDR_LO_ZERO_BITS 1
53#else
54#  error "Unsupported architecture"
55#endif
56
57
58/* Get a taken/not-taken prediction for the instruction (presumably a
59   conditional branch) at instr_addr.  Once that's done, update the
60   predictor state based on whether or not it was actually taken, as
61   indicated by 'taken'.  Finally, return 1 for a mispredict and 0 for
62   a successful predict.
63
64   The predictor is an array of 16k (== 2^14) 2-bit saturating
65   counters.  Given the address of the branch instruction, the array
66   index to use is computed both from the low order bits of the branch
67   instruction's address, and the global history - that is, from the
68   taken/not-taken behaviour of the most recent few branches.  This
69   makes the predictor able to correlate this branch's behaviour with
70   that of other branches.
71
72   TODO: use predictor written by someone who understands this stuff.
73   Perhaps it would be better to move to a standard GShare predictor
74   and/or tournament predictor.
75*/
76/* The index is composed of N_HIST bits at the top and N_IADD bits at
77   the bottom.  These numbers chosen somewhat arbitrarily, but note
78   that making N_IADD_BITS too small (eg 4) can cause large amounts of
79   aliasing, and hence misprediction, particularly if the history bits
80   are mostly unchanging. */
81#define N_HIST_BITS 7
82#define N_IADD_BITS 7
83
84#define N_BITS     (N_HIST_BITS + N_IADD_BITS)
85#define N_COUNTERS (1 << N_BITS)
86
87static UWord shift_register = 0;   /* Contains global history */
88static UChar counters[N_COUNTERS]; /* Counter array; presumably auto-zeroed */
89
90
91static ULong do_cond_branch_predict ( Addr instr_addr, Word takenW )
92{
93   UWord indx;
94   Bool  predicted_taken, actually_taken, mispredict;
95
96   const UWord hist_mask = (1 << N_HIST_BITS) - 1;
97   const UWord iadd_mask = (1 << N_IADD_BITS) - 1;
98         UWord hist_bits = shift_register & hist_mask;
99         UWord iadd_bits = (instr_addr >> N_IADDR_LO_ZERO_BITS)
100                           & iadd_mask;
101
102   tl_assert(hist_bits <= hist_mask);
103   tl_assert(iadd_bits <= iadd_mask);
104   indx = (hist_bits << N_IADD_BITS) | iadd_bits;
105   tl_assert(indx < N_COUNTERS);
106   if (0) VG_(printf)("index = %d\n", (Int)indx);
107
108   tl_assert(takenW <= 1);
109   predicted_taken = counters[ indx ] >= 2;
110   actually_taken  = takenW > 0;
111
112   mispredict = (actually_taken && (!predicted_taken))
113                || ((!actually_taken) && predicted_taken);
114
115   shift_register <<= 1;
116   shift_register |= (actually_taken ? 1 : 0);
117
118   if (actually_taken) {
119      if (counters[indx] < 3)
120         counters[indx]++;
121   } else {
122      if (counters[indx] > 0)
123         counters[indx]--;
124   }
125
126   tl_assert(counters[indx] <= 3);
127
128   return mispredict ? 1 : 0;
129}
130
131
132/* A very simple indirect branch predictor.  Use the branch's address
133   to index a table which records the previous target address for this
134   branch (or whatever aliased with it) and use that as the
135   prediction. */
136#define N_BTAC_BITS 9
137#define N_BTAC      (1 << N_BTAC_BITS)
138static Addr btac[N_BTAC]; /* BTAC; presumably auto-zeroed */
139
140static ULong do_ind_branch_predict ( Addr instr_addr, Addr actual )
141{
142   Bool mispredict;
143   const UWord mask = (1 << N_BTAC_BITS) - 1;
144         UWord indx = (instr_addr >> N_IADDR_LO_ZERO_BITS)
145                      & mask;
146   tl_assert(indx < N_BTAC);
147   mispredict = btac[indx] != actual;
148   btac[indx] = actual;
149   return mispredict ? 1 : 0;
150}
151
152
153/*--------------------------------------------------------------------*/
154/*--- end                                          cg_branchpred.c ---*/
155/*--------------------------------------------------------------------*/
156
157