1ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
2ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/
3ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- Branch predictor simulation                  cg_branchpred.c ---*/
4ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/
5ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
6ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*
7ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   This file is part of Cachegrind, a Valgrind tool for cache
8ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   profiling programs.
9ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
10b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   Copyright (C) 2002-2011 Nicholas Nethercote
11ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      njn@valgrind.org
12ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
13ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   This program is free software; you can redistribute it and/or
14ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   modify it under the terms of the GNU General Public License as
15ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   published by the Free Software Foundation; either version 2 of the
16ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   License, or (at your option) any later version.
17ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
18ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   This program is distributed in the hope that it will be useful, but
19ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   WITHOUT ANY WARRANTY; without even the implied warranty of
20ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   General Public License for more details.
22ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
23ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   You should have received a copy of the GNU General Public License
24ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   along with this program; if not, write to the Free Software
25ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   02111-1307, USA.
27ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
28ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   The GNU General Public License is contained in the file COPYING.
29ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/
30ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
31ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
32ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* This file contains the actual branch predictor simulator and its
33ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   associated state.  As with cg_sim.c it is #included directly into
34ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   cg_main.c.  It provides:
35ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
36ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   - a taken/not-taken predictor for conditional branches
37ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   - a branch target address predictor for indirect branches
38ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
39ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Function return-address prediction is not modelled, on the basis
40ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   that return stack predictors almost always predict correctly, and
41ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   also that it is difficult for Valgrind to robustly identify
42ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   function calls and returns.
43ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/
44ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
45ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* How many bits at the bottom of an instruction address are
46ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   guaranteed to be zero? */
47ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#if defined(VGA_ppc32) || defined(VGA_ppc64) || defined(VGA_arm)
48ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#  define N_IADDR_LO_ZERO_BITS 2
49ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#elif defined(VGA_x86) || defined(VGA_amd64)
50ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#  define N_IADDR_LO_ZERO_BITS 0
51b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov#elif defined(VGA_s390x)
52b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov#  define N_IADDR_LO_ZERO_BITS 1
53ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#else
54ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#  error "Unsupported architecture"
55ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#endif
56ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
57ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
58ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Get a taken/not-taken prediction for the instruction (presumably a
59ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   conditional branch) at instr_addr.  Once that's done, update the
60ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   predictor state based on whether or not it was actually taken, as
61ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   indicated by 'taken'.  Finally, return 1 for a mispredict and 0 for
62ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   a successful predict.
63ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
64ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   The predictor is an array of 16k (== 2^14) 2-bit saturating
65ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   counters.  Given the address of the branch instruction, the array
66ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   index to use is computed both from the low order bits of the branch
67ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   instruction's address, and the global history - that is, from the
68ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   taken/not-taken behaviour of the most recent few branches.  This
69ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   makes the predictor able to correlate this branch's behaviour with
70ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   that of other branches.
71ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
72ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   TODO: use predictor written by someone who understands this stuff.
73ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Perhaps it would be better to move to a standard GShare predictor
74ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   and/or tournament predictor.
75ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/
76ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* The index is composed of N_HIST bits at the top and N_IADD bits at
77ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   the bottom.  These numbers chosen somewhat arbitrarily, but note
78ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   that making N_IADD_BITS too small (eg 4) can cause large amounts of
79ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   aliasing, and hence misprediction, particularly if the history bits
80ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   are mostly unchanging. */
81ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define N_HIST_BITS 7
82ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define N_IADD_BITS 7
83ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
84ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define N_BITS     (N_HIST_BITS + N_IADD_BITS)
85ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define N_COUNTERS (1 << N_BITS)
86ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
87ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic UWord shift_register = 0;   /* Contains global history */
88ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic UChar counters[N_COUNTERS]; /* Counter array; presumably auto-zeroed */
89ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
90ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
91ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic ULong do_cond_branch_predict ( Addr instr_addr, Word takenW )
92ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
93ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UWord indx;
94ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Bool  predicted_taken, actually_taken, mispredict;
95ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
96ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   const UWord hist_mask = (1 << N_HIST_BITS) - 1;
97ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   const UWord iadd_mask = (1 << N_IADD_BITS) - 1;
98ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         UWord hist_bits = shift_register & hist_mask;
99ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         UWord iadd_bits = (instr_addr >> N_IADDR_LO_ZERO_BITS)
100ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                           & iadd_mask;
101ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
102ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   tl_assert(hist_bits <= hist_mask);
103ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   tl_assert(iadd_bits <= iadd_mask);
104ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   indx = (hist_bits << N_IADD_BITS) | iadd_bits;
105ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   tl_assert(indx < N_COUNTERS);
106ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (0) VG_(printf)("index = %d\n", (Int)indx);
107ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
108ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   tl_assert(takenW <= 1);
109ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   predicted_taken = counters[ indx ] >= 2;
110ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   actually_taken  = takenW > 0;
111ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
112ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   mispredict = (actually_taken && (!predicted_taken))
113ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                || ((!actually_taken) && predicted_taken);
114ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
115ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   shift_register <<= 1;
116ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   shift_register |= (actually_taken ? 1 : 0);
117ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
118ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (actually_taken) {
119ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      if (counters[indx] < 3)
120ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         counters[indx]++;
121ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   } else {
122ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      if (counters[indx] > 0)
123ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         counters[indx]--;
124ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
125ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
126ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   tl_assert(counters[indx] <= 3);
127ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
128ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return mispredict ? 1 : 0;
129ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
130ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
131ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
132ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* A very simple indirect branch predictor.  Use the branch's address
133ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   to index a table which records the previous target address for this
134ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   branch (or whatever aliased with it) and use that as the
135ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   prediction. */
136ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define N_BTAC_BITS 9
137ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define N_BTAC      (1 << N_BTAC_BITS)
138ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic Addr btac[N_BTAC]; /* BTAC; presumably auto-zeroed */
139ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
140ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic ULong do_ind_branch_predict ( Addr instr_addr, Addr actual )
141ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
142ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Bool mispredict;
143ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   const UWord mask = (1 << N_BTAC_BITS) - 1;
144ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         UWord indx = (instr_addr >> N_IADDR_LO_ZERO_BITS)
145ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                      & mask;
146ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   tl_assert(indx < N_BTAC);
147ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   mispredict = btac[indx] != actual;
148ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   btac[indx] = actual;
149ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return mispredict ? 1 : 0;
150ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
151ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
152ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
153ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/
154ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- end                                          cg_branchpred.c ---*/
155ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/
156ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
157