1ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 2ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/ 3ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- Branch predictor simulation cg_branchpred.c ---*/ 4ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/ 5ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 6ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* 7ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown This file is part of Cachegrind, a Valgrind tool for cache 8ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown profiling programs. 9ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 10b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov Copyright (C) 2002-2011 Nicholas Nethercote 11ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown njn@valgrind.org 12ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 13ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown This program is free software; you can redistribute it and/or 14ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown modify it under the terms of the GNU General Public License as 15ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown published by the Free Software Foundation; either version 2 of the 16ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown License, or (at your option) any later version. 17ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 18ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown This program is distributed in the hope that it will be useful, but 19ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown WITHOUT ANY WARRANTY; without even the implied warranty of 20ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown General Public License for more details. 22ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 23ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown You should have received a copy of the GNU General Public License 24ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown along with this program; if not, write to the Free Software 25ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 02111-1307, USA. 27ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 28ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown The GNU General Public License is contained in the file COPYING. 29ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/ 30ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 31ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 32ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* This file contains the actual branch predictor simulator and its 33ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown associated state. As with cg_sim.c it is #included directly into 34ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown cg_main.c. It provides: 35ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 36ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown - a taken/not-taken predictor for conditional branches 37ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown - a branch target address predictor for indirect branches 38ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 39ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Function return-address prediction is not modelled, on the basis 40ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown that return stack predictors almost always predict correctly, and 41ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown also that it is difficult for Valgrind to robustly identify 42ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown function calls and returns. 43ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/ 44ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 45ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* How many bits at the bottom of an instruction address are 46ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown guaranteed to be zero? */ 47ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#if defined(VGA_ppc32) || defined(VGA_ppc64) || defined(VGA_arm) 48ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown# define N_IADDR_LO_ZERO_BITS 2 49ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#elif defined(VGA_x86) || defined(VGA_amd64) 50ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown# define N_IADDR_LO_ZERO_BITS 0 51b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov#elif defined(VGA_s390x) 52b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov# define N_IADDR_LO_ZERO_BITS 1 53ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#else 54ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown# error "Unsupported architecture" 55ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#endif 56ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 57ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 58ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Get a taken/not-taken prediction for the instruction (presumably a 59ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown conditional branch) at instr_addr. Once that's done, update the 60ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown predictor state based on whether or not it was actually taken, as 61ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown indicated by 'taken'. Finally, return 1 for a mispredict and 0 for 62ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown a successful predict. 63ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 64ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown The predictor is an array of 16k (== 2^14) 2-bit saturating 65ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown counters. Given the address of the branch instruction, the array 66ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown index to use is computed both from the low order bits of the branch 67ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown instruction's address, and the global history - that is, from the 68ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown taken/not-taken behaviour of the most recent few branches. This 69ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown makes the predictor able to correlate this branch's behaviour with 70ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown that of other branches. 71ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 72ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown TODO: use predictor written by someone who understands this stuff. 73ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Perhaps it would be better to move to a standard GShare predictor 74ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown and/or tournament predictor. 75ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/ 76ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* The index is composed of N_HIST bits at the top and N_IADD bits at 77ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown the bottom. These numbers chosen somewhat arbitrarily, but note 78ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown that making N_IADD_BITS too small (eg 4) can cause large amounts of 79ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown aliasing, and hence misprediction, particularly if the history bits 80ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown are mostly unchanging. */ 81ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define N_HIST_BITS 7 82ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define N_IADD_BITS 7 83ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 84ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define N_BITS (N_HIST_BITS + N_IADD_BITS) 85ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define N_COUNTERS (1 << N_BITS) 86ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 87ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic UWord shift_register = 0; /* Contains global history */ 88ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic UChar counters[N_COUNTERS]; /* Counter array; presumably auto-zeroed */ 89ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 90ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 91ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic ULong do_cond_branch_predict ( Addr instr_addr, Word takenW ) 92ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 93ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UWord indx; 94ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Bool predicted_taken, actually_taken, mispredict; 95ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 96ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const UWord hist_mask = (1 << N_HIST_BITS) - 1; 97ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const UWord iadd_mask = (1 << N_IADD_BITS) - 1; 98ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UWord hist_bits = shift_register & hist_mask; 99ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UWord iadd_bits = (instr_addr >> N_IADDR_LO_ZERO_BITS) 100ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown & iadd_mask; 101ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 102ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown tl_assert(hist_bits <= hist_mask); 103ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown tl_assert(iadd_bits <= iadd_mask); 104ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown indx = (hist_bits << N_IADD_BITS) | iadd_bits; 105ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown tl_assert(indx < N_COUNTERS); 106ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (0) VG_(printf)("index = %d\n", (Int)indx); 107ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 108ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown tl_assert(takenW <= 1); 109ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown predicted_taken = counters[ indx ] >= 2; 110ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown actually_taken = takenW > 0; 111ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 112ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mispredict = (actually_taken && (!predicted_taken)) 113ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown || ((!actually_taken) && predicted_taken); 114ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 115ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown shift_register <<= 1; 116ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown shift_register |= (actually_taken ? 1 : 0); 117ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 118ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (actually_taken) { 119ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (counters[indx] < 3) 120ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown counters[indx]++; 121ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } else { 122ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (counters[indx] > 0) 123ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown counters[indx]--; 124ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 125ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 126ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown tl_assert(counters[indx] <= 3); 127ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 128ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return mispredict ? 1 : 0; 129ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 130ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 131ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 132ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* A very simple indirect branch predictor. Use the branch's address 133ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown to index a table which records the previous target address for this 134ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown branch (or whatever aliased with it) and use that as the 135ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown prediction. */ 136ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define N_BTAC_BITS 9 137ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define N_BTAC (1 << N_BTAC_BITS) 138ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic Addr btac[N_BTAC]; /* BTAC; presumably auto-zeroed */ 139ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 140ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic ULong do_ind_branch_predict ( Addr instr_addr, Addr actual ) 141ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 142ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Bool mispredict; 143ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const UWord mask = (1 << N_BTAC_BITS) - 1; 144ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UWord indx = (instr_addr >> N_IADDR_LO_ZERO_BITS) 145ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown & mask; 146ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown tl_assert(indx < N_BTAC); 147ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mispredict = btac[indx] != actual; 148ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown btac[indx] = actual; 149ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return mispredict ? 1 : 0; 150ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 151ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 152ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 153ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/ 154ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- end cg_branchpred.c ---*/ 155ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/ 156ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 157