1432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 2432b1c9af7e4421b67644811b94489b57f3cfae1sewardj/*---------------------------------------------------------------*/ 3752f90673ebbb6b2f55fc5e46606dea371313713sewardj/*--- begin host_reg_alloc2.c ---*/ 4432b1c9af7e4421b67644811b94489b57f3cfae1sewardj/*---------------------------------------------------------------*/ 5432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 6f8ed9d874a7b8651654591c68c6d431c758d787csewardj/* 7752f90673ebbb6b2f55fc5e46606dea371313713sewardj This file is part of Valgrind, a dynamic binary instrumentation 8752f90673ebbb6b2f55fc5e46606dea371313713sewardj framework. 9f8ed9d874a7b8651654591c68c6d431c758d787csewardj 10ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes Copyright (C) 2004-2017 OpenWorks LLP 11752f90673ebbb6b2f55fc5e46606dea371313713sewardj info@open-works.net 127bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj 13752f90673ebbb6b2f55fc5e46606dea371313713sewardj This program is free software; you can redistribute it and/or 14752f90673ebbb6b2f55fc5e46606dea371313713sewardj modify it under the terms of the GNU General Public License as 15752f90673ebbb6b2f55fc5e46606dea371313713sewardj published by the Free Software Foundation; either version 2 of the 16752f90673ebbb6b2f55fc5e46606dea371313713sewardj License, or (at your option) any later version. 177bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj 18752f90673ebbb6b2f55fc5e46606dea371313713sewardj This program is distributed in the hope that it will be useful, but 19752f90673ebbb6b2f55fc5e46606dea371313713sewardj WITHOUT ANY WARRANTY; without even the implied warranty of 20752f90673ebbb6b2f55fc5e46606dea371313713sewardj MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21752f90673ebbb6b2f55fc5e46606dea371313713sewardj General Public License for more details. 22752f90673ebbb6b2f55fc5e46606dea371313713sewardj 23752f90673ebbb6b2f55fc5e46606dea371313713sewardj You should have received a copy of the GNU General Public License 24752f90673ebbb6b2f55fc5e46606dea371313713sewardj along with this program; if not, write to the Free Software 25752f90673ebbb6b2f55fc5e46606dea371313713sewardj Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 267bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj 02110-1301, USA. 277bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj 28752f90673ebbb6b2f55fc5e46606dea371313713sewardj The GNU General Public License is contained in the file COPYING. 29f8ed9d874a7b8651654591c68c6d431c758d787csewardj 30f8ed9d874a7b8651654591c68c6d431c758d787csewardj Neither the names of the U.S. Department of Energy nor the 31f8ed9d874a7b8651654591c68c6d431c758d787csewardj University of California nor the names of its contributors may be 32f8ed9d874a7b8651654591c68c6d431c758d787csewardj used to endorse or promote products derived from this software 33f8ed9d874a7b8651654591c68c6d431c758d787csewardj without prior written permission. 34f8ed9d874a7b8651654591c68c6d431c758d787csewardj*/ 35f8ed9d874a7b8651654591c68c6d431c758d787csewardj 36432b1c9af7e4421b67644811b94489b57f3cfae1sewardj#include "libvex_basictypes.h" 37432b1c9af7e4421b67644811b94489b57f3cfae1sewardj#include "libvex.h" 38432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 39cef7d3e3df4796e35b4521158d9dc058f034aa87sewardj#include "main_util.h" 40cef7d3e3df4796e35b4521158d9dc058f034aa87sewardj#include "host_generic_regs.h" 41432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 42432b1c9af7e4421b67644811b94489b57f3cfae1sewardj/* Set to 1 for lots of debugging output. */ 43432b1c9af7e4421b67644811b94489b57f3cfae1sewardj#define DEBUG_REGALLOC 0 44432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 45432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 46432b1c9af7e4421b67644811b94489b57f3cfae1sewardj/* TODO 27 Oct 04: 47432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 48432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Better consistency checking from what isMove tells us. 49432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 50432b1c9af7e4421b67644811b94489b57f3cfae1sewardj We can possibly do V-V coalescing even when the src is spilled, 51432b1c9af7e4421b67644811b94489b57f3cfae1sewardj providing we can arrange for the dst to have the same spill slot. 52432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 53432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Note that state[].hreg is the same as the available real regs. 54432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 55432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Generally rationalise data structures. */ 56432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 57432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 58432b1c9af7e4421b67644811b94489b57f3cfae1sewardj/* Records information on virtual register live ranges. Computed once 59432b1c9af7e4421b67644811b94489b57f3cfae1sewardj and remains unchanged after that. */ 60432b1c9af7e4421b67644811b94489b57f3cfae1sewardjtypedef 61432b1c9af7e4421b67644811b94489b57f3cfae1sewardj struct { 62432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Becomes live for the first time after this insn ... */ 63432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Short live_after; 64432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Becomes dead for the last time before this insn ... */ 65432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Short dead_before; 66432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* The "home" spill slot, if needed. Never changes. */ 67432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Short spill_offset; 68432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Short spill_size; 69432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* What kind of register this is. */ 70432b1c9af7e4421b67644811b94489b57f3cfae1sewardj HRegClass reg_class; 71432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 72ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj VRegLR; 73432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 74432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 75432b1c9af7e4421b67644811b94489b57f3cfae1sewardj/* Records information on real-register live ranges. Computed once 76432b1c9af7e4421b67644811b94489b57f3cfae1sewardj and remains unchanged after that. */ 77432b1c9af7e4421b67644811b94489b57f3cfae1sewardjtypedef 78432b1c9af7e4421b67644811b94489b57f3cfae1sewardj struct { 79432b1c9af7e4421b67644811b94489b57f3cfae1sewardj HReg rreg; 80432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Becomes live after this insn ... */ 81432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Short live_after; 82432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Becomes dead before this insn ... */ 83432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Short dead_before; 84432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 85ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj RRegLR; 86432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 87432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 88ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj/* An array of the following structs (rreg_state) comprises the 89ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj running state of the allocator. It indicates what the current 90ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj disposition of each allocatable real register is. The array gets 91a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj updated as the allocator processes instructions. The identity of 92a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj the register is not recorded here, because the index of this 93a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj structure in doRegisterAllocation()'s |rreg_state| is the index 94a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj number of the register, and the register itself can be extracted 95a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj from the RRegUniverse supplied to doRegisterAllocation(). */ 96432b1c9af7e4421b67644811b94489b57f3cfae1sewardjtypedef 97432b1c9af7e4421b67644811b94489b57f3cfae1sewardj struct { 98607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj /* ------ FIELDS WHICH DO NOT CHANGE ------ */ 99432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Is this involved in any HLRs? (only an optimisation hint) */ 100432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Bool has_hlrs; 101607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj /* ------ FIELDS WHICH DO CHANGE ------ */ 102607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj /* 6 May 07: rearranged fields below so the whole struct fits 103607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj into 16 bytes on both x86 and amd64. */ 104607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj /* Used when .disp == Bound and we are looking for vregs to 105607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj spill. */ 106607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj Bool is_spill_cand; 107607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj /* Optimisation: used when .disp == Bound. Indicates when the 108607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj rreg has the same value as the spill slot for the associated 109607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj vreg. Is safely left at False, and becomes True after a 110607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj spill store or reload for this rreg. */ 111607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj Bool eq_spill_slot; 112432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* What's it's current disposition? */ 113432b1c9af7e4421b67644811b94489b57f3cfae1sewardj enum { Free, /* available for use */ 114432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Unavail, /* in a real-reg live range */ 115432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Bound /* in use (holding value of some vreg) */ 116432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 117432b1c9af7e4421b67644811b94489b57f3cfae1sewardj disp; 118607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj /* If .disp == Bound, what vreg is it bound to? */ 119432b1c9af7e4421b67644811b94489b57f3cfae1sewardj HReg vreg; 120432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 121432b1c9af7e4421b67644811b94489b57f3cfae1sewardj RRegState; 122432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 123c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj 124ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj/* The allocator also maintains a redundant array of indexes 125ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj (vreg_state) from vreg numbers back to entries in rreg_state. It 126ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj is redundant because iff vreg_state[i] == j then 127ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj hregNumber(rreg_state[j].vreg) == i -- that is, the two entries 128ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj point at each other. The purpose of this is to speed up activities 129ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj which involve looking for a particular vreg: there is no need to 130ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj scan the rreg_state looking for it, just index directly into 131ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vreg_state. The FAQ "does this vreg already have an associated 132ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj rreg" is the main beneficiary. 133ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj 134ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj To indicate, in vreg_state[i], that a given vreg is not currently 135ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj associated with any rreg, that entry can be set to INVALID_RREG_NO. 136ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj 137ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj Because the vreg_state entries are signed Shorts, the max number 138ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj of vregs that can be handed by regalloc is 32767. 139ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj*/ 140ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj 141ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj#define INVALID_RREG_NO ((Short)(-1)) 142ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj 143ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj#define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs) 144ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj#define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs) 145ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj 146432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 147432b1c9af7e4421b67644811b94489b57f3cfae1sewardj/* Search forward from some given point in the incoming instruction 148432b1c9af7e4421b67644811b94489b57f3cfae1sewardj sequence. Point is to select a virtual register to spill, by 149432b1c9af7e4421b67644811b94489b57f3cfae1sewardj finding the vreg which is mentioned as far ahead as possible, in 150432b1c9af7e4421b67644811b94489b57f3cfae1sewardj the hope that this will minimise the number of consequent reloads. 151432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 152432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Only do the search for vregs which are Bound in the running state, 153432b1c9af7e4421b67644811b94489b57f3cfae1sewardj and for which the .is_spill_cand field is set. This allows the 154432b1c9af7e4421b67644811b94489b57f3cfae1sewardj caller to arbitrarily restrict the set of spill candidates to be 155432b1c9af7e4421b67644811b94489b57f3cfae1sewardj considered. 156432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 157a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj To do this we don't actually need to see the incoming instruction 158a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj stream. Rather, what we need us the HRegUsage records for the 159a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj incoming instruction stream. Hence that is passed in. 160a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj 161432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Returns an index into the state array indicating the (v,r) pair to 162432b1c9af7e4421b67644811b94489b57f3cfae1sewardj spill, or -1 if none was found. */ 163432b1c9af7e4421b67644811b94489b57f3cfae1sewardjstatic 164432b1c9af7e4421b67644811b94489b57f3cfae1sewardjInt findMostDistantlyMentionedVReg ( 165a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj HRegUsage* reg_usages_in, 166432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Int search_from_instr, 167a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int num_instrs, 168432b1c9af7e4421b67644811b94489b57f3cfae1sewardj RRegState* state, 169a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int n_state 170432b1c9af7e4421b67644811b94489b57f3cfae1sewardj) 171432b1c9af7e4421b67644811b94489b57f3cfae1sewardj{ 172432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Int k, m; 173432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Int furthest_k = -1; 174432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Int furthest = -1; 175432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vassert(search_from_instr >= 0); 176432b1c9af7e4421b67644811b94489b57f3cfae1sewardj for (k = 0; k < n_state; k++) { 177432b1c9af7e4421b67644811b94489b57f3cfae1sewardj if (!state[k].is_spill_cand) 178432b1c9af7e4421b67644811b94489b57f3cfae1sewardj continue; 179432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vassert(state[k].disp == Bound); 180a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (m = search_from_instr; m < num_instrs; m++) { 181a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (HRegUsage__contains(®_usages_in[m], state[k].vreg)) 182432b1c9af7e4421b67644811b94489b57f3cfae1sewardj break; 183432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 184432b1c9af7e4421b67644811b94489b57f3cfae1sewardj if (m > furthest) { 185432b1c9af7e4421b67644811b94489b57f3cfae1sewardj furthest = m; 186432b1c9af7e4421b67644811b94489b57f3cfae1sewardj furthest_k = k; 187432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 188432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 189432b1c9af7e4421b67644811b94489b57f3cfae1sewardj return furthest_k; 190432b1c9af7e4421b67644811b94489b57f3cfae1sewardj} 191432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 192432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 193478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj/* Check that this vreg has been assigned a sane spill offset. */ 194a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardjinline 195a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardjstatic void sanity_check_spill_offset ( VRegLR* vreg ) 196478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj{ 197c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj switch (vreg->reg_class) { 198c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj case HRcVec128: case HRcFlt64: 199c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj vassert(0 == ((UShort)vreg->spill_offset % 16)); break; 200c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj default: 201c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj vassert(0 == ((UShort)vreg->spill_offset % 8)); break; 202478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj } 203478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj} 204478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj 205478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj 206ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj/* Double the size of the real-reg live-range array, if needed. */ 207a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj__attribute__((noinline)) 208a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardjstatic void ensureRRLRspace_SLOW ( RRegLR** info, Int* size, Int used ) 209432b1c9af7e4421b67644811b94489b57f3cfae1sewardj{ 210ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj Int k; 211ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj RRegLR* arr2; 212432b1c9af7e4421b67644811b94489b57f3cfae1sewardj if (0) 213432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size); 214432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vassert(used == *size); 215d8e3ecaede8eccf5bc7be2a55d99a188a07b0a34florian arr2 = LibVEX_Alloc_inline(2 * *size * sizeof(RRegLR)); 216432b1c9af7e4421b67644811b94489b57f3cfae1sewardj for (k = 0; k < *size; k++) 217432b1c9af7e4421b67644811b94489b57f3cfae1sewardj arr2[k] = (*info)[k]; 218432b1c9af7e4421b67644811b94489b57f3cfae1sewardj *size *= 2; 219432b1c9af7e4421b67644811b94489b57f3cfae1sewardj *info = arr2; 220432b1c9af7e4421b67644811b94489b57f3cfae1sewardj} 221a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardjinline 222a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardjstatic void ensureRRLRspace ( RRegLR** info, Int* size, Int used ) 223a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj{ 224a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (LIKELY(used < *size)) return; 225a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj ensureRRLRspace_SLOW(info, size, used); 226a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj} 227432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 228432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 229c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj/* Sort an array of RRegLR entries by either the .live_after or 230c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj .dead_before fields. This is performance-critical. */ 231c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardjstatic void sortRRLRarray ( RRegLR* arr, 232c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj Int size, Bool by_live_after ) 233c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj{ 234c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj Int incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 235c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj 9841, 29524, 88573, 265720, 236c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj 797161, 2391484 }; 237c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj Int lo = 0; 238c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj Int hi = size-1; 239c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj Int i, j, h, bigN, hp; 240c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj RRegLR v; 241c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj 242c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj vassert(size >= 0); 243c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj if (size == 0) 244c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj return; 245c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj 246c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj bigN = hi - lo + 1; if (bigN < 2) return; 247c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--; 248c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj 249c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj if (by_live_after) { 250c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj 251c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj for ( ; hp >= 0; hp--) { 252c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj h = incs[hp]; 253c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj for (i = lo + h; i <= hi; i++) { 254c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj v = arr[i]; 255c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj j = i; 256c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj while (arr[j-h].live_after > v.live_after) { 257c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj arr[j] = arr[j-h]; 258c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj j = j - h; 259c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj if (j <= (lo + h - 1)) break; 260c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj } 261c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj arr[j] = v; 262c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj } 263c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj } 264c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj 265c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj } else { 266c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj 267c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj for ( ; hp >= 0; hp--) { 268c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj h = incs[hp]; 269c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj for (i = lo + h; i <= hi; i++) { 270c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj v = arr[i]; 271c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj j = i; 272c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj while (arr[j-h].dead_before > v.dead_before) { 273c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj arr[j] = arr[j-h]; 274c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj j = j - h; 275c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj if (j <= (lo + h - 1)) break; 276c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj } 277c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj arr[j] = v; 278c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj } 279c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj } 280c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj 281c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj } 282c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj} 283c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj 284c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj 285a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj/* Compute the index of the highest and lowest 1 in a ULong, 286a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj respectively. Results are undefined if the argument is zero. 287a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Don't pass it zero :) */ 288a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardjstatic inline UInt ULong__maxIndex ( ULong w64 ) { 289a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj return 63 - __builtin_clzll(w64); 290a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj} 291a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj 292a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardjstatic inline UInt ULong__minIndex ( ULong w64 ) { 293a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj return __builtin_ctzll(w64); 294a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj} 295a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj 296a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj 297a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj/* Vectorised memset, copied from Valgrind's m_libcbase.c. */ 298a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardjstatic void* local_memset ( void *destV, Int c, SizeT sz ) 299a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj{ 300a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj# define IS_4_ALIGNED(aaa_p) (0 == (((HWord)(aaa_p)) & ((HWord)0x3))) 301a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj 302a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj UInt c4; 303a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj UChar* d = destV; 304a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj UChar uc = c; 305a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj 306a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj while ((!IS_4_ALIGNED(d)) && sz >= 1) { 307a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj d[0] = uc; 308a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj d++; 309a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj sz--; 310a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 311a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (sz == 0) 312a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj return destV; 313a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj c4 = uc; 314a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj c4 |= (c4 << 8); 315a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj c4 |= (c4 << 16); 316a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj while (sz >= 16) { 317a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj ((UInt*)d)[0] = c4; 318a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj ((UInt*)d)[1] = c4; 319a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj ((UInt*)d)[2] = c4; 320a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj ((UInt*)d)[3] = c4; 321a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj d += 16; 322a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj sz -= 16; 323a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 324a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj while (sz >= 4) { 325a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj ((UInt*)d)[0] = c4; 326a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj d += 4; 327a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj sz -= 4; 328a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 329a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj while (sz >= 1) { 330a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj d[0] = c; 331a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj d++; 332a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj sz--; 333a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 334a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj return destV; 335a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj 336a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj# undef IS_4_ALIGNED 337a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj} 338a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj 339a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj 34017bbc21313cd903a6b44c912b4af44f14ce68ec4sewardj/* A target-independent register allocator. Requires various 34117bbc21313cd903a6b44c912b4af44f14ce68ec4sewardj functions which it uses to deal abstractly with instructions and 34217bbc21313cd903a6b44c912b4af44f14ce68ec4sewardj registers, since it cannot have any target-specific knowledge. 343432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 344432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Returns a new list of instructions, which, as a result of the 345432b1c9af7e4421b67644811b94489b57f3cfae1sewardj behaviour of mapRegs, will be in-place modifications of the 346432b1c9af7e4421b67644811b94489b57f3cfae1sewardj original instructions. 347432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 348432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Requires that the incoming code has been generated using 349432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vreg numbers 0, 1 .. n_vregs-1. Appearance of a vreg outside 350432b1c9af7e4421b67644811b94489b57f3cfae1sewardj that range is a checked run-time error. 351432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 352432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Takes an expandable array of pointers to unallocated insns. 353432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Returns an expandable array of pointers to allocated insns. 354432b1c9af7e4421b67644811b94489b57f3cfae1sewardj*/ 355432b1c9af7e4421b67644811b94489b57f3cfae1sewardjHInstrArray* doRegisterAllocation ( 356432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 357432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Incoming virtual-registerised code. */ 358432b1c9af7e4421b67644811b94489b57f3cfae1sewardj HInstrArray* instrs_in, 359432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 360a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj /* The real-register universe to use. This contains facts about 361a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj real registers, one of which is the set of registers available 362a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for allocation. */ 363a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj const RRegUniverse* univ, 364432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 365432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Return True iff the given insn is a reg-reg move, in which 366432b1c9af7e4421b67644811b94489b57f3cfae1sewardj case also return the src and dst regs. */ 367d8c64e082224b2e688abdef9219cc76fd82b373bflorian Bool (*isMove) ( const HInstr*, HReg*, HReg* ), 368432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 369432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Get info about register usage in this insn. */ 370d8c64e082224b2e688abdef9219cc76fd82b373bflorian void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ), 371432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 372432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Apply a reg-reg mapping to an insn. */ 37392b643609c5fa432b11fc726c2706ae3f3296eb4cerion void (*mapRegs) ( HRegRemap*, HInstr*, Bool ), 374432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 3756c299f3acab617581ea504e45fbb6cab24c2b29fsewardj /* Return one, or, if we're unlucky, two insn(s) to spill/restore a 3766c299f3acab617581ea504e45fbb6cab24c2b29fsewardj real reg to a spill slot byte offset. The two leading HInstr** 3776c299f3acab617581ea504e45fbb6cab24c2b29fsewardj args are out parameters, through which the generated insns are 3786c299f3acab617581ea504e45fbb6cab24c2b29fsewardj returned. Also (optionally) a 'directReload' function, which 379fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj attempts to replace a given instruction by one which reads 380fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj directly from a specified spill slot. May be NULL, in which 381fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj case the optimisation is not attempted. */ 3826c299f3acab617581ea504e45fbb6cab24c2b29fsewardj void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ), 3836c299f3acab617581ea504e45fbb6cab24c2b29fsewardj void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ), 384fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj HInstr* (*directReload) ( HInstr*, HReg, Short ), 385432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Int guest_sizeB, 386432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 387432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* For debug printing only. */ 388d8c64e082224b2e688abdef9219cc76fd82b373bflorian void (*ppInstr) ( const HInstr*, Bool ), 38992b643609c5fa432b11fc726c2706ae3f3296eb4cerion void (*ppReg) ( HReg ), 39092b643609c5fa432b11fc726c2706ae3f3296eb4cerion 39192b643609c5fa432b11fc726c2706ae3f3296eb4cerion /* 32/64bit mode */ 39292b643609c5fa432b11fc726c2706ae3f3296eb4cerion Bool mode64 393432b1c9af7e4421b67644811b94489b57f3cfae1sewardj) 394432b1c9af7e4421b67644811b94489b57f3cfae1sewardj{ 395432b1c9af7e4421b67644811b94489b57f3cfae1sewardj# define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8) 396432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 397607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj const Bool eq_spill_opt = True; 398607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj 399432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Info on vregs and rregs. Computed once and remains 400432b1c9af7e4421b67644811b94489b57f3cfae1sewardj unchanged. */ 401ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj Int n_vregs; 402ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */ 403432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 404c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj /* We keep two copies of the real-reg live range info, one sorted 405c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj by .live_after and the other by .dead_before. First the 406c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj unsorted info is created in the _la variant is copied into the 407c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj _db variant. Once that's done both of them are sorted. 408c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj We also need two integer cursors which record the next 409c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj location in the two arrays to consider. */ 410c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj RRegLR* rreg_lrs_la; 411c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj RRegLR* rreg_lrs_db; 412ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj Int rreg_lrs_size; 413ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj Int rreg_lrs_used; 414c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj Int rreg_lrs_la_next; 415c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj Int rreg_lrs_db_next; 416ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj 417a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj /* Info on register usage in the incoming instruction array. 418a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Computed once and remains unchanged, more or less; updated 419a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj sometimes by the direct-reload optimisation. */ 420a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj HRegUsage* reg_usage_arr; /* [0 .. instrs_in->arr_used-1] */ 421a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj 422ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj /* Used when constructing vreg_lrs (for allocating stack 423432b1c9af7e4421b67644811b94489b57f3cfae1sewardj slots). */ 424a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Short ss_busy_until_before[N_SPILL64S]; 425432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 426ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj /* Used when constructing rreg_lrs. */ 427432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Int* rreg_live_after; 428432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Int* rreg_dead_before; 429432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 430432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Running state of the core allocation algorithm. */ 431ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj RRegState* rreg_state; /* [0 .. n_rregs-1] */ 432ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj Int n_rregs; 433ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj 434ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj /* .. and the redundant backward map */ 435ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO. 436ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj This inplies n_rregs must be <= 32768. */ 437ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj Short* vreg_state; /* [0 .. n_vregs-1] */ 438432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 439432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* The vreg -> rreg map constructed and then applied to each 440432b1c9af7e4421b67644811b94489b57f3cfae1sewardj instr. */ 441432b1c9af7e4421b67644811b94489b57f3cfae1sewardj HRegRemap remap; 442432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 443432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* The output array of instructions. */ 444432b1c9af7e4421b67644811b94489b57f3cfae1sewardj HInstrArray* instrs_out; 445432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 446ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj /* Sanity checks are expensive. They are only done periodically, 447ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj not at each insn processed. */ 448ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj Bool do_sanity_check; 449ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj 4505074b493ae4af5e6017fac208f823d283c6123c0florian vassert(0 == (guest_sizeB % LibVEX_GUEST_STATE_ALIGN)); 4515074b493ae4af5e6017fac208f823d283c6123c0florian vassert(0 == (LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN)); 45295a487bc73c0f8c9371ad500988a51c9e78ee34aflorian vassert(0 == (N_SPILL64S % 2)); 453432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 454432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* The live range numbers are signed shorts, and so limiting the 4558c50d7dcedc1efc0768548c88b7ce9001b7d4964sewardj number of insns to 15000 comfortably guards against them 456432b1c9af7e4421b67644811b94489b57f3cfae1sewardj overflowing 32k. */ 4578c50d7dcedc1efc0768548c88b7ce9001b7d4964sewardj vassert(instrs_in->arr_used <= 15000); 458432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 459432b1c9af7e4421b67644811b94489b57f3cfae1sewardj# define INVALID_INSTRNO (-2) 460432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 461432b1c9af7e4421b67644811b94489b57f3cfae1sewardj# define EMIT_INSTR(_instr) \ 462432b1c9af7e4421b67644811b94489b57f3cfae1sewardj do { \ 463432b1c9af7e4421b67644811b94489b57f3cfae1sewardj HInstr* _tmp = (_instr); \ 464432b1c9af7e4421b67644811b94489b57f3cfae1sewardj if (DEBUG_REGALLOC) { \ 465432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vex_printf("** "); \ 46692b643609c5fa432b11fc726c2706ae3f3296eb4cerion (*ppInstr)(_tmp, mode64); \ 467432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vex_printf("\n\n"); \ 468432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } \ 469432b1c9af7e4421b67644811b94489b57f3cfae1sewardj addHInstr ( instrs_out, _tmp ); \ 470432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } while (0) 471432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 472ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj# define PRINT_STATE \ 473ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj do { \ 474ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj Int z, q; \ 475ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj for (z = 0; z < n_rregs; z++) { \ 476ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vex_printf(" rreg_state[%2d] = ", z); \ 477a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj (*ppReg)(univ->regs[z]); \ 478ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vex_printf(" \t"); \ 479ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj switch (rreg_state[z].disp) { \ 480ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj case Free: vex_printf("Free\n"); break; \ 481ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj case Unavail: vex_printf("Unavail\n"); break; \ 482ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj case Bound: vex_printf("BoundTo "); \ 483ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj (*ppReg)(rreg_state[z].vreg); \ 484ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vex_printf("\n"); break; \ 485ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj } \ 486ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj } \ 487ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vex_printf("\n vreg_state[0 .. %d]:\n ", n_vregs-1); \ 488ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj q = 0; \ 489ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj for (z = 0; z < n_vregs; z++) { \ 490ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj if (vreg_state[z] == INVALID_RREG_NO) \ 491ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj continue; \ 492ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vex_printf("[%d] -> %d ", z, vreg_state[z]); \ 493ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj q++; \ 494ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj if (q > 0 && (q % 6) == 0) \ 495ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vex_printf("\n "); \ 496ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj } \ 497ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vex_printf("\n"); \ 498432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } while (0) 499432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 500432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 501ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj /* --------- Stage 0: set up output array --------- */ 502ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj /* --------- and allocate/initialise running state. --------- */ 503ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj 504432b1c9af7e4421b67644811b94489b57f3cfae1sewardj instrs_out = newHInstrArray(); 505432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 506432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* ... and initialise running state. */ 507ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj /* n_rregs is no more than a short name for n_available_real_regs. */ 508a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj n_rregs = univ->allocable; 509ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj n_vregs = instrs_in->n_vregs; 510ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj 511ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj /* If this is not so, vreg_state entries will overflow. */ 512ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vassert(n_vregs < 32767); 513ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj 514a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj /* If this is not so, the universe we have is nonsensical. */ 515a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(n_rregs > 0); 516a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj 517d8e3ecaede8eccf5bc7be2a55d99a188a07b0a34florian rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState)); 518d8e3ecaede8eccf5bc7be2a55d99a188a07b0a34florian vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(Short)); 519ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj 520a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < n_rregs; j++) { 521ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj rreg_state[j].has_hlrs = False; 522ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj rreg_state[j].disp = Free; 523ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj rreg_state[j].vreg = INVALID_HREG; 524ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj rreg_state[j].is_spill_cand = False; 525607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj rreg_state[j].eq_spill_slot = False; 526432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 527432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 528a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < n_vregs; j++) 529ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vreg_state[j] = INVALID_RREG_NO; 530ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj 531ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj 532432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* --------- Stage 1: compute vreg live ranges. --------- */ 533432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* --------- Stage 2: compute rreg live ranges. --------- */ 534432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 535432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */ 536432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 537432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* This is relatively simple, because (1) we only seek the complete 538432b1c9af7e4421b67644811b94489b57f3cfae1sewardj end-to-end live range of each vreg, and are not interested in 539432b1c9af7e4421b67644811b94489b57f3cfae1sewardj any holes in it, and (2) the vregs are conveniently numbered 0 540ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj .. n_vregs-1, so we can just dump the results in a 541ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj pre-allocated array. */ 542ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj 543ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj vreg_lrs = NULL; 544ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj if (n_vregs > 0) 545d8e3ecaede8eccf5bc7be2a55d99a188a07b0a34florian vreg_lrs = LibVEX_Alloc_inline(sizeof(VRegLR) * n_vregs); 546ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj 547a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < n_vregs; j++) { 548ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj vreg_lrs[j].live_after = INVALID_INSTRNO; 549ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj vreg_lrs[j].dead_before = INVALID_INSTRNO; 550ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj vreg_lrs[j].spill_offset = 0; 551ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj vreg_lrs[j].spill_size = 0; 552ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj vreg_lrs[j].reg_class = HRcINVALID; 553432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 554432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 555a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj /* An array to hold the reg-usage info for the incoming 556a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj instructions. */ 557a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj reg_usage_arr 558a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used-1); 559a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj 560432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */ 561432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 562432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */ 563432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 564432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* This is more complex than Stage 1, because we need to compute 565432b1c9af7e4421b67644811b94489b57f3cfae1sewardj exactly all the live ranges of all the allocatable real regs, 566432b1c9af7e4421b67644811b94489b57f3cfae1sewardj and we don't know in advance how many there will be. */ 567432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 568ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj rreg_lrs_used = 0; 569ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj rreg_lrs_size = 4; 570d8e3ecaede8eccf5bc7be2a55d99a188a07b0a34florian rreg_lrs_la = LibVEX_Alloc_inline(rreg_lrs_size * sizeof(RRegLR)); 571c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj rreg_lrs_db = NULL; /* we'll create this later */ 572432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 573432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* We'll need to track live range start/end points seperately for 574432b1c9af7e4421b67644811b94489b57f3cfae1sewardj each rreg. Sigh. */ 575a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(n_rregs > 0); 576a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rreg_live_after = LibVEX_Alloc_inline(n_rregs * sizeof(Int)); 577a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rreg_dead_before = LibVEX_Alloc_inline(n_rregs * sizeof(Int)); 578432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 579a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < n_rregs; j++) { 580432b1c9af7e4421b67644811b94489b57f3cfae1sewardj rreg_live_after[j] = 581432b1c9af7e4421b67644811b94489b57f3cfae1sewardj rreg_dead_before[j] = INVALID_INSTRNO; 582432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 583432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 584432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */ 585432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 586432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* ------ start of ITERATE OVER INSNS ------ */ 587432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 588a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int ii = 0; ii < instrs_in->arr_used; ii++) { 589432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 590a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj (*getRegUsage)( ®_usage_arr[ii], instrs_in->arr[ii], mode64 ); 591432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 592a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (0) { 593a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("\n%d stage1: ", ii); 594a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj (*ppInstr)(instrs_in->arr[ii], mode64); 595a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("\n"); 596a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj ppHRegUsage(univ, ®_usage_arr[ii]); 597a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 598432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 599432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* ------ start of DEAL WITH VREG LIVE RANGES ------ */ 600432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 601a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj /* for each virtual reg mentioned in the insn ... */ 602a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) { 603432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 604a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj HReg vreg = reg_usage_arr[ii].vRegs[j]; 605a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(hregIsVirtual(vreg)); 606a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj 607a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int k = hregIndex(vreg); 608ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj if (k < 0 || k >= n_vregs) { 609432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vex_printf("\n"); 61092b643609c5fa432b11fc726c2706ae3f3296eb4cerion (*ppInstr)(instrs_in->arr[ii], mode64); 611432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vex_printf("\n"); 612ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vex_printf("vreg %d, n_vregs %d\n", k, n_vregs); 613432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vpanic("doRegisterAllocation: out-of-range vreg"); 614432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 615432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 616432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Take the opportunity to note its regclass. We'll need 617432b1c9af7e4421b67644811b94489b57f3cfae1sewardj that when allocating spill slots. */ 618ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj if (vreg_lrs[k].reg_class == HRcINVALID) { 619432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* First mention of this vreg. */ 620ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj vreg_lrs[k].reg_class = hregClass(vreg); 621432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } else { 622432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Seen it before, so check for consistency. */ 623ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj vassert(vreg_lrs[k].reg_class == hregClass(vreg)); 624432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 625432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 626432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Now consider live ranges. */ 627a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj switch (reg_usage_arr[ii].vMode[j]) { 628432b1c9af7e4421b67644811b94489b57f3cfae1sewardj case HRmRead: 629ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj if (vreg_lrs[k].live_after == INVALID_INSTRNO) { 630d08f2d71ec2efcdf5029b4b571c90452d12b1c72sewardj vex_printf("\n\nOFFENDING VREG = %d\n", k); 631432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vpanic("doRegisterAllocation: " 632432b1c9af7e4421b67644811b94489b57f3cfae1sewardj "first event for vreg is Read"); 633d08f2d71ec2efcdf5029b4b571c90452d12b1c72sewardj } 63440e144d3c4b155ab30c71f5abc90ff67391e7a87sewardj vreg_lrs[k].dead_before = toShort(ii + 1); 635432b1c9af7e4421b67644811b94489b57f3cfae1sewardj break; 636432b1c9af7e4421b67644811b94489b57f3cfae1sewardj case HRmWrite: 637ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj if (vreg_lrs[k].live_after == INVALID_INSTRNO) 63840e144d3c4b155ab30c71f5abc90ff67391e7a87sewardj vreg_lrs[k].live_after = toShort(ii); 63940e144d3c4b155ab30c71f5abc90ff67391e7a87sewardj vreg_lrs[k].dead_before = toShort(ii + 1); 640432b1c9af7e4421b67644811b94489b57f3cfae1sewardj break; 641432b1c9af7e4421b67644811b94489b57f3cfae1sewardj case HRmModify: 642ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj if (vreg_lrs[k].live_after == INVALID_INSTRNO) { 643109ffdbb31ff652ae83d0ad400966f68c46cd4f1sewardj vex_printf("\n\nOFFENDING VREG = %d\n", k); 644432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vpanic("doRegisterAllocation: " 645432b1c9af7e4421b67644811b94489b57f3cfae1sewardj "first event for vreg is Modify"); 646109ffdbb31ff652ae83d0ad400966f68c46cd4f1sewardj } 64740e144d3c4b155ab30c71f5abc90ff67391e7a87sewardj vreg_lrs[k].dead_before = toShort(ii + 1); 648432b1c9af7e4421b67644811b94489b57f3cfae1sewardj break; 649432b1c9af7e4421b67644811b94489b57f3cfae1sewardj default: 650432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vpanic("doRegisterAllocation(1)"); 651432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } /* switch */ 652432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 653a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } /* iterate over virtual registers */ 654432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 655432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* ------ end of DEAL WITH VREG LIVE RANGES ------ */ 656432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 657432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* ------ start of DEAL WITH RREG LIVE RANGES ------ */ 658432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 659a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj /* If this doesn't hold, the following iteration over real registers 660a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj will fail miserably. */ 661a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(N_RREGUNIVERSE_REGS == 64); 662a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj 663a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj const ULong rRead = reg_usage_arr[ii].rRead; 664a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj const ULong rWritten = reg_usage_arr[ii].rWritten; 665a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj const ULong rMentioned = rRead | rWritten; 666a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj 667a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj UInt rReg_minIndex; 668a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj UInt rReg_maxIndex; 669a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (rMentioned == 0) { 670a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj /* There are no real register uses in this insn. Set 671a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rReg_{min,max}Index so that the following loop doesn't iterate 672a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj at all, so as to avoid wasting time. */ 673a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rReg_minIndex = 1; 674a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rReg_maxIndex = 0; 675a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } else { 676a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rReg_minIndex = ULong__minIndex(rMentioned); 677a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rReg_maxIndex = ULong__maxIndex(rMentioned); 678a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj /* Don't bother to look at registers which are not available 679a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj to the allocator. We asserted above that n_rregs > 0, so 680a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj n_rregs-1 is safe. */ 681a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (rReg_maxIndex >= n_rregs) 682a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rReg_maxIndex = n_rregs-1; 683a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 684432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 685a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj /* for each allocator-available real reg mentioned in the insn ... */ 686a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj /* Note. We are allocating only over the real regs available to 687a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj the allocator. Others, eg the stack or baseblock pointers, 688a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj are unavailable to allocation and so we never visit them. 689a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Hence the iteration is cut off at n_rregs-1, since n_rregs == 690a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj univ->allocable. */ 691a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = rReg_minIndex; j <= rReg_maxIndex; j++) { 692432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 693a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj const ULong jMask = 1ULL << j; 694a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (LIKELY((rMentioned & jMask) == 0)) 695432b1c9af7e4421b67644811b94489b57f3cfae1sewardj continue; 696432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 697a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj const Bool isR = (rRead & jMask) != 0; 698a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj const Bool isW = (rWritten & jMask) != 0; 699a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj 700a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj /* Dummy initialisations of flush_la and flush_db to avoid 701a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj possible bogus uninit-var warnings from gcc. */ 702a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO; 703a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Bool flush = False; 704a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj 705a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (isW && !isR) { 706a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj flush_la = rreg_live_after[j]; 707a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj flush_db = rreg_dead_before[j]; 708a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (flush_la != INVALID_INSTRNO && flush_db != INVALID_INSTRNO) 709a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj flush = True; 710a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rreg_live_after[j] = ii; 711a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rreg_dead_before[j] = ii+1; 712a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } else if (!isW && isR) { 713a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (rreg_live_after[j] == INVALID_INSTRNO) { 714a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("\nOFFENDING RREG = "); 715a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj (*ppReg)(univ->regs[j]); 716a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("\n"); 717a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("\nOFFENDING instr = "); 718a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj (*ppInstr)(instrs_in->arr[ii], mode64); 719a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("\n"); 720a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vpanic("doRegisterAllocation: " 721a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj "first event for rreg is Read"); 722a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 723a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rreg_dead_before[j] = ii+1; 724a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } else { 725a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(isR && isW); 726a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (rreg_live_after[j] == INVALID_INSTRNO) { 727a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("\nOFFENDING RREG = "); 728a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj (*ppReg)(univ->regs[j]); 729a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("\n"); 730a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("\nOFFENDING instr = "); 731a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj (*ppInstr)(instrs_in->arr[ii], mode64); 732a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("\n"); 733a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vpanic("doRegisterAllocation: " 734a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj "first event for rreg is Modify"); 735a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 736a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rreg_dead_before[j] = ii+1; 737432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 738432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 739432b1c9af7e4421b67644811b94489b57f3cfae1sewardj if (flush) { 740432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vassert(flush_la != INVALID_INSTRNO); 741432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vassert(flush_db != INVALID_INSTRNO); 742c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used); 743432b1c9af7e4421b67644811b94489b57f3cfae1sewardj if (0) 744432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db); 745a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j]; 746c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la); 747c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db); 748ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj rreg_lrs_used++; 749432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 750432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 751a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } /* iterate over rregs in the instr */ 752432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 753432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* ------ end of DEAL WITH RREG LIVE RANGES ------ */ 754432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 755432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } /* iterate over insns */ 756432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 757432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* ------ end of ITERATE OVER INSNS ------ */ 758432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 759432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* ------ start of FINALISE RREG LIVE RANGES ------ */ 760432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 761432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Now finish up any live ranges left over. */ 762a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < n_rregs; j++) { 763432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 764a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (0) { 765a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("residual %d: %d %d\n", j, rreg_live_after[j], 766a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rreg_dead_before[j]); 767a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 768432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vassert( (rreg_live_after[j] == INVALID_INSTRNO 769a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj && rreg_dead_before[j] == INVALID_INSTRNO) 770432b1c9af7e4421b67644811b94489b57f3cfae1sewardj || 771a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj (rreg_live_after[j] != INVALID_INSTRNO 772a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj && rreg_dead_before[j] != INVALID_INSTRNO) 773432b1c9af7e4421b67644811b94489b57f3cfae1sewardj ); 774432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 775432b1c9af7e4421b67644811b94489b57f3cfae1sewardj if (rreg_live_after[j] == INVALID_INSTRNO) 776432b1c9af7e4421b67644811b94489b57f3cfae1sewardj continue; 777432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 778c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used); 779432b1c9af7e4421b67644811b94489b57f3cfae1sewardj if (0) 780432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vex_printf("FLUSH 2 (%d,%d)\n", 781432b1c9af7e4421b67644811b94489b57f3cfae1sewardj rreg_live_after[j], rreg_dead_before[j]); 782a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j]; 783c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]); 784c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]); 785ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj rreg_lrs_used++; 786432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 787432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 788432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Compute summary hints for choosing real regs. If a real reg is 789432b1c9af7e4421b67644811b94489b57f3cfae1sewardj involved in a hard live range, record that fact in the fixed 790ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj part of the running rreg_state. Later, when offered a choice between 791432b1c9af7e4421b67644811b94489b57f3cfae1sewardj rregs, it's better to choose one which is not marked as having 792432b1c9af7e4421b67644811b94489b57f3cfae1sewardj any HLRs, since ones with HLRs may need to be spilled around 793432b1c9af7e4421b67644811b94489b57f3cfae1sewardj their HLRs. Correctness of final assignment is unaffected by 794ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj this mechanism -- it is only an optimisation. */ 795ee313664de48d0956d907b4ea7a954eb7f375683florian 796a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < rreg_lrs_used; j++) { 797a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj HReg rreg = rreg_lrs_la[j].rreg; 798432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vassert(!hregIsVirtual(rreg)); 799432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* rreg is involved in a HLR. Record this info in the array, if 800432b1c9af7e4421b67644811b94489b57f3cfae1sewardj there is space. */ 801a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj UInt ix = hregIndex(rreg); 802a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(ix < n_rregs); 803a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rreg_state[ix].has_hlrs = True; 804432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 805432b1c9af7e4421b67644811b94489b57f3cfae1sewardj if (0) { 806a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < n_rregs; j++) { 807ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj if (!rreg_state[j].has_hlrs) 808432b1c9af7e4421b67644811b94489b57f3cfae1sewardj continue; 809a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj ppReg(univ->regs[j]); 810432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vex_printf(" hinted\n"); 811432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 812432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 813432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 814c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj /* Finally, copy the _la variant into the _db variant and 815c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj sort both by their respective fields. */ 816d8e3ecaede8eccf5bc7be2a55d99a188a07b0a34florian rreg_lrs_db = LibVEX_Alloc_inline(rreg_lrs_used * sizeof(RRegLR)); 817a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < rreg_lrs_used; j++) 818c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj rreg_lrs_db[j] = rreg_lrs_la[j]; 819c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj 820c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/ ); 821c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ ); 822c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj 823c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj /* And set up the cursors. */ 824c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj rreg_lrs_la_next = 0; 825c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj rreg_lrs_db_next = 0; 826c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj 827a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 1; j < rreg_lrs_used; j++) { 828c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj vassert(rreg_lrs_la[j-1].live_after <= rreg_lrs_la[j].live_after); 829c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before); 830c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj } 831c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj 832432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* ------ end of FINALISE RREG LIVE RANGES ------ */ 833432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 834a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (DEBUG_REGALLOC) { 835a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < n_vregs; j++) { 836a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("vreg %d: la = %d, db = %d\n", 837a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before ); 838a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 839c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj } 840a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj 841a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (DEBUG_REGALLOC) { 842a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("RRegLRs by LA:\n"); 843a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < rreg_lrs_used; j++) { 844a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf(" "); 845a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj (*ppReg)(rreg_lrs_la[j].rreg); 846a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf(" la = %d, db = %d\n", 847a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before ); 848a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 849a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("RRegLRs by DB:\n"); 850a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < rreg_lrs_used; j++) { 851a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf(" "); 852a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj (*ppReg)(rreg_lrs_db[j].rreg); 853a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf(" la = %d, db = %d\n", 854a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before ); 855a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 856432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 857432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 858432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* --------- Stage 3: allocate spill slots. --------- */ 859432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 8607fb65ebcac4ee8522f9b6823d857d87b04146c31sewardj /* Each spill slot is 8 bytes long. For vregs which take more than 8617fb65ebcac4ee8522f9b6823d857d87b04146c31sewardj 64 bits to spill (classes Flt64 and Vec128), we have to allocate 862c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj two consecutive spill slots. For 256 bit registers (class 863c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj Vec256), we have to allocate four consecutive spill slots. 864432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 865478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj For Vec128-class on PowerPC, the spill slot's actual address 866478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj must be 16-byte aligned. Since the spill slot's address is 867478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj computed as an offset from the guest state pointer, and since 868478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj the user of the generated code must set that pointer to a 869c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj 32-aligned value, we have the residual obligation here of 870478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj choosing a 16-aligned spill slot offset for Vec128-class values. 871478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj Since each spill slot is 8 bytes long, that means for 872478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj Vec128-class values we must allocated a spill slot number which 873478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj is zero mod 2. 874478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj 875a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Similarly, for Vec256 class on amd64, find a spill slot number 876c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj which is zero mod 4. This guarantees it will be 32 byte 877c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj aligned, which isn't actually necessary on amd64 (we use movUpd 878c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj etc to spill), but seems like good practice. 879c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj 880432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Do a rank-based allocation of vregs to spill slot numbers. We 881607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj put as few values as possible in spill slots, but nevertheless 882432b1c9af7e4421b67644811b94489b57f3cfae1sewardj need to have a spill slot available for all vregs, just in case. 883432b1c9af7e4421b67644811b94489b57f3cfae1sewardj */ 884a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj /* Int max_ss_no = -1; */ 885432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 886a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj local_memset(ss_busy_until_before, 0, sizeof(ss_busy_until_before)); 887432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 888a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < n_vregs; j++) { 889432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 890432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* True iff this vreg is unused. In which case we also expect 891432b1c9af7e4421b67644811b94489b57f3cfae1sewardj that the reg_class field for it has not been set. */ 892ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj if (vreg_lrs[j].live_after == INVALID_INSTRNO) { 893ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj vassert(vreg_lrs[j].reg_class == HRcINVALID); 894432b1c9af7e4421b67644811b94489b57f3cfae1sewardj continue; 895432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 896432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 8977fb65ebcac4ee8522f9b6823d857d87b04146c31sewardj /* The spill slots are 64 bits in size. As per the comment on 898c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj definition of HRegClass in host_generic_regs.h, that means, 899c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj to spill a vreg of class Flt64 or Vec128, we'll need to find 900c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj two adjacent spill slots to use. For Vec256, we'll need to 901c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj find four adjacent slots to use. Note, this logic needs to 902c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj kept in sync with the size info on the definition of 903c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj HRegClass. */ 904a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int ss_no = -1; 905c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj switch (vreg_lrs[j].reg_class) { 906c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj 907c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj case HRcVec128: case HRcFlt64: 908c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj /* Find two adjacent free slots in which between them 909c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj provide up to 128 bits in which to spill the vreg. 910c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj Since we are trying to find an even:odd pair, move 911c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj along in steps of 2 (slots). */ 912a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (ss_no = 0; ss_no < N_SPILL64S-1; ss_no += 2) 913a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (ss_busy_until_before[ss_no+0] <= vreg_lrs[j].live_after 914a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj && ss_busy_until_before[ss_no+1] <= vreg_lrs[j].live_after) 915c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj break; 916a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (ss_no >= N_SPILL64S-1) { 917c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj vpanic("LibVEX_N_SPILL_BYTES is too low. " 918c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj "Increase and recompile."); 919c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj } 920a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj ss_busy_until_before[ss_no+0] = vreg_lrs[j].dead_before; 921a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj ss_busy_until_before[ss_no+1] = vreg_lrs[j].dead_before; 922c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj break; 923d08f2d71ec2efcdf5029b4b571c90452d12b1c72sewardj 924c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj default: 925c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj /* The ordinary case -- just find a single spill slot. */ 926c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj /* Find the lowest-numbered spill slot which is available 927c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj at the start point of this interval, and assign the 928c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj interval to it. */ 929a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (ss_no = 0; ss_no < N_SPILL64S; ss_no++) 930a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (ss_busy_until_before[ss_no] <= vreg_lrs[j].live_after) 931c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj break; 932a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (ss_no == N_SPILL64S) { 933c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj vpanic("LibVEX_N_SPILL_BYTES is too low. " 934c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj "Increase and recompile."); 935c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj } 936a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj ss_busy_until_before[ss_no] = vreg_lrs[j].dead_before; 937c4530ae9079b0c3f8d4e8df35073613d718cadecsewardj break; 938432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 939a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } /* switch (vreg_lrs[j].reg_class) */ 940432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 941432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* This reflects LibVEX's hard-wired knowledge of the baseBlock 942478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj layout: the guest state, then two equal sized areas following 943478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj it for two sets of shadow state, and then the spill area. */ 944a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + ss_no * 8); 945432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 946478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj /* Independent check that we've made a sane choice of slot */ 947478646f54befaba01cbceb40fd5e46cdf562fdb5sewardj sanity_check_spill_offset( &vreg_lrs[j] ); 948432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* if (j > max_ss_no) */ 949432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* max_ss_no = j; */ 950432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 951432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 952a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (0) { 953a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("\n\n"); 954a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < n_vregs; j++) 955a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("vreg %d --> spill offset %d\n", 956a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj j, vreg_lrs[j].spill_offset); 957a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 958432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 959432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* --------- Stage 4: establish rreg preferences --------- */ 960432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 961432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* It may be advantageous to allocating certain vregs to specific 962432b1c9af7e4421b67644811b94489b57f3cfae1sewardj rregs, as a way of avoiding reg-reg moves later. Here we 963432b1c9af7e4421b67644811b94489b57f3cfae1sewardj establish which, if any, rreg each vreg would prefer to be in. 964432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Note that this constrains the allocator -- ideally we end up 9652fa60a387c848499df6b7aa1f1c3de5a753a39dasewardj with as few as possible vregs expressing a preference. 966432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 9672fa60a387c848499df6b7aa1f1c3de5a753a39dasewardj This is an optimisation: if the .preferred_rreg field is never 9682fa60a387c848499df6b7aa1f1c3de5a753a39dasewardj set to anything different from INVALID_HREG, the allocator still 9692fa60a387c848499df6b7aa1f1c3de5a753a39dasewardj works. */ 97017bbc21313cd903a6b44c912b4af44f14ce68ec4sewardj 97117bbc21313cd903a6b44c912b4af44f14ce68ec4sewardj /* 30 Dec 04: removed this mechanism as it does not seem to 97217bbc21313cd903a6b44c912b4af44f14ce68ec4sewardj help. */ 973432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 974432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* --------- Stage 5: process instructions --------- */ 975432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 976432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* This is the main loop of the allocator. First, we need to 977432b1c9af7e4421b67644811b94489b57f3cfae1sewardj correctly set up our running state, which tracks the status of 978432b1c9af7e4421b67644811b94489b57f3cfae1sewardj each real register. */ 979432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 980432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* ------ BEGIN: Process each insn in turn. ------ */ 981432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 982a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int ii = 0; ii < instrs_in->arr_used; ii++) { 983432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 984a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (DEBUG_REGALLOC) { 985a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("\n====----====---- Insn %d ----====----====\n", ii); 986a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("---- "); 987a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj (*ppInstr)(instrs_in->arr[ii], mode64); 988a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("\n\nInitial state:\n"); 989a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj PRINT_STATE; 990a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("\n"); 991a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 992432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 993432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* ------------ Sanity checks ------------ */ 994432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 995ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj /* Sanity checks are expensive. So they are done only once 996a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes every 17 instructions, and just before the last 997ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj instruction. */ 998ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj do_sanity_check 999ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj = toBool( 1000a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj False /* Set to True for sanity checking of all insns. */ 1001ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj || ii == instrs_in->arr_used-1 1002a0664b9ca67b594bd6f570a61d3301167a24750cElliott Hughes || (ii > 0 && (ii % 17) == 0) 1003ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj ); 1004ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj 1005ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj if (do_sanity_check) { 1006ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj 1007ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj /* Sanity check 1: all rregs with a hard live range crossing 1008ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj this insn must be marked as unavailable in the running 1009ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj state. */ 1010a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < rreg_lrs_used; j++) { 1011a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (rreg_lrs_la[j].live_after < ii 1012c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj && ii < rreg_lrs_la[j].dead_before) { 1013ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj /* ii is the middle of a hard live range for some real 1014ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj reg. Check it's marked as such in the running 1015ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj state. */ 1016a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj HReg reg = rreg_lrs_la[j].rreg; 1017ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj 1018a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (0) { 1019a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("considering la %d .. db %d reg = ", 1020a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rreg_lrs_la[j].live_after, 1021a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rreg_lrs_la[j].dead_before); 1022a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj (*ppReg)(reg); 1023a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("\n"); 1024a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 1025432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1026a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj /* assert that this rreg is marked as unavailable */ 1027a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(!hregIsVirtual(reg)); 1028a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(rreg_state[hregIndex(reg)].disp == Unavail); 1029ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj } 1030432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 1031432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1032ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj /* Sanity check 2: conversely, all rregs marked as 1033ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj unavailable in the running rreg_state must have a 1034ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj corresponding hard live range entry in the rreg_lrs 1035ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj array. */ 1036a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < n_rregs; j++) { 1037ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vassert(rreg_state[j].disp == Bound 1038ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj || rreg_state[j].disp == Free 1039ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj || rreg_state[j].disp == Unavail); 1040ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj if (rreg_state[j].disp != Unavail) 1041ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj continue; 1042a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int k; 1043a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (k = 0; k < rreg_lrs_used; k++) { 1044a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj HReg reg = rreg_lrs_la[k].rreg; 1045a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(!hregIsVirtual(reg)); 1046a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (hregIndex(reg) == j 1047c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj && rreg_lrs_la[k].live_after < ii 1048c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj && ii < rreg_lrs_la[k].dead_before) 1049ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj break; 1050a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 1051ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj /* If this vassertion fails, we couldn't find a 1052ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj corresponding HLR. */ 1053ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vassert(k < rreg_lrs_used); 1054ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj } 1055432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1056ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj /* Sanity check 3: all vreg-rreg bindings must bind registers 1057ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj of the same class. */ 1058a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < n_rregs; j++) { 1059607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj if (rreg_state[j].disp != Bound) { 1060607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj vassert(rreg_state[j].eq_spill_slot == False); 1061ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj continue; 1062607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj } 1063a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(hregClass(univ->regs[j]) 1064ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj == hregClass(rreg_state[j].vreg)); 1065ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vassert( hregIsVirtual(rreg_state[j].vreg)); 1066ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj } 1067432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1068ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj /* Sanity check 4: the vreg_state and rreg_state 1069ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj mutually-redundant mappings are consistent. If 1070ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj rreg_state[j].vreg points at some vreg_state entry then 1071ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj that vreg_state entry should point back at 1072ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj rreg_state[j]. */ 1073a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < n_rregs; j++) { 1074ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj if (rreg_state[j].disp != Bound) 1075ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj continue; 1076a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int k = hregIndex(rreg_state[j].vreg); 1077ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vassert(IS_VALID_VREGNO(k)); 1078ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vassert(vreg_state[k] == j); 1079ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj } 1080a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < n_vregs; j++) { 1081a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int k = vreg_state[j]; 1082ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj if (k == INVALID_RREG_NO) 1083ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj continue; 1084ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vassert(IS_VALID_RREGNO(k)); 1085ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vassert(rreg_state[k].disp == Bound); 1086a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(hregIndex(rreg_state[k].vreg) == j); 1087ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj } 1088ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj 1089ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj } /* if (do_sanity_check) */ 1090432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1091432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* ------------ end of Sanity checks ------------ */ 1092432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1093432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Do various optimisations pertaining to register coalescing 1094432b1c9af7e4421b67644811b94489b57f3cfae1sewardj and preferencing: 1095432b1c9af7e4421b67644811b94489b57f3cfae1sewardj MOV v <-> v coalescing (done here). 1096432b1c9af7e4421b67644811b94489b57f3cfae1sewardj MOV v <-> r coalescing (not yet, if ever) 1097432b1c9af7e4421b67644811b94489b57f3cfae1sewardj */ 1098432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* If doing a reg-reg move between two vregs, and the src's live 1099432b1c9af7e4421b67644811b94489b57f3cfae1sewardj range ends here and the dst's live range starts here, bind 1100432b1c9af7e4421b67644811b94489b57f3cfae1sewardj the dst to the src's rreg, and that's all. */ 1101a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj HReg vregS = INVALID_HREG; 1102a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj HReg vregD = INVALID_HREG; 1103432b1c9af7e4421b67644811b94489b57f3cfae1sewardj if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) { 1104432b1c9af7e4421b67644811b94489b57f3cfae1sewardj if (!hregIsVirtual(vregS)) goto cannot_coalesce; 1105432b1c9af7e4421b67644811b94489b57f3cfae1sewardj if (!hregIsVirtual(vregD)) goto cannot_coalesce; 1106432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Check that *isMove is not telling us a bunch of lies ... */ 1107432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vassert(hregClass(vregS) == hregClass(vregD)); 1108a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int k = hregIndex(vregS); 1109a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int m = hregIndex(vregD); 1110ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vassert(IS_VALID_VREGNO(k)); 1111ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vassert(IS_VALID_VREGNO(m)); 1112ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce; 1113ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj if (vreg_lrs[m].live_after != ii) goto cannot_coalesce; 1114a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (DEBUG_REGALLOC) { 1115432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vex_printf("COALESCE "); 1116a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj (*ppReg)(vregS); 1117a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf(" -> "); 1118a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj (*ppReg)(vregD); 1119a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("\n\n"); 1120a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 1121432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Find the state entry for vregS. */ 1122a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int n = vreg_state[k]; /* k is the index of vregS */ 1123a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (n == INVALID_RREG_NO) { 1124a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj /* vregS is not currently in a real register. So we can't 1125a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj do the coalescing. Give up. */ 1126432b1c9af7e4421b67644811b94489b57f3cfae1sewardj goto cannot_coalesce; 1127a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 1128a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(IS_VALID_RREGNO(n)); 1129432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1130432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Finally, we can do the coalescing. It's trivial -- merely 1131432b1c9af7e4421b67644811b94489b57f3cfae1sewardj claim vregS's register for vregD. */ 1132a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rreg_state[n].vreg = vregD; 1133a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(IS_VALID_VREGNO(hregIndex(vregD))); 1134a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(IS_VALID_VREGNO(hregIndex(vregS))); 1135a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vreg_state[hregIndex(vregD)] = toShort(n); 1136a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vreg_state[hregIndex(vregS)] = INVALID_RREG_NO; 1137432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1138607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj /* This rreg has become associated with a different vreg and 1139607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj hence with a different spill slot. Play safe. */ 1140a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rreg_state[n].eq_spill_slot = False; 1141607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj 1142432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Move on to the next insn. We skip the post-insn stuff for 1143432b1c9af7e4421b67644811b94489b57f3cfae1sewardj fixed registers, since this move should not interact with 1144432b1c9af7e4421b67644811b94489b57f3cfae1sewardj them in any way. */ 1145432b1c9af7e4421b67644811b94489b57f3cfae1sewardj continue; 1146432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 1147432b1c9af7e4421b67644811b94489b57f3cfae1sewardj cannot_coalesce: 1148432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1149432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* ------ Free up rregs bound to dead vregs ------ */ 1150432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1151432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Look for vregs whose live range has just ended, and 1152432b1c9af7e4421b67644811b94489b57f3cfae1sewardj mark the associated rreg as free. */ 1153432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1154a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < n_rregs; j++) { 1155ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj if (rreg_state[j].disp != Bound) 1156432b1c9af7e4421b67644811b94489b57f3cfae1sewardj continue; 1157a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj UInt vregno = hregIndex(rreg_state[j].vreg); 1158c17ce029081f7ad437538cf474c51de80ab50964florian vassert(IS_VALID_VREGNO(vregno)); 1159c17ce029081f7ad437538cf474c51de80ab50964florian if (vreg_lrs[vregno].dead_before <= ii) { 1160ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj rreg_state[j].disp = Free; 1161607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj rreg_state[j].eq_spill_slot = False; 1162a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int m = hregIndex(rreg_state[j].vreg); 1163ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vassert(IS_VALID_VREGNO(m)); 1164ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vreg_state[m] = INVALID_RREG_NO; 1165432b1c9af7e4421b67644811b94489b57f3cfae1sewardj if (DEBUG_REGALLOC) { 1166432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vex_printf("free up "); 1167a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj (*ppReg)(univ->regs[j]); 1168432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vex_printf("\n"); 1169432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 1170432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 1171432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 1172432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1173432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* ------ Pre-instruction actions for fixed rreg uses ------ */ 1174432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1175432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Now we have to deal with rregs which are about to be made 1176432b1c9af7e4421b67644811b94489b57f3cfae1sewardj live by this instruction -- in other words, are entering into 1177432b1c9af7e4421b67644811b94489b57f3cfae1sewardj one of their live ranges. If any such rreg holds a vreg, we 1178432b1c9af7e4421b67644811b94489b57f3cfae1sewardj will have to free up the rreg. The simplest solution which 1179432b1c9af7e4421b67644811b94489b57f3cfae1sewardj is correct is to spill the rreg. 1180432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1181432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Note we could do better: 1182432b1c9af7e4421b67644811b94489b57f3cfae1sewardj * Could move it into some other free rreg, if one is available 1183432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1184c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj Do this efficiently, by incrementally stepping along an array 1185c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj of rreg HLRs that are known to be sorted by start point 1186c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj (their .live_after field). 1187432b1c9af7e4421b67644811b94489b57f3cfae1sewardj */ 1188c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj while (True) { 1189c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj vassert(rreg_lrs_la_next >= 0); 1190c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj vassert(rreg_lrs_la_next <= rreg_lrs_used); 1191c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj if (rreg_lrs_la_next == rreg_lrs_used) 1192c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj break; /* no more real reg live ranges to consider */ 1193c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after) 1194c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj break; /* next live range does not yet start */ 1195c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after); 1196c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up. 1197c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj Find the associated rreg_state entry. */ 1198c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after. 1199c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj Real register live ranges are guaranteed to be well-formed 1200c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj in that they start with a write to the register -- Stage 2 1201c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj rejects any code not satisfying this. So the correct 1202c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj question to ask is whether 1203c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is, 1204c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj whether the reg becomes live after this insn -- rather 1205c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj than before it. */ 1206a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (DEBUG_REGALLOC) { 1207a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("need to free up rreg: "); 1208a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg); 1209a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("\n\n"); 1210a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 1211a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int k = hregIndex(rreg_lrs_la[rreg_lrs_la_next].rreg); 1212a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj 1213c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj /* If this fails, we don't have an entry for this rreg. 1214c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj Which we should. */ 1215c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj vassert(IS_VALID_RREGNO(k)); 1216a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int m = hregIndex(rreg_state[k].vreg); 1217c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj if (rreg_state[k].disp == Bound) { 1218c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj /* Yes, there is an associated vreg. Spill it if it's 1219c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj still live. */ 1220c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj vassert(IS_VALID_VREGNO(m)); 1221c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj vreg_state[m] = INVALID_RREG_NO; 1222c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj if (vreg_lrs[m].dead_before > ii) { 1223c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj vassert(vreg_lrs[m].reg_class != HRcINVALID); 1224607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) { 12256c299f3acab617581ea504e45fbb6cab24c2b29fsewardj HInstr* spill1 = NULL; 12266c299f3acab617581ea504e45fbb6cab24c2b29fsewardj HInstr* spill2 = NULL; 1227a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj (*genSpill)( &spill1, &spill2, univ->regs[k], 12286c299f3acab617581ea504e45fbb6cab24c2b29fsewardj vreg_lrs[m].spill_offset, mode64 ); 12296c299f3acab617581ea504e45fbb6cab24c2b29fsewardj vassert(spill1 || spill2); /* can't both be NULL */ 12306c299f3acab617581ea504e45fbb6cab24c2b29fsewardj if (spill1) 12316c299f3acab617581ea504e45fbb6cab24c2b29fsewardj EMIT_INSTR(spill1); 12326c299f3acab617581ea504e45fbb6cab24c2b29fsewardj if (spill2) 12336c299f3acab617581ea504e45fbb6cab24c2b29fsewardj EMIT_INSTR(spill2); 1234607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj } 1235607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj rreg_state[k].eq_spill_slot = True; 1236432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 1237432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 1238c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj rreg_state[k].disp = Unavail; 1239c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj rreg_state[k].vreg = INVALID_HREG; 1240607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj rreg_state[k].eq_spill_slot = False; 1241c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj 1242c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj /* check for further rregs entering HLRs at this point */ 1243c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj rreg_lrs_la_next++; 1244432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 1245432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1246a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (DEBUG_REGALLOC) { 1247a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("After pre-insn actions for fixed regs:\n"); 1248a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj PRINT_STATE; 1249a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("\n"); 1250a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 1251432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1252432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* ------ Deal with the current instruction. ------ */ 1253432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1254432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Finally we can begin the processing of this instruction 1255432b1c9af7e4421b67644811b94489b57f3cfae1sewardj itself. The aim is to free up enough rregs for this insn. 1256432b1c9af7e4421b67644811b94489b57f3cfae1sewardj This may generate spill stores since we may have to evict 1257432b1c9af7e4421b67644811b94489b57f3cfae1sewardj some vregs currently in rregs. Also generates spill loads. 1258432b1c9af7e4421b67644811b94489b57f3cfae1sewardj We also build up the final vreg->rreg mapping to be applied 1259432b1c9af7e4421b67644811b94489b57f3cfae1sewardj to the insn. */ 1260432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1261432b1c9af7e4421b67644811b94489b57f3cfae1sewardj initHRegRemap(&remap); 1262432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1263fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj /* ------------ BEGIN directReload optimisation ----------- */ 1264fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj 1265fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj /* If the instruction reads exactly one vreg which is currently 1266fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj in a spill slot, and this is last use of that vreg, see if we 1267a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj can convert the instruction into one that reads directly from 1268a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj the spill slot. This is clearly only possible for x86 and 1269a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj amd64 targets, since ppc and arm are load-store 1270a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj architectures. If successful, replace instrs_in->arr[ii] 1271a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj with this new instruction, and recompute its reg usage, so 1272a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj that the change is invisible to the standard-case handling 1273a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj that follows. */ 1274fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj 1275a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (directReload && reg_usage_arr[ii].n_vRegs <= 2) { 1276a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Bool debug_direct_reload = False; 1277fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj HReg cand = INVALID_HREG; 1278fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj Bool nreads = 0; 1279fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj Short spilloff = 0; 1280fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj 1281a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) { 1282fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj 1283a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj HReg vreg = reg_usage_arr[ii].vRegs[j]; 1284a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(hregIsVirtual(vreg)); 1285fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj 1286a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (reg_usage_arr[ii].vMode[j] == HRmRead) { 1287fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj nreads++; 1288a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int m = hregIndex(vreg); 1289fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj vassert(IS_VALID_VREGNO(m)); 1290a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int k = vreg_state[m]; 1291fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj if (!IS_VALID_RREGNO(k)) { 1292fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj /* ok, it is spilled. Now, is this its last use? */ 1293fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj vassert(vreg_lrs[m].dead_before >= ii+1); 1294fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj if (vreg_lrs[m].dead_before == ii+1 129579efdc6ea93db174395af845d4e21a4a7ad600ccflorian && hregIsInvalid(cand)) { 1296fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj spilloff = vreg_lrs[m].spill_offset; 1297fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj cand = vreg; 1298fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj } 1299fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj } 1300fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj } 1301fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj } 1302fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj 130379efdc6ea93db174395af845d4e21a4a7ad600ccflorian if (nreads == 1 && ! hregIsInvalid(cand)) { 1304fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj HInstr* reloaded; 1305a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (reg_usage_arr[ii].n_vRegs == 2) 1306a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(! sameHReg(reg_usage_arr[ii].vRegs[0], 1307a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj reg_usage_arr[ii].vRegs[1])); 1308fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj 1309fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj reloaded = directReload ( instrs_in->arr[ii], cand, spilloff ); 1310fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj if (debug_direct_reload && !reloaded) { 1311fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" "); 1312fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj ppInstr(instrs_in->arr[ii], mode64); 1313fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj } 1314fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj if (reloaded) { 1315fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj /* Update info about the insn, so it looks as if it had 1316fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj been in this form all along. */ 1317fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj instrs_in->arr[ii] = reloaded; 1318a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj (*getRegUsage)( ®_usage_arr[ii], instrs_in->arr[ii], mode64 ); 1319fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj if (debug_direct_reload && !reloaded) { 1320fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj vex_printf(" --> "); 1321fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj ppInstr(reloaded, mode64); 1322fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj } 1323fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj } 1324fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj 1325fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj if (debug_direct_reload && !reloaded) 1326fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj vex_printf("\n"); 1327fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj } 1328fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj 1329fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj } 1330fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj 1331fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj /* ------------ END directReload optimisation ------------ */ 1332fb7373aee5e8a3039f2916ecf09870f3ec0c1805sewardj 1333a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj /* for each virtual reg mentioned in the insn ... */ 1334a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) { 1335432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1336a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj HReg vreg = reg_usage_arr[ii].vRegs[j]; 1337a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(hregIsVirtual(vreg)); 1338432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1339a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (0) { 1340a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n"); 1341a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 1342432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1343432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Now we're trying to find a rreg for "vreg". First of all, 1344432b1c9af7e4421b67644811b94489b57f3cfae1sewardj if it already has an rreg assigned, we don't need to do 1345a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj anything more. Inspect the current state to find out. */ 1346a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int m = hregIndex(vreg); 1347ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vassert(IS_VALID_VREGNO(m)); 1348a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int n = vreg_state[m]; 1349a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (IS_VALID_RREGNO(n)) { 1350a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(rreg_state[n].disp == Bound); 1351a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj addToHRegRemap(&remap, vreg, univ->regs[n]); 1352607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj /* If this rreg is written or modified, mark it as different 1353607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj from any spill slot value. */ 1354a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (reg_usage_arr[ii].vMode[j] != HRmRead) 1355a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj rreg_state[n].eq_spill_slot = False; 1356432b1c9af7e4421b67644811b94489b57f3cfae1sewardj continue; 1357ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj } else { 1358a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(n == INVALID_RREG_NO); 1359432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 1360432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1361432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* No luck. The next thing to do is see if there is a 1362432b1c9af7e4421b67644811b94489b57f3cfae1sewardj currently free rreg available, of the correct class. If 1363432b1c9af7e4421b67644811b94489b57f3cfae1sewardj so, bag it. NOTE, we could improve this by selecting an 1364432b1c9af7e4421b67644811b94489b57f3cfae1sewardj rreg for which the next live-range event is as far ahead 1365432b1c9af7e4421b67644811b94489b57f3cfae1sewardj as possible. */ 1366a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int k_suboptimal = -1; 1367a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int k; 1368ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj for (k = 0; k < n_rregs; k++) { 1369ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj if (rreg_state[k].disp != Free 1370a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj || hregClass(univ->regs[k]) != hregClass(vreg)) 1371432b1c9af7e4421b67644811b94489b57f3cfae1sewardj continue; 1372ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj if (rreg_state[k].has_hlrs) { 1373432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Well, at least we can use k_suboptimal if we really 1374432b1c9af7e4421b67644811b94489b57f3cfae1sewardj have to. Keep on looking for a better candidate. */ 1375432b1c9af7e4421b67644811b94489b57f3cfae1sewardj k_suboptimal = k; 1376432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } else { 1377432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Found a preferable reg. Use it. */ 1378432b1c9af7e4421b67644811b94489b57f3cfae1sewardj k_suboptimal = -1; 1379432b1c9af7e4421b67644811b94489b57f3cfae1sewardj break; 1380432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 1381432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 1382432b1c9af7e4421b67644811b94489b57f3cfae1sewardj if (k_suboptimal >= 0) 1383432b1c9af7e4421b67644811b94489b57f3cfae1sewardj k = k_suboptimal; 1384432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1385ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj if (k < n_rregs) { 1386ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj rreg_state[k].disp = Bound; 1387ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj rreg_state[k].vreg = vreg; 1388a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int p = hregIndex(vreg); 1389a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(IS_VALID_VREGNO(p)); 1390a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vreg_state[p] = toShort(k); 1391a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj addToHRegRemap(&remap, vreg, univ->regs[k]); 1392607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj /* Generate a reload if needed. This only creates needed 1393607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj reloads because the live range builder for vregs will 1394607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj guarantee that the first event for a vreg is a write. 1395607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj Hence, if this reference is not a write, it cannot be 1396607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj the first reference for this vreg, and so a reload is 1397607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj indeed needed. */ 1398a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (reg_usage_arr[ii].vMode[j] != HRmWrite) { 1399a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(vreg_lrs[p].reg_class != HRcINVALID); 14006c299f3acab617581ea504e45fbb6cab24c2b29fsewardj HInstr* reload1 = NULL; 14016c299f3acab617581ea504e45fbb6cab24c2b29fsewardj HInstr* reload2 = NULL; 1402a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj (*genReload)( &reload1, &reload2, univ->regs[k], 1403a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vreg_lrs[p].spill_offset, mode64 ); 14046c299f3acab617581ea504e45fbb6cab24c2b29fsewardj vassert(reload1 || reload2); /* can't both be NULL */ 14056c299f3acab617581ea504e45fbb6cab24c2b29fsewardj if (reload1) 14066c299f3acab617581ea504e45fbb6cab24c2b29fsewardj EMIT_INSTR(reload1); 14076c299f3acab617581ea504e45fbb6cab24c2b29fsewardj if (reload2) 14086c299f3acab617581ea504e45fbb6cab24c2b29fsewardj EMIT_INSTR(reload2); 14097342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj /* This rreg is read or modified by the instruction. 14107342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj If it's merely read we can claim it now equals the 14117342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj spill slot, but not so if it is modified. */ 1412a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (reg_usage_arr[ii].vMode[j] == HRmRead) { 14137342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj rreg_state[k].eq_spill_slot = True; 14147342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj } else { 1415a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(reg_usage_arr[ii].vMode[j] == HRmModify); 14167342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj rreg_state[k].eq_spill_slot = False; 14177342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj } 1418607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj } else { 1419607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj rreg_state[k].eq_spill_slot = False; 1420432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 1421607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj 1422432b1c9af7e4421b67644811b94489b57f3cfae1sewardj continue; 1423432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 1424432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1425432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Well, now we have no option but to spill a vreg. It's 1426432b1c9af7e4421b67644811b94489b57f3cfae1sewardj important to make a good choice of vreg to spill, and of 1427432b1c9af7e4421b67644811b94489b57f3cfae1sewardj course we need to be careful not to spill a vreg which is 1428432b1c9af7e4421b67644811b94489b57f3cfae1sewardj needed by this insn. */ 1429432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1430ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj /* First, mark in the rreg_state, those rregs which are not spill 1431432b1c9af7e4421b67644811b94489b57f3cfae1sewardj candidates, due to holding a vreg mentioned by this 1432432b1c9af7e4421b67644811b94489b57f3cfae1sewardj instruction. Or being of the wrong class. */ 1433ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj for (k = 0; k < n_rregs; k++) { 1434ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj rreg_state[k].is_spill_cand = False; 1435ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj if (rreg_state[k].disp != Bound) 1436432b1c9af7e4421b67644811b94489b57f3cfae1sewardj continue; 1437a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (hregClass(univ->regs[k]) != hregClass(vreg)) 1438432b1c9af7e4421b67644811b94489b57f3cfae1sewardj continue; 1439ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj rreg_state[k].is_spill_cand = True; 1440a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj /* Note, the following loop visits only the virtual regs 1441a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj mentioned by the instruction. */ 1442a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj for (m = 0; m < reg_usage_arr[ii].n_vRegs; m++) { 1443a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (sameHReg(rreg_state[k].vreg, reg_usage_arr[ii].vRegs[m])) { 1444ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj rreg_state[k].is_spill_cand = False; 1445432b1c9af7e4421b67644811b94489b57f3cfae1sewardj break; 1446432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 1447432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 1448432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 1449432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1450432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* We can choose to spill any rreg satisfying 1451ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj rreg_state[r].is_spill_cand (so to speak). Choose r so that 1452432b1c9af7e4421b67644811b94489b57f3cfae1sewardj the next use of its associated vreg is as far ahead as 1453432b1c9af7e4421b67644811b94489b57f3cfae1sewardj possible, in the hope that this will minimise the number 1454432b1c9af7e4421b67644811b94489b57f3cfae1sewardj of consequent reloads required. */ 1455a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int spillee 1456432b1c9af7e4421b67644811b94489b57f3cfae1sewardj = findMostDistantlyMentionedVReg ( 1457a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj reg_usage_arr, ii+1, instrs_in->arr_used, rreg_state, n_rregs ); 1458432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1459432b1c9af7e4421b67644811b94489b57f3cfae1sewardj if (spillee == -1) { 1460432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Hmmmmm. There don't appear to be any spill candidates. 1461432b1c9af7e4421b67644811b94489b57f3cfae1sewardj We're hosed. */ 1462432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vex_printf("reg_alloc: can't find a register in class: "); 1463432b1c9af7e4421b67644811b94489b57f3cfae1sewardj ppHRegClass(hregClass(vreg)); 1464432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vex_printf("\n"); 1465432b1c9af7e4421b67644811b94489b57f3cfae1sewardj vpanic("reg_alloc: can't create a free register."); 1466432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 1467432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1468ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj /* Right. So we're going to spill rreg_state[spillee]. */ 1469ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vassert(IS_VALID_RREGNO(spillee)); 1470ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vassert(rreg_state[spillee].disp == Bound); 1471432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* check it's the right class */ 1472a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(hregClass(univ->regs[spillee]) == hregClass(vreg)); 1473432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* check we're not ejecting the vreg for which we are trying 1474432b1c9af7e4421b67644811b94489b57f3cfae1sewardj to free up a register. */ 147579efdc6ea93db174395af845d4e21a4a7ad600ccflorian vassert(! sameHReg(rreg_state[spillee].vreg, vreg)); 1476432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1477a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj m = hregIndex(rreg_state[spillee].vreg); 1478ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vassert(IS_VALID_VREGNO(m)); 1479432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1480432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* So here's the spill store. Assert that we're spilling a 1481432b1c9af7e4421b67644811b94489b57f3cfae1sewardj live vreg. */ 1482ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj vassert(vreg_lrs[m].dead_before > ii); 1483ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj vassert(vreg_lrs[m].reg_class != HRcINVALID); 1484607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) { 14856c299f3acab617581ea504e45fbb6cab24c2b29fsewardj HInstr* spill1 = NULL; 14866c299f3acab617581ea504e45fbb6cab24c2b29fsewardj HInstr* spill2 = NULL; 1487a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj (*genSpill)( &spill1, &spill2, univ->regs[spillee], 14886c299f3acab617581ea504e45fbb6cab24c2b29fsewardj vreg_lrs[m].spill_offset, mode64 ); 14896c299f3acab617581ea504e45fbb6cab24c2b29fsewardj vassert(spill1 || spill2); /* can't both be NULL */ 14906c299f3acab617581ea504e45fbb6cab24c2b29fsewardj if (spill1) 14916c299f3acab617581ea504e45fbb6cab24c2b29fsewardj EMIT_INSTR(spill1); 14926c299f3acab617581ea504e45fbb6cab24c2b29fsewardj if (spill2) 14936c299f3acab617581ea504e45fbb6cab24c2b29fsewardj EMIT_INSTR(spill2); 1494607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj } 1495432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1496ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj /* Update the rreg_state to reflect the new assignment for this 1497432b1c9af7e4421b67644811b94489b57f3cfae1sewardj rreg. */ 1498ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj rreg_state[spillee].vreg = vreg; 1499ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vreg_state[m] = INVALID_RREG_NO; 1500ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj 1501607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj rreg_state[spillee].eq_spill_slot = False; /* be safe */ 1502607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj 1503a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj m = hregIndex(vreg); 1504ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj vassert(IS_VALID_VREGNO(m)); 1505d4d3dd6ab27066963d21fca93394d6d0f0674a19sewardj vreg_state[m] = toShort(spillee); 1506432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1507432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Now, if this vreg is being read or modified (as opposed to 1508432b1c9af7e4421b67644811b94489b57f3cfae1sewardj written), we have to generate a reload for it. */ 1509a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (reg_usage_arr[ii].vMode[j] != HRmWrite) { 1510ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj vassert(vreg_lrs[m].reg_class != HRcINVALID); 15116c299f3acab617581ea504e45fbb6cab24c2b29fsewardj HInstr* reload1 = NULL; 15126c299f3acab617581ea504e45fbb6cab24c2b29fsewardj HInstr* reload2 = NULL; 1513a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj (*genReload)( &reload1, &reload2, univ->regs[spillee], 15146c299f3acab617581ea504e45fbb6cab24c2b29fsewardj vreg_lrs[m].spill_offset, mode64 ); 15156c299f3acab617581ea504e45fbb6cab24c2b29fsewardj vassert(reload1 || reload2); /* can't both be NULL */ 15166c299f3acab617581ea504e45fbb6cab24c2b29fsewardj if (reload1) 15176c299f3acab617581ea504e45fbb6cab24c2b29fsewardj EMIT_INSTR(reload1); 15186c299f3acab617581ea504e45fbb6cab24c2b29fsewardj if (reload2) 15196c299f3acab617581ea504e45fbb6cab24c2b29fsewardj EMIT_INSTR(reload2); 15207342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj /* This rreg is read or modified by the instruction. 15217342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj If it's merely read we can claim it now equals the 15227342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj spill slot, but not so if it is modified. */ 1523a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (reg_usage_arr[ii].vMode[j] == HRmRead) { 15247342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj rreg_state[spillee].eq_spill_slot = True; 15257342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj } else { 1526a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(reg_usage_arr[ii].vMode[j] == HRmModify); 15277342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj rreg_state[spillee].eq_spill_slot = False; 15287342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj } 1529432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 1530432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1531432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* So after much twisting and turning, we have vreg mapped to 15327342b1b1666d4f47d5db8f042b22c5f115324f0bsewardj rreg_state[spillee].rreg. Note that in the map. */ 1533a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj addToHRegRemap(&remap, vreg, univ->regs[spillee]); 1534432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1535a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } /* iterate over virtual registers in this instruction. */ 1536432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1537432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* We've finished clowning around with registers in this instruction. 1538432b1c9af7e4421b67644811b94489b57f3cfae1sewardj Three results: 1539ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj - the running rreg_state[] has been updated 1540432b1c9af7e4421b67644811b94489b57f3cfae1sewardj - a suitable vreg->rreg mapping for this instruction has been 1541432b1c9af7e4421b67644811b94489b57f3cfae1sewardj constructed 1542432b1c9af7e4421b67644811b94489b57f3cfae1sewardj - spill and reload instructions may have been emitted. 1543432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1544432b1c9af7e4421b67644811b94489b57f3cfae1sewardj The final step is to apply the mapping to the instruction, 1545432b1c9af7e4421b67644811b94489b57f3cfae1sewardj and emit that. 1546432b1c9af7e4421b67644811b94489b57f3cfae1sewardj */ 1547432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1548432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */ 154992b643609c5fa432b11fc726c2706ae3f3296eb4cerion (*mapRegs)( &remap, instrs_in->arr[ii], mode64 ); 1550432b1c9af7e4421b67644811b94489b57f3cfae1sewardj EMIT_INSTR( instrs_in->arr[ii] ); 1551432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1552a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (DEBUG_REGALLOC) { 1553a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("After dealing with current insn:\n"); 1554a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj PRINT_STATE; 1555a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("\n"); 1556a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 1557432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1558432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* ------ Post-instruction actions for fixed rreg uses ------ */ 1559432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1560432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Now we need to check for rregs exiting fixed live ranges 1561432b1c9af7e4421b67644811b94489b57f3cfae1sewardj after this instruction, and if so mark them as free. */ 1562c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj while (True) { 1563c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj vassert(rreg_lrs_db_next >= 0); 1564c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj vassert(rreg_lrs_db_next <= rreg_lrs_used); 1565c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj if (rreg_lrs_db_next == rreg_lrs_used) 1566c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj break; /* no more real reg live ranges to consider */ 1567c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before) 1568c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj break; /* next live range does not yet start */ 1569c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before); 1570c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live 1571c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj range. Mark it as such in the main rreg_state array. */ 1572a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj HReg reg = rreg_lrs_db[rreg_lrs_db_next].rreg; 1573a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(!hregIsVirtual(reg)); 1574a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj Int k = hregIndex(reg); 1575a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vassert(IS_VALID_RREGNO(k)); 1576c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj vassert(rreg_state[k].disp == Unavail); 1577c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj rreg_state[k].disp = Free; 1578c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj rreg_state[k].vreg = INVALID_HREG; 1579607ca2d5fcdb6187d6b18bfe30bbbc999b9daa38sewardj rreg_state[k].eq_spill_slot = False; 1580c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj 1581c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj /* check for further rregs leaving HLRs at this point */ 1582c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj rreg_lrs_db_next++; 1583432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } 1584432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1585a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj if (DEBUG_REGALLOC) { 1586a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("After post-insn actions for fixed regs:\n"); 1587a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj PRINT_STATE; 1588a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj vex_printf("\n"); 1589a5b502299bfc9d97f4c2c9f61cdc1a0a65e1da61sewardj } 1590432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1591432b1c9af7e4421b67644811b94489b57f3cfae1sewardj } /* iterate over insns */ 1592432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1593432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* ------ END: Process each insn in turn. ------ */ 1594432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1595ee7d2283d5263c020b7c12fd55b207c1ac556765sewardj /* free(rreg_state); */ 1596ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj /* free(rreg_lrs); */ 1597ad572dde4cc3e6f108d5793482ecb7f56f74f4a4sewardj /* if (vreg_lrs) free(vreg_lrs); */ 1598432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1599432b1c9af7e4421b67644811b94489b57f3cfae1sewardj /* Paranoia */ 1600c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj vassert(rreg_lrs_la_next == rreg_lrs_used); 1601c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj vassert(rreg_lrs_db_next == rreg_lrs_used); 1602c3bc60ab4089e8a0383ebca9bc7f7b14727aaa8asewardj 1603432b1c9af7e4421b67644811b94489b57f3cfae1sewardj return instrs_out; 1604432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1605432b1c9af7e4421b67644811b94489b57f3cfae1sewardj# undef INVALID_INSTRNO 1606432b1c9af7e4421b67644811b94489b57f3cfae1sewardj# undef EMIT_INSTR 1607432b1c9af7e4421b67644811b94489b57f3cfae1sewardj# undef PRINT_STATE 1608432b1c9af7e4421b67644811b94489b57f3cfae1sewardj} 1609432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1610432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1611432b1c9af7e4421b67644811b94489b57f3cfae1sewardj 1612432b1c9af7e4421b67644811b94489b57f3cfae1sewardj/*---------------------------------------------------------------*/ 1613cef7d3e3df4796e35b4521158d9dc058f034aa87sewardj/*--- host_reg_alloc2.c ---*/ 1614432b1c9af7e4421b67644811b94489b57f3cfae1sewardj/*---------------------------------------------------------------*/ 1615