120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt/* 220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * Copyright © 2012 Intel Corporation 320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * 420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * Permission is hereby granted, free of charge, to any person obtaining a 520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * copy of this software and associated documentation files (the "Software"), 620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * to deal in the Software without restriction, including without limitation 720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * the rights to use, copy, modify, merge, publish, distribute, sublicense, 820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * and/or sell copies of the Software, and to permit persons to whom the 920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * Software is furnished to do so, subject to the following conditions: 1020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * 1120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * The above copyright notice and this permission notice (including the next 1220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * paragraph) shall be included in all copies or substantial portions of the 1320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * Software. 1420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * 1520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * IN THE SOFTWARE. 2220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * 2320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * Authors: 2420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * Eric Anholt <eric@anholt.net> 2520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * 2620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt */ 2720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 2820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt#include "brw_cfg.h" 2920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt#include "brw_vec4_live_variables.h" 3020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 3120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholtusing namespace brw; 3220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 3320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt/** @file brw_vec4_live_variables.cpp 3420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * 3520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * Support for computing at the basic block level which variables 3620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * (virtual GRFs in our case) are live at entry and exit. 3720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * 38503fe278b070285b75a4000408873973d8d5f2b1Matt Turner * See Muchnick's Advanced Compiler Design and Implementation, section 3920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * 14.1 (p444). 4020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt */ 4120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 4220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt/** 4320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * Sets up the use[] and def[] arrays. 4420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * 4520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * The basic-block-level live variable analysis needs to know which 4620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * variables get used before they're completely defined, and which 4720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * variables are completely defined before they're used. 4820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * 4920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * We independently track each channel of a vec4. This is because we need to 5020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * be able to recognize a sequence like: 5120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * 5220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * ... 5320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * DP4 tmp.x a b; 5420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * DP4 tmp.y c d; 5520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * MUL result.xy tmp.xy e.xy 5620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * ... 5720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * 5820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * as having tmp live only across that sequence (assuming it's used nowhere 5920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * else), because it's a common pattern. A more conservative approach that 6020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * doesn't get tmp marked a deffed in this block will tend to result in 6120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * spilling. 6220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt */ 6320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholtvoid 6420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholtvec4_live_variables::setup_def_use() 6520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt{ 6620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt int ip = 0; 6720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 68596990d91e2a4c4a3a303c6c2da623bf1840771bMatt Turner foreach_block (block, cfg) { 6920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt assert(ip == block->start_ip); 70596990d91e2a4c4a3a303c6c2da623bf1840771bMatt Turner if (block->num > 0) 71596990d91e2a4c4a3a303c6c2da623bf1840771bMatt Turner assert(cfg->blocks[block->num - 1]->end_ip == ip - 1); 7220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 73bc2fbbafd216676ccc7c3abd794ecb7dd1fa631fMatt Turner foreach_inst_in_block(vec4_instruction, inst, block) { 7413f660158573846d6b1bc30ed4c61d97405bea58Matt Turner struct block_data *bd = &block_data[block->num]; 7513f660158573846d6b1bc30ed4c61d97405bea58Matt Turner 7620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt /* Set use[] for this instruction */ 7720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt for (unsigned int i = 0; i < 3; i++) { 78b163aa01487ab5f9b22c48b7badc5d65999c4985Matt Turner if (inst->src[i].file == VGRF) { 794ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), 16); j++) { 802babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez for (int c = 0; c < 4; c++) { 814ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero const unsigned v = var_from_reg(alloc, inst->src[i], c, j); 822babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez if (!BITSET_TEST(bd->def, v)) 832babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez BITSET_SET(bd->use, v); 842babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez } 8520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt } 8620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt } 8720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt } 88a59359ecd22154cc2b3f88bb8c599f21af8a3934Alejandro Piñeiro for (unsigned c = 0; c < 4; c++) { 89a59359ecd22154cc2b3f88bb8c599f21af8a3934Alejandro Piñeiro if (inst->reads_flag(c) && 90a59359ecd22154cc2b3f88bb8c599f21af8a3934Alejandro Piñeiro !BITSET_TEST(bd->flag_def, c)) { 91a59359ecd22154cc2b3f88bb8c599f21af8a3934Alejandro Piñeiro BITSET_SET(bd->flag_use, c); 927a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner } 937a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner } 9420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 9520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt /* Check for unconditional writes to whole registers. These 9620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * are the things that screen off preceding definitions of a 9720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * variable, and thus qualify for being in def[]. 9820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt */ 99b163aa01487ab5f9b22c48b7badc5d65999c4985Matt Turner if (inst->dst.file == VGRF && 100b298311d517017834841e53b7e641738e6067cdcIago Toral Quiroga (!inst->predicate || inst->opcode == BRW_OPCODE_SEL)) { 1014ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) { 1022babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez for (int c = 0; c < 4; c++) { 1032babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez if (inst->dst.writemask & (1 << c)) { 1044ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero const unsigned v = var_from_reg(alloc, inst->dst, c, i); 1052babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez if (!BITSET_TEST(bd->use, v)) 1062babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez BITSET_SET(bd->def, v); 1072babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez } 10820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt } 10920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt } 11020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt } 1117a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner if (inst->writes_flag()) { 112a59359ecd22154cc2b3f88bb8c599f21af8a3934Alejandro Piñeiro for (unsigned c = 0; c < 4; c++) { 113a59359ecd22154cc2b3f88bb8c599f21af8a3934Alejandro Piñeiro if ((inst->dst.writemask & (1 << c)) && 114a59359ecd22154cc2b3f88bb8c599f21af8a3934Alejandro Piñeiro !BITSET_TEST(bd->flag_use, c)) { 115a59359ecd22154cc2b3f88bb8c599f21af8a3934Alejandro Piñeiro BITSET_SET(bd->flag_def, c); 116a59359ecd22154cc2b3f88bb8c599f21af8a3934Alejandro Piñeiro } 1177a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner } 1187a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner } 11920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 12020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt ip++; 12120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt } 12220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt } 12320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt} 12420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 12520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt/** 12620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * The algorithm incrementally sets bits in liveout and livein, 12720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * propagating it through control flow. It will eventually terminate 12820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * because it only ever adds bits, and stops when no bits are added in 12920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * a pass. 13020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt */ 13120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholtvoid 13220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholtvec4_live_variables::compute_live_variables() 13320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt{ 13420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt bool cont = true; 13520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 13620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt while (cont) { 13720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt cont = false; 13820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 139316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand foreach_block_reverse (block, cfg) { 14013f660158573846d6b1bc30ed4c61d97405bea58Matt Turner struct block_data *bd = &block_data[block->num]; 14113f660158573846d6b1bc30ed4c61d97405bea58Matt Turner 14220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt /* Update liveout */ 143596990d91e2a4c4a3a303c6c2da623bf1840771bMatt Turner foreach_list_typed(bblock_link, child_link, link, &block->children) { 14413f660158573846d6b1bc30ed4c61d97405bea58Matt Turner struct block_data *child_bd = &block_data[child_link->block->num]; 14520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 146415d6dc5bd6915b0c17a1df0f9bd0ef4ca534a81Eric Anholt for (int i = 0; i < bitset_words; i++) { 14713f660158573846d6b1bc30ed4c61d97405bea58Matt Turner BITSET_WORD new_liveout = (child_bd->livein[i] & 14813f660158573846d6b1bc30ed4c61d97405bea58Matt Turner ~bd->liveout[i]); 149415d6dc5bd6915b0c17a1df0f9bd0ef4ca534a81Eric Anholt if (new_liveout) { 15013f660158573846d6b1bc30ed4c61d97405bea58Matt Turner bd->liveout[i] |= new_liveout; 15120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt cont = true; 15220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt } 15320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt } 1547a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner BITSET_WORD new_liveout = (child_bd->flag_livein[0] & 1557a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner ~bd->flag_liveout[0]); 1567a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner if (new_liveout) { 1577a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner bd->flag_liveout[0] |= new_liveout; 1587a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner cont = true; 1597a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner } 16020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt } 161316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand 162316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand /* Update livein */ 163316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand for (int i = 0; i < bitset_words; i++) { 164316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand BITSET_WORD new_livein = (bd->use[i] | 165316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand (bd->liveout[i] & 166316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand ~bd->def[i])); 167316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand if (new_livein & ~bd->livein[i]) { 168316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand bd->livein[i] |= new_livein; 169316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand cont = true; 170316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand } 171316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand } 172316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand BITSET_WORD new_livein = (bd->flag_use[0] | 173316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand (bd->flag_liveout[0] & 174316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand ~bd->flag_def[0])); 175316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand if (new_livein & ~bd->flag_livein[0]) { 176316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand bd->flag_livein[0] |= new_livein; 177316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand cont = true; 178316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand } 17920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt } 18020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt } 18120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt} 18220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 183bf6eb37e0b62fa61c01a32dc5ccb6a7ab00be5f4Francisco Jerezvec4_live_variables::vec4_live_variables(const simple_allocator &alloc, 184bf6eb37e0b62fa61c01a32dc5ccb6a7ab00be5f4Francisco Jerez cfg_t *cfg) 185bf6eb37e0b62fa61c01a32dc5ccb6a7ab00be5f4Francisco Jerez : alloc(alloc), cfg(cfg) 18620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt{ 187db47074ac02e2b822dd118f4837b32732941b78bFrancisco Jerez mem_ctx = ralloc_context(NULL); 18820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 1894ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero num_vars = alloc.total_size * 8; 19013f660158573846d6b1bc30ed4c61d97405bea58Matt Turner block_data = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks); 19120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 192415d6dc5bd6915b0c17a1df0f9bd0ef4ca534a81Eric Anholt bitset_words = BITSET_WORDS(num_vars); 19320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt for (int i = 0; i < cfg->num_blocks; i++) { 19413f660158573846d6b1bc30ed4c61d97405bea58Matt Turner block_data[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words); 19513f660158573846d6b1bc30ed4c61d97405bea58Matt Turner block_data[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words); 19613f660158573846d6b1bc30ed4c61d97405bea58Matt Turner block_data[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words); 19713f660158573846d6b1bc30ed4c61d97405bea58Matt Turner block_data[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words); 1987a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner 1997a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner block_data[i].flag_def[0] = 0; 2007a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner block_data[i].flag_use[0] = 0; 2017a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner block_data[i].flag_livein[0] = 0; 2027a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner block_data[i].flag_liveout[0] = 0; 20320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt } 20420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 20520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt setup_def_use(); 20620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt compute_live_variables(); 20720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt} 20820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 20920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholtvec4_live_variables::~vec4_live_variables() 21020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt{ 21120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt ralloc_free(mem_ctx); 21220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt} 21320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 21420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt#define MAX_INSTRUCTION (1 << 30) 21520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 21620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt/** 21720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * Computes a conservative start/end of the live intervals for each virtual GRF. 21820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * 21920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * We could expose per-channel live intervals to the consumer based on the 22020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * information we computed in vec4_live_variables, except that our only 22120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * current user is virtual_grf_interferes(). So we instead union the 2220f3068a58bdbceb2cb93e3848b0e2145629cdf43Eric Anholt * per-channel ranges into a per-vgrf range for virtual_grf_start[] and 2230f3068a58bdbceb2cb93e3848b0e2145629cdf43Eric Anholt * virtual_grf_end[]. 22420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * 22520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * We could potentially have virtual_grf_interferes() do the test per-channel, 22620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * which would let some interesting register allocation occur (particularly on 22720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * code-generated GLSL sequences from the Cg compiler which does register 22820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * allocation at the GLSL level and thus reuses components of the variable 22920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * with distinct lifetimes). But right now the complexity of doing so doesn't 23020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * seem worth it, since having virtual_grf_interferes() be cheap is important 23120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * for register allocation performance. 23220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt */ 23320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholtvoid 23420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholtvec4_visitor::calculate_live_intervals() 23520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt{ 236a50915984fe1205a3479cc8a5d07a8b3bde7d6bcMatt Turner if (this->live_intervals) 23720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt return; 23820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 2394ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero int *start = ralloc_array(mem_ctx, int, this->alloc.total_size * 8); 2404ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero int *end = ralloc_array(mem_ctx, int, this->alloc.total_size * 8); 2410f3068a58bdbceb2cb93e3848b0e2145629cdf43Eric Anholt ralloc_free(this->virtual_grf_start); 2420f3068a58bdbceb2cb93e3848b0e2145629cdf43Eric Anholt ralloc_free(this->virtual_grf_end); 2430f3068a58bdbceb2cb93e3848b0e2145629cdf43Eric Anholt this->virtual_grf_start = start; 2440f3068a58bdbceb2cb93e3848b0e2145629cdf43Eric Anholt this->virtual_grf_end = end; 24520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 2464ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero for (unsigned i = 0; i < this->alloc.total_size * 8; i++) { 2470f3068a58bdbceb2cb93e3848b0e2145629cdf43Eric Anholt start[i] = MAX_INSTRUCTION; 2480f3068a58bdbceb2cb93e3848b0e2145629cdf43Eric Anholt end[i] = -1; 24920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt } 25020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 25120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt /* Start by setting up the intervals with no knowledge of control 25220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * flow. 25320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt */ 25420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt int ip = 0; 25590bfeb22444df6ce779251522e47bf169e130f8eMatt Turner foreach_block_and_inst(block, vec4_instruction, inst, cfg) { 25620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt for (unsigned int i = 0; i < 3; i++) { 257b163aa01487ab5f9b22c48b7badc5d65999c4985Matt Turner if (inst->src[i].file == VGRF) { 2584ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), 16); j++) { 2592babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez for (int c = 0; c < 4; c++) { 2604ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero const unsigned v = var_from_reg(alloc, inst->src[i], c, j); 2612babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez start[v] = MIN2(start[v], ip); 2622babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez end[v] = ip; 2632babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez } 264dc0f5099fa3cb564c25eb892fde93cacd29df8f1Matt Turner } 26520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt } 26620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt } 26720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 268b163aa01487ab5f9b22c48b7badc5d65999c4985Matt Turner if (inst->dst.file == VGRF) { 2694ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) { 2702babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez for (int c = 0; c < 4; c++) { 2712babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez if (inst->dst.writemask & (1 << c)) { 2724ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero const unsigned v = var_from_reg(alloc, inst->dst, c, i); 2732babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez start[v] = MIN2(start[v], ip); 2742babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez end[v] = ip; 2752babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez } 276dc0f5099fa3cb564c25eb892fde93cacd29df8f1Matt Turner } 277dc0f5099fa3cb564c25eb892fde93cacd29df8f1Matt Turner } 27820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt } 27920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 28020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt ip++; 28120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt } 28220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 28320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt /* Now, extend those intervals using our analysis of control flow. 28420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * 28520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * The control flow-aware analysis was done at a channel level, while at 28620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * this point we're distilling it down to vgrfs. 28720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt */ 288bf6eb37e0b62fa61c01a32dc5ccb6a7ab00be5f4Francisco Jerez this->live_intervals = new(mem_ctx) vec4_live_variables(alloc, cfg); 28920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 290596990d91e2a4c4a3a303c6c2da623bf1840771bMatt Turner foreach_block (block, cfg) { 29113f660158573846d6b1bc30ed4c61d97405bea58Matt Turner struct block_data *bd = &live_intervals->block_data[block->num]; 29213f660158573846d6b1bc30ed4c61d97405bea58Matt Turner 293a50915984fe1205a3479cc8a5d07a8b3bde7d6bcMatt Turner for (int i = 0; i < live_intervals->num_vars; i++) { 294be800ef6d80fc43279780e652e611253428d7a78Iago Toral Quiroga if (BITSET_TEST(bd->livein, i)) { 295be800ef6d80fc43279780e652e611253428d7a78Iago Toral Quiroga start[i] = MIN2(start[i], block->start_ip); 296be800ef6d80fc43279780e652e611253428d7a78Iago Toral Quiroga end[i] = MAX2(end[i], block->start_ip); 297be800ef6d80fc43279780e652e611253428d7a78Iago Toral Quiroga } 29820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 299be800ef6d80fc43279780e652e611253428d7a78Iago Toral Quiroga if (BITSET_TEST(bd->liveout, i)) { 300be800ef6d80fc43279780e652e611253428d7a78Iago Toral Quiroga start[i] = MIN2(start[i], block->end_ip); 301be800ef6d80fc43279780e652e611253428d7a78Iago Toral Quiroga end[i] = MAX2(end[i], block->end_ip); 302be800ef6d80fc43279780e652e611253428d7a78Iago Toral Quiroga } 30320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt } 30420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt } 30520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt} 30620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt 3071c263f8f4f767df0511e63377c17a95ebebba944Matt Turnervoid 308072ea414d04f1b9a7bf06a00b9011e8ad521c878Matt Turnervec4_visitor::invalidate_live_intervals() 3091c263f8f4f767df0511e63377c17a95ebebba944Matt Turner{ 310a50915984fe1205a3479cc8a5d07a8b3bde7d6bcMatt Turner ralloc_free(live_intervals); 311a50915984fe1205a3479cc8a5d07a8b3bde7d6bcMatt Turner live_intervals = NULL; 3121c263f8f4f767df0511e63377c17a95ebebba944Matt Turner} 3131c263f8f4f767df0511e63377c17a95ebebba944Matt Turner 314e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerezint 315e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerezvec4_visitor::var_range_start(unsigned v, unsigned n) const 316e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez{ 317e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez int start = INT_MAX; 318e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez 319e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez for (unsigned i = 0; i < n; i++) 320e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez start = MIN2(start, virtual_grf_start[v + i]); 321e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez 322e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez return start; 323e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez} 324e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez 325e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerezint 326e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerezvec4_visitor::var_range_end(unsigned v, unsigned n) const 327e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez{ 328e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez int end = INT_MIN; 329e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez 330e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez for (unsigned i = 0; i < n; i++) 331e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez end = MAX2(end, virtual_grf_end[v + i]); 332e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez 333e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez return end; 334e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez} 335e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez 33620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholtbool 33720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholtvec4_visitor::virtual_grf_interferes(int a, int b) 33820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt{ 3394ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero return !((var_range_end(8 * alloc.offsets[a], 8 * alloc.sizes[a]) <= 3404ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero var_range_start(8 * alloc.offsets[b], 8 * alloc.sizes[b])) || 3414ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero (var_range_end(8 * alloc.offsets[b], 8 * alloc.sizes[b]) <= 3424ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero var_range_start(8 * alloc.offsets[a], 8 * alloc.sizes[a]))); 34320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt} 344