120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt/*
220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * Copyright © 2012 Intel Corporation
320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt *
420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * Permission is hereby granted, free of charge, to any person obtaining a
520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * copy of this software and associated documentation files (the "Software"),
620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * to deal in the Software without restriction, including without limitation
720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * the rights to use, copy, modify, merge, publish, distribute, sublicense,
820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * and/or sell copies of the Software, and to permit persons to whom the
920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * Software is furnished to do so, subject to the following conditions:
1020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt *
1120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * The above copyright notice and this permission notice (including the next
1220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * paragraph) shall be included in all copies or substantial portions of the
1320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * Software.
1420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt *
1520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * IN THE SOFTWARE.
2220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt *
2320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * Authors:
2420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt *    Eric Anholt <eric@anholt.net>
2520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt *
2620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt */
2720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
2820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt#include "brw_cfg.h"
2920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt#include "brw_vec4_live_variables.h"
3020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
3120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholtusing namespace brw;
3220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
3320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt/** @file brw_vec4_live_variables.cpp
3420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt *
3520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * Support for computing at the basic block level which variables
3620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * (virtual GRFs in our case) are live at entry and exit.
3720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt *
38503fe278b070285b75a4000408873973d8d5f2b1Matt Turner * See Muchnick's Advanced Compiler Design and Implementation, section
3920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * 14.1 (p444).
4020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt */
4120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
4220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt/**
4320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * Sets up the use[] and def[] arrays.
4420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt *
4520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * The basic-block-level live variable analysis needs to know which
4620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * variables get used before they're completely defined, and which
4720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * variables are completely defined before they're used.
4820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt *
4920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * We independently track each channel of a vec4.  This is because we need to
5020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * be able to recognize a sequence like:
5120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt *
5220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * ...
5320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * DP4 tmp.x a b;
5420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * DP4 tmp.y c d;
5520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * MUL result.xy tmp.xy e.xy
5620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * ...
5720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt *
5820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * as having tmp live only across that sequence (assuming it's used nowhere
5920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * else), because it's a common pattern.  A more conservative approach that
6020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * doesn't get tmp marked a deffed in this block will tend to result in
6120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * spilling.
6220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt */
6320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholtvoid
6420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholtvec4_live_variables::setup_def_use()
6520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt{
6620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt   int ip = 0;
6720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
68596990d91e2a4c4a3a303c6c2da623bf1840771bMatt Turner   foreach_block (block, cfg) {
6920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt      assert(ip == block->start_ip);
70596990d91e2a4c4a3a303c6c2da623bf1840771bMatt Turner      if (block->num > 0)
71596990d91e2a4c4a3a303c6c2da623bf1840771bMatt Turner	 assert(cfg->blocks[block->num - 1]->end_ip == ip - 1);
7220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
73bc2fbbafd216676ccc7c3abd794ecb7dd1fa631fMatt Turner      foreach_inst_in_block(vec4_instruction, inst, block) {
7413f660158573846d6b1bc30ed4c61d97405bea58Matt Turner         struct block_data *bd = &block_data[block->num];
7513f660158573846d6b1bc30ed4c61d97405bea58Matt Turner
7620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt	 /* Set use[] for this instruction */
7720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt	 for (unsigned int i = 0; i < 3; i++) {
78b163aa01487ab5f9b22c48b7badc5d65999c4985Matt Turner	    if (inst->src[i].file == VGRF) {
794ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero               for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), 16); j++) {
802babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez                  for (int c = 0; c < 4; c++) {
814ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero                     const unsigned v = var_from_reg(alloc, inst->src[i], c, j);
822babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez                     if (!BITSET_TEST(bd->def, v))
832babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez                        BITSET_SET(bd->use, v);
842babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez                  }
8520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt               }
8620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt	    }
8720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt	 }
88a59359ecd22154cc2b3f88bb8c599f21af8a3934Alejandro Piñeiro         for (unsigned c = 0; c < 4; c++) {
89a59359ecd22154cc2b3f88bb8c599f21af8a3934Alejandro Piñeiro            if (inst->reads_flag(c) &&
90a59359ecd22154cc2b3f88bb8c599f21af8a3934Alejandro Piñeiro                !BITSET_TEST(bd->flag_def, c)) {
91a59359ecd22154cc2b3f88bb8c599f21af8a3934Alejandro Piñeiro               BITSET_SET(bd->flag_use, c);
927a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner            }
937a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner         }
9420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
9520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt	 /* Check for unconditional writes to whole registers. These
9620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt	  * are the things that screen off preceding definitions of a
9720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt	  * variable, and thus qualify for being in def[].
9820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt	  */
99b163aa01487ab5f9b22c48b7badc5d65999c4985Matt Turner	 if (inst->dst.file == VGRF &&
100b298311d517017834841e53b7e641738e6067cdcIago Toral Quiroga	     (!inst->predicate || inst->opcode == BRW_OPCODE_SEL)) {
1014ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero            for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) {
1022babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez               for (int c = 0; c < 4; c++) {
1032babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez                  if (inst->dst.writemask & (1 << c)) {
1044ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero                     const unsigned v = var_from_reg(alloc, inst->dst, c, i);
1052babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez                     if (!BITSET_TEST(bd->use, v))
1062babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez                        BITSET_SET(bd->def, v);
1072babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez                  }
10820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt               }
10920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt            }
11020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt         }
1117a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner         if (inst->writes_flag()) {
112a59359ecd22154cc2b3f88bb8c599f21af8a3934Alejandro Piñeiro            for (unsigned c = 0; c < 4; c++) {
113a59359ecd22154cc2b3f88bb8c599f21af8a3934Alejandro Piñeiro               if ((inst->dst.writemask & (1 << c)) &&
114a59359ecd22154cc2b3f88bb8c599f21af8a3934Alejandro Piñeiro                   !BITSET_TEST(bd->flag_use, c)) {
115a59359ecd22154cc2b3f88bb8c599f21af8a3934Alejandro Piñeiro                  BITSET_SET(bd->flag_def, c);
116a59359ecd22154cc2b3f88bb8c599f21af8a3934Alejandro Piñeiro               }
1177a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner            }
1187a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner         }
11920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
12020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt	 ip++;
12120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt      }
12220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt   }
12320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt}
12420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
12520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt/**
12620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * The algorithm incrementally sets bits in liveout and livein,
12720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * propagating it through control flow.  It will eventually terminate
12820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * because it only ever adds bits, and stops when no bits are added in
12920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * a pass.
13020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt */
13120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholtvoid
13220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholtvec4_live_variables::compute_live_variables()
13320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt{
13420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt   bool cont = true;
13520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
13620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt   while (cont) {
13720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt      cont = false;
13820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
139316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand      foreach_block_reverse (block, cfg) {
14013f660158573846d6b1bc30ed4c61d97405bea58Matt Turner         struct block_data *bd = &block_data[block->num];
14113f660158573846d6b1bc30ed4c61d97405bea58Matt Turner
14220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt	 /* Update liveout */
143596990d91e2a4c4a3a303c6c2da623bf1840771bMatt Turner	 foreach_list_typed(bblock_link, child_link, link, &block->children) {
14413f660158573846d6b1bc30ed4c61d97405bea58Matt Turner            struct block_data *child_bd = &block_data[child_link->block->num];
14520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
146415d6dc5bd6915b0c17a1df0f9bd0ef4ca534a81Eric Anholt	    for (int i = 0; i < bitset_words; i++) {
14713f660158573846d6b1bc30ed4c61d97405bea58Matt Turner               BITSET_WORD new_liveout = (child_bd->livein[i] &
14813f660158573846d6b1bc30ed4c61d97405bea58Matt Turner                                          ~bd->liveout[i]);
149415d6dc5bd6915b0c17a1df0f9bd0ef4ca534a81Eric Anholt               if (new_liveout) {
15013f660158573846d6b1bc30ed4c61d97405bea58Matt Turner                  bd->liveout[i] |= new_liveout;
15120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt		  cont = true;
15220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt	       }
15320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt	    }
1547a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner            BITSET_WORD new_liveout = (child_bd->flag_livein[0] &
1557a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner                                       ~bd->flag_liveout[0]);
1567a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner            if (new_liveout) {
1577a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner               bd->flag_liveout[0] |= new_liveout;
1587a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner               cont = true;
1597a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner            }
16020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt	 }
161316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand
162316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand         /* Update livein */
163316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand         for (int i = 0; i < bitset_words; i++) {
164316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand            BITSET_WORD new_livein = (bd->use[i] |
165316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand                                      (bd->liveout[i] &
166316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand                                       ~bd->def[i]));
167316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand            if (new_livein & ~bd->livein[i]) {
168316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand               bd->livein[i] |= new_livein;
169316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand               cont = true;
170316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand            }
171316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand         }
172316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand         BITSET_WORD new_livein = (bd->flag_use[0] |
173316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand                                   (bd->flag_liveout[0] &
174316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand                                    ~bd->flag_def[0]));
175316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand         if (new_livein & ~bd->flag_livein[0]) {
176316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand            bd->flag_livein[0] |= new_livein;
177316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand            cont = true;
178316206ee9ea06419c9a2ea6fe48d66a0b805319dJason Ekstrand         }
17920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt      }
18020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt   }
18120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt}
18220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
183bf6eb37e0b62fa61c01a32dc5ccb6a7ab00be5f4Francisco Jerezvec4_live_variables::vec4_live_variables(const simple_allocator &alloc,
184bf6eb37e0b62fa61c01a32dc5ccb6a7ab00be5f4Francisco Jerez                                         cfg_t *cfg)
185bf6eb37e0b62fa61c01a32dc5ccb6a7ab00be5f4Francisco Jerez   : alloc(alloc), cfg(cfg)
18620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt{
187db47074ac02e2b822dd118f4837b32732941b78bFrancisco Jerez   mem_ctx = ralloc_context(NULL);
18820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
1894ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero   num_vars = alloc.total_size * 8;
19013f660158573846d6b1bc30ed4c61d97405bea58Matt Turner   block_data = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
19120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
192415d6dc5bd6915b0c17a1df0f9bd0ef4ca534a81Eric Anholt   bitset_words = BITSET_WORDS(num_vars);
19320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt   for (int i = 0; i < cfg->num_blocks; i++) {
19413f660158573846d6b1bc30ed4c61d97405bea58Matt Turner      block_data[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
19513f660158573846d6b1bc30ed4c61d97405bea58Matt Turner      block_data[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
19613f660158573846d6b1bc30ed4c61d97405bea58Matt Turner      block_data[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
19713f660158573846d6b1bc30ed4c61d97405bea58Matt Turner      block_data[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
1987a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner
1997a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner      block_data[i].flag_def[0] = 0;
2007a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner      block_data[i].flag_use[0] = 0;
2017a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner      block_data[i].flag_livein[0] = 0;
2027a5cc789def94af7e5c364cce7b0884eee2bcc6bMatt Turner      block_data[i].flag_liveout[0] = 0;
20320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt   }
20420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
20520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt   setup_def_use();
20620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt   compute_live_variables();
20720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt}
20820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
20920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholtvec4_live_variables::~vec4_live_variables()
21020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt{
21120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt   ralloc_free(mem_ctx);
21220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt}
21320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
21420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt#define MAX_INSTRUCTION (1 << 30)
21520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
21620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt/**
21720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * Computes a conservative start/end of the live intervals for each virtual GRF.
21820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt *
21920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * We could expose per-channel live intervals to the consumer based on the
22020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * information we computed in vec4_live_variables, except that our only
22120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * current user is virtual_grf_interferes().  So we instead union the
2220f3068a58bdbceb2cb93e3848b0e2145629cdf43Eric Anholt * per-channel ranges into a per-vgrf range for virtual_grf_start[] and
2230f3068a58bdbceb2cb93e3848b0e2145629cdf43Eric Anholt * virtual_grf_end[].
22420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt *
22520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * We could potentially have virtual_grf_interferes() do the test per-channel,
22620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * which would let some interesting register allocation occur (particularly on
22720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * code-generated GLSL sequences from the Cg compiler which does register
22820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * allocation at the GLSL level and thus reuses components of the variable
22920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * with distinct lifetimes).  But right now the complexity of doing so doesn't
23020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * seem worth it, since having virtual_grf_interferes() be cheap is important
23120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt * for register allocation performance.
23220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt */
23320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholtvoid
23420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholtvec4_visitor::calculate_live_intervals()
23520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt{
236a50915984fe1205a3479cc8a5d07a8b3bde7d6bcMatt Turner   if (this->live_intervals)
23720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt      return;
23820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
2394ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero   int *start = ralloc_array(mem_ctx, int, this->alloc.total_size * 8);
2404ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero   int *end = ralloc_array(mem_ctx, int, this->alloc.total_size * 8);
2410f3068a58bdbceb2cb93e3848b0e2145629cdf43Eric Anholt   ralloc_free(this->virtual_grf_start);
2420f3068a58bdbceb2cb93e3848b0e2145629cdf43Eric Anholt   ralloc_free(this->virtual_grf_end);
2430f3068a58bdbceb2cb93e3848b0e2145629cdf43Eric Anholt   this->virtual_grf_start = start;
2440f3068a58bdbceb2cb93e3848b0e2145629cdf43Eric Anholt   this->virtual_grf_end = end;
24520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
2464ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero   for (unsigned i = 0; i < this->alloc.total_size * 8; i++) {
2470f3068a58bdbceb2cb93e3848b0e2145629cdf43Eric Anholt      start[i] = MAX_INSTRUCTION;
2480f3068a58bdbceb2cb93e3848b0e2145629cdf43Eric Anholt      end[i] = -1;
24920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt   }
25020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
25120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt   /* Start by setting up the intervals with no knowledge of control
25220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt    * flow.
25320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt    */
25420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt   int ip = 0;
25590bfeb22444df6ce779251522e47bf169e130f8eMatt Turner   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
25620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt      for (unsigned int i = 0; i < 3; i++) {
257b163aa01487ab5f9b22c48b7badc5d65999c4985Matt Turner	 if (inst->src[i].file == VGRF) {
2584ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero            for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), 16); j++) {
2592babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez               for (int c = 0; c < 4; c++) {
2604ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero                  const unsigned v = var_from_reg(alloc, inst->src[i], c, j);
2612babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez                  start[v] = MIN2(start[v], ip);
2622babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez                  end[v] = ip;
2632babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez               }
264dc0f5099fa3cb564c25eb892fde93cacd29df8f1Matt Turner            }
26520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt	 }
26620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt      }
26720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
268b163aa01487ab5f9b22c48b7badc5d65999c4985Matt Turner      if (inst->dst.file == VGRF) {
2694ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero         for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) {
2702babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez            for (int c = 0; c < 4; c++) {
2712babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez               if (inst->dst.writemask & (1 << c)) {
2724ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero                  const unsigned v = var_from_reg(alloc, inst->dst, c, i);
2732babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez                  start[v] = MIN2(start[v], ip);
2742babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez                  end[v] = ip;
2752babde35b9a38a0561a87dc2d7cb431e9aabbd5aFrancisco Jerez               }
276dc0f5099fa3cb564c25eb892fde93cacd29df8f1Matt Turner            }
277dc0f5099fa3cb564c25eb892fde93cacd29df8f1Matt Turner         }
27820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt      }
27920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
28020ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt      ip++;
28120ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt   }
28220ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
28320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt   /* Now, extend those intervals using our analysis of control flow.
28420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt    *
28520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt    * The control flow-aware analysis was done at a channel level, while at
28620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt    * this point we're distilling it down to vgrfs.
28720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt    */
288bf6eb37e0b62fa61c01a32dc5ccb6a7ab00be5f4Francisco Jerez   this->live_intervals = new(mem_ctx) vec4_live_variables(alloc, cfg);
28920ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
290596990d91e2a4c4a3a303c6c2da623bf1840771bMatt Turner   foreach_block (block, cfg) {
29113f660158573846d6b1bc30ed4c61d97405bea58Matt Turner      struct block_data *bd = &live_intervals->block_data[block->num];
29213f660158573846d6b1bc30ed4c61d97405bea58Matt Turner
293a50915984fe1205a3479cc8a5d07a8b3bde7d6bcMatt Turner      for (int i = 0; i < live_intervals->num_vars; i++) {
294be800ef6d80fc43279780e652e611253428d7a78Iago Toral Quiroga         if (BITSET_TEST(bd->livein, i)) {
295be800ef6d80fc43279780e652e611253428d7a78Iago Toral Quiroga            start[i] = MIN2(start[i], block->start_ip);
296be800ef6d80fc43279780e652e611253428d7a78Iago Toral Quiroga            end[i] = MAX2(end[i], block->start_ip);
297be800ef6d80fc43279780e652e611253428d7a78Iago Toral Quiroga         }
29820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
299be800ef6d80fc43279780e652e611253428d7a78Iago Toral Quiroga         if (BITSET_TEST(bd->liveout, i)) {
300be800ef6d80fc43279780e652e611253428d7a78Iago Toral Quiroga            start[i] = MIN2(start[i], block->end_ip);
301be800ef6d80fc43279780e652e611253428d7a78Iago Toral Quiroga            end[i] = MAX2(end[i], block->end_ip);
302be800ef6d80fc43279780e652e611253428d7a78Iago Toral Quiroga         }
30320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt      }
30420ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt   }
30520ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt}
30620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt
3071c263f8f4f767df0511e63377c17a95ebebba944Matt Turnervoid
308072ea414d04f1b9a7bf06a00b9011e8ad521c878Matt Turnervec4_visitor::invalidate_live_intervals()
3091c263f8f4f767df0511e63377c17a95ebebba944Matt Turner{
310a50915984fe1205a3479cc8a5d07a8b3bde7d6bcMatt Turner   ralloc_free(live_intervals);
311a50915984fe1205a3479cc8a5d07a8b3bde7d6bcMatt Turner   live_intervals = NULL;
3121c263f8f4f767df0511e63377c17a95ebebba944Matt Turner}
3131c263f8f4f767df0511e63377c17a95ebebba944Matt Turner
314e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerezint
315e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerezvec4_visitor::var_range_start(unsigned v, unsigned n) const
316e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez{
317e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez   int start = INT_MAX;
318e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez
319e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez   for (unsigned i = 0; i < n; i++)
320e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez      start = MIN2(start, virtual_grf_start[v + i]);
321e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez
322e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez   return start;
323e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez}
324e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez
325e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerezint
326e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerezvec4_visitor::var_range_end(unsigned v, unsigned n) const
327e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez{
328e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez   int end = INT_MIN;
329e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez
330e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez   for (unsigned i = 0; i < n; i++)
331e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez      end = MAX2(end, virtual_grf_end[v + i]);
332e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez
333e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez   return end;
334e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez}
335e6e655ef76bb22193b31af2841cb50fda0c39461Francisco Jerez
33620ebebac5153affcbd44350332678a2fb04d4c96Eric Anholtbool
33720ebebac5153affcbd44350332678a2fb04d4c96Eric Anholtvec4_visitor::virtual_grf_interferes(int a, int b)
33820ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt{
3394ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero   return !((var_range_end(8 * alloc.offsets[a], 8 * alloc.sizes[a]) <=
3404ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero             var_range_start(8 * alloc.offsets[b], 8 * alloc.sizes[b])) ||
3414ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero            (var_range_end(8 * alloc.offsets[b], 8 * alloc.sizes[b]) <=
3424ea3bf8ebb56c8db6e885a77d81502a0b2adca4fJuan A. Suarez Romero             var_range_start(8 * alloc.offsets[a], 8 * alloc.sizes[a])));
34320ebebac5153affcbd44350332678a2fb04d4c96Eric Anholt}
344