prog_optimize.c revision 257cc48de2f4e472eb651a4c70042e5cb6b9fe0e
15f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/* 25f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * Mesa 3-D graphics library 35f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * Version: 7.5 45f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * 50bc735ffcfb223c0186419547abaa5c84482663eChris Lattner * Copyright (C) 2009 VMware, Inc. All Rights Reserved. 60bc735ffcfb223c0186419547abaa5c84482663eChris Lattner * 75f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * Permission is hereby granted, free of charge, to any person obtaining a 85f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * copy of this software and associated documentation files (the "Software"), 95f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * to deal in the Software without restriction, including without limitation 105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * the rights to use, copy, modify, merge, publish, distribute, sublicense, 115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * and/or sell copies of the Software, and to permit persons to whom the 125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * Software is furnished to do so, subject to the following conditions: 135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * 14c4a1dea2dc56bd1357ec91b829a0b9e68229a13eDaniel Dunbar * The above copyright notice and this permission notice shall be included 15a4d55d89c8076b402bb168e3edeef0c2cd2a78c3Chris Lattner * in all copies or substantial portions of the Software. 162eadfb638eb1bb6ccfd6fd0453e764d47e27eed9Chris Lattner * 17a4d55d89c8076b402bb168e3edeef0c2cd2a78c3Chris Lattner * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 1898cd599ee8a9b259ed7388ee2921a20d97658864Douglas Gregor * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19aaba5e346dffdbad5d1c42765a89e4a7afb0da67Douglas Gregor * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20e91593ef084479340582b2ba177b44be50a717b7Daniel Dunbar * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22da5a6b6d9fd52899499d5b7b46273ec844dcaa6eChris Lattner * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer */ 245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "main/glheader.h" 285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "main/context.h" 29da8249e57f3badecf925571881fe57243935c6c1Chris Lattner#include "main/macros.h" 30da8249e57f3badecf925571881fe57243935c6c1Chris Lattner#include "program.h" 31da8249e57f3badecf925571881fe57243935c6c1Chris Lattner#include "prog_instruction.h" 32da8249e57f3badecf925571881fe57243935c6c1Chris Lattner#include "prog_optimize.h" 33da8249e57f3badecf925571881fe57243935c6c1Chris Lattner#include "prog_print.h" 34ee5a700af3fe9ae1a639c271f093f40677dddc04Dale Johannesen 35ee5a700af3fe9ae1a639c271f093f40677dddc04Dale Johannesen 36ee5a700af3fe9ae1a639c271f093f40677dddc04Dale Johannesen#define MAX_LOOP_NESTING 50 37da8249e57f3badecf925571881fe57243935c6c1Chris Lattner/* MAX_PROGRAM_TEMPS is a low number (256), and we want to be able to 38da8249e57f3badecf925571881fe57243935c6c1Chris Lattner * register allocate many temporary values into that small number of 39da8249e57f3badecf925571881fe57243935c6c1Chris Lattner * temps. So allow large temporary indices coming into the register 40da8249e57f3badecf925571881fe57243935c6c1Chris Lattner * allocator. 416e94ef5696cfb005d3fc7bbac8dcf7690b64f0a5Ted Kremenek */ 426e94ef5696cfb005d3fc7bbac8dcf7690b64f0a5Ted Kremenek#define REG_ALLOCATE_MAX_PROGRAM_TEMPS ((1 << INST_INDEX_BITS) - 1) 436e94ef5696cfb005d3fc7bbac8dcf7690b64f0a5Ted Kremenek 445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic GLboolean dbg = GL_FALSE; 455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#define NO_MASK 0xf 476e94ef5696cfb005d3fc7bbac8dcf7690b64f0a5Ted Kremenek 485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/** 495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which 505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * are read from the given src in this instruction, We also provide 515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * one optional masks which may mask other components in the dst 525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * register 535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer */ 545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic GLuint 555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerget_src_arg_mask(const struct prog_instruction *inst, 566e94ef5696cfb005d3fc7bbac8dcf7690b64f0a5Ted Kremenek GLuint arg, GLuint dst_mask) 578189cde56b4f6f938cd65f53c932fe1860d0204cTed Kremenek{ 58353ffceafc6bcebd5592cb9d93ea3f9242e5370aTed Kremenek GLuint read_mask, channel_mask; 59353ffceafc6bcebd5592cb9d93ea3f9242e5370aTed Kremenek GLuint comp; 605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ASSERT(arg < _mesa_num_inst_src_regs(inst->Opcode)); 625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer /* Form the dst register, find the written channels */ 645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (inst->CondUpdate) { 655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer channel_mask = WRITEMASK_XYZW; 665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer else { 685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer switch (inst->Opcode) { 695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_MOV: 705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_MIN: 715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_MAX: 725a56ac30d04e8f0431a08980885662a47a6308aaTed Kremenek case OPCODE_ABS: 735a56ac30d04e8f0431a08980885662a47a6308aaTed Kremenek case OPCODE_ADD: 745a56ac30d04e8f0431a08980885662a47a6308aaTed Kremenek case OPCODE_MAD: 755a56ac30d04e8f0431a08980885662a47a6308aaTed Kremenek case OPCODE_MUL: 765a56ac30d04e8f0431a08980885662a47a6308aaTed Kremenek case OPCODE_SUB: 775a56ac30d04e8f0431a08980885662a47a6308aaTed Kremenek case OPCODE_CMP: 785a56ac30d04e8f0431a08980885662a47a6308aaTed Kremenek case OPCODE_FLR: 795a56ac30d04e8f0431a08980885662a47a6308aaTed Kremenek case OPCODE_FRC: 805a56ac30d04e8f0431a08980885662a47a6308aaTed Kremenek case OPCODE_LRP: 815a56ac30d04e8f0431a08980885662a47a6308aaTed Kremenek case OPCODE_SEQ: 825f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_SGE: 835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_SGT: 845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_SLE: 855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_SLT: 865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_SNE: 875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_SSG: 885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer channel_mask = inst->DstReg.WriteMask & dst_mask; 895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_RCP: 915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_SIN: 925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_COS: 935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_RSQ: 945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_POW: 955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_EX2: 965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_LOG: 975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer channel_mask = WRITEMASK_X; 985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_DP2: 10073d0d4fac161ed12926e010dcf8b448a8de6a2ecChris Lattner channel_mask = WRITEMASK_XY; 1015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 1025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_DP3: 1035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_XPD: 1045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer channel_mask = WRITEMASK_XYZ; 1055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 1065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer default: 1075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer channel_mask = WRITEMASK_XYZW; 108668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek break; 1098189cde56b4f6f938cd65f53c932fe1860d0204cTed Kremenek } 110898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor } 111898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor 112898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor /* Now, given the src swizzle and the written channels, find which 113898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor * components are actually read 114668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek */ 115668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek read_mask = 0x0; 116b4609806e9232593ece09ce08b630836e825865cDouglas Gregor for (comp = 0; comp < 4; ++comp) { 117b4609806e9232593ece09ce08b630836e825865cDouglas Gregor const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp); 118b4609806e9232593ece09ce08b630836e825865cDouglas Gregor ASSERT(coord < 4); 119668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek if (channel_mask & (1 << comp) && coord <= SWIZZLE_W) 120b4609806e9232593ece09ce08b630836e825865cDouglas Gregor read_mask |= 1 << coord; 121b4609806e9232593ece09ce08b630836e825865cDouglas Gregor } 122e2ce1d9440186cf3332368291cd884a6e3ae8946Nate Begeman 123668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek return read_mask; 124668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek} 125898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor 126898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor 127898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor/** 128898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor * For a MOV instruction, compute a write mask when src register also has 129668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek * a mask 130668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek */ 13177ed8e4edf6ed78c53fb20ec3210aff2a59c9d87Ted Kremenekstatic GLuint 1325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerget_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask) 13377ed8e4edf6ed78c53fb20ec3210aff2a59c9d87Ted Kremenek{ 134668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek const GLuint mask = mov->DstReg.WriteMask; 1355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer GLuint comp; 1365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer GLuint updated_mask = 0x0; 1375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 138668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek ASSERT(mov->Opcode == OPCODE_MOV); 139668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek 140668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek for (comp = 0; comp < 4; ++comp) { 141668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek GLuint src_comp; 142668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek if ((mask & (1 << comp)) == 0) 143668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek continue; 144668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp); 145d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner if ((src_mask & (1 << src_comp)) == 0) 146d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner continue; 147d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner updated_mask |= 1 << comp; 1488189cde56b4f6f938cd65f53c932fe1860d0204cTed Kremenek } 149d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner 150d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner return updated_mask; 151d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner} 152d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner 153d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner 154d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner/** 1558189cde56b4f6f938cd65f53c932fe1860d0204cTed Kremenek * Ensure that the swizzle is regular. That is, all of the swizzle 156d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE. 157d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner */ 158d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattnerstatic GLboolean 159d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattneris_swizzle_regular(GLuint swz) 160d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner{ 1615549976193e34417d4474a5f4a514268ef6666c7Ted Kremenek return GET_SWZ(swz,0) <= SWIZZLE_W && 162d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner GET_SWZ(swz,1) <= SWIZZLE_W && 163d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner GET_SWZ(swz,2) <= SWIZZLE_W && 164d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner GET_SWZ(swz,3) <= SWIZZLE_W; 165d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner} 166d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner 167d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner 168d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner/** 1698189cde56b4f6f938cd65f53c932fe1860d0204cTed Kremenek * In 'prog' remove instruction[i] if removeFlags[i] == TRUE. 170d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner * \return number of instructions removed 171d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner */ 172d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattnerstatic GLuint 173d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattnerremove_instructions(struct gl_program *prog, const GLboolean *removeFlags) 174cb888967400a03504c88acedd5248d6778a82f46Chris Lattner{ 175cb888967400a03504c88acedd5248d6778a82f46Chris Lattner GLint i, removeEnd = 0, removeCount = 0; 176cb888967400a03504c88acedd5248d6778a82f46Chris Lattner GLuint totalRemoved = 0; 177c4f8e8b645b8e0e685c089dde0674c6f29a24168Steve Naroff 178c4f8e8b645b8e0e685c089dde0674c6f29a24168Steve Naroff /* go backward */ 179c4f8e8b645b8e0e685c089dde0674c6f29a24168Steve Naroff for (i = prog->NumInstructions - 1; i >= 0; i--) { 180c4f8e8b645b8e0e685c089dde0674c6f29a24168Steve Naroff if (removeFlags[i]) { 181c4f8e8b645b8e0e685c089dde0674c6f29a24168Steve Naroff totalRemoved++; 182cb888967400a03504c88acedd5248d6778a82f46Chris Lattner if (removeCount == 0) { 183cb888967400a03504c88acedd5248d6778a82f46Chris Lattner /* begin a run of instructions to remove */ 184c4f8e8b645b8e0e685c089dde0674c6f29a24168Steve Naroff removeEnd = i; 185c4f8e8b645b8e0e685c089dde0674c6f29a24168Steve Naroff removeCount = 1; 186cb888967400a03504c88acedd5248d6778a82f46Chris Lattner } 187cb888967400a03504c88acedd5248d6778a82f46Chris Lattner else { 188bcba201a1118d7852b8b97187d495ae2a4f49519Anders Carlsson /* extend the run of instructions to remove */ 189bcba201a1118d7852b8b97187d495ae2a4f49519Anders Carlsson removeCount++; 190cb888967400a03504c88acedd5248d6778a82f46Chris Lattner } 191cb888967400a03504c88acedd5248d6778a82f46Chris Lattner } 1924fcd399a52ae45ed8ebfdd3a25e01cfb76fa366dDouglas Gregor else { 1934fcd399a52ae45ed8ebfdd3a25e01cfb76fa366dDouglas Gregor /* don't remove this instruction, but check if the preceeding 1944fcd399a52ae45ed8ebfdd3a25e01cfb76fa366dDouglas Gregor * instructions are to be removed. 195cb888967400a03504c88acedd5248d6778a82f46Chris Lattner */ 196cb888967400a03504c88acedd5248d6778a82f46Chris Lattner if (removeCount > 0) { 197bcba201a1118d7852b8b97187d495ae2a4f49519Anders Carlsson GLint removeStart = removeEnd - removeCount + 1; 198cb888967400a03504c88acedd5248d6778a82f46Chris Lattner _mesa_delete_instructions(prog, removeStart, removeCount); 1995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer removeStart = removeCount = 0; /* reset removal info */ 2005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 2015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 2025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 2035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer /* Finish removing if the first instruction was to be removed. */ 2045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (removeCount > 0) { 2055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer GLint removeStart = removeEnd - removeCount + 1; 2065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer _mesa_delete_instructions(prog, removeStart, removeCount); 2075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 2085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return totalRemoved; 2095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 2105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 2115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 2125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/** 2135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * Remap register indexes according to map. 2145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * \param prog the program to search/replace 2155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * \param file the type of register file to search/replace 2165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * \param map maps old register indexes to new indexes 2175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer */ 2185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic void 2195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerreplace_regs(struct gl_program *prog, gl_register_file file, const GLint map[]) 2205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer{ 2215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer GLuint i; 2225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 2235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer for (i = 0; i < prog->NumInstructions; i++) { 2245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer struct prog_instruction *inst = prog->Instructions + i; 2255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); 2265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer GLuint j; 2275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer for (j = 0; j < numSrc; j++) { 2285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (inst->SrcReg[j].File == file) { 2295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer GLuint index = inst->SrcReg[j].Index; 2305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ASSERT(map[index] >= 0); 2315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer inst->SrcReg[j].Index = map[index]; 2325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 2335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 2345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (inst->DstReg.File == file) { 2355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const GLuint index = inst->DstReg.Index; 2365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ASSERT(map[index] >= 0); 23766b5a8a39088598c01a9fa6f032dc908612dc8ecAnders Carlsson inst->DstReg.Index = map[index]; 238418f6c7d142e5ff4607f70cd8431d008442bafe9Chris Lattner } 2394c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor } 240c5ae899b4bbf65488445316c63168079177db0edSteve Naroff} 2410bb76897bedb8b747efc6523efb432fc24966118Douglas Gregor 242a9c878086036de36482cc21e35a33cabe9699b0aDouglas Gregor 243418f6c7d142e5ff4607f70cd8431d008442bafe9Chris Lattner/** 244418f6c7d142e5ff4607f70cd8431d008442bafe9Chris Lattner * Remove dead instructions from the given program. 24566b5a8a39088598c01a9fa6f032dc908612dc8ecAnders Carlsson * This is very primitive for now. Basically look for temp registers 2465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * that are written to but never read. Remove any instructions that 2474c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor * write to such registers. Be careful with condition code setters. 2484c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor */ 2494c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregorstatic GLboolean 2504c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor_mesa_remove_dead_code_global(struct gl_program *prog) 2514c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor{ 2524c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4]; 2534c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor GLboolean *removeInst; /* per-instruction removal flag */ 2544c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor GLuint i, rem = 0, comp; 2554c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor 2564c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor memset(tempRead, 0, sizeof(tempRead)); 2574c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor 2584c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor if (dbg) { 2594c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor printf("Optimize: Begin dead code removal\n"); 2604c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor /*_mesa_print_program(prog);*/ 2614c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor } 2624c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor 2634c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor removeInst = (GLboolean *) 2644c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor calloc(1, prog->NumInstructions * sizeof(GLboolean)); 265bfdcae678d44906293e21c0cddc6537f3ee8b5a4Steve Naroff 2664eb206bebcdab28ababe8df55c6185cec2cdc071Steve Naroff /* Determine which temps are read and written */ 2674eb206bebcdab28ababe8df55c6185cec2cdc071Steve Naroff for (i = 0; i < prog->NumInstructions; i++) { 2684eb206bebcdab28ababe8df55c6185cec2cdc071Steve Naroff const struct prog_instruction *inst = prog->Instructions + i; 2694eb206bebcdab28ababe8df55c6185cec2cdc071Steve Naroff const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); 2704eb206bebcdab28ababe8df55c6185cec2cdc071Steve Naroff GLuint j; 2714eb206bebcdab28ababe8df55c6185cec2cdc071Steve Naroff 27256ee6896f2efebffb4a2cce5a7610cdf1eddbbbeSteve Naroff /* check src regs */ 27356ee6896f2efebffb4a2cce5a7610cdf1eddbbbeSteve Naroff for (j = 0; j < numSrc; j++) { 27456ee6896f2efebffb4a2cce5a7610cdf1eddbbbeSteve Naroff if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { 27556ee6896f2efebffb4a2cce5a7610cdf1eddbbbeSteve Naroff const GLuint index = inst->SrcReg[j].Index; 27656ee6896f2efebffb4a2cce5a7610cdf1eddbbbeSteve Naroff GLuint read_mask; 27756ee6896f2efebffb4a2cce5a7610cdf1eddbbbeSteve Naroff ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); 27856ee6896f2efebffb4a2cce5a7610cdf1eddbbbeSteve Naroff read_mask = get_src_arg_mask(inst, j, NO_MASK); 2795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 2805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (inst->SrcReg[j].RelAddr) { 2815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (dbg) 2825f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer printf("abort remove dead code (indirect temp)\n"); 2835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer goto done; 2845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 2855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 2865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer for (comp = 0; comp < 4; comp++) { 2875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp); 2885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ASSERT(swz < 4); 2895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if ((read_mask & (1 << swz)) == 0) 2905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer continue; 2915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (swz <= SWIZZLE_W) 2925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer tempRead[index][swz] = GL_TRUE; 2935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 2945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 2955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 2965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 2975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer /* check dst reg */ 2985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (inst->DstReg.File == PROGRAM_TEMPORARY) { 2995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const GLuint index = inst->DstReg.Index; 3005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); 3015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 3025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (inst->DstReg.RelAddr) { 3035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (dbg) 3045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer printf("abort remove dead code (indirect temp)\n"); 3055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer goto done; 3065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 3075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 3085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (inst->CondUpdate) { 3095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer /* If we're writing to this register and setting condition 3105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * codes we cannot remove the instruction. Prevent removal 3115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * by setting the 'read' flag. 3125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer */ 3135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer tempRead[index][0] = GL_TRUE; 314e7716e6133e23e4a89248a65a388bc840d8c130cChris Lattner tempRead[index][1] = GL_TRUE; 315e7716e6133e23e4a89248a65a388bc840d8c130cChris Lattner tempRead[index][2] = GL_TRUE; 316e7716e6133e23e4a89248a65a388bc840d8c130cChris Lattner tempRead[index][3] = GL_TRUE; 317e7716e6133e23e4a89248a65a388bc840d8c130cChris Lattner } 318e7716e6133e23e4a89248a65a388bc840d8c130cChris Lattner } 319e7716e6133e23e4a89248a65a388bc840d8c130cChris Lattner } 320e7716e6133e23e4a89248a65a388bc840d8c130cChris Lattner 321e7716e6133e23e4a89248a65a388bc840d8c130cChris Lattner /* find instructions that write to dead registers, flag for removal */ 322e7716e6133e23e4a89248a65a388bc840d8c130cChris Lattner for (i = 0; i < prog->NumInstructions; i++) { 323eb14fe839ec24c2ca14e5f094be147a34e3d3339Chris Lattner struct prog_instruction *inst = prog->Instructions + i; 3241f683e9cf2e855b32d3fb4b142aa121cd3cf1088Chris Lattner const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode); 3255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 326ab38e4b50268633f037a10841fdfb612513f8d33Fariborz Jahanian if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) { 327ab38e4b50268633f037a10841fdfb612513f8d33Fariborz Jahanian GLint chan, index = inst->DstReg.Index; 328ab38e4b50268633f037a10841fdfb612513f8d33Fariborz Jahanian 329ab38e4b50268633f037a10841fdfb612513f8d33Fariborz Jahanian for (chan = 0; chan < 4; chan++) { 330ab38e4b50268633f037a10841fdfb612513f8d33Fariborz Jahanian if (!tempRead[index][chan] && 331ab38e4b50268633f037a10841fdfb612513f8d33Fariborz Jahanian inst->DstReg.WriteMask & (1 << chan)) { 332ab38e4b50268633f037a10841fdfb612513f8d33Fariborz Jahanian if (dbg) { 3335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer printf("Remove writemask on %u.%c\n", i, 3345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer chan == 3 ? 'w' : 'x' + chan); 3355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 3365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer inst->DstReg.WriteMask &= ~(1 << chan); 3375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer rem++; 338211f6adf1301a1461015fb6cb08a05f0a35b65f3Eli Friedman } 3395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 340b4609806e9232593ece09ce08b630836e825865cDouglas Gregor 3415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (inst->DstReg.WriteMask == 0) { 3425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer /* If we cleared all writes, the instruction can be removed. */ 3435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (dbg) 344a9c01021724b9b546d282b8609cbe559734812ecChris Lattner printf("Remove instruction %u: \n", i); 345a9c01021724b9b546d282b8609cbe559734812ecChris Lattner removeInst[i] = GL_TRUE; 346611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner } 347611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner } 348611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner } 349611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner 350611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner /* now remove the instructions which aren't needed */ 351611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner rem = remove_instructions(prog, removeInst); 352611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner 353611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner if (dbg) { 354611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner printf("Optimize: End dead code removal.\n"); 355611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner printf(" %u channel writes removed\n", rem); 356611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner printf(" %u instructions removed\n", rem); 357611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner /*_mesa_print_program(prog);*/ 3586eec8e883de118b431e3ead5b1e604a6ac68ff6bDouglas Gregor } 359987a14bf5883ef6e5d07f1c83eb6d41a8212a78cArgyrios Kyrtzidis 3605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerdone: 3615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer free(removeInst); 3625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return rem != 0; 3635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 3645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 36504421087832a031c90bd58f128c7c0e741db8dd2Chris Lattner 3664be1f47de20525ad90f02ba8682a7e2cbd3205d1Eli Friedmanenum inst_use 3674be1f47de20525ad90f02ba8682a7e2cbd3205d1Eli Friedman{ 3684be1f47de20525ad90f02ba8682a7e2cbd3205d1Eli Friedman READ, 3694be1f47de20525ad90f02ba8682a7e2cbd3205d1Eli Friedman WRITE, 37004421087832a031c90bd58f128c7c0e741db8dd2Chris Lattner FLOW, 37104421087832a031c90bd58f128c7c0e741db8dd2Chris Lattner END 3724c5d320a7581f4b80b151630c91cea5727fa9923Sebastian Redl}; 3734c5d320a7581f4b80b151630c91cea5727fa9923Sebastian Redl 3744c5d320a7581f4b80b151630c91cea5727fa9923Sebastian Redl 3754c5d320a7581f4b80b151630c91cea5727fa9923Sebastian Redl/** 3764c5d320a7581f4b80b151630c91cea5727fa9923Sebastian Redl * Scan forward in program from 'start' for the next occurances of TEMP[index]. 3774c5d320a7581f4b80b151630c91cea5727fa9923Sebastian Redl * We look if an instruction reads the component given by the masks and if they 3784c5d320a7581f4b80b151630c91cea5727fa9923Sebastian Redl * are overwritten. 3795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator 3805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * that we can't look further. 381ba7e210a999275695f58be03ef402758cfec3635Douglas Gregor */ 382ba7e210a999275695f58be03ef402758cfec3635Douglas Gregorstatic enum inst_use 383ba7e210a999275695f58be03ef402758cfec3635Douglas Gregorfind_next_use(const struct gl_program *prog, 38472c3f314d92d65c050ee1c07b7753623c044d6c7Douglas Gregor GLuint start, 38572c3f314d92d65c050ee1c07b7753623c044d6c7Douglas Gregor GLuint index, 38672c3f314d92d65c050ee1c07b7753623c044d6c7Douglas Gregor GLuint mask) 38772c3f314d92d65c050ee1c07b7753623c044d6c7Douglas Gregor{ 38872c3f314d92d65c050ee1c07b7753623c044d6c7Douglas Gregor GLuint i; 38972c3f314d92d65c050ee1c07b7753623c044d6c7Douglas Gregor 39044b4321feab46299d3f5cfd404680884752a0fcfDouglas Gregor for (i = start; i < prog->NumInstructions; i++) { 391ba7e210a999275695f58be03ef402758cfec3635Douglas Gregor const struct prog_instruction *inst = prog->Instructions + i; 392ba7e210a999275695f58be03ef402758cfec3635Douglas Gregor switch (inst->Opcode) { 393ba7e210a999275695f58be03ef402758cfec3635Douglas Gregor case OPCODE_BGNLOOP: 394ba7e210a999275695f58be03ef402758cfec3635Douglas Gregor case OPCODE_BGNSUB: 395ba7e210a999275695f58be03ef402758cfec3635Douglas Gregor case OPCODE_BRA: 3965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_CAL: 3975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_CONT: 3985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_IF: 3995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_ELSE: 4005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_ENDIF: 4015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_ENDLOOP: 4025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_ENDSUB: 4035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_RET: 4045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return FLOW; 4057da36f642e907ff5a5ba4b18b5bfebfabf36ecc7Chris Lattner case OPCODE_END: 40608ad47cbd1f81fcb31dbc731c13b885a07e12704Bill Wendling return END; 4075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer default: 40828be73f74c9e241a23ea24fe5756623de6bf1084Chris Lattner { 40998cd599ee8a9b259ed7388ee2921a20d97658864Douglas Gregor const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); 41098cd599ee8a9b259ed7388ee2921a20d97658864Douglas Gregor GLuint j; 41198cd599ee8a9b259ed7388ee2921a20d97658864Douglas Gregor for (j = 0; j < numSrc; j++) { 4125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (inst->SrcReg[j].RelAddr || 4135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer (inst->SrcReg[j].File == PROGRAM_TEMPORARY && 414acb818a4f7d9b608826171094d6b5a555a8fe694Steve Naroff inst->SrcReg[j].Index == index && 41528be73f74c9e241a23ea24fe5756623de6bf1084Chris Lattner (get_src_arg_mask(inst,j,NO_MASK) & mask))) 416acb818a4f7d9b608826171094d6b5a555a8fe694Steve Naroff return READ; 417acb818a4f7d9b608826171094d6b5a555a8fe694Steve Naroff } 41898cd599ee8a9b259ed7388ee2921a20d97658864Douglas Gregor if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 && 41998cd599ee8a9b259ed7388ee2921a20d97658864Douglas Gregor inst->DstReg.File == PROGRAM_TEMPORARY && 420cb4f9a6d0be445d84af35ba81323be6127816f9eChris Lattner inst->DstReg.Index == index) { 42108ad47cbd1f81fcb31dbc731c13b885a07e12704Bill Wendling mask &= ~inst->DstReg.WriteMask; 42208ad47cbd1f81fcb31dbc731c13b885a07e12704Bill Wendling if (mask == 0) 4235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return WRITE; 4245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 4255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 4267323a6297edad643c202594dcf3d9a174de96ca6Anders Carlsson } 4275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 4285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return END; 4295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 43028be73f74c9e241a23ea24fe5756623de6bf1084Chris Lattner 4315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 4321a49af9681c350fef58e677f85ccb9a77e8e9d0aDouglas Gregor/** 4331a49af9681c350fef58e677f85ccb9a77e8e9d0aDouglas Gregor * Is the given instruction opcode a flow-control opcode? 434ba7e210a999275695f58be03ef402758cfec3635Douglas Gregor * XXX maybe move this into prog_instruction.[ch] 435ba7e210a999275695f58be03ef402758cfec3635Douglas Gregor */ 4365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic GLboolean 4375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer_mesa_is_flow_control_opcode(enum prog_opcode opcode) 4384111024be81e7c0525e42dadcc126d27e5bf2425Chris Lattner{ 439dd972f20dc2bd3609d833893e5c6544ac09b59a9Steve Naroff switch (opcode) { 440dd972f20dc2bd3609d833893e5c6544ac09b59a9Steve Naroff case OPCODE_BGNLOOP: 4414f6a7d7ead09b439216c32f2de806a998aeb222aSteve Naroff case OPCODE_BGNSUB: 442dd972f20dc2bd3609d833893e5c6544ac09b59a9Steve Naroff case OPCODE_BRA: 443dd972f20dc2bd3609d833893e5c6544ac09b59a9Steve Naroff case OPCODE_CAL: 444dd972f20dc2bd3609d833893e5c6544ac09b59a9Steve Naroff case OPCODE_CONT: 44586f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor case OPCODE_IF: 4465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer case OPCODE_ELSE: 44786f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor case OPCODE_END: 44886f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor case OPCODE_ENDIF: 44986f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor case OPCODE_ENDLOOP: 45086f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor case OPCODE_ENDSUB: 45186f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor case OPCODE_RET: 45286f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor return GL_TRUE; 45386f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor default: 45486f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor return GL_FALSE; 45586f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor } 45686f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor} 45786f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor 45886f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor 45986f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor/** 46086f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor * Test if the given instruction is a simple MOV (no conditional updating, 46186f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor * not relative addressing, no negation/abs, etc). 46286f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor */ 46386f194083504938df72135b5b66bf0c5cafd9498Douglas Gregorstatic GLboolean 46486f194083504938df72135b5b66bf0c5cafd9498Douglas Gregorcan_downward_mov_be_modifed(const struct prog_instruction *mov) 46586f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor{ 46686f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor return 46786f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor mov->Opcode == OPCODE_MOV && 46886f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor mov->CondUpdate == GL_FALSE && 46986f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor mov->SrcReg[0].RelAddr == 0 && 47086f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor mov->SrcReg[0].Negate == 0 && 47186f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor mov->SrcReg[0].Abs == 0 && 47286f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor mov->SrcReg[0].HasIndex2 == 0 && 47386f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor mov->SrcReg[0].RelAddr2 == 0 && 47486f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor mov->DstReg.RelAddr == 0 && 47586f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor mov->DstReg.CondMask == COND_TR && 47686f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor mov->SaturateMode == SATURATE_OFF; 47786f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor} 47886f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor 47986f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor 48086f194083504938df72135b5b66bf0c5cafd9498Douglas Gregorstatic GLboolean 48186f194083504938df72135b5b66bf0c5cafd9498Douglas Gregorcan_upward_mov_be_modifed(const struct prog_instruction *mov) 48228be73f74c9e241a23ea24fe5756623de6bf1084Chris Lattner{ 483fdd75663fffeb2058a7847975e50837e61200593Anton Korobeynikov return 4847da36f642e907ff5a5ba4b18b5bfebfabf36ecc7Chris Lattner can_downward_mov_be_modifed(mov) && 4855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer mov->DstReg.File == PROGRAM_TEMPORARY; 4867da36f642e907ff5a5ba4b18b5bfebfabf36ecc7Chris Lattner} 4877da36f642e907ff5a5ba4b18b5bfebfabf36ecc7Chris Lattner 4887da36f642e907ff5a5ba4b18b5bfebfabf36ecc7Chris Lattner 489baf0d6678418e0dd9309438c3e50274253cfc7b2Chris Lattner/** 490baf0d6678418e0dd9309438c3e50274253cfc7b2Chris Lattner * Try to remove use of extraneous MOV instructions, to free them up for dead 49128be73f74c9e241a23ea24fe5756623de6bf1084Chris Lattner * code removal. 49274253736184c0717a0649922551bf9d8b6815651Douglas Gregor */ 49374253736184c0717a0649922551bf9d8b6815651Douglas Gregorstatic void 49474253736184c0717a0649922551bf9d8b6815651Douglas Gregor_mesa_remove_extra_move_use(struct gl_program *prog) 49574253736184c0717a0649922551bf9d8b6815651Douglas Gregor{ 49674253736184c0717a0649922551bf9d8b6815651Douglas Gregor GLuint i, j; 4975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 498eb8f3063257a392f15aea48d42fb73ec51afc548Douglas Gregor if (dbg) { 499eb8f3063257a392f15aea48d42fb73ec51afc548Douglas Gregor printf("Optimize: Begin remove extra move use\n"); 500eb8f3063257a392f15aea48d42fb73ec51afc548Douglas Gregor _mesa_print_program(prog); 5015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 50228be73f74c9e241a23ea24fe5756623de6bf1084Chris Lattner 503eb8f3063257a392f15aea48d42fb73ec51afc548Douglas Gregor /* 504eb8f3063257a392f15aea48d42fb73ec51afc548Douglas Gregor * Look for sequences such as this: 505eb8f3063257a392f15aea48d42fb73ec51afc548Douglas Gregor * MOV tmpX, arg0; 506337c6b9f5d502dc1c5acea628bf7bf9e828efc0eDouglas Gregor * ... 507337c6b9f5d502dc1c5acea628bf7bf9e828efc0eDouglas Gregor * FOO tmpY, tmpX, arg1; 508337c6b9f5d502dc1c5acea628bf7bf9e828efc0eDouglas Gregor * and convert into: 509337c6b9f5d502dc1c5acea628bf7bf9e828efc0eDouglas Gregor * MOV tmpX, arg0; 510337c6b9f5d502dc1c5acea628bf7bf9e828efc0eDouglas Gregor * ... 511224605064a4ef87d1c3d35ad1cb363f8b534012bSebastian Redl * FOO tmpY, arg0, arg1; 512224605064a4ef87d1c3d35ad1cb363f8b534012bSebastian Redl */ 513224605064a4ef87d1c3d35ad1cb363f8b534012bSebastian Redl 514224605064a4ef87d1c3d35ad1cb363f8b534012bSebastian Redl for (i = 0; i + 1 < prog->NumInstructions; i++) { 515224605064a4ef87d1c3d35ad1cb363f8b534012bSebastian Redl const struct prog_instruction *mov = prog->Instructions + i; 516224605064a4ef87d1c3d35ad1cb363f8b534012bSebastian Redl GLuint dst_mask, src_mask; 517bf3af056289893f58d37b05a2c80970708781d61Douglas Gregor if (can_upward_mov_be_modifed(mov) == GL_FALSE) 518eb8f3063257a392f15aea48d42fb73ec51afc548Douglas Gregor continue; 519eb8f3063257a392f15aea48d42fb73ec51afc548Douglas Gregor 520bf3af056289893f58d37b05a2c80970708781d61Douglas Gregor /* Scanning the code, we maintain the components which are still active in 521bf3af056289893f58d37b05a2c80970708781d61Douglas Gregor * these two masks 522bf3af056289893f58d37b05a2c80970708781d61Douglas Gregor */ 523bf3af056289893f58d37b05a2c80970708781d61Douglas Gregor dst_mask = mov->DstReg.WriteMask; 524bf3af056289893f58d37b05a2c80970708781d61Douglas Gregor src_mask = get_src_arg_mask(mov, 0, NO_MASK); 525bf3af056289893f58d37b05a2c80970708781d61Douglas Gregor 526bf3af056289893f58d37b05a2c80970708781d61Douglas Gregor /* Walk through remaining instructions until the or src reg gets 527bf3af056289893f58d37b05a2c80970708781d61Douglas Gregor * rewritten or we get into some flow-control, eliminating the use of 528bf3af056289893f58d37b05a2c80970708781d61Douglas Gregor * this MOV. 529eb8f3063257a392f15aea48d42fb73ec51afc548Douglas Gregor */ 53059b5da6d853b4368b984700315adf7b37de05764Nate Begeman for (j = i + 1; j < prog->NumInstructions; j++) { 531b4609806e9232593ece09ce08b630836e825865cDouglas Gregor struct prog_instruction *inst2 = prog->Instructions + j; 53288a3514f36de96b19cdf50141c640df1a5f13f6cDouglas Gregor GLuint arg; 53388a3514f36de96b19cdf50141c640df1a5f13f6cDouglas Gregor 5349d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor if (_mesa_is_flow_control_opcode(inst2->Opcode)) 5359d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor break; 5369d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor 53727c8dc06f65d7abcf6a7e7f64a7960c9a150ca01Douglas Gregor /* First rewrite this instruction's args if appropriate. */ 5389d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) { 53988a3514f36de96b19cdf50141c640df1a5f13f6cDouglas Gregor GLuint comp, read_mask; 54088a3514f36de96b19cdf50141c640df1a5f13f6cDouglas Gregor 54188a3514f36de96b19cdf50141c640df1a5f13f6cDouglas Gregor if (inst2->SrcReg[arg].File != mov->DstReg.File || 54288a3514f36de96b19cdf50141c640df1a5f13f6cDouglas Gregor inst2->SrcReg[arg].Index != mov->DstReg.Index || 5439d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor inst2->SrcReg[arg].RelAddr || 5449d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor inst2->SrcReg[arg].Abs) 5459d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor continue; 546e6386394677ed4f77b20e2e3d5446a3a2f628e53Steve Naroff read_mask = get_src_arg_mask(inst2, arg, NO_MASK); 547e6386394677ed4f77b20e2e3d5446a3a2f628e53Steve Naroff 548670a62cd1d51042ea076cda5e93f26a1d8327fb3Chris Lattner /* Adjust the swizzles of inst2 to point at MOV's source if ALL the 549670a62cd1d51042ea076cda5e93f26a1d8327fb3Chris Lattner * components read still come from the mov instructions 550670a62cd1d51042ea076cda5e93f26a1d8327fb3Chris Lattner */ 551670a62cd1d51042ea076cda5e93f26a1d8327fb3Chris Lattner if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) && 552670a62cd1d51042ea076cda5e93f26a1d8327fb3Chris Lattner (read_mask & dst_mask) == read_mask) { 553670a62cd1d51042ea076cda5e93f26a1d8327fb3Chris Lattner for (comp = 0; comp < 4; comp++) { 554670a62cd1d51042ea076cda5e93f26a1d8327fb3Chris Lattner const GLuint inst2_swz = 555213541a68a3e137d11d2cefb612c6cdb410d7e8eNate Begeman GET_SWZ(inst2->SrcReg[arg].Swizzle, comp); 556213541a68a3e137d11d2cefb612c6cdb410d7e8eNate Begeman const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz); 557fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp)); 558fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff inst2->SrcReg[arg].Swizzle |= s << (3 * comp); 559027282d1c1ac151aa7b1b3b45babc918b8ad456aSteve Naroff inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >> 560027282d1c1ac151aa7b1b3b45babc918b8ad456aSteve Naroff inst2_swz) & 0x1) << comp); 561799a6a6850af625946bb8d88ca960bb6604e3858Steve Naroff } 562799a6a6850af625946bb8d88ca960bb6604e3858Steve Naroff inst2->SrcReg[arg].File = mov->SrcReg[0].File; 5635daf570d0ce027e18ed5f9d66e6b2a14a40b720dFariborz Jahanian inst2->SrcReg[arg].Index = mov->SrcReg[0].Index; 564670a62cd1d51042ea076cda5e93f26a1d8327fb3Chris Lattner } 565d9f6910f4ef37c0e8eeee2a01287d9572c3176efChris Lattner } 566796da18402f286b897782a298ae3b20c459c102eDouglas Gregor 5679d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor /* The source of MOV is written. This potentially deactivates some 5689d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor * components from the src and dst of the MOV instruction 56904421087832a031c90bd58f128c7c0e741db8dd2Chris Lattner */ 57028be73f74c9e241a23ea24fe5756623de6bf1084Chris Lattner if (inst2->DstReg.File == mov->DstReg.File && 57124b41fa8239c63b9eb570d3e83c4a82840656a65Argyrios Kyrtzidis (inst2->DstReg.RelAddr || 57224b41fa8239c63b9eb570d3e83c4a82840656a65Argyrios Kyrtzidis inst2->DstReg.Index == mov->DstReg.Index)) { 5736eec8e883de118b431e3ead5b1e604a6ac68ff6bDouglas Gregor dst_mask &= ~inst2->DstReg.WriteMask; 5749d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor src_mask = get_src_arg_mask(mov, 0, dst_mask); 5759d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor } 5769d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor 5779d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor /* Idem when the destination of mov is written */ 5789d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor if (inst2->DstReg.File == mov->SrcReg[0].File && 5799d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor (inst2->DstReg.RelAddr || 5809d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor inst2->DstReg.Index == mov->SrcReg[0].Index)) { 5819d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor src_mask &= ~inst2->DstReg.WriteMask; 5829d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor dst_mask &= get_dst_mask_for_mov(mov, src_mask); 5839d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor } 5849d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor if (dst_mask == 0) 5859d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor break; 586c42e1183846228a7fa5143ad76507d6d60f5c6f3Sebastian Redl } 587c42e1183846228a7fa5143ad76507d6d60f5c6f3Sebastian Redl } 588c42e1183846228a7fa5143ad76507d6d60f5c6f3Sebastian Redl 5895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (dbg) { 5905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer printf("Optimize: End remove extra move use.\n"); 5915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer /*_mesa_print_program(prog);*/ 5925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 5935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 5945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 5955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 5965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/** 5975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * Complements dead_code_global. Try to remove code in block of code by 5985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * carefully monitoring the swizzles. Both functions should be merged into one 5995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * with a proper control flow graph 60028be73f74c9e241a23ea24fe5756623de6bf1084Chris Lattner */ 60128be73f74c9e241a23ea24fe5756623de6bf1084Chris Lattnerstatic GLboolean 6025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer_mesa_remove_dead_code_local(struct gl_program *prog) 6035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer{ 604ae8d467e75a4e72b19e1eca199bf93dfaab47acfDouglas Gregor GLboolean *removeInst; 605ae8d467e75a4e72b19e1eca199bf93dfaab47acfDouglas Gregor GLuint i, arg, rem = 0; 606ae8d467e75a4e72b19e1eca199bf93dfaab47acfDouglas Gregor 607ae8d467e75a4e72b19e1eca199bf93dfaab47acfDouglas Gregor removeInst = (GLboolean *) 608ae8d467e75a4e72b19e1eca199bf93dfaab47acfDouglas Gregor calloc(1, prog->NumInstructions * sizeof(GLboolean)); 609ae8d467e75a4e72b19e1eca199bf93dfaab47acfDouglas Gregor 610ae8d467e75a4e72b19e1eca199bf93dfaab47acfDouglas Gregor for (i = 0; i < prog->NumInstructions; i++) { 6115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const struct prog_instruction *inst = prog->Instructions + i; 6125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const GLuint index = inst->DstReg.Index; 613fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff const GLuint mask = inst->DstReg.WriteMask; 614ca354faa7e9b99af17070c82b9662a5fca76422cChris Lattner enum inst_use use; 615ca354faa7e9b99af17070c82b9662a5fca76422cChris Lattner 616ca354faa7e9b99af17070c82b9662a5fca76422cChris Lattner /* We must deactivate the pass as soon as some indirection is used */ 617ca354faa7e9b99af17070c82b9662a5fca76422cChris Lattner if (inst->DstReg.RelAddr) 618ca354faa7e9b99af17070c82b9662a5fca76422cChris Lattner goto done; 619ca354faa7e9b99af17070c82b9662a5fca76422cChris Lattner for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) 620ca354faa7e9b99af17070c82b9662a5fca76422cChris Lattner if (inst->SrcReg[arg].RelAddr) 621ca354faa7e9b99af17070c82b9662a5fca76422cChris Lattner goto done; 622ca354faa7e9b99af17070c82b9662a5fca76422cChris Lattner 62386f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor if (_mesa_is_flow_control_opcode(inst->Opcode) || 6245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer _mesa_num_inst_dst_regs(inst->Opcode) == 0 || 625c63a1f276f7b324fd9a4be82098b1c8f7bf30733Chris Lattner inst->DstReg.File != PROGRAM_TEMPORARY || 626c63a1f276f7b324fd9a4be82098b1c8f7bf30733Chris Lattner inst->DstReg.RelAddr) 627c63a1f276f7b324fd9a4be82098b1c8f7bf30733Chris Lattner continue; 628c63a1f276f7b324fd9a4be82098b1c8f7bf30733Chris Lattner 6295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer use = find_next_use(prog, i+1, index, mask); 630c63a1f276f7b324fd9a4be82098b1c8f7bf30733Chris Lattner if (use == WRITE || use == END) 6315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer removeInst[i] = GL_TRUE; 632c63a1f276f7b324fd9a4be82098b1c8f7bf30733Chris Lattner } 6335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 6345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer rem = remove_instructions(prog, removeInst); 635c63a1f276f7b324fd9a4be82098b1c8f7bf30733Chris Lattner 6365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerdone: 6375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer free(removeInst); 6385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return rem != 0; 6394f6a7d7ead09b439216c32f2de806a998aeb222aSteve Naroff} 6404f6a7d7ead09b439216c32f2de806a998aeb222aSteve Naroff 6414f6a7d7ead09b439216c32f2de806a998aeb222aSteve Naroff 6424f6a7d7ead09b439216c32f2de806a998aeb222aSteve Naroff/** 6434f6a7d7ead09b439216c32f2de806a998aeb222aSteve Naroff * Try to inject the destination of mov as the destination of inst and recompute 6444f6a7d7ead09b439216c32f2de806a998aeb222aSteve Naroff * the swizzles operators for the sources of inst if required. Return GL_TRUE 6454f6a7d7ead09b439216c32f2de806a998aeb222aSteve Naroff * of the substitution was possible, GL_FALSE otherwise 6464f6a7d7ead09b439216c32f2de806a998aeb222aSteve Naroff */ 6474f6a7d7ead09b439216c32f2de806a998aeb222aSteve Naroffstatic GLboolean 648d1fa6449e9dbdd667466e9e1e971aa17c9793e8aFariborz Jahanian_mesa_merge_mov_into_inst(struct prog_instruction *inst, 649ba8d2d684e74a20bef03828c21c991d222c7e9e5Fariborz Jahanian const struct prog_instruction *mov) 6506669db9d80d77d10f101aa9f8e488bbd2d98f76cFariborz Jahanian{ 651ba8d2d684e74a20bef03828c21c991d222c7e9e5Fariborz Jahanian /* Indirection table which associates destination and source components for 652ba8d2d684e74a20bef03828c21c991d222c7e9e5Fariborz Jahanian * the mov instruction 653ba8d2d684e74a20bef03828c21c991d222c7e9e5Fariborz Jahanian */ 654ba8d2d684e74a20bef03828c21c991d222c7e9e5Fariborz Jahanian const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK); 6555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 6565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer /* Some components are not written by inst. We cannot remove the mov */ 6575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (mask != (inst->DstReg.WriteMask & mask)) 6582e5f54aa1dd15a62c34a9d1d24a5a0692f43934aTed Kremenek return GL_FALSE; 6594cc627111453b75519d5130b57e06256da7b00e8Chris Lattner 6604cc627111453b75519d5130b57e06256da7b00e8Chris Lattner /* Depending on the instruction, we may need to recompute the swizzles. 6612e5f54aa1dd15a62c34a9d1d24a5a0692f43934aTed Kremenek * Also, some other instructions (like TEX) are not linear. We will only 6621d09ecca892185ba067e47ba879f424de59950efChris Lattner * consider completely active sources and destinations 6631d09ecca892185ba067e47ba879f424de59950efChris Lattner */ 6641d09ecca892185ba067e47ba879f424de59950efChris Lattner switch (inst->Opcode) { 6654cc627111453b75519d5130b57e06256da7b00e8Chris Lattner 6662e5f54aa1dd15a62c34a9d1d24a5a0692f43934aTed Kremenek /* Carstesian instructions: we compute the swizzle */ 6674cc627111453b75519d5130b57e06256da7b00e8Chris Lattner case OPCODE_MOV: 6682e5f54aa1dd15a62c34a9d1d24a5a0692f43934aTed Kremenek case OPCODE_MIN: 669e9b12198c4cc7f5687960100351b4af006c14469Steve Naroff case OPCODE_MAX: 670e9b12198c4cc7f5687960100351b4af006c14469Steve Naroff case OPCODE_ABS: 6711a49af9681c350fef58e677f85ccb9a77e8e9d0aDouglas Gregor case OPCODE_ADD: 6721a49af9681c350fef58e677f85ccb9a77e8e9d0aDouglas Gregor case OPCODE_MAD: 6731d09ecca892185ba067e47ba879f424de59950efChris Lattner case OPCODE_MUL: 6741d09ecca892185ba067e47ba879f424de59950efChris Lattner case OPCODE_SUB: 6752e5f54aa1dd15a62c34a9d1d24a5a0692f43934aTed Kremenek { 67663f067f5a75ae97b53dc7a6a6530e2c72c8bb7f8Seo Sanghyeon GLuint dst_to_src_comp[4] = {0,0,0,0}; 67763f067f5a75ae97b53dc7a6a6530e2c72c8bb7f8Seo Sanghyeon GLuint dst_comp, arg; 6781d09ecca892185ba067e47ba879f424de59950efChris Lattner for (dst_comp = 0; dst_comp < 4; ++dst_comp) { 6791d09ecca892185ba067e47ba879f424de59950efChris Lattner if (mov->DstReg.WriteMask & (1 << dst_comp)) { 680fb7080663a0f0abd687a6f58ac917d9a76318b73Chris Lattner const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp); 6811d09ecca892185ba067e47ba879f424de59950efChris Lattner ASSERT(src_comp < 4); 6822e5f54aa1dd15a62c34a9d1d24a5a0692f43934aTed Kremenek dst_to_src_comp[dst_comp] = src_comp; 683fb7080663a0f0abd687a6f58ac917d9a76318b73Chris Lattner } 6844cc627111453b75519d5130b57e06256da7b00e8Chris Lattner } 6852e5f54aa1dd15a62c34a9d1d24a5a0692f43934aTed Kremenek 686d9f6910f4ef37c0e8eeee2a01287d9572c3176efChris Lattner /* Patch each source of the instruction */ 687fa28b30d5a1e93e5263be33e343532b900d2c643Chris Lattner for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) { 68804421087832a031c90bd58f128c7c0e741db8dd2Chris Lattner const GLuint arg_swz = inst->SrcReg[arg].Swizzle; 68904421087832a031c90bd58f128c7c0e741db8dd2Chris Lattner inst->SrcReg[arg].Swizzle = 0; 6901d09ecca892185ba067e47ba879f424de59950efChris Lattner 6911d09ecca892185ba067e47ba879f424de59950efChris Lattner /* Reset each active component of the swizzle */ 6921d09ecca892185ba067e47ba879f424de59950efChris Lattner for (dst_comp = 0; dst_comp < 4; ++dst_comp) { 6934e99a5fc3b203397a91136c6e695e405fb8fc606Ted Kremenek GLuint src_comp, arg_comp; 6944e99a5fc3b203397a91136c6e695e405fb8fc606Ted Kremenek if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0) 6954e99a5fc3b203397a91136c6e695e405fb8fc606Ted Kremenek continue; 6964e99a5fc3b203397a91136c6e695e405fb8fc606Ted Kremenek src_comp = dst_to_src_comp[dst_comp]; 6974e99a5fc3b203397a91136c6e695e405fb8fc606Ted Kremenek ASSERT(src_comp < 4); 6984e99a5fc3b203397a91136c6e695e405fb8fc606Ted Kremenek arg_comp = GET_SWZ(arg_swz, src_comp); 6994e99a5fc3b203397a91136c6e695e405fb8fc606Ted Kremenek ASSERT(arg_comp < 4); 7004e99a5fc3b203397a91136c6e695e405fb8fc606Ted Kremenek inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp); 70156f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner } 70256f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner } 70356f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner inst->DstReg = mov->DstReg; 70456f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner return GL_TRUE; 70556f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner } 70656f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner 70756f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner /* Dot products and scalar instructions: we only change the destination */ 70856f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner case OPCODE_RCP: 70956f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner case OPCODE_SIN: 71056f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner case OPCODE_COS: 71156f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner case OPCODE_RSQ: 71256f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner case OPCODE_POW: 71356f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner case OPCODE_EX2: 71456f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner case OPCODE_LOG: 715898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor case OPCODE_DP2: 716898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor case OPCODE_DP3: 717898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor case OPCODE_DP4: 718898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor inst->DstReg = mov->DstReg; 719898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor return GL_TRUE; 720898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor 721898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor /* All other instructions require fully active components with no swizzle */ 722898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor default: 723898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW || 724898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor inst->DstReg.WriteMask != WRITEMASK_XYZW) 725898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor return GL_FALSE; 726898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor inst->DstReg = mov->DstReg; 727898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor return GL_TRUE; 728898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor } 729898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor} 730898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor 731898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor 732898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor/** 733898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor * Try to remove extraneous MOV instructions from the given program. 734898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor */ 735c9e8f606787b0bc0c3b08e566b87cc1751694168Eli Friedmanstatic GLboolean 736c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman_mesa_remove_extra_moves(struct gl_program *prog) 737c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman{ 738c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman GLboolean *removeInst; /* per-instruction removal flag */ 739c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman GLuint i, rem = 0, nesting = 0; 740c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman 741c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman if (dbg) { 742e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson printf("Optimize: Begin remove extra moves\n"); 743c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman _mesa_print_program(prog); 744e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson } 745e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson 74659b5da6d853b4368b984700315adf7b37de05764Nate Begeman removeInst = (GLboolean *) 74759b5da6d853b4368b984700315adf7b37de05764Nate Begeman calloc(1, prog->NumInstructions * sizeof(GLboolean)); 748c9e8f606787b0bc0c3b08e566b87cc1751694168Eli Friedman 74959b5da6d853b4368b984700315adf7b37de05764Nate Begeman /* 750e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson * Look for sequences such as this: 751e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson * FOO tmpX, arg0, arg1; 752e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson * MOV tmpY, tmpX; 753e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson * and convert into: 754c9e8f606787b0bc0c3b08e566b87cc1751694168Eli Friedman * FOO tmpY, arg0, arg1; 755e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson */ 756e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson 757c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman for (i = 0; i < prog->NumInstructions; i++) { 758e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson const struct prog_instruction *mov = prog->Instructions + i; 7593498bdb9e9cb300de74c7b51c92608e2902b2348Douglas Gregor 7603498bdb9e9cb300de74c7b51c92608e2902b2348Douglas Gregor switch (mov->Opcode) { 761c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman case OPCODE_BGNLOOP: 762c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman case OPCODE_BGNSUB: 763c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman case OPCODE_IF: 764c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman nesting++; 765c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman break; 766c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman case OPCODE_ENDLOOP: 767c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman case OPCODE_ENDSUB: 768c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman case OPCODE_ENDIF: 769c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman nesting--; 770c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman break; 771c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman case OPCODE_MOV: 772c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman if (i > 0 && 773c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman can_downward_mov_be_modifed(mov) && 774c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman mov->SrcReg[0].File == PROGRAM_TEMPORARY && 775c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman nesting == 0) 776c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman { 77732a311e276cd4bcafddd38db679aff9804e66bd4Eli Friedman 7784e716e07d90a18ac83dd94d157ec676530bc78f9Sebastian Redl /* see if this MOV can be removed */ 7794e716e07d90a18ac83dd94d157ec676530bc78f9Sebastian Redl const GLuint id = mov->SrcReg[0].Index; 780e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson struct prog_instruction *prevInst; 781e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson GLuint prevI; 782c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman 78338374b05791ee93300b9fbe8ceb3957f54184b37Steve Naroff /* get pointer to previous instruction */ 78438374b05791ee93300b9fbe8ceb3957f54184b37Steve Naroff prevI = i - 1; 7855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (prevI > 0 && removeInst[prevI]) 7865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer prevI--; 7875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer prevInst = prog->Instructions + prevI; 7885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 7895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (prevInst->DstReg.File == PROGRAM_TEMPORARY && 7905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer prevInst->DstReg.Index == id && 7915f6b632391e24b08ead3e62ba2e2765e770382edNuno Lopes prevInst->DstReg.RelAddr == 0 && 7925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer prevInst->DstReg.CondSrc == 0 && 7935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer prevInst->DstReg.CondMask == COND_TR) { 7945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 7955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const GLuint dst_mask = prevInst->DstReg.WriteMask; 7965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask); 7975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 7985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (next_use == WRITE || next_use == END) { 7995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer /* OK, we can safely remove this MOV instruction. 8005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * Transform: 801ccc213f5d4034bb6fcda14cad112d9029a508b66Chris Lattner * prevI: FOO tempIndex, x, y; 802ce0afc0b09accbf7370b2ba87574b2c4be7fb032Chris Lattner * i: MOV z, tempIndex; 803ce0afc0b09accbf7370b2ba87574b2c4be7fb032Chris Lattner * Into: 804ce0afc0b09accbf7370b2ba87574b2c4be7fb032Chris Lattner * prevI: FOO z, x, y; 805ce0afc0b09accbf7370b2ba87574b2c4be7fb032Chris Lattner */ 806590b6646ef747d2f7b42e5f40487ff07642d7b6fChris Lattner if (_mesa_merge_mov_into_inst(prevInst, mov)) { 807590b6646ef747d2f7b42e5f40487ff07642d7b6fChris Lattner removeInst[i] = GL_TRUE; 808a6afa768aa7bd3102a2807aa720917e4a1771e4eEli Friedman if (dbg) { 809a6afa768aa7bd3102a2807aa720917e4a1771e4eEli Friedman printf("Remove MOV at %u\n", i); 810a6afa768aa7bd3102a2807aa720917e4a1771e4eEli Friedman printf("new prev inst %u: ", prevI); 811a6afa768aa7bd3102a2807aa720917e4a1771e4eEli Friedman _mesa_print_instruction(prevInst); 812a6afa768aa7bd3102a2807aa720917e4a1771e4eEli Friedman } 813a6afa768aa7bd3102a2807aa720917e4a1771e4eEli Friedman } 8145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 8155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 8165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 8175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 8185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer default: 8195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ; /* nothing */ 820590b6646ef747d2f7b42e5f40487ff07642d7b6fChris Lattner } 8215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 8225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 8235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer /* now remove the instructions which aren't needed */ 8242eadfb638eb1bb6ccfd6fd0453e764d47e27eed9Chris Lattner rem = remove_instructions(prog, removeInst); 8252eadfb638eb1bb6ccfd6fd0453e764d47e27eed9Chris Lattner 82698be4943e8dc4f3905629a7102668960873cf863Chris Lattner free(removeInst); 8272eadfb638eb1bb6ccfd6fd0453e764d47e27eed9Chris Lattner 828f0fbcb356bc4eb5f6467dc4fc4bc447e9a630102Chris Lattner if (dbg) { 8295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer printf("Optimize: End remove extra moves. %u instructions removed\n", rem); 8302eadfb638eb1bb6ccfd6fd0453e764d47e27eed9Chris Lattner /*_mesa_print_program(prog);*/ 831b88d45ea7eb835d36c4a4b3ea84b1260b120dd0aAnders Carlsson } 832b88d45ea7eb835d36c4a4b3ea84b1260b120dd0aAnders Carlsson 833b88d45ea7eb835d36c4a4b3ea84b1260b120dd0aAnders Carlsson return rem != 0; 834b88d45ea7eb835d36c4a4b3ea84b1260b120dd0aAnders Carlsson} 835b88d45ea7eb835d36c4a4b3ea84b1260b120dd0aAnders Carlsson 836b88d45ea7eb835d36c4a4b3ea84b1260b120dd0aAnders Carlsson 837b88d45ea7eb835d36c4a4b3ea84b1260b120dd0aAnders Carlsson/** A live register interval */ 8387267f7832e5f0c7f951765e201c5a2650eb1637bArgyrios Kyrtzidisstruct interval 8397267f7832e5f0c7f951765e201c5a2650eb1637bArgyrios Kyrtzidis{ 8407267f7832e5f0c7f951765e201c5a2650eb1637bArgyrios Kyrtzidis GLuint Reg; /** The temporary register index */ 8417b658aa315784fa489a32daeacfe400512ce468dSteve Naroff GLuint Start, End; /** Start/end instruction numbers */ 8427b658aa315784fa489a32daeacfe400512ce468dSteve Naroff}; 84398be4943e8dc4f3905629a7102668960873cf863Chris Lattner 844ac620decb68aad1a2cf6c0c191b56d78981d9aaaDaniel Dunbar 845ac620decb68aad1a2cf6c0c191b56d78981d9aaaDaniel Dunbar/** A list of register intervals */ 846ac620decb68aad1a2cf6c0c191b56d78981d9aaaDaniel Dunbarstruct interval_list 847ac620decb68aad1a2cf6c0c191b56d78981d9aaaDaniel Dunbar{ 848ac620decb68aad1a2cf6c0c191b56d78981d9aaaDaniel Dunbar GLuint Num; 849ac620decb68aad1a2cf6c0c191b56d78981d9aaaDaniel Dunbar struct interval Intervals[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; 850ac620decb68aad1a2cf6c0c191b56d78981d9aaaDaniel Dunbar}; 851389cecc83f33e93c8ba6bf2e8269b8690404416fSteve Naroff 8527b658aa315784fa489a32daeacfe400512ce468dSteve Naroff 853b4609806e9232593ece09ce08b630836e825865cDouglas Gregorstatic void 854b4609806e9232593ece09ce08b630836e825865cDouglas Gregorappend_interval(struct interval_list *list, const struct interval *inv) 85513b7c5ff42d6077a8d59e2c9ec9e7fedd0150ae6Steve Naroff{ 85698be4943e8dc4f3905629a7102668960873cf863Chris Lattner list->Intervals[list->Num++] = *inv; 857a4d55d89c8076b402bb168e3edeef0c2cd2a78c3Chris Lattner} 858a4d55d89c8076b402bb168e3edeef0c2cd2a78c3Chris Lattner 859a4d55d89c8076b402bb168e3edeef0c2cd2a78c3Chris Lattner 860a4d55d89c8076b402bb168e3edeef0c2cd2a78c3Chris Lattner/** Insert interval inv into list, sorted by interval end */ 8611c0cfd4599e816cfd7a8f348286bf0ad79652ffcAnders Carlssonstatic void 8621c0cfd4599e816cfd7a8f348286bf0ad79652ffcAnders Carlssoninsert_interval_by_end(struct interval_list *list, const struct interval *inv) 8631c0cfd4599e816cfd7a8f348286bf0ad79652ffcAnders Carlsson{ 8641c0cfd4599e816cfd7a8f348286bf0ad79652ffcAnders Carlsson /* XXX we could do a binary search insertion here since list is sorted */ 8651c0cfd4599e816cfd7a8f348286bf0ad79652ffcAnders Carlsson GLint i = list->Num - 1; 866a4d55d89c8076b402bb168e3edeef0c2cd2a78c3Chris Lattner while (i >= 0 && list->Intervals[i].End > inv->End) { 867a4d55d89c8076b402bb168e3edeef0c2cd2a78c3Chris Lattner list->Intervals[i + 1] = list->Intervals[i]; 868a4d55d89c8076b402bb168e3edeef0c2cd2a78c3Chris Lattner i--; 869a4d55d89c8076b402bb168e3edeef0c2cd2a78c3Chris Lattner } 87013b7c5ff42d6077a8d59e2c9ec9e7fedd0150ae6Steve Naroff list->Intervals[i + 1] = *inv; 87113b7c5ff42d6077a8d59e2c9ec9e7fedd0150ae6Steve Naroff list->Num++; 87213b7c5ff42d6077a8d59e2c9ec9e7fedd0150ae6Steve Naroff 8735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#ifdef DEBUG 8741a49af9681c350fef58e677f85ccb9a77e8e9d0aDouglas Gregor { 8755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer GLuint i; 8765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer for (i = 0; i + 1 < list->Num; i++) { 8775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End); 8785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 8795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 8804a4251b9e719415f30db0f5170abf31296a62225Sebastian Redl#endif 8814a4251b9e719415f30db0f5170abf31296a62225Sebastian Redl} 8824a4251b9e719415f30db0f5170abf31296a62225Sebastian Redl 8834a4251b9e719415f30db0f5170abf31296a62225Sebastian Redl 8844a4251b9e719415f30db0f5170abf31296a62225Sebastian Redl/** Remove the given interval from the interval list */ 8854a4251b9e719415f30db0f5170abf31296a62225Sebastian Redlstatic void 8864a4251b9e719415f30db0f5170abf31296a62225Sebastian Redlremove_interval(struct interval_list *list, const struct interval *inv) 8874a4251b9e719415f30db0f5170abf31296a62225Sebastian Redl{ 8884a4251b9e719415f30db0f5170abf31296a62225Sebastian Redl /* XXX we could binary search since list is sorted */ 8894a4251b9e719415f30db0f5170abf31296a62225Sebastian Redl GLuint k; 8904a4251b9e719415f30db0f5170abf31296a62225Sebastian Redl for (k = 0; k < list->Num; k++) { 8915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (list->Intervals[k].Reg == inv->Reg) { 8925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer /* found, remove it */ 8935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ASSERT(list->Intervals[k].Start == inv->Start); 8945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ASSERT(list->Intervals[k].End == inv->End); 8955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer while (k < list->Num - 1) { 8960518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redl list->Intervals[k] = list->Intervals[k + 1]; 8975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer k++; 8980518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redl } 8990518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redl list->Num--; 9005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return; 9015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 9025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 9035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 9045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 9055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 9065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/** called by qsort() */ 9075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic int 9085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencercompare_start(const void *a, const void *b) 90976e773a443be9f006610f46529e07d4c8d857680Chris Lattner{ 9105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const struct interval *ia = (const struct interval *) a; 911bf7553889594886213e05cd3f94a6d6f7747bf7bChris Lattner const struct interval *ib = (const struct interval *) b; 91298be4943e8dc4f3905629a7102668960873cf863Chris Lattner if (ia->Start < ib->Start) 9135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return -1; 9145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer else if (ia->Start > ib->Start) 9155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return +1; 9165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer else 9175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return 0; 9185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 9195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 9205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 9215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/** sort the interval list according to interval starts */ 9225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic void 9235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencersort_interval_list_by_start(struct interval_list *list) 9245a1deb8d9c0722beae28d693fa137bbb942bd11fAnders Carlsson{ 925aa1f9f1d50adf294674b74510b62e863b68572bcDaniel Dunbar qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start); 9265a1deb8d9c0722beae28d693fa137bbb942bd11fAnders Carlsson#ifdef DEBUG 9275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer { 9285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer GLuint i; 9295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer for (i = 0; i + 1 < list->Num; i++) { 9300518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redl ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start); 9310518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redl } 932a269ebfd91c4fa47e051fa1fa904833a022fe025Chris Lattner } 933a269ebfd91c4fa47e051fa1fa904833a022fe025Chris Lattner#endif 93498be4943e8dc4f3905629a7102668960873cf863Chris Lattner} 935a269ebfd91c4fa47e051fa1fa904833a022fe025Chris Lattner 9360518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redlstruct loop_info 937a269ebfd91c4fa47e051fa1fa904833a022fe025Chris Lattner{ 9380518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redl GLuint Start, End; /**< Start, end instructions of loop */ 939a269ebfd91c4fa47e051fa1fa904833a022fe025Chris Lattner}; 940a269ebfd91c4fa47e051fa1fa904833a022fe025Chris Lattner 941a269ebfd91c4fa47e051fa1fa904833a022fe025Chris Lattner/** 942a269ebfd91c4fa47e051fa1fa904833a022fe025Chris Lattner * Update the intermediate interval info for register 'index' and 943a269ebfd91c4fa47e051fa1fa904833a022fe025Chris Lattner * instruction 'ic'. 9440518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redl */ 94565383479cb2caf0f136f58fecdbdbaf9c497b7a1Chris Lattnerstatic void 9465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerupdate_interval(GLint intBegin[], GLint intEnd[], 94765383479cb2caf0f136f58fecdbdbaf9c497b7a1Chris Lattner struct loop_info *loopStack, GLuint loopStackDepth, 9485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer GLuint index, GLuint ic) 94976e773a443be9f006610f46529e07d4c8d857680Chris Lattner{ 9500518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redl int i; 951efdd1574e3cca00ef0410aee8b7ee36eda5a2f71Chris Lattner GLuint begin = ic; 952efdd1574e3cca00ef0410aee8b7ee36eda5a2f71Chris Lattner GLuint end = ic; 9536a24acbb3dbb3bea9426716bee6ad6023ad15f3fAnders Carlsson 95498be4943e8dc4f3905629a7102668960873cf863Chris Lattner /* If the register is used in a loop, extend its lifetime through the end 9556a24acbb3dbb3bea9426716bee6ad6023ad15f3fAnders Carlsson * of the outermost loop that doesn't contain its definition. 9560518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redl */ 9576a24acbb3dbb3bea9426716bee6ad6023ad15f3fAnders Carlsson for (i = 0; i < loopStackDepth; i++) { 9580518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redl if (intBegin[index] < loopStack[i].Start) { 959060e470e53b73167114284eb0b8c3e25bb3dad99Ted Kremenek end = loopStack[i].End; 9605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 9615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 9625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 9635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 964e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar /* Variables that are live at the end of a loop will also be live at the 965e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar * beginning, so an instruction inside of a loop should have its live 966e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar * interval begin at the start of the outermost loop. 967e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar */ 968b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman if (loopStackDepth > 0 && ic > loopStack[0].Start && ic < loopStack[0].End) { 969b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman begin = loopStack[0].Start; 9705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 971e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar 9725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); 9735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (intBegin[index] == -1) { 9745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer ASSERT(intEnd[index] == -1); 9755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer intBegin[index] = begin; 9765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer intEnd[index] = end; 9775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 9785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer else { 979e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar intEnd[index] = end; 9805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 9815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 982e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar 9835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 9845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/** 985590b6646ef747d2f7b42e5f40487ff07642d7b6fChris Lattner * Find first/last instruction that references each temporary register. 9865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer */ 9875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerGLboolean 9885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer_mesa_find_temp_intervals(const struct prog_instruction *instructions, 989590b6646ef747d2f7b42e5f40487ff07642d7b6fChris Lattner GLuint numInstructions, 9905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS], 9915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS]) 9925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer{ 9935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer struct loop_info loopStack[MAX_LOOP_NESTING]; 9945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer GLuint loopStackDepth = 0; 9955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer GLuint i; 9965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 9975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){ 998e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar intBegin[i] = intEnd[i] = -1; 9995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 10005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 10015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer /* Scan instructions looking for temporary registers */ 10025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer for (i = 0; i < numInstructions; i++) { 10035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const struct prog_instruction *inst = instructions + i; 10045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (inst->Opcode == OPCODE_BGNLOOP) { 10055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer loopStack[loopStackDepth].Start = i; 1006e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar loopStack[loopStackDepth].End = inst->BranchTarget; 10075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer loopStackDepth++; 10085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 10095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer else if (inst->Opcode == OPCODE_ENDLOOP) { 10105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer loopStackDepth--; 10115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 10125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer else if (inst->Opcode == OPCODE_CAL) { 10135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return GL_FALSE; 1014e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar } 10155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer else { 1016e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/ 1017e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar GLuint j; 10185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer for (j = 0; j < numSrc; j++) { 1019e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { 1020e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar const GLuint index = inst->SrcReg[j].Index; 1021e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar if (inst->SrcReg[j].RelAddr) 10225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return GL_FALSE; 1023e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar update_interval(intBegin, intEnd, loopStack, loopStackDepth, 1024e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar index, i); 10255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 1026e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar } 1027e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar if (inst->DstReg.File == PROGRAM_TEMPORARY) { 1028e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar const GLuint index = inst->DstReg.Index; 1029e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar if (inst->DstReg.RelAddr) 1030e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar return GL_FALSE; 1031e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar update_interval(intBegin, intEnd, loopStack, loopStackDepth, 1032e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar index, i); 1033e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar } 1034e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar } 10355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 1036e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar 10375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer return GL_TRUE; 10385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer} 1039e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar 10405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 1041b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman/** 1042b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman * Find the live intervals for each temporary register in the program. 1043b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman * For register R, the interval [A,B] indicates that R is referenced 1044b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman * from instruction A through instruction B. 1045b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman * Special consideration is needed for loops and subroutines. 1046b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason 1047b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman */ 1048b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedmanstatic GLboolean 1049b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedmanfind_live_intervals(struct gl_program *prog, 1050b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman struct interval_list *liveIntervals) 1051b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman{ 1052b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; 1053b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; 1054b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman GLuint i; 10555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 10565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer /* 10575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * Note: we'll return GL_FALSE below if we find relative indexing 10585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * into the TEMP register file. We can't handle that yet. 10595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * We also give up on subroutines for now. 106026dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner */ 10616eec8e883de118b431e3ead5b1e604a6ac68ff6bDouglas Gregor 1062987a14bf5883ef6e5d07f1c83eb6d41a8212a78cArgyrios Kyrtzidis if (dbg) { 10630835a3cdeefe714b4959d31127ea155e56393125Argyrios Kyrtzidis printf("Optimize: Begin find intervals\n"); 10640835a3cdeefe714b4959d31127ea155e56393125Argyrios Kyrtzidis } 106526dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner 10665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer /* build intermediate arrays */ 106726dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions, 106826dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner intBegin, intEnd)) 106926dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner return GL_FALSE; 10705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer 10715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer /* Build live intervals list from intermediate arrays */ 1072987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner liveIntervals->Num = 0; 107398be4943e8dc4f3905629a7102668960873cf863Chris Lattner for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) { 1074987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner if (intBegin[i] >= 0) { 10755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer struct interval inv; 107626dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner inv.Reg = i; 107726dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner inv.Start = intBegin[i]; 10785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer inv.End = intEnd[i]; 107926dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner append_interval(liveIntervals, &inv); 108026dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner } 108126dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner } 1082c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattner 1083c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattner /* Sort the list according to interval starts */ 1084c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattner sort_interval_list_by_start(liveIntervals); 1085c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattner 1086c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattner if (dbg) { 108726dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner /* print interval info */ 108826dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner for (i = 0; i < liveIntervals->Num; i++) { 108926dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner const struct interval *inv = liveIntervals->Intervals + i; 10905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer printf("Reg[%d] live [%d, %d]:", 10915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer inv->Reg, inv->Start, inv->End); 10925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (1) { 10935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer GLuint j; 10945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer for (j = 0; j < inv->Start; j++) 109526dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner printf(" "); 10965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer for (j = inv->Start; j <= inv->End; j++) 10975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer printf("x"); 1098987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner } 1099987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner printf("\n"); 1100987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner } 1101987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner } 1102987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner 1103987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner return GL_TRUE; 1104987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner} 1105c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattner 1106c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattner 1107c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattner/** Scan the array of used register flags to find free entry */ 1108c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattnerstatic GLint 1109c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattneralloc_register(GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS]) 1110c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattner{ 1111c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattner GLuint k; 11125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer for (k = 0; k < REG_ALLOCATE_MAX_PROGRAM_TEMPS; k++) { 1113987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner if (!usedRegs[k]) { 1114987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner usedRegs[k] = GL_TRUE; 1115ccc213f5d4034bb6fcda14cad112d9029a508b66Chris Lattner return k; 1116ccc213f5d4034bb6fcda14cad112d9029a508b66Chris Lattner } 1117ccc213f5d4034bb6fcda14cad112d9029a508b66Chris Lattner } 1118ee5a700af3fe9ae1a639c271f093f40677dddc04Dale Johannesen return -1; 1119ee5a700af3fe9ae1a639c271f093f40677dddc04Dale Johannesen} 1120ccc213f5d4034bb6fcda14cad112d9029a508b66Chris Lattner 1121ee5a700af3fe9ae1a639c271f093f40677dddc04Dale Johannesen 1122ee5a700af3fe9ae1a639c271f093f40677dddc04Dale Johannesen/** 1123987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner * This function implements "Linear Scan Register Allocation" to reduce 1124987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner * the number of temporary registers used by the program. 11255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * 11265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * We compute the "live interval" for all temporary registers then 11275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * examine the overlap of the intervals to allocate new registers. 11285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * Basically, if two intervals do not overlap, they can use the same register. 112928daa53db73341b7ee7f269924ccfca1c6d179acChris Lattner */ 113028daa53db73341b7ee7f269924ccfca1c6d179acChris Lattnerstatic void 113128daa53db73341b7ee7f269924ccfca1c6d179acChris Lattner_mesa_reallocate_registers(struct gl_program *prog) 11325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer{ 11335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer struct interval_list liveIntervals; 11345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer GLint registerMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; 11355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; 11365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer GLuint i; 11375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer GLint maxTemp = -1; 113828daa53db73341b7ee7f269924ccfca1c6d179acChris Lattner 113928daa53db73341b7ee7f269924ccfca1c6d179acChris Lattner if (dbg) { 114042b83dde7c700b34f9435ad746984169888ae705Chris Lattner printf("Optimize: Begin live-interval register reallocation\n"); 114142b83dde7c700b34f9435ad746984169888ae705Chris Lattner _mesa_print_program(prog); 114228daa53db73341b7ee7f269924ccfca1c6d179acChris Lattner } 114342b83dde7c700b34f9435ad746984169888ae705Chris Lattner 114442b83dde7c700b34f9435ad746984169888ae705Chris Lattner for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){ 114542b83dde7c700b34f9435ad746984169888ae705Chris Lattner registerMap[i] = -1; 114642b83dde7c700b34f9435ad746984169888ae705Chris Lattner usedRegs[i] = GL_FALSE; 114742b83dde7c700b34f9435ad746984169888ae705Chris Lattner } 114842b83dde7c700b34f9435ad746984169888ae705Chris Lattner 114942b83dde7c700b34f9435ad746984169888ae705Chris Lattner if (!find_live_intervals(prog, &liveIntervals)) { 115042b83dde7c700b34f9435ad746984169888ae705Chris Lattner if (dbg) 115142b83dde7c700b34f9435ad746984169888ae705Chris Lattner printf("Aborting register reallocation\n"); 115228daa53db73341b7ee7f269924ccfca1c6d179acChris Lattner return; 11535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 11543907323dd6665c0c4e383435cb145233f4533406Anders Carlsson 11555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer { 11565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer struct interval_list activeIntervals; 11573907323dd6665c0c4e383435cb145233f4533406Anders Carlsson activeIntervals.Num = 0; 11583907323dd6665c0c4e383435cb145233f4533406Anders Carlsson 11595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer /* loop over live intervals, allocating a new register for each */ 11605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer for (i = 0; i < liveIntervals.Num; i++) { 11615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const struct interval *live = liveIntervals.Intervals + i; 116204421087832a031c90bd58f128c7c0e741db8dd2Chris Lattner 116304421087832a031c90bd58f128c7c0e741db8dd2Chris Lattner if (dbg) 116404421087832a031c90bd58f128c7c0e741db8dd2Chris Lattner printf("Consider register %u\n", live->Reg); 116564b45f7e0d3167f040841ac2920aead7f080730dSebastian Redl 116664b45f7e0d3167f040841ac2920aead7f080730dSebastian Redl /* Expire old intervals. Intervals which have ended with respect 116764b45f7e0d3167f040841ac2920aead7f080730dSebastian Redl * to the live interval can have their remapped registers freed. 116864b45f7e0d3167f040841ac2920aead7f080730dSebastian Redl */ 11695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer { 11705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer GLint j; 11715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer for (j = 0; j < (GLint) activeIntervals.Num; j++) { 11725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer const struct interval *inv = activeIntervals.Intervals + j; 11735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (inv->End >= live->Start) { 11745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer /* Stop now. Since the activeInterval list is sorted 11755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * we know we don't have to go further. 11765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer */ 11775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer break; 11785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer } 1179efa9b3877ef298bcb792600ac33521827e1f7fafAnders Carlsson else { 1180efa9b3877ef298bcb792600ac33521827e1f7fafAnders Carlsson /* Interval 'inv' has expired */ 11810777972d38a3125efed962b045704c30ae6965cfSebastian Redl const GLint regNew = registerMap[inv->Reg]; 11820835a3cdeefe714b4959d31127ea155e56393125Argyrios Kyrtzidis ASSERT(regNew >= 0); 11836215dee86c0e715b9f2b0d401ab2a5fcf629f1afSebastian Redl 11840777972d38a3125efed962b045704c30ae6965cfSebastian Redl if (dbg) 11850777972d38a3125efed962b045704c30ae6965cfSebastian Redl printf(" expire interval for reg %u\n", inv->Reg); 11860777972d38a3125efed962b045704c30ae6965cfSebastian Redl 11870777972d38a3125efed962b045704c30ae6965cfSebastian Redl /* remove interval j from active list */ 11880777972d38a3125efed962b045704c30ae6965cfSebastian Redl remove_interval(&activeIntervals, inv); 11890777972d38a3125efed962b045704c30ae6965cfSebastian Redl j--; /* counter-act j++ in for-loop above */ 1190d26527708b2b2f3b1d747f570efd10149d48364eAnders Carlsson 11910777972d38a3125efed962b045704c30ae6965cfSebastian Redl /* return register regNew to the free pool */ 11925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer if (dbg) 1193aa58f00ebba5f14955001736b7aea20bb5bd91e6Steve Naroff printf(" free reg %d\n", regNew); 1194aa58f00ebba5f14955001736b7aea20bb5bd91e6Steve Naroff ASSERT(usedRegs[regNew] == GL_TRUE); 1195d26527708b2b2f3b1d747f570efd10149d48364eAnders Carlsson usedRegs[regNew] = GL_FALSE; 1196aa58f00ebba5f14955001736b7aea20bb5bd91e6Steve Naroff } 1197aa58f00ebba5f14955001736b7aea20bb5bd91e6Steve Naroff } 1198aa58f00ebba5f14955001736b7aea20bb5bd91e6Steve Naroff } 1199d26527708b2b2f3b1d747f570efd10149d48364eAnders Carlsson 12008123a95c33b792d35c2e4992ba6e27882748fb0dChris Lattner /* find a free register for this live interval */ 12018123a95c33b792d35c2e4992ba6e27882748fb0dChris Lattner { 120204421087832a031c90bd58f128c7c0e741db8dd2Chris Lattner const GLint k = alloc_register(usedRegs); 1203d26527708b2b2f3b1d747f570efd10149d48364eAnders Carlsson if (k < 0) { 12042d8b273470684a9cd47f0ce24743cc1f71ef7cbcDouglas Gregor /* out of registers, give up */ 12052d8b273470684a9cd47f0ce24743cc1f71ef7cbcDouglas Gregor return; 12062d8b273470684a9cd47f0ce24743cc1f71ef7cbcDouglas Gregor } 1207aaffbf7c790a324ed114184db771aae2d2e9151cSteve Naroff registerMap[live->Reg] = k; 12082d8b273470684a9cd47f0ce24743cc1f71ef7cbcDouglas Gregor maxTemp = MAX2(maxTemp, k); 1209aa58f00ebba5f14955001736b7aea20bb5bd91e6Steve Naroff if (dbg) 1210aa58f00ebba5f14955001736b7aea20bb5bd91e6Steve Naroff printf(" remap register %u -> %d\n", live->Reg, k); 1211aa58f00ebba5f14955001736b7aea20bb5bd91e6Steve Naroff } 1212aa58f00ebba5f14955001736b7aea20bb5bd91e6Steve Naroff 12135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer /* Insert this live interval into the active list which is sorted 12145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * by increasing end points. 1215d26527708b2b2f3b1d747f570efd10149d48364eAnders Carlsson */ 1216d26527708b2b2f3b1d747f570efd10149d48364eAnders Carlsson insert_interval_by_end(&activeIntervals, live); 1217d26527708b2b2f3b1d747f570efd10149d48364eAnders Carlsson } 1218d26527708b2b2f3b1d747f570efd10149d48364eAnders Carlsson } 1219efa9b3877ef298bcb792600ac33521827e1f7fafAnders Carlsson 1220d26527708b2b2f3b1d747f570efd10149d48364eAnders Carlsson if (maxTemp + 1 < (GLint) liveIntervals.Num) { 12215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer /* OK, we've reduced the number of registers needed. 122231a458462c6cf417a84e0c47852b18fb22d79acbSteve Naroff * Scan the program and replace all the old temporary register 122327c8dc06f65d7abcf6a7e7f64a7960c9a150ca01Douglas Gregor * indexes with the new indexes. 122427c8dc06f65d7abcf6a7e7f64a7960c9a150ca01Douglas Gregor */ 122527c8dc06f65d7abcf6a7e7f64a7960c9a150ca01Douglas Gregor replace_regs(prog, PROGRAM_TEMPORARY, registerMap); 122627c8dc06f65d7abcf6a7e7f64a7960c9a150ca01Douglas Gregor 122786f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor prog->NumTemporaries = maxTemp + 1; 122886f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor } 122927c8dc06f65d7abcf6a7e7f64a7960c9a150ca01Douglas Gregor 123027c8dc06f65d7abcf6a7e7f64a7960c9a150ca01Douglas Gregor if (dbg) { 123127c8dc06f65d7abcf6a7e7f64a7960c9a150ca01Douglas Gregor printf("Optimize: End live-interval register reallocation\n"); 1232213541a68a3e137d11d2cefb612c6cdb410d7e8eNate Begeman printf("Num temp regs before: %u after: %u\n", 12338a99764f9b778a54e7440b1ee06a1e48f25d76d8Nate Begeman liveIntervals.Num, maxTemp + 1); 12348a99764f9b778a54e7440b1ee06a1e48f25d76d8Nate Begeman _mesa_print_program(prog); 12358a99764f9b778a54e7440b1ee06a1e48f25d76d8Nate Begeman } 12364d0ac88428b3ed7c6f3a2f4e758ea5424ecd70aeChris Lattner} 12374d0ac88428b3ed7c6f3a2f4e758ea5424ecd70aeChris Lattner 12388a99764f9b778a54e7440b1ee06a1e48f25d76d8Nate Begeman 1239213541a68a3e137d11d2cefb612c6cdb410d7e8eNate Begeman#if 0 1240fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroffstatic void 12417e3e9b152e06edf329ceb32190d3255f248d4d5fChris Lattnerprint_it(struct gl_context *ctx, struct gl_program *program, const char *txt) { 1242190d6a25393995b42e32086949a68285ee423fb9Nate Begeman fprintf(stderr, "%s (%u inst):\n", txt, program->NumInstructions); 1243190d6a25393995b42e32086949a68285ee423fb9Nate Begeman _mesa_print_program(program); 1244190d6a25393995b42e32086949a68285ee423fb9Nate Begeman _mesa_print_program_parameters(ctx, program); 1245190d6a25393995b42e32086949a68285ee423fb9Nate Begeman fprintf(stderr, "\n\n"); 1246190d6a25393995b42e32086949a68285ee423fb9Nate Begeman} 1247190d6a25393995b42e32086949a68285ee423fb9Nate Begeman#endif 1248190d6a25393995b42e32086949a68285ee423fb9Nate Begeman 1249190d6a25393995b42e32086949a68285ee423fb9Nate Begeman 1250190d6a25393995b42e32086949a68285ee423fb9Nate Begeman/** 1251190d6a25393995b42e32086949a68285ee423fb9Nate Begeman * Apply optimizations to the given program to eliminate unnecessary 1252190d6a25393995b42e32086949a68285ee423fb9Nate Begeman * instructions, temp regs, etc. 1253fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff */ 12547e3e9b152e06edf329ceb32190d3255f248d4d5fChris Lattnervoid 1255fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff_mesa_optimize_program(struct gl_context *ctx, struct gl_program *program) 1256fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff{ 1257fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff GLboolean any_change; 1258fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff 1259fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff /* Stop when no modifications were output */ 1260fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff do { 1261fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff any_change = GL_FALSE; 1262b8f849da3cedee2f61ad98389115ddd04e439d60Chris Lattner _mesa_remove_extra_move_use(program); 12638a99764f9b778a54e7440b1ee06a1e48f25d76d8Nate Begeman if (_mesa_remove_dead_code_global(program)) 12643b8d116703db8018f855cbb4733ace426422623bNate Begeman any_change = GL_TRUE; 12653b8d116703db8018f855cbb4733ace426422623bNate Begeman if (_mesa_remove_extra_moves(program)) 12667e3e9b152e06edf329ceb32190d3255f248d4d5fChris Lattner any_change = GL_TRUE; 1267353417af9d254d4fd0eb7d0a3ff71c4d8594ac58Nate Begeman if (_mesa_remove_dead_code_local(program)) 1268353417af9d254d4fd0eb7d0a3ff71c4d8594ac58Nate Begeman any_change = GL_TRUE; 1269353417af9d254d4fd0eb7d0a3ff71c4d8594ac58Nate Begeman _mesa_reallocate_registers(program); 1270353417af9d254d4fd0eb7d0a3ff71c4d8594ac58Nate Begeman } while (any_change); 1271353417af9d254d4fd0eb7d0a3ff71c4d8594ac58Nate Begeman} 1272353417af9d254d4fd0eb7d0a3ff71c4d8594ac58Nate Begeman 1273353417af9d254d4fd0eb7d0a3ff71c4d8594ac58Nate Begeman