prog_optimize.c revision 257cc48de2f4e472eb651a4c70042e5cb6b9fe0e
15f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/*
25f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * Mesa 3-D graphics library
35f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * Version:  7.5
45f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer *
50bc735ffcfb223c0186419547abaa5c84482663eChris Lattner * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
60bc735ffcfb223c0186419547abaa5c84482663eChris Lattner *
75f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * Permission is hereby granted, free of charge, to any person obtaining a
85f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * copy of this software and associated documentation files (the "Software"),
95f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * to deal in the Software without restriction, including without limitation
105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * the rights to use, copy, modify, merge, publish, distribute, sublicense,
115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * and/or sell copies of the Software, and to permit persons to whom the
125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * Software is furnished to do so, subject to the following conditions:
135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer *
14c4a1dea2dc56bd1357ec91b829a0b9e68229a13eDaniel Dunbar * The above copyright notice and this permission notice shall be included
15a4d55d89c8076b402bb168e3edeef0c2cd2a78c3Chris Lattner * in all copies or substantial portions of the Software.
162eadfb638eb1bb6ccfd6fd0453e764d47e27eed9Chris Lattner *
17a4d55d89c8076b402bb168e3edeef0c2cd2a78c3Chris Lattner * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
1898cd599ee8a9b259ed7388ee2921a20d97658864Douglas Gregor * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19aaba5e346dffdbad5d1c42765a89e4a7afb0da67Douglas Gregor * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20e91593ef084479340582b2ba177b44be50a717b7Daniel Dunbar * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22da5a6b6d9fd52899499d5b7b46273ec844dcaa6eChris Lattner * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer */
245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "main/glheader.h"
285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "main/context.h"
29da8249e57f3badecf925571881fe57243935c6c1Chris Lattner#include "main/macros.h"
30da8249e57f3badecf925571881fe57243935c6c1Chris Lattner#include "program.h"
31da8249e57f3badecf925571881fe57243935c6c1Chris Lattner#include "prog_instruction.h"
32da8249e57f3badecf925571881fe57243935c6c1Chris Lattner#include "prog_optimize.h"
33da8249e57f3badecf925571881fe57243935c6c1Chris Lattner#include "prog_print.h"
34ee5a700af3fe9ae1a639c271f093f40677dddc04Dale Johannesen
35ee5a700af3fe9ae1a639c271f093f40677dddc04Dale Johannesen
36ee5a700af3fe9ae1a639c271f093f40677dddc04Dale Johannesen#define MAX_LOOP_NESTING 50
37da8249e57f3badecf925571881fe57243935c6c1Chris Lattner/* MAX_PROGRAM_TEMPS is a low number (256), and we want to be able to
38da8249e57f3badecf925571881fe57243935c6c1Chris Lattner * register allocate many temporary values into that small number of
39da8249e57f3badecf925571881fe57243935c6c1Chris Lattner * temps.  So allow large temporary indices coming into the register
40da8249e57f3badecf925571881fe57243935c6c1Chris Lattner * allocator.
416e94ef5696cfb005d3fc7bbac8dcf7690b64f0a5Ted Kremenek */
426e94ef5696cfb005d3fc7bbac8dcf7690b64f0a5Ted Kremenek#define REG_ALLOCATE_MAX_PROGRAM_TEMPS	((1 << INST_INDEX_BITS) - 1)
436e94ef5696cfb005d3fc7bbac8dcf7690b64f0a5Ted Kremenek
445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic GLboolean dbg = GL_FALSE;
455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#define NO_MASK 0xf
476e94ef5696cfb005d3fc7bbac8dcf7690b64f0a5Ted Kremenek
485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/**
495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * are read from the given src in this instruction, We also provide
515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * one optional masks which may mask other components in the dst
525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * register
535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer */
545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic GLuint
555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerget_src_arg_mask(const struct prog_instruction *inst,
566e94ef5696cfb005d3fc7bbac8dcf7690b64f0a5Ted Kremenek                 GLuint arg, GLuint dst_mask)
578189cde56b4f6f938cd65f53c932fe1860d0204cTed Kremenek{
58353ffceafc6bcebd5592cb9d93ea3f9242e5370aTed Kremenek   GLuint read_mask, channel_mask;
59353ffceafc6bcebd5592cb9d93ea3f9242e5370aTed Kremenek   GLuint comp;
605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   ASSERT(arg < _mesa_num_inst_src_regs(inst->Opcode));
625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   /* Form the dst register, find the written channels */
645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   if (inst->CondUpdate) {
655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      channel_mask = WRITEMASK_XYZW;
665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   }
675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   else {
685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      switch (inst->Opcode) {
695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_MOV:
705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_MIN:
715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_MAX:
725a56ac30d04e8f0431a08980885662a47a6308aaTed Kremenek      case OPCODE_ABS:
735a56ac30d04e8f0431a08980885662a47a6308aaTed Kremenek      case OPCODE_ADD:
745a56ac30d04e8f0431a08980885662a47a6308aaTed Kremenek      case OPCODE_MAD:
755a56ac30d04e8f0431a08980885662a47a6308aaTed Kremenek      case OPCODE_MUL:
765a56ac30d04e8f0431a08980885662a47a6308aaTed Kremenek      case OPCODE_SUB:
775a56ac30d04e8f0431a08980885662a47a6308aaTed Kremenek      case OPCODE_CMP:
785a56ac30d04e8f0431a08980885662a47a6308aaTed Kremenek      case OPCODE_FLR:
795a56ac30d04e8f0431a08980885662a47a6308aaTed Kremenek      case OPCODE_FRC:
805a56ac30d04e8f0431a08980885662a47a6308aaTed Kremenek      case OPCODE_LRP:
815a56ac30d04e8f0431a08980885662a47a6308aaTed Kremenek      case OPCODE_SEQ:
825f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_SGE:
835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_SGT:
845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_SLE:
855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_SLT:
865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_SNE:
875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_SSG:
885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         channel_mask = inst->DstReg.WriteMask & dst_mask;
895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         break;
905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_RCP:
915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_SIN:
925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_COS:
935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_RSQ:
945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_POW:
955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_EX2:
965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_LOG:
975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         channel_mask = WRITEMASK_X;
985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         break;
995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_DP2:
10073d0d4fac161ed12926e010dcf8b448a8de6a2ecChris Lattner         channel_mask = WRITEMASK_XY;
1015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         break;
1025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_DP3:
1035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_XPD:
1045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         channel_mask = WRITEMASK_XYZ;
1055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         break;
1065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      default:
1075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         channel_mask = WRITEMASK_XYZW;
108668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek         break;
1098189cde56b4f6f938cd65f53c932fe1860d0204cTed Kremenek      }
110898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor   }
111898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor
112898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor   /* Now, given the src swizzle and the written channels, find which
113898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor    * components are actually read
114668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek    */
115668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek   read_mask = 0x0;
116b4609806e9232593ece09ce08b630836e825865cDouglas Gregor   for (comp = 0; comp < 4; ++comp) {
117b4609806e9232593ece09ce08b630836e825865cDouglas Gregor      const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp);
118b4609806e9232593ece09ce08b630836e825865cDouglas Gregor      ASSERT(coord < 4);
119668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek      if (channel_mask & (1 << comp) && coord <= SWIZZLE_W)
120b4609806e9232593ece09ce08b630836e825865cDouglas Gregor         read_mask |= 1 << coord;
121b4609806e9232593ece09ce08b630836e825865cDouglas Gregor   }
122e2ce1d9440186cf3332368291cd884a6e3ae8946Nate Begeman
123668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek   return read_mask;
124668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek}
125898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor
126898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor
127898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor/**
128898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor * For a MOV instruction, compute a write mask when src register also has
129668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek * a mask
130668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek */
13177ed8e4edf6ed78c53fb20ec3210aff2a59c9d87Ted Kremenekstatic GLuint
1325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerget_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask)
13377ed8e4edf6ed78c53fb20ec3210aff2a59c9d87Ted Kremenek{
134668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek   const GLuint mask = mov->DstReg.WriteMask;
1355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   GLuint comp;
1365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   GLuint updated_mask = 0x0;
1375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
138668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek   ASSERT(mov->Opcode == OPCODE_MOV);
139668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek
140668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek   for (comp = 0; comp < 4; ++comp) {
141668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek      GLuint src_comp;
142668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek      if ((mask & (1 << comp)) == 0)
143668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek         continue;
144668bf91d31265b6ea8c3eb854ba450857701f269Ted Kremenek      src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp);
145d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner      if ((src_mask & (1 << src_comp)) == 0)
146d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner         continue;
147d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner      updated_mask |= 1 << comp;
1488189cde56b4f6f938cd65f53c932fe1860d0204cTed Kremenek   }
149d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner
150d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner   return updated_mask;
151d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner}
152d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner
153d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner
154d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner/**
1558189cde56b4f6f938cd65f53c932fe1860d0204cTed Kremenek * Ensure that the swizzle is regular.  That is, all of the swizzle
156d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE.
157d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner */
158d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattnerstatic GLboolean
159d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattneris_swizzle_regular(GLuint swz)
160d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner{
1615549976193e34417d4474a5f4a514268ef6666c7Ted Kremenek   return GET_SWZ(swz,0) <= SWIZZLE_W &&
162d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner          GET_SWZ(swz,1) <= SWIZZLE_W &&
163d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner          GET_SWZ(swz,2) <= SWIZZLE_W &&
164d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner          GET_SWZ(swz,3) <= SWIZZLE_W;
165d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner}
166d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner
167d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner
168d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner/**
1698189cde56b4f6f938cd65f53c932fe1860d0204cTed Kremenek * In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
170d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner * \return number of instructions removed
171d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattner */
172d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattnerstatic GLuint
173d18b3299debb7b0dbd9d34d9369189dc98c87f53Chris Lattnerremove_instructions(struct gl_program *prog, const GLboolean *removeFlags)
174cb888967400a03504c88acedd5248d6778a82f46Chris Lattner{
175cb888967400a03504c88acedd5248d6778a82f46Chris Lattner   GLint i, removeEnd = 0, removeCount = 0;
176cb888967400a03504c88acedd5248d6778a82f46Chris Lattner   GLuint totalRemoved = 0;
177c4f8e8b645b8e0e685c089dde0674c6f29a24168Steve Naroff
178c4f8e8b645b8e0e685c089dde0674c6f29a24168Steve Naroff   /* go backward */
179c4f8e8b645b8e0e685c089dde0674c6f29a24168Steve Naroff   for (i = prog->NumInstructions - 1; i >= 0; i--) {
180c4f8e8b645b8e0e685c089dde0674c6f29a24168Steve Naroff      if (removeFlags[i]) {
181c4f8e8b645b8e0e685c089dde0674c6f29a24168Steve Naroff         totalRemoved++;
182cb888967400a03504c88acedd5248d6778a82f46Chris Lattner         if (removeCount == 0) {
183cb888967400a03504c88acedd5248d6778a82f46Chris Lattner            /* begin a run of instructions to remove */
184c4f8e8b645b8e0e685c089dde0674c6f29a24168Steve Naroff            removeEnd = i;
185c4f8e8b645b8e0e685c089dde0674c6f29a24168Steve Naroff            removeCount = 1;
186cb888967400a03504c88acedd5248d6778a82f46Chris Lattner         }
187cb888967400a03504c88acedd5248d6778a82f46Chris Lattner         else {
188bcba201a1118d7852b8b97187d495ae2a4f49519Anders Carlsson            /* extend the run of instructions to remove */
189bcba201a1118d7852b8b97187d495ae2a4f49519Anders Carlsson            removeCount++;
190cb888967400a03504c88acedd5248d6778a82f46Chris Lattner         }
191cb888967400a03504c88acedd5248d6778a82f46Chris Lattner      }
1924fcd399a52ae45ed8ebfdd3a25e01cfb76fa366dDouglas Gregor      else {
1934fcd399a52ae45ed8ebfdd3a25e01cfb76fa366dDouglas Gregor         /* don't remove this instruction, but check if the preceeding
1944fcd399a52ae45ed8ebfdd3a25e01cfb76fa366dDouglas Gregor          * instructions are to be removed.
195cb888967400a03504c88acedd5248d6778a82f46Chris Lattner          */
196cb888967400a03504c88acedd5248d6778a82f46Chris Lattner         if (removeCount > 0) {
197bcba201a1118d7852b8b97187d495ae2a4f49519Anders Carlsson            GLint removeStart = removeEnd - removeCount + 1;
198cb888967400a03504c88acedd5248d6778a82f46Chris Lattner            _mesa_delete_instructions(prog, removeStart, removeCount);
1995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            removeStart = removeCount = 0; /* reset removal info */
2005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         }
2015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
2025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   }
2035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   /* Finish removing if the first instruction was to be removed. */
2045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   if (removeCount > 0) {
2055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      GLint removeStart = removeEnd - removeCount + 1;
2065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      _mesa_delete_instructions(prog, removeStart, removeCount);
2075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   }
2085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   return totalRemoved;
2095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
2105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
2115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
2125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/**
2135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * Remap register indexes according to map.
2145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * \param prog  the program to search/replace
2155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * \param file  the type of register file to search/replace
2165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * \param map  maps old register indexes to new indexes
2175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer */
2185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic void
2195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerreplace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
2205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer{
2215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   GLuint i;
2225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
2235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   for (i = 0; i < prog->NumInstructions; i++) {
2245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      struct prog_instruction *inst = prog->Instructions + i;
2255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
2265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      GLuint j;
2275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      for (j = 0; j < numSrc; j++) {
2285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         if (inst->SrcReg[j].File == file) {
2295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            GLuint index = inst->SrcReg[j].Index;
2305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            ASSERT(map[index] >= 0);
2315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            inst->SrcReg[j].Index = map[index];
2325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         }
2335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
2345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      if (inst->DstReg.File == file) {
2355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         const GLuint index = inst->DstReg.Index;
2365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         ASSERT(map[index] >= 0);
23766b5a8a39088598c01a9fa6f032dc908612dc8ecAnders Carlsson         inst->DstReg.Index = map[index];
238418f6c7d142e5ff4607f70cd8431d008442bafe9Chris Lattner      }
2394c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor   }
240c5ae899b4bbf65488445316c63168079177db0edSteve Naroff}
2410bb76897bedb8b747efc6523efb432fc24966118Douglas Gregor
242a9c878086036de36482cc21e35a33cabe9699b0aDouglas Gregor
243418f6c7d142e5ff4607f70cd8431d008442bafe9Chris Lattner/**
244418f6c7d142e5ff4607f70cd8431d008442bafe9Chris Lattner * Remove dead instructions from the given program.
24566b5a8a39088598c01a9fa6f032dc908612dc8ecAnders Carlsson * This is very primitive for now.  Basically look for temp registers
2465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * that are written to but never read.  Remove any instructions that
2474c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor * write to such registers.  Be careful with condition code setters.
2484c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor */
2494c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregorstatic GLboolean
2504c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor_mesa_remove_dead_code_global(struct gl_program *prog)
2514c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor{
2524c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor   GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4];
2534c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor   GLboolean *removeInst; /* per-instruction removal flag */
2544c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor   GLuint i, rem = 0, comp;
2554c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor
2564c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor   memset(tempRead, 0, sizeof(tempRead));
2574c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor
2584c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor   if (dbg) {
2594c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor      printf("Optimize: Begin dead code removal\n");
2604c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor      /*_mesa_print_program(prog);*/
2614c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor   }
2624c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor
2634c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor   removeInst = (GLboolean *)
2644c67834407ca6ab344dcf44fc599ad4938cfa96dDouglas Gregor      calloc(1, prog->NumInstructions * sizeof(GLboolean));
265bfdcae678d44906293e21c0cddc6537f3ee8b5a4Steve Naroff
2664eb206bebcdab28ababe8df55c6185cec2cdc071Steve Naroff   /* Determine which temps are read and written */
2674eb206bebcdab28ababe8df55c6185cec2cdc071Steve Naroff   for (i = 0; i < prog->NumInstructions; i++) {
2684eb206bebcdab28ababe8df55c6185cec2cdc071Steve Naroff      const struct prog_instruction *inst = prog->Instructions + i;
2694eb206bebcdab28ababe8df55c6185cec2cdc071Steve Naroff      const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
2704eb206bebcdab28ababe8df55c6185cec2cdc071Steve Naroff      GLuint j;
2714eb206bebcdab28ababe8df55c6185cec2cdc071Steve Naroff
27256ee6896f2efebffb4a2cce5a7610cdf1eddbbbeSteve Naroff      /* check src regs */
27356ee6896f2efebffb4a2cce5a7610cdf1eddbbbeSteve Naroff      for (j = 0; j < numSrc; j++) {
27456ee6896f2efebffb4a2cce5a7610cdf1eddbbbeSteve Naroff         if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
27556ee6896f2efebffb4a2cce5a7610cdf1eddbbbeSteve Naroff            const GLuint index = inst->SrcReg[j].Index;
27656ee6896f2efebffb4a2cce5a7610cdf1eddbbbeSteve Naroff            GLuint read_mask;
27756ee6896f2efebffb4a2cce5a7610cdf1eddbbbeSteve Naroff            ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
27856ee6896f2efebffb4a2cce5a7610cdf1eddbbbeSteve Naroff	    read_mask = get_src_arg_mask(inst, j, NO_MASK);
2795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
2805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            if (inst->SrcReg[j].RelAddr) {
2815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer               if (dbg)
2825f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                  printf("abort remove dead code (indirect temp)\n");
2835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer               goto done;
2845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            }
2855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
2865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer	    for (comp = 0; comp < 4; comp++) {
2875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer	       const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp);
2885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer	       ASSERT(swz < 4);
2895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer               if ((read_mask & (1 << swz)) == 0)
2905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer		  continue;
2915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer               if (swz <= SWIZZLE_W)
2925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                  tempRead[index][swz] = GL_TRUE;
2935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer	    }
2945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         }
2955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
2965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
2975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      /* check dst reg */
2985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      if (inst->DstReg.File == PROGRAM_TEMPORARY) {
2995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         const GLuint index = inst->DstReg.Index;
3005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
3015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
3025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         if (inst->DstReg.RelAddr) {
3035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            if (dbg)
3045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer               printf("abort remove dead code (indirect temp)\n");
3055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            goto done;
3065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         }
3075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
3085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         if (inst->CondUpdate) {
3095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            /* If we're writing to this register and setting condition
3105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer             * codes we cannot remove the instruction.  Prevent removal
3115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer             * by setting the 'read' flag.
3125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer             */
3135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            tempRead[index][0] = GL_TRUE;
314e7716e6133e23e4a89248a65a388bc840d8c130cChris Lattner            tempRead[index][1] = GL_TRUE;
315e7716e6133e23e4a89248a65a388bc840d8c130cChris Lattner            tempRead[index][2] = GL_TRUE;
316e7716e6133e23e4a89248a65a388bc840d8c130cChris Lattner            tempRead[index][3] = GL_TRUE;
317e7716e6133e23e4a89248a65a388bc840d8c130cChris Lattner         }
318e7716e6133e23e4a89248a65a388bc840d8c130cChris Lattner      }
319e7716e6133e23e4a89248a65a388bc840d8c130cChris Lattner   }
320e7716e6133e23e4a89248a65a388bc840d8c130cChris Lattner
321e7716e6133e23e4a89248a65a388bc840d8c130cChris Lattner   /* find instructions that write to dead registers, flag for removal */
322e7716e6133e23e4a89248a65a388bc840d8c130cChris Lattner   for (i = 0; i < prog->NumInstructions; i++) {
323eb14fe839ec24c2ca14e5f094be147a34e3d3339Chris Lattner      struct prog_instruction *inst = prog->Instructions + i;
3241f683e9cf2e855b32d3fb4b142aa121cd3cf1088Chris Lattner      const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode);
3255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
326ab38e4b50268633f037a10841fdfb612513f8d33Fariborz Jahanian      if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) {
327ab38e4b50268633f037a10841fdfb612513f8d33Fariborz Jahanian         GLint chan, index = inst->DstReg.Index;
328ab38e4b50268633f037a10841fdfb612513f8d33Fariborz Jahanian
329ab38e4b50268633f037a10841fdfb612513f8d33Fariborz Jahanian	 for (chan = 0; chan < 4; chan++) {
330ab38e4b50268633f037a10841fdfb612513f8d33Fariborz Jahanian	    if (!tempRead[index][chan] &&
331ab38e4b50268633f037a10841fdfb612513f8d33Fariborz Jahanian		inst->DstReg.WriteMask & (1 << chan)) {
332ab38e4b50268633f037a10841fdfb612513f8d33Fariborz Jahanian	       if (dbg) {
3335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer		  printf("Remove writemask on %u.%c\n", i,
3345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer			       chan == 3 ? 'w' : 'x' + chan);
3355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer	       }
3365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer	       inst->DstReg.WriteMask &= ~(1 << chan);
3375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer	       rem++;
338211f6adf1301a1461015fb6cb08a05f0a35b65f3Eli Friedman	    }
3395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer	 }
340b4609806e9232593ece09ce08b630836e825865cDouglas Gregor
3415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer	 if (inst->DstReg.WriteMask == 0) {
3425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer	    /* If we cleared all writes, the instruction can be removed. */
3435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer	    if (dbg)
344a9c01021724b9b546d282b8609cbe559734812ecChris Lattner	       printf("Remove instruction %u: \n", i);
345a9c01021724b9b546d282b8609cbe559734812ecChris Lattner	    removeInst[i] = GL_TRUE;
346611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner	 }
347611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner      }
348611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner   }
349611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner
350611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner   /* now remove the instructions which aren't needed */
351611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner   rem = remove_instructions(prog, removeInst);
352611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner
353611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner   if (dbg) {
354611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner      printf("Optimize: End dead code removal.\n");
355611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner      printf("  %u channel writes removed\n", rem);
356611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner      printf("  %u instructions removed\n", rem);
357611b2eccaf3869f32de51ecc02985426d1c0aaefChris Lattner      /*_mesa_print_program(prog);*/
3586eec8e883de118b431e3ead5b1e604a6ac68ff6bDouglas Gregor   }
359987a14bf5883ef6e5d07f1c83eb6d41a8212a78cArgyrios Kyrtzidis
3605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerdone:
3615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   free(removeInst);
3625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   return rem != 0;
3635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
3645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
36504421087832a031c90bd58f128c7c0e741db8dd2Chris Lattner
3664be1f47de20525ad90f02ba8682a7e2cbd3205d1Eli Friedmanenum inst_use
3674be1f47de20525ad90f02ba8682a7e2cbd3205d1Eli Friedman{
3684be1f47de20525ad90f02ba8682a7e2cbd3205d1Eli Friedman   READ,
3694be1f47de20525ad90f02ba8682a7e2cbd3205d1Eli Friedman   WRITE,
37004421087832a031c90bd58f128c7c0e741db8dd2Chris Lattner   FLOW,
37104421087832a031c90bd58f128c7c0e741db8dd2Chris Lattner   END
3724c5d320a7581f4b80b151630c91cea5727fa9923Sebastian Redl};
3734c5d320a7581f4b80b151630c91cea5727fa9923Sebastian Redl
3744c5d320a7581f4b80b151630c91cea5727fa9923Sebastian Redl
3754c5d320a7581f4b80b151630c91cea5727fa9923Sebastian Redl/**
3764c5d320a7581f4b80b151630c91cea5727fa9923Sebastian Redl * Scan forward in program from 'start' for the next occurances of TEMP[index].
3774c5d320a7581f4b80b151630c91cea5727fa9923Sebastian Redl * We look if an instruction reads the component given by the masks and if they
3784c5d320a7581f4b80b151630c91cea5727fa9923Sebastian Redl * are overwritten.
3795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
3805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * that we can't look further.
381ba7e210a999275695f58be03ef402758cfec3635Douglas Gregor */
382ba7e210a999275695f58be03ef402758cfec3635Douglas Gregorstatic enum inst_use
383ba7e210a999275695f58be03ef402758cfec3635Douglas Gregorfind_next_use(const struct gl_program *prog,
38472c3f314d92d65c050ee1c07b7753623c044d6c7Douglas Gregor              GLuint start,
38572c3f314d92d65c050ee1c07b7753623c044d6c7Douglas Gregor              GLuint index,
38672c3f314d92d65c050ee1c07b7753623c044d6c7Douglas Gregor              GLuint mask)
38772c3f314d92d65c050ee1c07b7753623c044d6c7Douglas Gregor{
38872c3f314d92d65c050ee1c07b7753623c044d6c7Douglas Gregor   GLuint i;
38972c3f314d92d65c050ee1c07b7753623c044d6c7Douglas Gregor
39044b4321feab46299d3f5cfd404680884752a0fcfDouglas Gregor   for (i = start; i < prog->NumInstructions; i++) {
391ba7e210a999275695f58be03ef402758cfec3635Douglas Gregor      const struct prog_instruction *inst = prog->Instructions + i;
392ba7e210a999275695f58be03ef402758cfec3635Douglas Gregor      switch (inst->Opcode) {
393ba7e210a999275695f58be03ef402758cfec3635Douglas Gregor      case OPCODE_BGNLOOP:
394ba7e210a999275695f58be03ef402758cfec3635Douglas Gregor      case OPCODE_BGNSUB:
395ba7e210a999275695f58be03ef402758cfec3635Douglas Gregor      case OPCODE_BRA:
3965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_CAL:
3975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_CONT:
3985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_IF:
3995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_ELSE:
4005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_ENDIF:
4015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_ENDLOOP:
4025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_ENDSUB:
4035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      case OPCODE_RET:
4045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         return FLOW;
4057da36f642e907ff5a5ba4b18b5bfebfabf36ecc7Chris Lattner      case OPCODE_END:
40608ad47cbd1f81fcb31dbc731c13b885a07e12704Bill Wendling         return END;
4075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      default:
40828be73f74c9e241a23ea24fe5756623de6bf1084Chris Lattner         {
40998cd599ee8a9b259ed7388ee2921a20d97658864Douglas Gregor            const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
41098cd599ee8a9b259ed7388ee2921a20d97658864Douglas Gregor            GLuint j;
41198cd599ee8a9b259ed7388ee2921a20d97658864Douglas Gregor            for (j = 0; j < numSrc; j++) {
4125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer               if (inst->SrcReg[j].RelAddr ||
4135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                   (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
414acb818a4f7d9b608826171094d6b5a555a8fe694Steve Naroff                   inst->SrcReg[j].Index == index &&
41528be73f74c9e241a23ea24fe5756623de6bf1084Chris Lattner                   (get_src_arg_mask(inst,j,NO_MASK) & mask)))
416acb818a4f7d9b608826171094d6b5a555a8fe694Steve Naroff                  return READ;
417acb818a4f7d9b608826171094d6b5a555a8fe694Steve Naroff            }
41898cd599ee8a9b259ed7388ee2921a20d97658864Douglas Gregor            if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 &&
41998cd599ee8a9b259ed7388ee2921a20d97658864Douglas Gregor                inst->DstReg.File == PROGRAM_TEMPORARY &&
420cb4f9a6d0be445d84af35ba81323be6127816f9eChris Lattner                inst->DstReg.Index == index) {
42108ad47cbd1f81fcb31dbc731c13b885a07e12704Bill Wendling               mask &= ~inst->DstReg.WriteMask;
42208ad47cbd1f81fcb31dbc731c13b885a07e12704Bill Wendling               if (mask == 0)
4235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                  return WRITE;
4245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            }
4255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         }
4267323a6297edad643c202594dcf3d9a174de96ca6Anders Carlsson      }
4275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   }
4285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   return END;
4295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
43028be73f74c9e241a23ea24fe5756623de6bf1084Chris Lattner
4315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
4321a49af9681c350fef58e677f85ccb9a77e8e9d0aDouglas Gregor/**
4331a49af9681c350fef58e677f85ccb9a77e8e9d0aDouglas Gregor * Is the given instruction opcode a flow-control opcode?
434ba7e210a999275695f58be03ef402758cfec3635Douglas Gregor * XXX maybe move this into prog_instruction.[ch]
435ba7e210a999275695f58be03ef402758cfec3635Douglas Gregor */
4365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic GLboolean
4375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer_mesa_is_flow_control_opcode(enum prog_opcode opcode)
4384111024be81e7c0525e42dadcc126d27e5bf2425Chris Lattner{
439dd972f20dc2bd3609d833893e5c6544ac09b59a9Steve Naroff   switch (opcode) {
440dd972f20dc2bd3609d833893e5c6544ac09b59a9Steve Naroff   case OPCODE_BGNLOOP:
4414f6a7d7ead09b439216c32f2de806a998aeb222aSteve Naroff   case OPCODE_BGNSUB:
442dd972f20dc2bd3609d833893e5c6544ac09b59a9Steve Naroff   case OPCODE_BRA:
443dd972f20dc2bd3609d833893e5c6544ac09b59a9Steve Naroff   case OPCODE_CAL:
444dd972f20dc2bd3609d833893e5c6544ac09b59a9Steve Naroff   case OPCODE_CONT:
44586f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor   case OPCODE_IF:
4465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   case OPCODE_ELSE:
44786f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor   case OPCODE_END:
44886f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor   case OPCODE_ENDIF:
44986f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor   case OPCODE_ENDLOOP:
45086f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor   case OPCODE_ENDSUB:
45186f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor   case OPCODE_RET:
45286f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor      return GL_TRUE;
45386f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor   default:
45486f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor      return GL_FALSE;
45586f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor   }
45686f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor}
45786f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor
45886f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor
45986f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor/**
46086f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor * Test if the given instruction is a simple MOV (no conditional updating,
46186f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor * not relative addressing, no negation/abs, etc).
46286f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor */
46386f194083504938df72135b5b66bf0c5cafd9498Douglas Gregorstatic GLboolean
46486f194083504938df72135b5b66bf0c5cafd9498Douglas Gregorcan_downward_mov_be_modifed(const struct prog_instruction *mov)
46586f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor{
46686f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor   return
46786f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor      mov->Opcode == OPCODE_MOV &&
46886f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor      mov->CondUpdate == GL_FALSE &&
46986f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor      mov->SrcReg[0].RelAddr == 0 &&
47086f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor      mov->SrcReg[0].Negate == 0 &&
47186f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor      mov->SrcReg[0].Abs == 0 &&
47286f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor      mov->SrcReg[0].HasIndex2 == 0 &&
47386f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor      mov->SrcReg[0].RelAddr2 == 0 &&
47486f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor      mov->DstReg.RelAddr == 0 &&
47586f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor      mov->DstReg.CondMask == COND_TR &&
47686f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor      mov->SaturateMode == SATURATE_OFF;
47786f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor}
47886f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor
47986f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor
48086f194083504938df72135b5b66bf0c5cafd9498Douglas Gregorstatic GLboolean
48186f194083504938df72135b5b66bf0c5cafd9498Douglas Gregorcan_upward_mov_be_modifed(const struct prog_instruction *mov)
48228be73f74c9e241a23ea24fe5756623de6bf1084Chris Lattner{
483fdd75663fffeb2058a7847975e50837e61200593Anton Korobeynikov   return
4847da36f642e907ff5a5ba4b18b5bfebfabf36ecc7Chris Lattner      can_downward_mov_be_modifed(mov) &&
4855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      mov->DstReg.File == PROGRAM_TEMPORARY;
4867da36f642e907ff5a5ba4b18b5bfebfabf36ecc7Chris Lattner}
4877da36f642e907ff5a5ba4b18b5bfebfabf36ecc7Chris Lattner
4887da36f642e907ff5a5ba4b18b5bfebfabf36ecc7Chris Lattner
489baf0d6678418e0dd9309438c3e50274253cfc7b2Chris Lattner/**
490baf0d6678418e0dd9309438c3e50274253cfc7b2Chris Lattner * Try to remove use of extraneous MOV instructions, to free them up for dead
49128be73f74c9e241a23ea24fe5756623de6bf1084Chris Lattner * code removal.
49274253736184c0717a0649922551bf9d8b6815651Douglas Gregor */
49374253736184c0717a0649922551bf9d8b6815651Douglas Gregorstatic void
49474253736184c0717a0649922551bf9d8b6815651Douglas Gregor_mesa_remove_extra_move_use(struct gl_program *prog)
49574253736184c0717a0649922551bf9d8b6815651Douglas Gregor{
49674253736184c0717a0649922551bf9d8b6815651Douglas Gregor   GLuint i, j;
4975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
498eb8f3063257a392f15aea48d42fb73ec51afc548Douglas Gregor   if (dbg) {
499eb8f3063257a392f15aea48d42fb73ec51afc548Douglas Gregor      printf("Optimize: Begin remove extra move use\n");
500eb8f3063257a392f15aea48d42fb73ec51afc548Douglas Gregor      _mesa_print_program(prog);
5015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   }
50228be73f74c9e241a23ea24fe5756623de6bf1084Chris Lattner
503eb8f3063257a392f15aea48d42fb73ec51afc548Douglas Gregor   /*
504eb8f3063257a392f15aea48d42fb73ec51afc548Douglas Gregor    * Look for sequences such as this:
505eb8f3063257a392f15aea48d42fb73ec51afc548Douglas Gregor    *    MOV tmpX, arg0;
506337c6b9f5d502dc1c5acea628bf7bf9e828efc0eDouglas Gregor    *    ...
507337c6b9f5d502dc1c5acea628bf7bf9e828efc0eDouglas Gregor    *    FOO tmpY, tmpX, arg1;
508337c6b9f5d502dc1c5acea628bf7bf9e828efc0eDouglas Gregor    * and convert into:
509337c6b9f5d502dc1c5acea628bf7bf9e828efc0eDouglas Gregor    *    MOV tmpX, arg0;
510337c6b9f5d502dc1c5acea628bf7bf9e828efc0eDouglas Gregor    *    ...
511224605064a4ef87d1c3d35ad1cb363f8b534012bSebastian Redl    *    FOO tmpY, arg0, arg1;
512224605064a4ef87d1c3d35ad1cb363f8b534012bSebastian Redl    */
513224605064a4ef87d1c3d35ad1cb363f8b534012bSebastian Redl
514224605064a4ef87d1c3d35ad1cb363f8b534012bSebastian Redl   for (i = 0; i + 1 < prog->NumInstructions; i++) {
515224605064a4ef87d1c3d35ad1cb363f8b534012bSebastian Redl      const struct prog_instruction *mov = prog->Instructions + i;
516224605064a4ef87d1c3d35ad1cb363f8b534012bSebastian Redl      GLuint dst_mask, src_mask;
517bf3af056289893f58d37b05a2c80970708781d61Douglas Gregor      if (can_upward_mov_be_modifed(mov) == GL_FALSE)
518eb8f3063257a392f15aea48d42fb73ec51afc548Douglas Gregor         continue;
519eb8f3063257a392f15aea48d42fb73ec51afc548Douglas Gregor
520bf3af056289893f58d37b05a2c80970708781d61Douglas Gregor      /* Scanning the code, we maintain the components which are still active in
521bf3af056289893f58d37b05a2c80970708781d61Douglas Gregor       * these two masks
522bf3af056289893f58d37b05a2c80970708781d61Douglas Gregor       */
523bf3af056289893f58d37b05a2c80970708781d61Douglas Gregor      dst_mask = mov->DstReg.WriteMask;
524bf3af056289893f58d37b05a2c80970708781d61Douglas Gregor      src_mask = get_src_arg_mask(mov, 0, NO_MASK);
525bf3af056289893f58d37b05a2c80970708781d61Douglas Gregor
526bf3af056289893f58d37b05a2c80970708781d61Douglas Gregor      /* Walk through remaining instructions until the or src reg gets
527bf3af056289893f58d37b05a2c80970708781d61Douglas Gregor       * rewritten or we get into some flow-control, eliminating the use of
528bf3af056289893f58d37b05a2c80970708781d61Douglas Gregor       * this MOV.
529eb8f3063257a392f15aea48d42fb73ec51afc548Douglas Gregor       */
53059b5da6d853b4368b984700315adf7b37de05764Nate Begeman      for (j = i + 1; j < prog->NumInstructions; j++) {
531b4609806e9232593ece09ce08b630836e825865cDouglas Gregor	 struct prog_instruction *inst2 = prog->Instructions + j;
53288a3514f36de96b19cdf50141c640df1a5f13f6cDouglas Gregor         GLuint arg;
53388a3514f36de96b19cdf50141c640df1a5f13f6cDouglas Gregor
5349d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor	 if (_mesa_is_flow_control_opcode(inst2->Opcode))
5359d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor	     break;
5369d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor
53727c8dc06f65d7abcf6a7e7f64a7960c9a150ca01Douglas Gregor	 /* First rewrite this instruction's args if appropriate. */
5389d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor	 for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
53988a3514f36de96b19cdf50141c640df1a5f13f6cDouglas Gregor	    GLuint comp, read_mask;
54088a3514f36de96b19cdf50141c640df1a5f13f6cDouglas Gregor
54188a3514f36de96b19cdf50141c640df1a5f13f6cDouglas Gregor	    if (inst2->SrcReg[arg].File != mov->DstReg.File ||
54288a3514f36de96b19cdf50141c640df1a5f13f6cDouglas Gregor		inst2->SrcReg[arg].Index != mov->DstReg.Index ||
5439d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor		inst2->SrcReg[arg].RelAddr ||
5449d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor		inst2->SrcReg[arg].Abs)
5459d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor	       continue;
546e6386394677ed4f77b20e2e3d5446a3a2f628e53Steve Naroff            read_mask = get_src_arg_mask(inst2, arg, NO_MASK);
547e6386394677ed4f77b20e2e3d5446a3a2f628e53Steve Naroff
548670a62cd1d51042ea076cda5e93f26a1d8327fb3Chris Lattner	    /* Adjust the swizzles of inst2 to point at MOV's source if ALL the
549670a62cd1d51042ea076cda5e93f26a1d8327fb3Chris Lattner             * components read still come from the mov instructions
550670a62cd1d51042ea076cda5e93f26a1d8327fb3Chris Lattner             */
551670a62cd1d51042ea076cda5e93f26a1d8327fb3Chris Lattner            if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
552670a62cd1d51042ea076cda5e93f26a1d8327fb3Chris Lattner               (read_mask & dst_mask) == read_mask) {
553670a62cd1d51042ea076cda5e93f26a1d8327fb3Chris Lattner               for (comp = 0; comp < 4; comp++) {
554670a62cd1d51042ea076cda5e93f26a1d8327fb3Chris Lattner                  const GLuint inst2_swz =
555213541a68a3e137d11d2cefb612c6cdb410d7e8eNate Begeman                     GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
556213541a68a3e137d11d2cefb612c6cdb410d7e8eNate Begeman                  const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
557fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff                  inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
558fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff                  inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
559027282d1c1ac151aa7b1b3b45babc918b8ad456aSteve Naroff                  inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
560027282d1c1ac151aa7b1b3b45babc918b8ad456aSteve Naroff                                                  inst2_swz) & 0x1) << comp);
561799a6a6850af625946bb8d88ca960bb6604e3858Steve Naroff               }
562799a6a6850af625946bb8d88ca960bb6604e3858Steve Naroff               inst2->SrcReg[arg].File = mov->SrcReg[0].File;
5635daf570d0ce027e18ed5f9d66e6b2a14a40b720dFariborz Jahanian               inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
564670a62cd1d51042ea076cda5e93f26a1d8327fb3Chris Lattner            }
565d9f6910f4ef37c0e8eeee2a01287d9572c3176efChris Lattner	 }
566796da18402f286b897782a298ae3b20c459c102eDouglas Gregor
5679d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor	 /* The source of MOV is written. This potentially deactivates some
5689d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor          * components from the src and dst of the MOV instruction
56904421087832a031c90bd58f128c7c0e741db8dd2Chris Lattner          */
57028be73f74c9e241a23ea24fe5756623de6bf1084Chris Lattner	 if (inst2->DstReg.File == mov->DstReg.File &&
57124b41fa8239c63b9eb570d3e83c4a82840656a65Argyrios Kyrtzidis	     (inst2->DstReg.RelAddr ||
57224b41fa8239c63b9eb570d3e83c4a82840656a65Argyrios Kyrtzidis	      inst2->DstReg.Index == mov->DstReg.Index)) {
5736eec8e883de118b431e3ead5b1e604a6ac68ff6bDouglas Gregor            dst_mask &= ~inst2->DstReg.WriteMask;
5749d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor            src_mask = get_src_arg_mask(mov, 0, dst_mask);
5759d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor         }
5769d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor
5779d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor         /* Idem when the destination of mov is written */
5789d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor	 if (inst2->DstReg.File == mov->SrcReg[0].File &&
5799d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor	     (inst2->DstReg.RelAddr ||
5809d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor	      inst2->DstReg.Index == mov->SrcReg[0].Index)) {
5819d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor            src_mask &= ~inst2->DstReg.WriteMask;
5829d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor            dst_mask &= get_dst_mask_for_mov(mov, src_mask);
5839d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor         }
5849d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor         if (dst_mask == 0)
5859d293dfc0ad7c44ae0b5eb9517f1ed8c8d8b7ff7Douglas Gregor            break;
586c42e1183846228a7fa5143ad76507d6d60f5c6f3Sebastian Redl      }
587c42e1183846228a7fa5143ad76507d6d60f5c6f3Sebastian Redl   }
588c42e1183846228a7fa5143ad76507d6d60f5c6f3Sebastian Redl
5895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   if (dbg) {
5905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      printf("Optimize: End remove extra move use.\n");
5915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      /*_mesa_print_program(prog);*/
5925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   }
5935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
5945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
5955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
5965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/**
5975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * Complements dead_code_global. Try to remove code in block of code by
5985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * carefully monitoring the swizzles. Both functions should be merged into one
5995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * with a proper control flow graph
60028be73f74c9e241a23ea24fe5756623de6bf1084Chris Lattner */
60128be73f74c9e241a23ea24fe5756623de6bf1084Chris Lattnerstatic GLboolean
6025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer_mesa_remove_dead_code_local(struct gl_program *prog)
6035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer{
604ae8d467e75a4e72b19e1eca199bf93dfaab47acfDouglas Gregor   GLboolean *removeInst;
605ae8d467e75a4e72b19e1eca199bf93dfaab47acfDouglas Gregor   GLuint i, arg, rem = 0;
606ae8d467e75a4e72b19e1eca199bf93dfaab47acfDouglas Gregor
607ae8d467e75a4e72b19e1eca199bf93dfaab47acfDouglas Gregor   removeInst = (GLboolean *)
608ae8d467e75a4e72b19e1eca199bf93dfaab47acfDouglas Gregor      calloc(1, prog->NumInstructions * sizeof(GLboolean));
609ae8d467e75a4e72b19e1eca199bf93dfaab47acfDouglas Gregor
610ae8d467e75a4e72b19e1eca199bf93dfaab47acfDouglas Gregor   for (i = 0; i < prog->NumInstructions; i++) {
6115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      const struct prog_instruction *inst = prog->Instructions + i;
6125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      const GLuint index = inst->DstReg.Index;
613fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff      const GLuint mask = inst->DstReg.WriteMask;
614ca354faa7e9b99af17070c82b9662a5fca76422cChris Lattner      enum inst_use use;
615ca354faa7e9b99af17070c82b9662a5fca76422cChris Lattner
616ca354faa7e9b99af17070c82b9662a5fca76422cChris Lattner      /* We must deactivate the pass as soon as some indirection is used */
617ca354faa7e9b99af17070c82b9662a5fca76422cChris Lattner      if (inst->DstReg.RelAddr)
618ca354faa7e9b99af17070c82b9662a5fca76422cChris Lattner         goto done;
619ca354faa7e9b99af17070c82b9662a5fca76422cChris Lattner      for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++)
620ca354faa7e9b99af17070c82b9662a5fca76422cChris Lattner         if (inst->SrcReg[arg].RelAddr)
621ca354faa7e9b99af17070c82b9662a5fca76422cChris Lattner            goto done;
622ca354faa7e9b99af17070c82b9662a5fca76422cChris Lattner
62386f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor      if (_mesa_is_flow_control_opcode(inst->Opcode) ||
6245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer          _mesa_num_inst_dst_regs(inst->Opcode) == 0 ||
625c63a1f276f7b324fd9a4be82098b1c8f7bf30733Chris Lattner          inst->DstReg.File != PROGRAM_TEMPORARY ||
626c63a1f276f7b324fd9a4be82098b1c8f7bf30733Chris Lattner          inst->DstReg.RelAddr)
627c63a1f276f7b324fd9a4be82098b1c8f7bf30733Chris Lattner         continue;
628c63a1f276f7b324fd9a4be82098b1c8f7bf30733Chris Lattner
6295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      use = find_next_use(prog, i+1, index, mask);
630c63a1f276f7b324fd9a4be82098b1c8f7bf30733Chris Lattner      if (use == WRITE || use == END)
6315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         removeInst[i] = GL_TRUE;
632c63a1f276f7b324fd9a4be82098b1c8f7bf30733Chris Lattner   }
6335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
6345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   rem = remove_instructions(prog, removeInst);
635c63a1f276f7b324fd9a4be82098b1c8f7bf30733Chris Lattner
6365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerdone:
6375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   free(removeInst);
6385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   return rem != 0;
6394f6a7d7ead09b439216c32f2de806a998aeb222aSteve Naroff}
6404f6a7d7ead09b439216c32f2de806a998aeb222aSteve Naroff
6414f6a7d7ead09b439216c32f2de806a998aeb222aSteve Naroff
6424f6a7d7ead09b439216c32f2de806a998aeb222aSteve Naroff/**
6434f6a7d7ead09b439216c32f2de806a998aeb222aSteve Naroff * Try to inject the destination of mov as the destination of inst and recompute
6444f6a7d7ead09b439216c32f2de806a998aeb222aSteve Naroff * the swizzles operators for the sources of inst if required. Return GL_TRUE
6454f6a7d7ead09b439216c32f2de806a998aeb222aSteve Naroff * of the substitution was possible, GL_FALSE otherwise
6464f6a7d7ead09b439216c32f2de806a998aeb222aSteve Naroff */
6474f6a7d7ead09b439216c32f2de806a998aeb222aSteve Naroffstatic GLboolean
648d1fa6449e9dbdd667466e9e1e971aa17c9793e8aFariborz Jahanian_mesa_merge_mov_into_inst(struct prog_instruction *inst,
649ba8d2d684e74a20bef03828c21c991d222c7e9e5Fariborz Jahanian                          const struct prog_instruction *mov)
6506669db9d80d77d10f101aa9f8e488bbd2d98f76cFariborz Jahanian{
651ba8d2d684e74a20bef03828c21c991d222c7e9e5Fariborz Jahanian   /* Indirection table which associates destination and source components for
652ba8d2d684e74a20bef03828c21c991d222c7e9e5Fariborz Jahanian    * the mov instruction
653ba8d2d684e74a20bef03828c21c991d222c7e9e5Fariborz Jahanian    */
654ba8d2d684e74a20bef03828c21c991d222c7e9e5Fariborz Jahanian   const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK);
6555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
6565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   /* Some components are not written by inst. We cannot remove the mov */
6575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   if (mask != (inst->DstReg.WriteMask & mask))
6582e5f54aa1dd15a62c34a9d1d24a5a0692f43934aTed Kremenek      return GL_FALSE;
6594cc627111453b75519d5130b57e06256da7b00e8Chris Lattner
6604cc627111453b75519d5130b57e06256da7b00e8Chris Lattner   /* Depending on the instruction, we may need to recompute the swizzles.
6612e5f54aa1dd15a62c34a9d1d24a5a0692f43934aTed Kremenek    * Also, some other instructions (like TEX) are not linear. We will only
6621d09ecca892185ba067e47ba879f424de59950efChris Lattner    * consider completely active sources and destinations
6631d09ecca892185ba067e47ba879f424de59950efChris Lattner    */
6641d09ecca892185ba067e47ba879f424de59950efChris Lattner   switch (inst->Opcode) {
6654cc627111453b75519d5130b57e06256da7b00e8Chris Lattner
6662e5f54aa1dd15a62c34a9d1d24a5a0692f43934aTed Kremenek   /* Carstesian instructions: we compute the swizzle */
6674cc627111453b75519d5130b57e06256da7b00e8Chris Lattner   case OPCODE_MOV:
6682e5f54aa1dd15a62c34a9d1d24a5a0692f43934aTed Kremenek   case OPCODE_MIN:
669e9b12198c4cc7f5687960100351b4af006c14469Steve Naroff   case OPCODE_MAX:
670e9b12198c4cc7f5687960100351b4af006c14469Steve Naroff   case OPCODE_ABS:
6711a49af9681c350fef58e677f85ccb9a77e8e9d0aDouglas Gregor   case OPCODE_ADD:
6721a49af9681c350fef58e677f85ccb9a77e8e9d0aDouglas Gregor   case OPCODE_MAD:
6731d09ecca892185ba067e47ba879f424de59950efChris Lattner   case OPCODE_MUL:
6741d09ecca892185ba067e47ba879f424de59950efChris Lattner   case OPCODE_SUB:
6752e5f54aa1dd15a62c34a9d1d24a5a0692f43934aTed Kremenek   {
67663f067f5a75ae97b53dc7a6a6530e2c72c8bb7f8Seo Sanghyeon      GLuint dst_to_src_comp[4] = {0,0,0,0};
67763f067f5a75ae97b53dc7a6a6530e2c72c8bb7f8Seo Sanghyeon      GLuint dst_comp, arg;
6781d09ecca892185ba067e47ba879f424de59950efChris Lattner      for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
6791d09ecca892185ba067e47ba879f424de59950efChris Lattner         if (mov->DstReg.WriteMask & (1 << dst_comp)) {
680fb7080663a0f0abd687a6f58ac917d9a76318b73Chris Lattner            const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp);
6811d09ecca892185ba067e47ba879f424de59950efChris Lattner            ASSERT(src_comp < 4);
6822e5f54aa1dd15a62c34a9d1d24a5a0692f43934aTed Kremenek            dst_to_src_comp[dst_comp] = src_comp;
683fb7080663a0f0abd687a6f58ac917d9a76318b73Chris Lattner         }
6844cc627111453b75519d5130b57e06256da7b00e8Chris Lattner      }
6852e5f54aa1dd15a62c34a9d1d24a5a0692f43934aTed Kremenek
686d9f6910f4ef37c0e8eeee2a01287d9572c3176efChris Lattner      /* Patch each source of the instruction */
687fa28b30d5a1e93e5263be33e343532b900d2c643Chris Lattner      for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) {
68804421087832a031c90bd58f128c7c0e741db8dd2Chris Lattner         const GLuint arg_swz = inst->SrcReg[arg].Swizzle;
68904421087832a031c90bd58f128c7c0e741db8dd2Chris Lattner         inst->SrcReg[arg].Swizzle = 0;
6901d09ecca892185ba067e47ba879f424de59950efChris Lattner
6911d09ecca892185ba067e47ba879f424de59950efChris Lattner         /* Reset each active component of the swizzle */
6921d09ecca892185ba067e47ba879f424de59950efChris Lattner         for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
6934e99a5fc3b203397a91136c6e695e405fb8fc606Ted Kremenek            GLuint src_comp, arg_comp;
6944e99a5fc3b203397a91136c6e695e405fb8fc606Ted Kremenek            if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0)
6954e99a5fc3b203397a91136c6e695e405fb8fc606Ted Kremenek               continue;
6964e99a5fc3b203397a91136c6e695e405fb8fc606Ted Kremenek            src_comp = dst_to_src_comp[dst_comp];
6974e99a5fc3b203397a91136c6e695e405fb8fc606Ted Kremenek            ASSERT(src_comp < 4);
6984e99a5fc3b203397a91136c6e695e405fb8fc606Ted Kremenek            arg_comp = GET_SWZ(arg_swz, src_comp);
6994e99a5fc3b203397a91136c6e695e405fb8fc606Ted Kremenek            ASSERT(arg_comp < 4);
7004e99a5fc3b203397a91136c6e695e405fb8fc606Ted Kremenek            inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp);
70156f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner         }
70256f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner      }
70356f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner      inst->DstReg = mov->DstReg;
70456f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner      return GL_TRUE;
70556f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner   }
70656f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner
70756f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner   /* Dot products and scalar instructions: we only change the destination */
70856f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner   case OPCODE_RCP:
70956f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner   case OPCODE_SIN:
71056f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner   case OPCODE_COS:
71156f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner   case OPCODE_RSQ:
71256f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner   case OPCODE_POW:
71356f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner   case OPCODE_EX2:
71456f349400c5932a196509c0480ff6f99a9a0b48fChris Lattner   case OPCODE_LOG:
715898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor   case OPCODE_DP2:
716898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor   case OPCODE_DP3:
717898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor   case OPCODE_DP4:
718898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor      inst->DstReg = mov->DstReg;
719898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor      return GL_TRUE;
720898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor
721898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor   /* All other instructions require fully active components with no swizzle */
722898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor   default:
723898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor      if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW ||
724898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor          inst->DstReg.WriteMask != WRITEMASK_XYZW)
725898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor         return GL_FALSE;
726898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor      inst->DstReg = mov->DstReg;
727898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor      return GL_TRUE;
728898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor   }
729898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor}
730898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor
731898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor
732898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor/**
733898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor * Try to remove extraneous MOV instructions from the given program.
734898574e7496ba8fd76290079d3a9d06954992734Douglas Gregor */
735c9e8f606787b0bc0c3b08e566b87cc1751694168Eli Friedmanstatic GLboolean
736c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman_mesa_remove_extra_moves(struct gl_program *prog)
737c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman{
738c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman   GLboolean *removeInst; /* per-instruction removal flag */
739c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman   GLuint i, rem = 0, nesting = 0;
740c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman
741c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman   if (dbg) {
742e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson      printf("Optimize: Begin remove extra moves\n");
743c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman      _mesa_print_program(prog);
744e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson   }
745e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson
74659b5da6d853b4368b984700315adf7b37de05764Nate Begeman   removeInst = (GLboolean *)
74759b5da6d853b4368b984700315adf7b37de05764Nate Begeman      calloc(1, prog->NumInstructions * sizeof(GLboolean));
748c9e8f606787b0bc0c3b08e566b87cc1751694168Eli Friedman
74959b5da6d853b4368b984700315adf7b37de05764Nate Begeman   /*
750e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson    * Look for sequences such as this:
751e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson    *    FOO tmpX, arg0, arg1;
752e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson    *    MOV tmpY, tmpX;
753e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson    * and convert into:
754c9e8f606787b0bc0c3b08e566b87cc1751694168Eli Friedman    *    FOO tmpY, arg0, arg1;
755e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson    */
756e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson
757c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman   for (i = 0; i < prog->NumInstructions; i++) {
758e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson      const struct prog_instruction *mov = prog->Instructions + i;
7593498bdb9e9cb300de74c7b51c92608e2902b2348Douglas Gregor
7603498bdb9e9cb300de74c7b51c92608e2902b2348Douglas Gregor      switch (mov->Opcode) {
761c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman      case OPCODE_BGNLOOP:
762c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman      case OPCODE_BGNSUB:
763c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman      case OPCODE_IF:
764c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman         nesting++;
765c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman         break;
766c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman      case OPCODE_ENDLOOP:
767c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman      case OPCODE_ENDSUB:
768c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman      case OPCODE_ENDIF:
769c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman         nesting--;
770c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman         break;
771c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman      case OPCODE_MOV:
772c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman         if (i > 0 &&
773c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman             can_downward_mov_be_modifed(mov) &&
774c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman             mov->SrcReg[0].File == PROGRAM_TEMPORARY &&
775c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman             nesting == 0)
776c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman         {
77732a311e276cd4bcafddd38db679aff9804e66bd4Eli Friedman
7784e716e07d90a18ac83dd94d157ec676530bc78f9Sebastian Redl            /* see if this MOV can be removed */
7794e716e07d90a18ac83dd94d157ec676530bc78f9Sebastian Redl            const GLuint id = mov->SrcReg[0].Index;
780e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson            struct prog_instruction *prevInst;
781e8a32b855ce4e8580a191f8d29d2f3f459834302Anders Carlsson            GLuint prevI;
782c39dc9a25a9d74a5302e8567a4d3fc008212024cEli Friedman
78338374b05791ee93300b9fbe8ceb3957f54184b37Steve Naroff            /* get pointer to previous instruction */
78438374b05791ee93300b9fbe8ceb3957f54184b37Steve Naroff            prevI = i - 1;
7855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            while (prevI > 0 && removeInst[prevI])
7865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer               prevI--;
7875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            prevInst = prog->Instructions + prevI;
7885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
7895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
7905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                prevInst->DstReg.Index == id &&
7915f6b632391e24b08ead3e62ba2e2765e770382edNuno Lopes                prevInst->DstReg.RelAddr == 0 &&
7925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                prevInst->DstReg.CondSrc == 0 &&
7935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                prevInst->DstReg.CondMask == COND_TR) {
7945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
7955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer               const GLuint dst_mask = prevInst->DstReg.WriteMask;
7965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer               enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask);
7975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
7985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer               if (next_use == WRITE || next_use == END) {
7995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                  /* OK, we can safely remove this MOV instruction.
8005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                   * Transform:
801ccc213f5d4034bb6fcda14cad112d9029a508b66Chris Lattner                   *   prevI: FOO tempIndex, x, y;
802ce0afc0b09accbf7370b2ba87574b2c4be7fb032Chris Lattner                   *       i: MOV z, tempIndex;
803ce0afc0b09accbf7370b2ba87574b2c4be7fb032Chris Lattner                   * Into:
804ce0afc0b09accbf7370b2ba87574b2c4be7fb032Chris Lattner                   *   prevI: FOO z, x, y;
805ce0afc0b09accbf7370b2ba87574b2c4be7fb032Chris Lattner                   */
806590b6646ef747d2f7b42e5f40487ff07642d7b6fChris Lattner                  if (_mesa_merge_mov_into_inst(prevInst, mov)) {
807590b6646ef747d2f7b42e5f40487ff07642d7b6fChris Lattner                     removeInst[i] = GL_TRUE;
808a6afa768aa7bd3102a2807aa720917e4a1771e4eEli Friedman                     if (dbg) {
809a6afa768aa7bd3102a2807aa720917e4a1771e4eEli Friedman                        printf("Remove MOV at %u\n", i);
810a6afa768aa7bd3102a2807aa720917e4a1771e4eEli Friedman                        printf("new prev inst %u: ", prevI);
811a6afa768aa7bd3102a2807aa720917e4a1771e4eEli Friedman                        _mesa_print_instruction(prevInst);
812a6afa768aa7bd3102a2807aa720917e4a1771e4eEli Friedman                     }
813a6afa768aa7bd3102a2807aa720917e4a1771e4eEli Friedman                  }
8145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer               }
8155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            }
8165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         }
8175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         break;
8185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      default:
8195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         ; /* nothing */
820590b6646ef747d2f7b42e5f40487ff07642d7b6fChris Lattner      }
8215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   }
8225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
8235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   /* now remove the instructions which aren't needed */
8242eadfb638eb1bb6ccfd6fd0453e764d47e27eed9Chris Lattner   rem = remove_instructions(prog, removeInst);
8252eadfb638eb1bb6ccfd6fd0453e764d47e27eed9Chris Lattner
82698be4943e8dc4f3905629a7102668960873cf863Chris Lattner   free(removeInst);
8272eadfb638eb1bb6ccfd6fd0453e764d47e27eed9Chris Lattner
828f0fbcb356bc4eb5f6467dc4fc4bc447e9a630102Chris Lattner   if (dbg) {
8295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      printf("Optimize: End remove extra moves.  %u instructions removed\n", rem);
8302eadfb638eb1bb6ccfd6fd0453e764d47e27eed9Chris Lattner      /*_mesa_print_program(prog);*/
831b88d45ea7eb835d36c4a4b3ea84b1260b120dd0aAnders Carlsson   }
832b88d45ea7eb835d36c4a4b3ea84b1260b120dd0aAnders Carlsson
833b88d45ea7eb835d36c4a4b3ea84b1260b120dd0aAnders Carlsson   return rem != 0;
834b88d45ea7eb835d36c4a4b3ea84b1260b120dd0aAnders Carlsson}
835b88d45ea7eb835d36c4a4b3ea84b1260b120dd0aAnders Carlsson
836b88d45ea7eb835d36c4a4b3ea84b1260b120dd0aAnders Carlsson
837b88d45ea7eb835d36c4a4b3ea84b1260b120dd0aAnders Carlsson/** A live register interval */
8387267f7832e5f0c7f951765e201c5a2650eb1637bArgyrios Kyrtzidisstruct interval
8397267f7832e5f0c7f951765e201c5a2650eb1637bArgyrios Kyrtzidis{
8407267f7832e5f0c7f951765e201c5a2650eb1637bArgyrios Kyrtzidis   GLuint Reg;         /** The temporary register index */
8417b658aa315784fa489a32daeacfe400512ce468dSteve Naroff   GLuint Start, End;  /** Start/end instruction numbers */
8427b658aa315784fa489a32daeacfe400512ce468dSteve Naroff};
84398be4943e8dc4f3905629a7102668960873cf863Chris Lattner
844ac620decb68aad1a2cf6c0c191b56d78981d9aaaDaniel Dunbar
845ac620decb68aad1a2cf6c0c191b56d78981d9aaaDaniel Dunbar/** A list of register intervals */
846ac620decb68aad1a2cf6c0c191b56d78981d9aaaDaniel Dunbarstruct interval_list
847ac620decb68aad1a2cf6c0c191b56d78981d9aaaDaniel Dunbar{
848ac620decb68aad1a2cf6c0c191b56d78981d9aaaDaniel Dunbar   GLuint Num;
849ac620decb68aad1a2cf6c0c191b56d78981d9aaaDaniel Dunbar   struct interval Intervals[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
850ac620decb68aad1a2cf6c0c191b56d78981d9aaaDaniel Dunbar};
851389cecc83f33e93c8ba6bf2e8269b8690404416fSteve Naroff
8527b658aa315784fa489a32daeacfe400512ce468dSteve Naroff
853b4609806e9232593ece09ce08b630836e825865cDouglas Gregorstatic void
854b4609806e9232593ece09ce08b630836e825865cDouglas Gregorappend_interval(struct interval_list *list, const struct interval *inv)
85513b7c5ff42d6077a8d59e2c9ec9e7fedd0150ae6Steve Naroff{
85698be4943e8dc4f3905629a7102668960873cf863Chris Lattner   list->Intervals[list->Num++] = *inv;
857a4d55d89c8076b402bb168e3edeef0c2cd2a78c3Chris Lattner}
858a4d55d89c8076b402bb168e3edeef0c2cd2a78c3Chris Lattner
859a4d55d89c8076b402bb168e3edeef0c2cd2a78c3Chris Lattner
860a4d55d89c8076b402bb168e3edeef0c2cd2a78c3Chris Lattner/** Insert interval inv into list, sorted by interval end */
8611c0cfd4599e816cfd7a8f348286bf0ad79652ffcAnders Carlssonstatic void
8621c0cfd4599e816cfd7a8f348286bf0ad79652ffcAnders Carlssoninsert_interval_by_end(struct interval_list *list, const struct interval *inv)
8631c0cfd4599e816cfd7a8f348286bf0ad79652ffcAnders Carlsson{
8641c0cfd4599e816cfd7a8f348286bf0ad79652ffcAnders Carlsson   /* XXX we could do a binary search insertion here since list is sorted */
8651c0cfd4599e816cfd7a8f348286bf0ad79652ffcAnders Carlsson   GLint i = list->Num - 1;
866a4d55d89c8076b402bb168e3edeef0c2cd2a78c3Chris Lattner   while (i >= 0 && list->Intervals[i].End > inv->End) {
867a4d55d89c8076b402bb168e3edeef0c2cd2a78c3Chris Lattner      list->Intervals[i + 1] = list->Intervals[i];
868a4d55d89c8076b402bb168e3edeef0c2cd2a78c3Chris Lattner      i--;
869a4d55d89c8076b402bb168e3edeef0c2cd2a78c3Chris Lattner   }
87013b7c5ff42d6077a8d59e2c9ec9e7fedd0150ae6Steve Naroff   list->Intervals[i + 1] = *inv;
87113b7c5ff42d6077a8d59e2c9ec9e7fedd0150ae6Steve Naroff   list->Num++;
87213b7c5ff42d6077a8d59e2c9ec9e7fedd0150ae6Steve Naroff
8735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#ifdef DEBUG
8741a49af9681c350fef58e677f85ccb9a77e8e9d0aDouglas Gregor   {
8755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      GLuint i;
8765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      for (i = 0; i + 1 < list->Num; i++) {
8775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End);
8785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
8795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   }
8804a4251b9e719415f30db0f5170abf31296a62225Sebastian Redl#endif
8814a4251b9e719415f30db0f5170abf31296a62225Sebastian Redl}
8824a4251b9e719415f30db0f5170abf31296a62225Sebastian Redl
8834a4251b9e719415f30db0f5170abf31296a62225Sebastian Redl
8844a4251b9e719415f30db0f5170abf31296a62225Sebastian Redl/** Remove the given interval from the interval list */
8854a4251b9e719415f30db0f5170abf31296a62225Sebastian Redlstatic void
8864a4251b9e719415f30db0f5170abf31296a62225Sebastian Redlremove_interval(struct interval_list *list, const struct interval *inv)
8874a4251b9e719415f30db0f5170abf31296a62225Sebastian Redl{
8884a4251b9e719415f30db0f5170abf31296a62225Sebastian Redl   /* XXX we could binary search since list is sorted */
8894a4251b9e719415f30db0f5170abf31296a62225Sebastian Redl   GLuint k;
8904a4251b9e719415f30db0f5170abf31296a62225Sebastian Redl   for (k = 0; k < list->Num; k++) {
8915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      if (list->Intervals[k].Reg == inv->Reg) {
8925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         /* found, remove it */
8935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         ASSERT(list->Intervals[k].Start == inv->Start);
8945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         ASSERT(list->Intervals[k].End == inv->End);
8955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         while (k < list->Num - 1) {
8960518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redl            list->Intervals[k] = list->Intervals[k + 1];
8975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            k++;
8980518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redl         }
8990518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redl         list->Num--;
9005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         return;
9015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
9025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   }
9035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
9045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
9055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
9065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/** called by qsort() */
9075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic int
9085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencercompare_start(const void *a, const void *b)
90976e773a443be9f006610f46529e07d4c8d857680Chris Lattner{
9105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   const struct interval *ia = (const struct interval *) a;
911bf7553889594886213e05cd3f94a6d6f7747bf7bChris Lattner   const struct interval *ib = (const struct interval *) b;
91298be4943e8dc4f3905629a7102668960873cf863Chris Lattner   if (ia->Start < ib->Start)
9135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      return -1;
9145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   else if (ia->Start > ib->Start)
9155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      return +1;
9165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   else
9175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      return 0;
9185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
9195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
9205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
9215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/** sort the interval list according to interval starts */
9225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic void
9235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencersort_interval_list_by_start(struct interval_list *list)
9245a1deb8d9c0722beae28d693fa137bbb942bd11fAnders Carlsson{
925aa1f9f1d50adf294674b74510b62e863b68572bcDaniel Dunbar   qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start);
9265a1deb8d9c0722beae28d693fa137bbb942bd11fAnders Carlsson#ifdef DEBUG
9275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   {
9285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      GLuint i;
9295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      for (i = 0; i + 1 < list->Num; i++) {
9300518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redl         ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
9310518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redl      }
932a269ebfd91c4fa47e051fa1fa904833a022fe025Chris Lattner   }
933a269ebfd91c4fa47e051fa1fa904833a022fe025Chris Lattner#endif
93498be4943e8dc4f3905629a7102668960873cf863Chris Lattner}
935a269ebfd91c4fa47e051fa1fa904833a022fe025Chris Lattner
9360518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redlstruct loop_info
937a269ebfd91c4fa47e051fa1fa904833a022fe025Chris Lattner{
9380518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redl   GLuint Start, End;  /**< Start, end instructions of loop */
939a269ebfd91c4fa47e051fa1fa904833a022fe025Chris Lattner};
940a269ebfd91c4fa47e051fa1fa904833a022fe025Chris Lattner
941a269ebfd91c4fa47e051fa1fa904833a022fe025Chris Lattner/**
942a269ebfd91c4fa47e051fa1fa904833a022fe025Chris Lattner * Update the intermediate interval info for register 'index' and
943a269ebfd91c4fa47e051fa1fa904833a022fe025Chris Lattner * instruction 'ic'.
9440518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redl */
94565383479cb2caf0f136f58fecdbdbaf9c497b7a1Chris Lattnerstatic void
9465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerupdate_interval(GLint intBegin[], GLint intEnd[],
94765383479cb2caf0f136f58fecdbdbaf9c497b7a1Chris Lattner		struct loop_info *loopStack, GLuint loopStackDepth,
9485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer		GLuint index, GLuint ic)
94976e773a443be9f006610f46529e07d4c8d857680Chris Lattner{
9500518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redl   int i;
951efdd1574e3cca00ef0410aee8b7ee36eda5a2f71Chris Lattner   GLuint begin = ic;
952efdd1574e3cca00ef0410aee8b7ee36eda5a2f71Chris Lattner   GLuint end = ic;
9536a24acbb3dbb3bea9426716bee6ad6023ad15f3fAnders Carlsson
95498be4943e8dc4f3905629a7102668960873cf863Chris Lattner   /* If the register is used in a loop, extend its lifetime through the end
9556a24acbb3dbb3bea9426716bee6ad6023ad15f3fAnders Carlsson    * of the outermost loop that doesn't contain its definition.
9560518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redl    */
9576a24acbb3dbb3bea9426716bee6ad6023ad15f3fAnders Carlsson   for (i = 0; i < loopStackDepth; i++) {
9580518999d3adcc289997bd974dce90cc97f5c1c44Sebastian Redl      if (intBegin[index] < loopStack[i].Start) {
959060e470e53b73167114284eb0b8c3e25bb3dad99Ted Kremenek	 end = loopStack[i].End;
9605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer	 break;
9615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
9625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   }
9635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
964e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar   /* Variables that are live at the end of a loop will also be live at the
965e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar    * beginning, so an instruction inside of a loop should have its live
966e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar    * interval begin at the start of the outermost loop.
967e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar    */
968b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman   if (loopStackDepth > 0 && ic > loopStack[0].Start && ic < loopStack[0].End) {
969b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman      begin = loopStack[0].Start;
9705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   }
971e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar
9725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
9735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   if (intBegin[index] == -1) {
9745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ASSERT(intEnd[index] == -1);
9755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      intBegin[index] = begin;
9765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      intEnd[index] = end;
9775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   }
9785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   else {
979e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar      intEnd[index] = end;
9805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   }
9815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
982e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar
9835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
9845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/**
985590b6646ef747d2f7b42e5f40487ff07642d7b6fChris Lattner * Find first/last instruction that references each temporary register.
9865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer */
9875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerGLboolean
9885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer_mesa_find_temp_intervals(const struct prog_instruction *instructions,
989590b6646ef747d2f7b42e5f40487ff07642d7b6fChris Lattner                          GLuint numInstructions,
9905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                          GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS],
9915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                          GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
9925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer{
9935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   struct loop_info loopStack[MAX_LOOP_NESTING];
9945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   GLuint loopStackDepth = 0;
9955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   GLuint i;
9965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
9975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
998e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar      intBegin[i] = intEnd[i] = -1;
9995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   }
10005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
10015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   /* Scan instructions looking for temporary registers */
10025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   for (i = 0; i < numInstructions; i++) {
10035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      const struct prog_instruction *inst = instructions + i;
10045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      if (inst->Opcode == OPCODE_BGNLOOP) {
10055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         loopStack[loopStackDepth].Start = i;
1006e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar         loopStack[loopStackDepth].End = inst->BranchTarget;
10075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         loopStackDepth++;
10085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
10095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      else if (inst->Opcode == OPCODE_ENDLOOP) {
10105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         loopStackDepth--;
10115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
10125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      else if (inst->Opcode == OPCODE_CAL) {
10135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         return GL_FALSE;
1014e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar      }
10155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      else {
1016e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar         const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
1017e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar         GLuint j;
10185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         for (j = 0; j < numSrc; j++) {
1019e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar            if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
1020e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar               const GLuint index = inst->SrcReg[j].Index;
1021e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar               if (inst->SrcReg[j].RelAddr)
10225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                  return GL_FALSE;
1023e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar               update_interval(intBegin, intEnd, loopStack, loopStackDepth,
1024e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar			       index, i);
10255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            }
1026e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar         }
1027e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar         if (inst->DstReg.File == PROGRAM_TEMPORARY) {
1028e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar            const GLuint index = inst->DstReg.Index;
1029e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar            if (inst->DstReg.RelAddr)
1030e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar               return GL_FALSE;
1031e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar            update_interval(intBegin, intEnd, loopStack, loopStackDepth,
1032e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar			    index, i);
1033e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar         }
1034e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar      }
10355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   }
1036e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar
10375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   return GL_TRUE;
10385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
1039e1226d24d23510e422160eb2413d9bb90de9b144Daniel Dunbar
10405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
1041b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman/**
1042b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman * Find the live intervals for each temporary register in the program.
1043b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman * For register R, the interval [A,B] indicates that R is referenced
1044b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman * from instruction A through instruction B.
1045b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman * Special consideration is needed for loops and subroutines.
1046b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
1047b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman */
1048b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedmanstatic GLboolean
1049b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedmanfind_live_intervals(struct gl_program *prog,
1050b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman                    struct interval_list *liveIntervals)
1051b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman{
1052b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman   GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
1053b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman   GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
1054b11e77836dd0867955c5abf32baf1c3e6c7f81e1Eli Friedman   GLuint i;
10555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
10565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   /*
10575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    * Note: we'll return GL_FALSE below if we find relative indexing
10585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    * into the TEMP register file.  We can't handle that yet.
10595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    * We also give up on subroutines for now.
106026dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner    */
10616eec8e883de118b431e3ead5b1e604a6ac68ff6bDouglas Gregor
1062987a14bf5883ef6e5d07f1c83eb6d41a8212a78cArgyrios Kyrtzidis   if (dbg) {
10630835a3cdeefe714b4959d31127ea155e56393125Argyrios Kyrtzidis      printf("Optimize: Begin find intervals\n");
10640835a3cdeefe714b4959d31127ea155e56393125Argyrios Kyrtzidis   }
106526dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner
10665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   /* build intermediate arrays */
106726dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner   if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions,
106826dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner                                  intBegin, intEnd))
106926dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner      return GL_FALSE;
10705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
10715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   /* Build live intervals list from intermediate arrays */
1072987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner   liveIntervals->Num = 0;
107398be4943e8dc4f3905629a7102668960873cf863Chris Lattner   for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
1074987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner      if (intBegin[i] >= 0) {
10755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         struct interval inv;
107626dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner         inv.Reg = i;
107726dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner         inv.Start = intBegin[i];
10785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         inv.End = intEnd[i];
107926dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner         append_interval(liveIntervals, &inv);
108026dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner      }
108126dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner   }
1082c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattner
1083c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattner   /* Sort the list according to interval starts */
1084c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattner   sort_interval_list_by_start(liveIntervals);
1085c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattner
1086c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattner   if (dbg) {
108726dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner      /* print interval info */
108826dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner      for (i = 0; i < liveIntervals->Num; i++) {
108926dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner         const struct interval *inv = liveIntervals->Intervals + i;
10905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         printf("Reg[%d] live [%d, %d]:",
10915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                      inv->Reg, inv->Start, inv->End);
10925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         if (1) {
10935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            GLuint j;
10945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            for (j = 0; j < inv->Start; j++)
109526dc7b39267b7d29b35a7887a5b8e54697192192Chris Lattner               printf(" ");
10965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            for (j = inv->Start; j <= inv->End; j++)
10975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer               printf("x");
1098987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner         }
1099987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner         printf("\n");
1100987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner      }
1101987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner   }
1102987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner
1103987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner   return GL_TRUE;
1104987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner}
1105c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattner
1106c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattner
1107c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattner/** Scan the array of used register flags to find free entry */
1108c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattnerstatic GLint
1109c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattneralloc_register(GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
1110c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattner{
1111c0a356b4fa35bf3f7d6031c229c071eef1a8f7a9Chris Lattner   GLuint k;
11125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   for (k = 0; k < REG_ALLOCATE_MAX_PROGRAM_TEMPS; k++) {
1113987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner      if (!usedRegs[k]) {
1114987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner         usedRegs[k] = GL_TRUE;
1115ccc213f5d4034bb6fcda14cad112d9029a508b66Chris Lattner         return k;
1116ccc213f5d4034bb6fcda14cad112d9029a508b66Chris Lattner      }
1117ccc213f5d4034bb6fcda14cad112d9029a508b66Chris Lattner   }
1118ee5a700af3fe9ae1a639c271f093f40677dddc04Dale Johannesen   return -1;
1119ee5a700af3fe9ae1a639c271f093f40677dddc04Dale Johannesen}
1120ccc213f5d4034bb6fcda14cad112d9029a508b66Chris Lattner
1121ee5a700af3fe9ae1a639c271f093f40677dddc04Dale Johannesen
1122ee5a700af3fe9ae1a639c271f093f40677dddc04Dale Johannesen/**
1123987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner * This function implements "Linear Scan Register Allocation" to reduce
1124987b15db39745cd7fb2e634ba1a4b85469ac9131Chris Lattner * the number of temporary registers used by the program.
11255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer *
11265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * We compute the "live interval" for all temporary registers then
11275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * examine the overlap of the intervals to allocate new registers.
11285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer * Basically, if two intervals do not overlap, they can use the same register.
112928daa53db73341b7ee7f269924ccfca1c6d179acChris Lattner */
113028daa53db73341b7ee7f269924ccfca1c6d179acChris Lattnerstatic void
113128daa53db73341b7ee7f269924ccfca1c6d179acChris Lattner_mesa_reallocate_registers(struct gl_program *prog)
11325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer{
11335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   struct interval_list liveIntervals;
11345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   GLint registerMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
11355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
11365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   GLuint i;
11375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   GLint maxTemp = -1;
113828daa53db73341b7ee7f269924ccfca1c6d179acChris Lattner
113928daa53db73341b7ee7f269924ccfca1c6d179acChris Lattner   if (dbg) {
114042b83dde7c700b34f9435ad746984169888ae705Chris Lattner      printf("Optimize: Begin live-interval register reallocation\n");
114142b83dde7c700b34f9435ad746984169888ae705Chris Lattner      _mesa_print_program(prog);
114228daa53db73341b7ee7f269924ccfca1c6d179acChris Lattner   }
114342b83dde7c700b34f9435ad746984169888ae705Chris Lattner
114442b83dde7c700b34f9435ad746984169888ae705Chris Lattner   for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
114542b83dde7c700b34f9435ad746984169888ae705Chris Lattner      registerMap[i] = -1;
114642b83dde7c700b34f9435ad746984169888ae705Chris Lattner      usedRegs[i] = GL_FALSE;
114742b83dde7c700b34f9435ad746984169888ae705Chris Lattner   }
114842b83dde7c700b34f9435ad746984169888ae705Chris Lattner
114942b83dde7c700b34f9435ad746984169888ae705Chris Lattner   if (!find_live_intervals(prog, &liveIntervals)) {
115042b83dde7c700b34f9435ad746984169888ae705Chris Lattner      if (dbg)
115142b83dde7c700b34f9435ad746984169888ae705Chris Lattner         printf("Aborting register reallocation\n");
115228daa53db73341b7ee7f269924ccfca1c6d179acChris Lattner      return;
11535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   }
11543907323dd6665c0c4e383435cb145233f4533406Anders Carlsson
11555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer   {
11565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      struct interval_list activeIntervals;
11573907323dd6665c0c4e383435cb145233f4533406Anders Carlsson      activeIntervals.Num = 0;
11583907323dd6665c0c4e383435cb145233f4533406Anders Carlsson
11595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      /* loop over live intervals, allocating a new register for each */
11605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      for (i = 0; i < liveIntervals.Num; i++) {
11615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         const struct interval *live = liveIntervals.Intervals + i;
116204421087832a031c90bd58f128c7c0e741db8dd2Chris Lattner
116304421087832a031c90bd58f128c7c0e741db8dd2Chris Lattner         if (dbg)
116404421087832a031c90bd58f128c7c0e741db8dd2Chris Lattner            printf("Consider register %u\n", live->Reg);
116564b45f7e0d3167f040841ac2920aead7f080730dSebastian Redl
116664b45f7e0d3167f040841ac2920aead7f080730dSebastian Redl         /* Expire old intervals.  Intervals which have ended with respect
116764b45f7e0d3167f040841ac2920aead7f080730dSebastian Redl          * to the live interval can have their remapped registers freed.
116864b45f7e0d3167f040841ac2920aead7f080730dSebastian Redl          */
11695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         {
11705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            GLint j;
11715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer            for (j = 0; j < (GLint) activeIntervals.Num; j++) {
11725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer               const struct interval *inv = activeIntervals.Intervals + j;
11735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer               if (inv->End >= live->Start) {
11745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                  /* Stop now.  Since the activeInterval list is sorted
11755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                   * we know we don't have to go further.
11765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                   */
11775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                  break;
11785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer               }
1179efa9b3877ef298bcb792600ac33521827e1f7fafAnders Carlsson               else {
1180efa9b3877ef298bcb792600ac33521827e1f7fafAnders Carlsson                  /* Interval 'inv' has expired */
11810777972d38a3125efed962b045704c30ae6965cfSebastian Redl                  const GLint regNew = registerMap[inv->Reg];
11820835a3cdeefe714b4959d31127ea155e56393125Argyrios Kyrtzidis                  ASSERT(regNew >= 0);
11836215dee86c0e715b9f2b0d401ab2a5fcf629f1afSebastian Redl
11840777972d38a3125efed962b045704c30ae6965cfSebastian Redl                  if (dbg)
11850777972d38a3125efed962b045704c30ae6965cfSebastian Redl                     printf("  expire interval for reg %u\n", inv->Reg);
11860777972d38a3125efed962b045704c30ae6965cfSebastian Redl
11870777972d38a3125efed962b045704c30ae6965cfSebastian Redl                  /* remove interval j from active list */
11880777972d38a3125efed962b045704c30ae6965cfSebastian Redl                  remove_interval(&activeIntervals, inv);
11890777972d38a3125efed962b045704c30ae6965cfSebastian Redl                  j--;  /* counter-act j++ in for-loop above */
1190d26527708b2b2f3b1d747f570efd10149d48364eAnders Carlsson
11910777972d38a3125efed962b045704c30ae6965cfSebastian Redl                  /* return register regNew to the free pool */
11925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                  if (dbg)
1193aa58f00ebba5f14955001736b7aea20bb5bd91e6Steve Naroff                     printf("  free reg %d\n", regNew);
1194aa58f00ebba5f14955001736b7aea20bb5bd91e6Steve Naroff                  ASSERT(usedRegs[regNew] == GL_TRUE);
1195d26527708b2b2f3b1d747f570efd10149d48364eAnders Carlsson                  usedRegs[regNew] = GL_FALSE;
1196aa58f00ebba5f14955001736b7aea20bb5bd91e6Steve Naroff               }
1197aa58f00ebba5f14955001736b7aea20bb5bd91e6Steve Naroff            }
1198aa58f00ebba5f14955001736b7aea20bb5bd91e6Steve Naroff         }
1199d26527708b2b2f3b1d747f570efd10149d48364eAnders Carlsson
12008123a95c33b792d35c2e4992ba6e27882748fb0dChris Lattner         /* find a free register for this live interval */
12018123a95c33b792d35c2e4992ba6e27882748fb0dChris Lattner         {
120204421087832a031c90bd58f128c7c0e741db8dd2Chris Lattner            const GLint k = alloc_register(usedRegs);
1203d26527708b2b2f3b1d747f570efd10149d48364eAnders Carlsson            if (k < 0) {
12042d8b273470684a9cd47f0ce24743cc1f71ef7cbcDouglas Gregor               /* out of registers, give up */
12052d8b273470684a9cd47f0ce24743cc1f71ef7cbcDouglas Gregor               return;
12062d8b273470684a9cd47f0ce24743cc1f71ef7cbcDouglas Gregor            }
1207aaffbf7c790a324ed114184db771aae2d2e9151cSteve Naroff            registerMap[live->Reg] = k;
12082d8b273470684a9cd47f0ce24743cc1f71ef7cbcDouglas Gregor            maxTemp = MAX2(maxTemp, k);
1209aa58f00ebba5f14955001736b7aea20bb5bd91e6Steve Naroff            if (dbg)
1210aa58f00ebba5f14955001736b7aea20bb5bd91e6Steve Naroff               printf("  remap register %u -> %d\n", live->Reg, k);
1211aa58f00ebba5f14955001736b7aea20bb5bd91e6Steve Naroff         }
1212aa58f00ebba5f14955001736b7aea20bb5bd91e6Steve Naroff
12135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         /* Insert this live interval into the active list which is sorted
12145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer          * by increasing end points.
1215d26527708b2b2f3b1d747f570efd10149d48364eAnders Carlsson          */
1216d26527708b2b2f3b1d747f570efd10149d48364eAnders Carlsson         insert_interval_by_end(&activeIntervals, live);
1217d26527708b2b2f3b1d747f570efd10149d48364eAnders Carlsson      }
1218d26527708b2b2f3b1d747f570efd10149d48364eAnders Carlsson   }
1219efa9b3877ef298bcb792600ac33521827e1f7fafAnders Carlsson
1220d26527708b2b2f3b1d747f570efd10149d48364eAnders Carlsson   if (maxTemp + 1 < (GLint) liveIntervals.Num) {
12215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      /* OK, we've reduced the number of registers needed.
122231a458462c6cf417a84e0c47852b18fb22d79acbSteve Naroff       * Scan the program and replace all the old temporary register
122327c8dc06f65d7abcf6a7e7f64a7960c9a150ca01Douglas Gregor       * indexes with the new indexes.
122427c8dc06f65d7abcf6a7e7f64a7960c9a150ca01Douglas Gregor       */
122527c8dc06f65d7abcf6a7e7f64a7960c9a150ca01Douglas Gregor      replace_regs(prog, PROGRAM_TEMPORARY, registerMap);
122627c8dc06f65d7abcf6a7e7f64a7960c9a150ca01Douglas Gregor
122786f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor      prog->NumTemporaries = maxTemp + 1;
122886f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor   }
122927c8dc06f65d7abcf6a7e7f64a7960c9a150ca01Douglas Gregor
123027c8dc06f65d7abcf6a7e7f64a7960c9a150ca01Douglas Gregor   if (dbg) {
123127c8dc06f65d7abcf6a7e7f64a7960c9a150ca01Douglas Gregor      printf("Optimize: End live-interval register reallocation\n");
1232213541a68a3e137d11d2cefb612c6cdb410d7e8eNate Begeman      printf("Num temp regs before: %u  after: %u\n",
12338a99764f9b778a54e7440b1ee06a1e48f25d76d8Nate Begeman                   liveIntervals.Num, maxTemp + 1);
12348a99764f9b778a54e7440b1ee06a1e48f25d76d8Nate Begeman      _mesa_print_program(prog);
12358a99764f9b778a54e7440b1ee06a1e48f25d76d8Nate Begeman   }
12364d0ac88428b3ed7c6f3a2f4e758ea5424ecd70aeChris Lattner}
12374d0ac88428b3ed7c6f3a2f4e758ea5424ecd70aeChris Lattner
12388a99764f9b778a54e7440b1ee06a1e48f25d76d8Nate Begeman
1239213541a68a3e137d11d2cefb612c6cdb410d7e8eNate Begeman#if 0
1240fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroffstatic void
12417e3e9b152e06edf329ceb32190d3255f248d4d5fChris Lattnerprint_it(struct gl_context *ctx, struct gl_program *program, const char *txt) {
1242190d6a25393995b42e32086949a68285ee423fb9Nate Begeman   fprintf(stderr, "%s (%u inst):\n", txt, program->NumInstructions);
1243190d6a25393995b42e32086949a68285ee423fb9Nate Begeman   _mesa_print_program(program);
1244190d6a25393995b42e32086949a68285ee423fb9Nate Begeman   _mesa_print_program_parameters(ctx, program);
1245190d6a25393995b42e32086949a68285ee423fb9Nate Begeman   fprintf(stderr, "\n\n");
1246190d6a25393995b42e32086949a68285ee423fb9Nate Begeman}
1247190d6a25393995b42e32086949a68285ee423fb9Nate Begeman#endif
1248190d6a25393995b42e32086949a68285ee423fb9Nate Begeman
1249190d6a25393995b42e32086949a68285ee423fb9Nate Begeman
1250190d6a25393995b42e32086949a68285ee423fb9Nate Begeman/**
1251190d6a25393995b42e32086949a68285ee423fb9Nate Begeman * Apply optimizations to the given program to eliminate unnecessary
1252190d6a25393995b42e32086949a68285ee423fb9Nate Begeman * instructions, temp regs, etc.
1253fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff */
12547e3e9b152e06edf329ceb32190d3255f248d4d5fChris Lattnervoid
1255fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff_mesa_optimize_program(struct gl_context *ctx, struct gl_program *program)
1256fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff{
1257fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff   GLboolean any_change;
1258fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff
1259fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff   /* Stop when no modifications were output */
1260fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff   do {
1261fec0b49c3fa621fbf63e420f3d54a5bb3a0265d2Steve Naroff      any_change = GL_FALSE;
1262b8f849da3cedee2f61ad98389115ddd04e439d60Chris Lattner      _mesa_remove_extra_move_use(program);
12638a99764f9b778a54e7440b1ee06a1e48f25d76d8Nate Begeman      if (_mesa_remove_dead_code_global(program))
12643b8d116703db8018f855cbb4733ace426422623bNate Begeman         any_change = GL_TRUE;
12653b8d116703db8018f855cbb4733ace426422623bNate Begeman      if (_mesa_remove_extra_moves(program))
12667e3e9b152e06edf329ceb32190d3255f248d4d5fChris Lattner         any_change = GL_TRUE;
1267353417af9d254d4fd0eb7d0a3ff71c4d8594ac58Nate Begeman      if (_mesa_remove_dead_code_local(program))
1268353417af9d254d4fd0eb7d0a3ff71c4d8594ac58Nate Begeman         any_change = GL_TRUE;
1269353417af9d254d4fd0eb7d0a3ff71c4d8594ac58Nate Begeman      _mesa_reallocate_registers(program);
1270353417af9d254d4fd0eb7d0a3ff71c4d8594ac58Nate Begeman   } while (any_change);
1271353417af9d254d4fd0eb7d0a3ff71c4d8594ac58Nate Begeman}
1272353417af9d254d4fd0eb7d0a3ff71c4d8594ac58Nate Begeman
1273353417af9d254d4fd0eb7d0a3ff71c4d8594ac58Nate Begeman