1/**************************************************************************
2 *
3 * Copyright 2011 The Chromium OS authors.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "i915_reg.h"
29#include "i915_context.h"
30#include "i915_fpc.h"
31
32#include "pipe/p_shader_tokens.h"
33#include "util/u_math.h"
34#include "util/u_memory.h"
35#include "util/u_string.h"
36#include "tgsi/tgsi_parse.h"
37#include "tgsi/tgsi_dump.h"
38#include "tgsi/tgsi_exec.h"
39
40struct i915_optimize_context
41{
42   int first_write[TGSI_EXEC_NUM_TEMPS];
43   int last_read[TGSI_EXEC_NUM_TEMPS];
44};
45
46static boolean same_src_dst_reg(struct i915_full_src_register *s1, struct i915_full_dst_register *d1)
47{
48   return (s1->Register.File == d1->Register.File &&
49           s1->Register.Indirect == d1->Register.Indirect &&
50           s1->Register.Dimension == d1->Register.Dimension &&
51           s1->Register.Index == d1->Register.Index);
52}
53
54static boolean same_dst_reg(struct i915_full_dst_register *d1, struct i915_full_dst_register *d2)
55{
56   return (d1->Register.File == d2->Register.File &&
57           d1->Register.Indirect == d2->Register.Indirect &&
58           d1->Register.Dimension == d2->Register.Dimension &&
59           d1->Register.Index == d2->Register.Index);
60}
61
62static boolean same_src_reg(struct i915_full_src_register *d1, struct i915_full_src_register *d2)
63{
64   return (d1->Register.File == d2->Register.File &&
65           d1->Register.Indirect == d2->Register.Indirect &&
66           d1->Register.Dimension == d2->Register.Dimension &&
67           d1->Register.Index == d2->Register.Index &&
68           d1->Register.Absolute == d2->Register.Absolute &&
69           d1->Register.Negate == d2->Register.Negate);
70}
71
72static const struct {
73   boolean is_texture;
74   boolean commutes;
75   unsigned neutral_element;
76   unsigned num_dst;
77   unsigned num_src;
78} op_table [TGSI_OPCODE_LAST] = {
79   [ TGSI_OPCODE_ADD     ] = { false,   true,  TGSI_SWIZZLE_ZERO,  1,  2 },
80   [ TGSI_OPCODE_CEIL    ] = { false,  false,                  0,  1,  1 },
81   [ TGSI_OPCODE_CMP     ] = { false,  false,                  0,  1,  2 },
82   [ TGSI_OPCODE_COS     ] = { false,  false,                  0,  1,  1 },
83   [ TGSI_OPCODE_DDX     ] = { false,  false,                  0,  1,  0 },
84   [ TGSI_OPCODE_DDY     ] = { false,  false,                  0,  1,  0 },
85   [ TGSI_OPCODE_DP2     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
86   [ TGSI_OPCODE_DP3     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
87   [ TGSI_OPCODE_DP4     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
88   [ TGSI_OPCODE_DPH     ] = { false,  false,                  0,  1,  2 },
89   [ TGSI_OPCODE_DST     ] = { false,  false,                  0,  1,  2 },
90   [ TGSI_OPCODE_END     ] = { false,  false,                  0,  0,  0 },
91   [ TGSI_OPCODE_EX2     ] = { false,  false,                  0,  1,  1 },
92   [ TGSI_OPCODE_FLR     ] = { false,  false,                  0,  1,  1 },
93   [ TGSI_OPCODE_FRC     ] = { false,  false,                  0,  1,  1 },
94   [ TGSI_OPCODE_KILL_IF ] = { false,  false,                  0,  0,  1 },
95   [ TGSI_OPCODE_KILL    ] = { false,  false,                  0,  0,  0 },
96   [ TGSI_OPCODE_LG2     ] = { false,  false,                  0,  1,  1 },
97   [ TGSI_OPCODE_LIT     ] = { false,  false,                  0,  1,  1 },
98   [ TGSI_OPCODE_LRP     ] = { false,  false,                  0,  1,  3 },
99   [ TGSI_OPCODE_MAX     ] = { false,  false,                  0,  1,  2 },
100   [ TGSI_OPCODE_MAD     ] = { false,  false,                  0,  1,  3 },
101   [ TGSI_OPCODE_MIN     ] = { false,  false,                  0,  1,  2 },
102   [ TGSI_OPCODE_MOV     ] = { false,  false,                  0,  1,  1 },
103   [ TGSI_OPCODE_MUL     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
104   [ TGSI_OPCODE_NOP     ] = { false,  false,                  0,  0,  0 },
105   [ TGSI_OPCODE_POW     ] = { false,  false,                  0,  1,  2 },
106   [ TGSI_OPCODE_RCP     ] = { false,  false,                  0,  1,  1 },
107   [ TGSI_OPCODE_RET     ] = { false,  false,                  0,  0,  0 },
108   [ TGSI_OPCODE_RSQ     ] = { false,  false,                  0,  1,  1 },
109   [ TGSI_OPCODE_SCS     ] = { false,  false,                  0,  1,  1 },
110   [ TGSI_OPCODE_SEQ     ] = { false,  false,                  0,  1,  2 },
111   [ TGSI_OPCODE_SGE     ] = { false,  false,                  0,  1,  2 },
112   [ TGSI_OPCODE_SGT     ] = { false,  false,                  0,  1,  2 },
113   [ TGSI_OPCODE_SIN     ] = { false,  false,                  0,  1,  1 },
114   [ TGSI_OPCODE_SLE     ] = { false,  false,                  0,  1,  2 },
115   [ TGSI_OPCODE_SLT     ] = { false,  false,                  0,  1,  2 },
116   [ TGSI_OPCODE_SNE     ] = { false,  false,                  0,  1,  2 },
117   [ TGSI_OPCODE_SSG     ] = { false,  false,                  0,  1,  1 },
118   [ TGSI_OPCODE_TEX     ] = {  true,  false,                  0,  1,  2 },
119   [ TGSI_OPCODE_TRUNC   ] = { false,  false,                  0,  1,  1 },
120   [ TGSI_OPCODE_TXB     ] = {  true,  false,                  0,  1,  2 },
121   [ TGSI_OPCODE_TXP     ] = {  true,  false,                  0,  1,  2 },
122   [ TGSI_OPCODE_XPD     ] = { false,  false,                  0,  1,  2 },
123};
124
125static boolean op_has_dst(unsigned opcode)
126{
127   return (op_table[opcode].num_dst > 0);
128}
129
130static int op_num_dst(unsigned opcode)
131{
132   return op_table[opcode].num_dst;
133}
134
135static int op_num_src(unsigned opcode)
136{
137   return op_table[opcode].num_src;
138}
139
140static boolean op_commutes(unsigned opcode)
141{
142   return op_table[opcode].commutes;
143}
144
145static unsigned mask_for_unswizzled(int num_components)
146{
147   unsigned mask = 0;
148   switch(num_components)
149   {
150      case 4:
151         mask |= TGSI_WRITEMASK_W;
152      case 3:
153         mask |= TGSI_WRITEMASK_Z;
154      case 2:
155         mask |= TGSI_WRITEMASK_Y;
156      case 1:
157         mask |= TGSI_WRITEMASK_X;
158   }
159   return mask;
160}
161
162static boolean is_unswizzled(struct i915_full_src_register *r,
163                             unsigned write_mask)
164{
165   if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
166      return FALSE;
167   if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
168      return FALSE;
169   if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
170      return FALSE;
171   if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
172      return FALSE;
173   return TRUE;
174}
175
176static boolean op_is_texture(unsigned opcode)
177{
178   return op_table[opcode].is_texture;
179}
180
181static unsigned op_neutral_element(unsigned opcode)
182{
183   unsigned ne = op_table[opcode].neutral_element;
184   if (!ne) {
185      debug_printf("No neutral element for opcode %d\n",opcode);
186      ne = TGSI_SWIZZLE_ZERO;
187   }
188   return ne;
189}
190
191/*
192 * Sets the swizzle to the neutral element for the operation for the bits
193 * of writemask which are set, swizzle to identity otherwise.
194 */
195static void set_neutral_element_swizzle(struct i915_full_src_register *r,
196                                        unsigned write_mask,
197                                        unsigned neutral)
198{
199   if ( write_mask & TGSI_WRITEMASK_X )
200      r->Register.SwizzleX = neutral;
201   else
202      r->Register.SwizzleX = TGSI_SWIZZLE_X;
203
204   if ( write_mask & TGSI_WRITEMASK_Y )
205      r->Register.SwizzleY = neutral;
206   else
207      r->Register.SwizzleY = TGSI_SWIZZLE_Y;
208
209   if ( write_mask & TGSI_WRITEMASK_Z )
210      r->Register.SwizzleZ = neutral;
211   else
212      r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
213
214   if ( write_mask & TGSI_WRITEMASK_W )
215      r->Register.SwizzleW = neutral;
216   else
217      r->Register.SwizzleW = TGSI_SWIZZLE_W;
218}
219
220static void copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i)
221{
222   o->File      = i->File;
223   o->Indirect  = i->Indirect;
224   o->Dimension = i->Dimension;
225   o->Index     = i->Index;
226   o->SwizzleX  = i->SwizzleX;
227   o->SwizzleY  = i->SwizzleY;
228   o->SwizzleZ  = i->SwizzleZ;
229   o->SwizzleW  = i->SwizzleW;
230   o->Absolute  = i->Absolute;
231   o->Negate    = i->Negate;
232}
233
234static void copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i)
235{
236   o->File      = i->File;
237   o->WriteMask = i->WriteMask;
238   o->Indirect  = i->Indirect;
239   o->Dimension = i->Dimension;
240   o->Index     = i->Index;
241}
242
243static void copy_instruction(struct i915_full_instruction *o, const struct tgsi_full_instruction *i)
244{
245   memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
246   memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
247
248   copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
249
250   copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
251   copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
252   copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
253}
254
255static void copy_token(union i915_full_token *o, union tgsi_full_token *i)
256{
257   if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
258      memcpy(o, i, sizeof(*o));
259   else
260      copy_instruction(&o->FullInstruction, &i->FullInstruction);
261
262}
263
264static void liveness_mark_written(struct i915_optimize_context *ctx,
265                                  struct i915_full_dst_register *dst_reg,
266                                  int pos)
267{
268   int dst_reg_index;
269   if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) {
270      dst_reg_index = dst_reg->Register.Index;
271      assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
272      /* dead -> live transition */
273      if (ctx->first_write[dst_reg_index] != -1)
274         ctx->first_write[dst_reg_index] = pos;
275   }
276}
277
278static void liveness_mark_read(struct i915_optimize_context *ctx,
279                               struct i915_full_src_register *src_reg,
280                               int pos)
281{
282   int src_reg_index;
283   if (src_reg->Register.File == TGSI_FILE_TEMPORARY) {
284      src_reg_index = src_reg->Register.Index;
285      assert(src_reg_index < TGSI_EXEC_NUM_TEMPS);
286      /* live -> dead transition */
287      if (ctx->last_read[src_reg_index] != -1)
288         ctx->last_read[src_reg_index] = pos;
289   }
290}
291
292static void liveness_analysis(struct i915_optimize_context *ctx,
293                              struct i915_token_list *tokens)
294{
295   struct i915_full_dst_register *dst_reg;
296   struct i915_full_src_register *src_reg;
297   union i915_full_token *current;
298   unsigned opcode;
299   int num_dst, num_src;
300   int i = 0;
301
302   for(i = 0; i < TGSI_EXEC_NUM_TEMPS; i++)
303   {
304      ctx->first_write[i] = -1;
305      ctx->last_read[i] = -1;
306   }
307
308   for(i = 0; i < tokens->NumTokens; i++)
309   {
310      current = &tokens->Tokens[i];
311
312      if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
313         continue;
314
315      opcode = current->FullInstruction.Instruction.Opcode;
316      num_dst = op_num_dst(opcode);
317
318      switch(num_dst)
319      {
320         case 1:
321            dst_reg = &current->FullInstruction.Dst[0];
322            liveness_mark_written(ctx, dst_reg, i);
323         case 0:
324            break;
325         default:
326            debug_printf("Op %d has %d dst regs\n", opcode, num_dst);
327            break;
328      }
329   }
330
331   for(i = tokens->NumTokens - 1; i >= 0; i--)
332   {
333      current = &tokens->Tokens[i];
334
335      if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
336         continue;
337
338      opcode = current->FullInstruction.Instruction.Opcode;
339      num_src = op_num_src(opcode);
340
341      switch(num_src)
342      {
343         case 3:
344            src_reg = &current->FullInstruction.Src[2];
345            liveness_mark_read(ctx, src_reg, i);
346         case 2:
347            src_reg = &current->FullInstruction.Src[1];
348            liveness_mark_read(ctx, src_reg, i);
349         case 1:
350            src_reg = &current->FullInstruction.Src[0];
351            liveness_mark_read(ctx, src_reg, i);
352         case 0:
353            break;
354         default:
355            debug_printf("Op %d has %d src regs\n", opcode, num_src);
356            break;
357      }
358   }
359}
360
361static int unused_from(struct i915_optimize_context *ctx, struct i915_full_dst_register *dst_reg, int from)
362{
363   int dst_reg_index = dst_reg->Register.Index;
364   assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
365   return (from >= ctx->last_read[dst_reg_index]);
366}
367
368/* Returns a mask with the components used for a texture access instruction */
369static unsigned i915_tex_mask(union i915_full_token *instr)
370{
371   unsigned mask;
372
373   /* Get the number of coords */
374   mask = mask_for_unswizzled(i915_num_coords(instr->FullInstruction.Texture.Texture));
375
376   /* Add the W component if projective */
377   if (instr->FullInstruction.Instruction.Opcode == TGSI_OPCODE_TXP)
378      mask |= TGSI_WRITEMASK_W;
379
380   return mask;
381}
382
383static boolean target_is_texture2d(uint tex)
384{
385   switch (tex) {
386   case TGSI_TEXTURE_2D:
387   case TGSI_TEXTURE_RECT:
388      return true;
389   default:
390      return false;
391   }
392}
393
394
395/*
396 * Optimize away useless indirect texture reads:
397 *    MOV TEMP[0].xy, IN[0].xyyy
398 *    TEX TEMP[1], TEMP[0], SAMP[0], 2D
399 * into:
400 *    TEX TEMP[1], IN[0], SAMP[0], 2D
401 *
402 * note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/..
403 */
404static void i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx,
405                                             struct i915_token_list *tokens,
406                                             int index)
407{
408   union i915_full_token *current = &tokens->Tokens[index - 1];
409   union i915_full_token *next = &tokens->Tokens[index];
410
411   if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
412        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
413        current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
414        op_is_texture(next->FullInstruction.Instruction.Opcode) &&
415        target_is_texture2d(next->FullInstruction.Texture.Texture) &&
416        same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
417        is_unswizzled(&current->FullInstruction.Src[0], i915_tex_mask(next)) &&
418        unused_from(ctx, &current->FullInstruction.Dst[0], index))
419   {
420      memcpy(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0], sizeof(struct i915_src_register));
421      current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
422   }
423}
424
425/*
426 * Optimize away things like:
427 *    MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0])
428 *    MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0])
429 * into:
430 *    NOP
431 *    MOV OUT[0].xyw, TEMP[1].xyww
432 */
433static void i915_fpc_optimize_mov_after_mov(union i915_full_token *current, union i915_full_token *next)
434{
435   struct i915_full_src_register *src_reg1, *src_reg2;
436   struct i915_full_dst_register *dst_reg1, *dst_reg2;
437   unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w;
438
439   if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
440        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
441        current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
442        next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
443        current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
444        same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
445        same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
446        !same_src_dst_reg(&current->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
447   {
448      src_reg1 = &current->FullInstruction.Src[0];
449      dst_reg1 = &current->FullInstruction.Dst[0];
450      src_reg2 = &next->FullInstruction.Src[0];
451      dst_reg2 = &next->FullInstruction.Dst[0];
452
453      /* Start with swizzles from the first mov */
454      swizzle_x = src_reg1->Register.SwizzleX;
455      swizzle_y = src_reg1->Register.SwizzleY;
456      swizzle_z = src_reg1->Register.SwizzleZ;
457      swizzle_w = src_reg1->Register.SwizzleW;
458
459      /* Pile the second mov on top */
460      if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X)
461         swizzle_x = src_reg2->Register.SwizzleX;
462      if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y)
463         swizzle_y = src_reg2->Register.SwizzleY;
464      if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z)
465         swizzle_z = src_reg2->Register.SwizzleZ;
466      if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W)
467         swizzle_w = src_reg2->Register.SwizzleW;
468
469      dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask;
470      src_reg2->Register.SwizzleX = swizzle_x;
471      src_reg2->Register.SwizzleY = swizzle_y;
472      src_reg2->Register.SwizzleZ = swizzle_z;
473      src_reg2->Register.SwizzleW = swizzle_w;
474
475      current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
476
477      return;
478   }
479}
480
481/*
482 * Optimize away things like:
483 *    MUL OUT[0].xyz, TEMP[1], TEMP[2]
484 *    MOV OUT[0].w, TEMP[2]
485 * into:
486 *    MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
487 * This is useful for optimizing texenv.
488 */
489static void i915_fpc_optimize_mov_after_alu(union i915_full_token *current, union i915_full_token *next)
490{
491   if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
492        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
493        op_commutes(current->FullInstruction.Instruction.Opcode) &&
494        current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
495        next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
496        same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
497        same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[1]) &&
498        !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
499        is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
500        is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
501        is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
502   {
503      next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
504
505      set_neutral_element_swizzle(&current->FullInstruction.Src[1], 0, 0);
506      set_neutral_element_swizzle(&current->FullInstruction.Src[0],
507                                  next->FullInstruction.Dst[0].Register.WriteMask,
508                                  op_neutral_element(current->FullInstruction.Instruction.Opcode));
509
510      current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
511                                                           next->FullInstruction.Dst[0].Register.WriteMask;
512      return;
513   }
514
515   if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
516        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
517        op_commutes(current->FullInstruction.Instruction.Opcode) &&
518        current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
519        next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
520        same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
521        same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
522        !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
523        is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
524        is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
525        is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
526   {
527      next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
528
529      set_neutral_element_swizzle(&current->FullInstruction.Src[0], 0, 0);
530      set_neutral_element_swizzle(&current->FullInstruction.Src[1],
531                                  next->FullInstruction.Dst[0].Register.WriteMask,
532                                  op_neutral_element(current->FullInstruction.Instruction.Opcode));
533
534      current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
535                                                           next->FullInstruction.Dst[0].Register.WriteMask;
536      return;
537   }
538}
539
540/*
541 * Optimize away things like:
542 *    MOV TEMP[0].xyz TEMP[0].xyzx
543 * into:
544 *    NOP
545 */
546static boolean i915_fpc_useless_mov(union tgsi_full_token *tgsi_current)
547{
548   union i915_full_token current;
549   copy_token(&current , tgsi_current);
550   if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
551        current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
552        op_has_dst(current.FullInstruction.Instruction.Opcode) &&
553        !current.FullInstruction.Instruction.Saturate &&
554        current.FullInstruction.Src[0].Register.Absolute == 0 &&
555        current.FullInstruction.Src[0].Register.Negate == 0 &&
556        is_unswizzled(&current.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) &&
557        same_src_dst_reg(&current.FullInstruction.Src[0], &current.FullInstruction.Dst[0]) )
558   {
559      return TRUE;
560   }
561   return FALSE;
562}
563
564/*
565 * Optimize away things like:
566 *    *** TEMP[0], TEMP[1], TEMP[2]
567 *    MOV OUT[0] TEMP[0]
568 * into:
569 *    *** OUT[0], TEMP[1], TEMP[2]
570 */
571static void i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx,
572                                                     struct i915_token_list *tokens,
573                                                     int index)
574{
575   union i915_full_token *current = &tokens->Tokens[index - 1];
576   union i915_full_token *next = &tokens->Tokens[index];
577
578   // &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
579   if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
580        next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
581        next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
582        op_has_dst(current->FullInstruction.Instruction.Opcode) &&
583        !next->FullInstruction.Instruction.Saturate &&
584        next->FullInstruction.Src[0].Register.Absolute == 0 &&
585        next->FullInstruction.Src[0].Register.Negate == 0 &&
586        unused_from(ctx, &current->FullInstruction.Dst[0], index) &&
587        current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZW &&
588        is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) &&
589        current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask &&
590        same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
591   {
592      next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
593
594      current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];
595      return;
596   }
597}
598
599struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
600{
601   struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
602   struct tgsi_parse_context parse;
603   struct i915_optimize_context *ctx;
604   int i = 0;
605
606   ctx = malloc(sizeof(*ctx));
607
608   out_tokens->NumTokens = 0;
609
610   /* Count the tokens */
611   tgsi_parse_init( &parse, tokens );
612   while( !tgsi_parse_end_of_tokens( &parse ) ) {
613      tgsi_parse_token( &parse );
614      out_tokens->NumTokens++;
615   }
616   tgsi_parse_free (&parse);
617
618   /* Allocate our tokens */
619   out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
620
621   tgsi_parse_init( &parse, tokens );
622   while( !tgsi_parse_end_of_tokens( &parse ) ) {
623      tgsi_parse_token( &parse );
624
625      if (i915_fpc_useless_mov(&parse.FullToken)) {
626         out_tokens->NumTokens--;
627         continue;
628      }
629
630      copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
631
632      i++;
633   }
634   tgsi_parse_free (&parse);
635
636   liveness_analysis(ctx, out_tokens);
637
638   i = 1;
639   while( i < out_tokens->NumTokens) {
640      i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i);
641      i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
642      i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
643      i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i);
644      i++;
645   }
646
647   free(ctx);
648
649   return out_tokens;
650}
651
652void i915_optimize_free(struct i915_token_list *tokens)
653{
654   free(tokens->Tokens);
655   free(tokens);
656}
657
658
659