1/*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24#include "main/glheader.h"
25#include "main/context.h"
26#include "main/macros.h"
27#include "program.h"
28#include "prog_instruction.h"
29#include "prog_optimize.h"
30#include "prog_parameter.h"
31#include <stdbool.h>
32
33static bool
34src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs)
35{
36   unsigned i;
37
38   for (i = 0; i < num_srcs; i++) {
39      if (inst->SrcReg[i].File != PROGRAM_CONSTANT)
40	 return false;
41   }
42
43   return true;
44}
45
46static struct prog_src_register
47src_reg_for_float(struct gl_program *prog, float val)
48{
49   struct prog_src_register src;
50   unsigned swiz;
51
52   memset(&src, 0, sizeof(src));
53
54   src.File = PROGRAM_CONSTANT;
55   src.Index = _mesa_add_unnamed_constant(prog->Parameters,
56					  (gl_constant_value *) &val, 1, &swiz);
57   src.Swizzle = swiz;
58   return src;
59}
60
61static struct prog_src_register
62src_reg_for_vec4(struct gl_program *prog, const float *val)
63{
64   struct prog_src_register src;
65   unsigned swiz;
66
67   memset(&src, 0, sizeof(src));
68
69   src.File = PROGRAM_CONSTANT;
70   src.Index = _mesa_add_unnamed_constant(prog->Parameters,
71					  (gl_constant_value *) val, 4, &swiz);
72   src.Swizzle = swiz;
73   return src;
74}
75
76static bool
77src_regs_are_same(const struct prog_src_register *a,
78		  const struct prog_src_register *b)
79{
80   return (a->File == b->File)
81      && (a->Index == b->Index)
82      && (a->Swizzle == b->Swizzle)
83      && (a->Abs == b->Abs)
84      && (a->Negate == b->Negate)
85      && (a->RelAddr == 0)
86      && (b->RelAddr == 0);
87}
88
89static void
90get_value(struct gl_program *prog, struct prog_src_register *r, float *data)
91{
92   const gl_constant_value *const value =
93      prog->Parameters->ParameterValues[r->Index];
94
95   data[0] = value[GET_SWZ(r->Swizzle, 0)].f;
96   data[1] = value[GET_SWZ(r->Swizzle, 1)].f;
97   data[2] = value[GET_SWZ(r->Swizzle, 2)].f;
98   data[3] = value[GET_SWZ(r->Swizzle, 3)].f;
99
100   if (r->Abs) {
101      data[0] = fabsf(data[0]);
102      data[1] = fabsf(data[1]);
103      data[2] = fabsf(data[2]);
104      data[3] = fabsf(data[3]);
105   }
106
107   if (r->Negate & 0x01) {
108      data[0] = -data[0];
109   }
110
111   if (r->Negate & 0x02) {
112      data[1] = -data[1];
113   }
114
115   if (r->Negate & 0x04) {
116      data[2] = -data[2];
117   }
118
119   if (r->Negate & 0x08) {
120      data[3] = -data[3];
121   }
122}
123
124/**
125 * Try to replace instructions that produce a constant result with simple moves
126 *
127 * The hope is that a following copy propagation pass will eliminate the
128 * unnecessary move instructions.
129 */
130GLboolean
131_mesa_constant_fold(struct gl_program *prog)
132{
133   bool progress = false;
134   unsigned i;
135
136   for (i = 0; i < prog->NumInstructions; i++) {
137      struct prog_instruction *const inst = &prog->Instructions[i];
138
139      switch (inst->Opcode) {
140      case OPCODE_ADD:
141	 if (src_regs_are_constant(inst, 2)) {
142	    float a[4];
143	    float b[4];
144	    float result[4];
145
146	    get_value(prog, &inst->SrcReg[0], a);
147	    get_value(prog, &inst->SrcReg[1], b);
148
149	    result[0] = a[0] + b[0];
150	    result[1] = a[1] + b[1];
151	    result[2] = a[2] + b[2];
152	    result[3] = a[3] + b[3];
153
154	    inst->Opcode = OPCODE_MOV;
155	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
156
157	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
158	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
159
160	    progress = true;
161	 }
162	 break;
163
164      case OPCODE_CMP:
165	 /* FINISHME: We could also optimize CMP instructions where the first
166	  * FINISHME: source is a constant that is either all < 0.0 or all
167	  * FINISHME: >= 0.0.
168	  */
169	 if (src_regs_are_constant(inst, 3)) {
170	    float a[4];
171	    float b[4];
172	    float c[4];
173	    float result[4];
174
175	    get_value(prog, &inst->SrcReg[0], a);
176	    get_value(prog, &inst->SrcReg[1], b);
177	    get_value(prog, &inst->SrcReg[2], c);
178
179            result[0] = a[0] < 0.0f ? b[0] : c[0];
180            result[1] = a[1] < 0.0f ? b[1] : c[1];
181            result[2] = a[2] < 0.0f ? b[2] : c[2];
182            result[3] = a[3] < 0.0f ? b[3] : c[3];
183
184	    inst->Opcode = OPCODE_MOV;
185	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
186
187	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
188	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
189	    inst->SrcReg[2].File = PROGRAM_UNDEFINED;
190	    inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
191
192	    progress = true;
193	 }
194	 break;
195
196      case OPCODE_DP2:
197      case OPCODE_DP3:
198      case OPCODE_DP4:
199	 if (src_regs_are_constant(inst, 2)) {
200	    float a[4];
201	    float b[4];
202	    float result;
203
204	    get_value(prog, &inst->SrcReg[0], a);
205	    get_value(prog, &inst->SrcReg[1], b);
206
207	    /* It seems like a loop could be used here, but we cleverly put
208	     * DP2A between DP2 and DP3.  Subtracting DP2 (or similar) from
209	     * the opcode results in various failures of the loop control.
210	     */
211	    result = (a[0] * b[0]) + (a[1] * b[1]);
212
213	    if (inst->Opcode >= OPCODE_DP3)
214	       result += a[2] * b[2];
215
216	    if (inst->Opcode == OPCODE_DP4)
217	       result += a[3] * b[3];
218
219	    inst->Opcode = OPCODE_MOV;
220	    inst->SrcReg[0] = src_reg_for_float(prog, result);
221
222	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
223	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
224
225	    progress = true;
226	 }
227	 break;
228
229      case OPCODE_MUL:
230	 if (src_regs_are_constant(inst, 2)) {
231	    float a[4];
232	    float b[4];
233	    float result[4];
234
235	    get_value(prog, &inst->SrcReg[0], a);
236	    get_value(prog, &inst->SrcReg[1], b);
237
238	    result[0] = a[0] * b[0];
239	    result[1] = a[1] * b[1];
240	    result[2] = a[2] * b[2];
241	    result[3] = a[3] * b[3];
242
243	    inst->Opcode = OPCODE_MOV;
244	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
245
246	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
247	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
248
249	    progress = true;
250	 }
251	 break;
252
253      case OPCODE_SEQ:
254	 if (src_regs_are_constant(inst, 2)) {
255	    float a[4];
256	    float b[4];
257	    float result[4];
258
259	    get_value(prog, &inst->SrcReg[0], a);
260	    get_value(prog, &inst->SrcReg[1], b);
261
262	    result[0] = (a[0] == b[0]) ? 1.0f : 0.0f;
263	    result[1] = (a[1] == b[1]) ? 1.0f : 0.0f;
264	    result[2] = (a[2] == b[2]) ? 1.0f : 0.0f;
265	    result[3] = (a[3] == b[3]) ? 1.0f : 0.0f;
266
267	    inst->Opcode = OPCODE_MOV;
268	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
269
270	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
271	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
272
273	    progress = true;
274	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
275	    inst->Opcode = OPCODE_MOV;
276	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
277
278	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
279	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
280
281	    progress = true;
282	 }
283	 break;
284
285      case OPCODE_SGE:
286	 if (src_regs_are_constant(inst, 2)) {
287	    float a[4];
288	    float b[4];
289	    float result[4];
290
291	    get_value(prog, &inst->SrcReg[0], a);
292	    get_value(prog, &inst->SrcReg[1], b);
293
294	    result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f;
295	    result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f;
296	    result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f;
297	    result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f;
298
299	    inst->Opcode = OPCODE_MOV;
300	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
301
302	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
303	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
304
305	    progress = true;
306	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
307	    inst->Opcode = OPCODE_MOV;
308	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
309
310	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
311	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
312
313	    progress = true;
314	 }
315	 break;
316
317      case OPCODE_SGT:
318	 if (src_regs_are_constant(inst, 2)) {
319	    float a[4];
320	    float b[4];
321	    float result[4];
322
323	    get_value(prog, &inst->SrcReg[0], a);
324	    get_value(prog, &inst->SrcReg[1], b);
325
326	    result[0] = (a[0] > b[0]) ? 1.0f : 0.0f;
327	    result[1] = (a[1] > b[1]) ? 1.0f : 0.0f;
328	    result[2] = (a[2] > b[2]) ? 1.0f : 0.0f;
329	    result[3] = (a[3] > b[3]) ? 1.0f : 0.0f;
330
331	    inst->Opcode = OPCODE_MOV;
332	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
333
334	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
335	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
336
337	    progress = true;
338	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
339	    inst->Opcode = OPCODE_MOV;
340	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
341
342	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
343	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
344
345	    progress = true;
346	 }
347	 break;
348
349      case OPCODE_SLE:
350	 if (src_regs_are_constant(inst, 2)) {
351	    float a[4];
352	    float b[4];
353	    float result[4];
354
355	    get_value(prog, &inst->SrcReg[0], a);
356	    get_value(prog, &inst->SrcReg[1], b);
357
358	    result[0] = (a[0] <= b[0]) ? 1.0f : 0.0f;
359	    result[1] = (a[1] <= b[1]) ? 1.0f : 0.0f;
360	    result[2] = (a[2] <= b[2]) ? 1.0f : 0.0f;
361	    result[3] = (a[3] <= b[3]) ? 1.0f : 0.0f;
362
363	    inst->Opcode = OPCODE_MOV;
364	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
365
366	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
367	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
368
369	    progress = true;
370	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
371	    inst->Opcode = OPCODE_MOV;
372	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
373
374	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
375	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
376
377	    progress = true;
378	 }
379	 break;
380
381      case OPCODE_SLT:
382	 if (src_regs_are_constant(inst, 2)) {
383	    float a[4];
384	    float b[4];
385	    float result[4];
386
387	    get_value(prog, &inst->SrcReg[0], a);
388	    get_value(prog, &inst->SrcReg[1], b);
389
390	    result[0] = (a[0] < b[0]) ? 1.0f : 0.0f;
391	    result[1] = (a[1] < b[1]) ? 1.0f : 0.0f;
392	    result[2] = (a[2] < b[2]) ? 1.0f : 0.0f;
393	    result[3] = (a[3] < b[3]) ? 1.0f : 0.0f;
394
395	    inst->Opcode = OPCODE_MOV;
396	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
397
398	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
399	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
400
401	    progress = true;
402	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
403	    inst->Opcode = OPCODE_MOV;
404	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
405
406	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
407	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
408
409	    progress = true;
410	 }
411	 break;
412
413      case OPCODE_SNE:
414	 if (src_regs_are_constant(inst, 2)) {
415	    float a[4];
416	    float b[4];
417	    float result[4];
418
419	    get_value(prog, &inst->SrcReg[0], a);
420	    get_value(prog, &inst->SrcReg[1], b);
421
422	    result[0] = (a[0] != b[0]) ? 1.0f : 0.0f;
423	    result[1] = (a[1] != b[1]) ? 1.0f : 0.0f;
424	    result[2] = (a[2] != b[2]) ? 1.0f : 0.0f;
425	    result[3] = (a[3] != b[3]) ? 1.0f : 0.0f;
426
427	    inst->Opcode = OPCODE_MOV;
428	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
429
430	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
431	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
432
433	    progress = true;
434	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
435	    inst->Opcode = OPCODE_MOV;
436	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
437
438	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
439	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
440
441	    progress = true;
442	 }
443	 break;
444
445      default:
446	 break;
447      }
448   }
449
450   return progress;
451}
452