1/*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 *    Chia-I Wu <olv@lunarg.com>
26 */
27
28#include "pipe/p_shader_tokens.h"
29#include "toy_compiler.h"
30#include "toy_tgsi.h"
31#include "toy_helpers.h"
32#include "toy_legalize.h"
33
34/**
35 * Lower an instruction to GEN6_OPCODE_SEND(C).
36 */
37void
38toy_compiler_lower_to_send(struct toy_compiler *tc, struct toy_inst *inst,
39                           bool sendc, unsigned sfid)
40{
41   assert(inst->opcode >= 128);
42
43   inst->opcode = (sendc) ? GEN6_OPCODE_SENDC : GEN6_OPCODE_SEND;
44
45   /* thread control is reserved */
46   assert(inst->thread_ctrl == 0);
47
48   assert(inst->cond_modifier == GEN6_COND_NONE);
49   inst->cond_modifier = sfid;
50}
51
52static int
53math_op_to_func(unsigned opcode)
54{
55   switch (opcode) {
56   case TOY_OPCODE_INV:    return GEN6_MATH_INV;
57   case TOY_OPCODE_LOG:    return GEN6_MATH_LOG;
58   case TOY_OPCODE_EXP:    return GEN6_MATH_EXP;
59   case TOY_OPCODE_SQRT:   return GEN6_MATH_SQRT;
60   case TOY_OPCODE_RSQ:    return GEN6_MATH_RSQ;
61   case TOY_OPCODE_SIN:    return GEN6_MATH_SIN;
62   case TOY_OPCODE_COS:    return GEN6_MATH_COS;
63   case TOY_OPCODE_FDIV:   return GEN6_MATH_FDIV;
64   case TOY_OPCODE_POW:    return GEN6_MATH_POW;
65   case TOY_OPCODE_INT_DIV_QUOTIENT:   return GEN6_MATH_INT_DIV_QUOTIENT;
66   case TOY_OPCODE_INT_DIV_REMAINDER:  return GEN6_MATH_INT_DIV_REMAINDER;
67   default:
68       assert(!"unknown math opcode");
69       return -1;
70   }
71}
72
73/**
74 * Lower virtual math opcodes to GEN6_OPCODE_MATH.
75 */
76void
77toy_compiler_lower_math(struct toy_compiler *tc, struct toy_inst *inst)
78{
79   struct toy_dst tmp;
80   int i;
81
82   /* see commit 250770b74d33bb8625c780a74a89477af033d13a */
83   for (i = 0; i < ARRAY_SIZE(inst->src); i++) {
84      if (tsrc_is_null(inst->src[i]))
85         break;
86
87      /* no swizzling in align1 */
88      /* XXX how about source modifiers? */
89      if (toy_file_is_virtual(inst->src[i].file) &&
90          !tsrc_is_swizzled(inst->src[i]) &&
91          !inst->src[i].absolute &&
92          !inst->src[i].negate)
93         continue;
94
95      tmp = tdst_type(tc_alloc_tmp(tc), inst->src[i].type);
96      tc_MOV(tc, tmp, inst->src[i]);
97      inst->src[i] = tsrc_from(tmp);
98   }
99
100   /* FC[0:3] */
101   assert(inst->cond_modifier == GEN6_COND_NONE);
102   inst->cond_modifier = math_op_to_func(inst->opcode);
103   /* FC[4:5] */
104   assert(inst->thread_ctrl == 0);
105   inst->thread_ctrl = 0;
106
107   inst->opcode = GEN6_OPCODE_MATH;
108   tc_move_inst(tc, inst);
109
110   /* no writemask in align1 */
111   if (inst->dst.writemask != TOY_WRITEMASK_XYZW) {
112      struct toy_dst dst = inst->dst;
113      struct toy_inst *inst2;
114
115      tmp = tc_alloc_tmp(tc);
116      tmp.type = inst->dst.type;
117      inst->dst = tmp;
118
119      inst2 = tc_MOV(tc, dst, tsrc_from(tmp));
120      inst2->pred_ctrl = inst->pred_ctrl;
121   }
122}
123
124static uint32_t
125absolute_imm(uint32_t imm32, enum toy_type type)
126{
127   union fi val = { .ui = imm32 };
128
129   switch (type) {
130   case TOY_TYPE_F:
131      val.f = fabs(val.f);
132      break;
133   case TOY_TYPE_D:
134      if (val.i < 0)
135         val.i = -val.i;
136      break;
137   case TOY_TYPE_W:
138      if ((int16_t) (val.ui & 0xffff) < 0)
139         val.i = -((int16_t) (val.ui & 0xffff));
140      break;
141   case TOY_TYPE_V:
142      assert(!"cannot take absoulte of immediates of type V");
143      break;
144   default:
145      break;
146   }
147
148   return val.ui;
149}
150
151static uint32_t
152negate_imm(uint32_t imm32, enum toy_type type)
153{
154   union fi val = { .ui = imm32 };
155
156   switch (type) {
157   case TOY_TYPE_F:
158      val.f = -val.f;
159      break;
160   case TOY_TYPE_D:
161   case TOY_TYPE_UD:
162      val.i = -val.i;
163      break;
164   case TOY_TYPE_W:
165   case TOY_TYPE_UW:
166      val.i = -((int16_t) (val.ui & 0xffff));
167      break;
168   default:
169      assert(!"negate immediate of unknown type");
170      break;
171   }
172
173   return val.ui;
174}
175
176static void
177validate_imm(struct toy_compiler *tc, struct toy_inst *inst)
178{
179   bool move_inst = false;
180   int i;
181
182   for (i = 0; i < ARRAY_SIZE(inst->src); i++) {
183      struct toy_dst tmp;
184
185      if (tsrc_is_null(inst->src[i]))
186         break;
187
188      if (inst->src[i].file != TOY_FILE_IMM)
189         continue;
190
191      if (inst->src[i].absolute) {
192         inst->src[i].val32 =
193            absolute_imm(inst->src[i].val32, inst->src[i].type);
194         inst->src[i].absolute = false;
195      }
196
197      if (inst->src[i].negate) {
198         inst->src[i].val32 =
199            negate_imm(inst->src[i].val32, inst->src[i].type);
200         inst->src[i].negate = false;
201      }
202
203      /* this is the last operand */
204      if (i + 1 == ARRAY_SIZE(inst->src) || tsrc_is_null(inst->src[i + 1]))
205         break;
206
207      /* need to use a temp if this imm is not the last operand */
208      /* TODO we should simply swap the operands if the op is commutative */
209      tmp = tc_alloc_tmp(tc);
210      tmp = tdst_type(tmp, inst->src[i].type);
211      tc_MOV(tc, tmp, inst->src[i]);
212      inst->src[i] = tsrc_from(tmp);
213
214      move_inst = true;
215   }
216
217   if (move_inst)
218      tc_move_inst(tc, inst);
219}
220
221static void
222lower_opcode_mul(struct toy_compiler *tc, struct toy_inst *inst)
223{
224   const enum toy_type inst_type = inst->dst.type;
225   const struct toy_dst acc0 =
226      tdst_type(tdst(TOY_FILE_ARF, GEN6_ARF_ACC0, 0), inst_type);
227   struct toy_inst *inst2;
228
229   /* only need to take care of integer multiplications */
230   if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D)
231      return;
232
233   /* acc0 = (src0 & 0x0000ffff) * src1 */
234   tc_MUL(tc, acc0, inst->src[0], inst->src[1]);
235
236   /* acc0 = (src0 & 0xffff0000) * src1 + acc0 */
237   inst2 = tc_add2(tc, GEN6_OPCODE_MACH, tdst_type(tdst_null(), inst_type),
238         inst->src[0], inst->src[1]);
239   inst2->acc_wr_ctrl = true;
240
241   /* dst = acc0 & 0xffffffff */
242   tc_MOV(tc, inst->dst, tsrc_from(acc0));
243
244   tc_discard_inst(tc, inst);
245}
246
247static void
248lower_opcode_mac(struct toy_compiler *tc, struct toy_inst *inst)
249{
250   const enum toy_type inst_type = inst->dst.type;
251
252   if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D) {
253      const struct toy_dst acc0 = tdst(TOY_FILE_ARF, GEN6_ARF_ACC0, 0);
254
255      tc_MOV(tc, acc0, inst->src[2]);
256      inst->src[2] = tsrc_null();
257      tc_move_inst(tc, inst);
258   }
259   else {
260      struct toy_dst tmp = tdst_type(tc_alloc_tmp(tc), inst_type);
261      struct toy_inst *inst2;
262
263      inst2 = tc_MUL(tc, tmp, inst->src[0], inst->src[1]);
264      lower_opcode_mul(tc, inst2);
265
266      tc_ADD(tc, inst->dst, tsrc_from(tmp), inst->src[2]);
267
268      tc_discard_inst(tc, inst);
269   }
270}
271
272/**
273 * Legalize the instructions for register allocation.
274 */
275void
276toy_compiler_legalize_for_ra(struct toy_compiler *tc)
277{
278   struct toy_inst *inst;
279
280   tc_head(tc);
281   while ((inst = tc_next(tc)) != NULL) {
282      switch (inst->opcode) {
283      case GEN6_OPCODE_MAC:
284         lower_opcode_mac(tc, inst);
285         break;
286      case GEN6_OPCODE_MAD:
287         /* TODO operands must be floats */
288         break;
289      case GEN6_OPCODE_MUL:
290         lower_opcode_mul(tc, inst);
291         break;
292      default:
293         if (inst->opcode > TOY_OPCODE_LAST_HW)
294            tc_fail(tc, "internal opcodes not lowered");
295      }
296   }
297
298   /* loop again as the previous pass may add new instructions */
299   tc_head(tc);
300   while ((inst = tc_next(tc)) != NULL) {
301      validate_imm(tc, inst);
302   }
303}
304
305static void
306patch_while_jip(struct toy_compiler *tc, struct toy_inst *inst)
307{
308   struct toy_inst *inst2;
309   int nest_level, dist;
310
311   nest_level = 0;
312   dist = -1;
313
314   /* search backward */
315   LIST_FOR_EACH_ENTRY_FROM_REV(inst2, inst->list.prev,
316         &tc->instructions, list) {
317      if (inst2->marker) {
318         if (inst2->opcode == TOY_OPCODE_DO) {
319            if (nest_level) {
320               nest_level--;
321            }
322            else {
323               /* the following instruction */
324               dist++;
325               break;
326            }
327         }
328
329         continue;
330      }
331
332      if (inst2->opcode == GEN6_OPCODE_WHILE)
333         nest_level++;
334
335      dist--;
336   }
337
338   if (ilo_dev_gen(tc->dev) >= ILO_GEN(8))
339      inst->src[1] = tsrc_imm_d(dist * 16);
340   else if (ilo_dev_gen(tc->dev) >= ILO_GEN(7))
341      inst->src[1] = tsrc_imm_w(dist * 2);
342   else
343      inst->dst = tdst_imm_w(dist * 2);
344}
345
346static void
347patch_if_else_jip(struct toy_compiler *tc, struct toy_inst *inst)
348{
349   struct toy_inst *inst2;
350   int nest_level, dist;
351   int jip, uip;
352
353   nest_level = 0;
354   dist = 1;
355   jip = 0;
356   uip = 0;
357
358   /* search forward */
359   LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
360      if (inst2->marker)
361         continue;
362
363      if (inst2->opcode == GEN6_OPCODE_ENDIF) {
364         if (nest_level) {
365            nest_level--;
366         }
367         else {
368            uip = dist * 2;
369            if (!jip)
370               jip = uip;
371            break;
372         }
373      }
374      else if (inst2->opcode == GEN6_OPCODE_ELSE &&
375               inst->opcode == GEN6_OPCODE_IF) {
376         if (!nest_level) {
377            /* the following instruction */
378            jip = (dist + 1) * 2;
379
380            if (ilo_dev_gen(tc->dev) == ILO_GEN(6)) {
381               uip = jip;
382               break;
383            }
384         }
385      }
386      else if (inst2->opcode == GEN6_OPCODE_IF) {
387         nest_level++;
388      }
389
390      dist++;
391   }
392
393   if (ilo_dev_gen(tc->dev) >= ILO_GEN(8)) {
394      inst->dst.type = TOY_TYPE_D;
395      inst->src[0] = tsrc_imm_d(uip * 8);
396      inst->src[1] = tsrc_imm_d(jip * 8);
397   } else if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) {
398      /* what should the type be? */
399      inst->dst.type = TOY_TYPE_D;
400      inst->src[0].type = TOY_TYPE_D;
401      inst->src[1] = tsrc_imm_d(uip << 16 | jip);
402   } else {
403      inst->dst = tdst_imm_w(jip);
404   }
405}
406
407static void
408patch_endif_jip(struct toy_compiler *tc, struct toy_inst *inst)
409{
410   struct toy_inst *inst2;
411   bool found = false;
412   int dist = 1;
413
414   /* search forward for instructions that may enable channels */
415   LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
416      if (inst2->marker)
417         continue;
418
419      switch (inst2->opcode) {
420      case GEN6_OPCODE_ENDIF:
421      case GEN6_OPCODE_ELSE:
422      case GEN6_OPCODE_WHILE:
423         found = true;
424         break;
425      default:
426         break;
427      }
428
429      if (found)
430         break;
431
432      dist++;
433   }
434
435   /* should we set dist to (dist - 1) or 1? */
436   if (!found)
437      dist = 1;
438
439   if (ilo_dev_gen(tc->dev) >= ILO_GEN(8))
440      inst->src[1] = tsrc_imm_d(dist * 16);
441   else if (ilo_dev_gen(tc->dev) >= ILO_GEN(7))
442      inst->src[1] = tsrc_imm_w(dist * 2);
443   else
444      inst->dst = tdst_imm_w(dist * 2);
445}
446
447static void
448patch_break_continue_jip(struct toy_compiler *tc, struct toy_inst *inst)
449{
450   struct toy_inst *inst2, *inst3;
451   int nest_level, dist, jip, uip;
452
453   nest_level = 0;
454   dist = 1;
455   jip = 1 * 2;
456   uip = 1 * 2;
457
458   /* search forward */
459   LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
460      if (inst2->marker) {
461         if (inst2->opcode == TOY_OPCODE_DO)
462            nest_level++;
463         continue;
464      }
465
466      if (inst2->opcode == GEN6_OPCODE_ELSE ||
467          inst2->opcode == GEN6_OPCODE_ENDIF ||
468          inst2->opcode == GEN6_OPCODE_WHILE) {
469         jip = dist * 2;
470         break;
471      }
472
473      dist++;
474   }
475
476   /* go on to determine uip */
477   inst3 = inst2;
478   LIST_FOR_EACH_ENTRY_FROM(inst2, &inst3->list, &tc->instructions, list) {
479      if (inst2->marker) {
480         if (inst2->opcode == TOY_OPCODE_DO)
481            nest_level++;
482         continue;
483      }
484
485      if (inst2->opcode == GEN6_OPCODE_WHILE) {
486         if (nest_level) {
487            nest_level--;
488         }
489         else {
490            /* the following instruction */
491            if (ilo_dev_gen(tc->dev) == ILO_GEN(6) &&
492                inst->opcode == GEN6_OPCODE_BREAK)
493               dist++;
494
495            uip = dist * 2;
496            break;
497         }
498      }
499
500      dist++;
501   }
502
503   /* should the type be D or W? */
504   inst->dst.type = TOY_TYPE_D;
505   if (ilo_dev_gen(tc->dev) >= ILO_GEN(8)) {
506      inst->src[0] = tsrc_imm_d(uip * 8);
507      inst->src[1] = tsrc_imm_d(jip * 8);
508   } else {
509      inst->src[0].type = TOY_TYPE_D;
510      inst->src[1] = tsrc_imm_d(uip << 16 | jip);
511   }
512}
513
514/**
515 * Legalize the instructions for assembling.
516 */
517void
518toy_compiler_legalize_for_asm(struct toy_compiler *tc)
519{
520   struct toy_inst *inst;
521   int pc = 0;
522
523   tc_head(tc);
524   while ((inst = tc_next(tc)) != NULL) {
525      int i;
526
527      pc++;
528
529      /*
530       * From the Sandy Bridge PRM, volume 4 part 2, page 112:
531       *
532       *     "Specifically, for instructions with a single source, it only
533       *      uses the first source operand <src0>. In this case, the second
534       *      source operand <src1> must be set to null and also with the same
535       *      type as the first source operand <src0>.  It is a special case
536       *      when <src0> is an immediate, as an immediate <src0> uses DW3 of
537       *      the instruction word, which is normally used by <src1>.  In this
538       *      case, <src1> must be programmed with register file ARF and the
539       *      same data type as <src0>."
540       *
541       * Since we already fill unused operands with null, we only need to take
542       * care of the type.
543       */
544      if (tsrc_is_null(inst->src[1]))
545         inst->src[1].type = inst->src[0].type;
546
547      switch (inst->opcode) {
548      case GEN6_OPCODE_MATH:
549         /* math does not support align16 nor exec_size > 8 */
550         inst->access_mode = GEN6_ALIGN_1;
551
552         if (inst->exec_size == GEN6_EXECSIZE_16) {
553            /*
554             * From the Ivy Bridge PRM, volume 4 part 3, page 192:
555             *
556             *     "INT DIV function does not support SIMD16."
557             */
558            if (ilo_dev_gen(tc->dev) < ILO_GEN(7) ||
559                inst->cond_modifier == GEN6_MATH_INT_DIV_QUOTIENT ||
560                inst->cond_modifier == GEN6_MATH_INT_DIV_REMAINDER) {
561               struct toy_inst *inst2;
562
563               inst->exec_size = GEN6_EXECSIZE_8;
564               inst->qtr_ctrl = GEN6_QTRCTRL_1Q;
565
566               inst2 = tc_duplicate_inst(tc, inst);
567               inst2->qtr_ctrl = GEN6_QTRCTRL_2Q;
568               inst2->dst = tdst_offset(inst2->dst, 1, 0);
569               inst2->src[0] = tsrc_offset(inst2->src[0], 1, 0);
570               if (!tsrc_is_null(inst2->src[1]))
571                  inst2->src[1] = tsrc_offset(inst2->src[1], 1, 0);
572
573               pc++;
574            }
575         }
576         break;
577      case GEN6_OPCODE_IF:
578         if (ilo_dev_gen(tc->dev) >= ILO_GEN(7) &&
579             inst->cond_modifier != GEN6_COND_NONE) {
580            struct toy_inst *inst2;
581
582            inst2 = tc_duplicate_inst(tc, inst);
583
584            /* replace the original IF by CMP */
585            inst->opcode = GEN6_OPCODE_CMP;
586
587            /* predicate control instead of condition modifier */
588            inst2->dst = tdst_null();
589            inst2->src[0] = tsrc_null();
590            inst2->src[1] = tsrc_null();
591            inst2->cond_modifier = GEN6_COND_NONE;
592            inst2->pred_ctrl = GEN6_PREDCTRL_NORMAL;
593
594            pc++;
595         }
596         break;
597      default:
598         break;
599      }
600
601      /* MRF to GRF */
602      if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) {
603         for (i = 0; i < ARRAY_SIZE(inst->src); i++) {
604            if (inst->src[i].file != TOY_FILE_MRF)
605               continue;
606            else if (tsrc_is_null(inst->src[i]))
607               break;
608
609            inst->src[i].file = TOY_FILE_GRF;
610         }
611
612         if (inst->dst.file == TOY_FILE_MRF)
613            inst->dst.file = TOY_FILE_GRF;
614      }
615   }
616
617   tc->num_instructions = pc;
618
619   /* set JIP/UIP */
620   tc_head(tc);
621   while ((inst = tc_next(tc)) != NULL) {
622      switch (inst->opcode) {
623      case GEN6_OPCODE_IF:
624      case GEN6_OPCODE_ELSE:
625         patch_if_else_jip(tc, inst);
626         break;
627      case GEN6_OPCODE_ENDIF:
628         patch_endif_jip(tc, inst);
629         break;
630      case GEN6_OPCODE_WHILE:
631         patch_while_jip(tc, inst);
632         break;
633      case GEN6_OPCODE_BREAK:
634      case GEN6_OPCODE_CONT:
635         patch_break_continue_jip(tc, inst);
636         break;
637      default:
638         break;
639      }
640   }
641}
642