1/*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 *    Chia-I Wu <olv@lunarg.com>
26 */
27
28#include "tgsi/tgsi_dump.h"
29#include "tgsi/tgsi_util.h"
30#include "toy_compiler.h"
31#include "toy_tgsi.h"
32#include "toy_legalize.h"
33#include "toy_optimize.h"
34#include "toy_helpers.h"
35#include "ilo_shader_internal.h"
36
37struct fs_compile_context {
38   struct ilo_shader *shader;
39   const struct ilo_shader_variant *variant;
40
41   struct toy_compiler tc;
42   struct toy_tgsi tgsi;
43
44   int const_cache;
45   int dispatch_mode;
46
47   struct {
48      int interp_perspective_pixel;
49      int interp_perspective_centroid;
50      int interp_perspective_sample;
51      int interp_nonperspective_pixel;
52      int interp_nonperspective_centroid;
53      int interp_nonperspective_sample;
54      int source_depth;
55      int source_w;
56      int pos_offset;
57   } payloads[2];
58
59   int first_const_grf;
60   int first_attr_grf;
61   int first_free_grf;
62   int last_free_grf;
63
64   int num_grf_per_vrf;
65
66   int first_free_mrf;
67   int last_free_mrf;
68};
69
70static void
71fetch_position(struct fs_compile_context *fcc, struct toy_dst dst)
72{
73   struct toy_compiler *tc = &fcc->tc;
74   const struct toy_src src_z =
75      tsrc(TOY_FILE_GRF, fcc->payloads[0].source_depth, 0);
76   const struct toy_src src_w =
77      tsrc(TOY_FILE_GRF, fcc->payloads[0].source_w, 0);
78   const int fb_height =
79      (fcc->variant->u.fs.fb_height) ? fcc->variant->u.fs.fb_height : 1;
80   const bool origin_upper_left =
81      (fcc->tgsi.props.fs_coord_origin == TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
82   const bool pixel_center_integer =
83      (fcc->tgsi.props.fs_coord_pixel_center ==
84       TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
85   struct toy_src subspan_x, subspan_y;
86   struct toy_dst tmp, tmp_uw;
87   struct toy_dst real_dst[4];
88
89   tdst_transpose(dst, real_dst);
90
91   subspan_x = tsrc_uw(tsrc(TOY_FILE_GRF, 1, 2 * 4));
92   subspan_x = tsrc_rect(subspan_x, TOY_RECT_240);
93
94   subspan_y = tsrc_offset(subspan_x, 0, 1);
95
96   tmp_uw = tdst_uw(tc_alloc_tmp(tc));
97   tmp = tc_alloc_tmp(tc);
98
99   /* X */
100   tc_ADD(tc, tmp_uw, subspan_x, tsrc_imm_v(0x10101010));
101   tc_MOV(tc, tmp, tsrc_from(tmp_uw));
102   if (pixel_center_integer)
103      tc_MOV(tc, real_dst[0], tsrc_from(tmp));
104   else
105      tc_ADD(tc, real_dst[0], tsrc_from(tmp), tsrc_imm_f(0.5f));
106
107   /* Y */
108   tc_ADD(tc, tmp_uw, subspan_y, tsrc_imm_v(0x11001100));
109   tc_MOV(tc, tmp, tsrc_from(tmp_uw));
110   if (origin_upper_left && pixel_center_integer) {
111      tc_MOV(tc, real_dst[1], tsrc_from(tmp));
112   }
113   else {
114      struct toy_src y = tsrc_from(tmp);
115      float offset = 0.0f;
116
117      if (!pixel_center_integer)
118         offset += 0.5f;
119
120      if (!origin_upper_left) {
121         offset += (float) (fb_height - 1);
122         y = tsrc_negate(y);
123      }
124
125      tc_ADD(tc, real_dst[1], y, tsrc_imm_f(offset));
126   }
127
128   /* Z and W */
129   tc_MOV(tc, real_dst[2], src_z);
130   tc_INV(tc, real_dst[3], src_w);
131}
132
133static void
134fetch_face(struct fs_compile_context *fcc, struct toy_dst dst)
135{
136   struct toy_compiler *tc = &fcc->tc;
137   const struct toy_src r0 = tsrc_d(tsrc(TOY_FILE_GRF, 0, 0));
138   struct toy_dst tmp_f, tmp;
139   struct toy_dst real_dst[4];
140
141   tdst_transpose(dst, real_dst);
142
143   tmp_f = tc_alloc_tmp(tc);
144   tmp = tdst_d(tmp_f);
145   tc_SHR(tc, tmp, tsrc_rect(r0, TOY_RECT_010), tsrc_imm_d(15));
146   tc_AND(tc, tmp, tsrc_from(tmp), tsrc_imm_d(1));
147   tc_MOV(tc, tmp_f, tsrc_from(tmp));
148
149   /* convert to 1.0 and -1.0 */
150   tc_MUL(tc, tmp_f, tsrc_from(tmp_f), tsrc_imm_f(-2.0f));
151   tc_ADD(tc, real_dst[0], tsrc_from(tmp_f), tsrc_imm_f(1.0f));
152
153   tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f));
154   tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f));
155   tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f));
156}
157
158static void
159fetch_attr(struct fs_compile_context *fcc, struct toy_dst dst, int slot)
160{
161   struct toy_compiler *tc = &fcc->tc;
162   struct toy_dst real_dst[4];
163   bool is_const = false;
164   int grf, interp, ch;
165
166   tdst_transpose(dst, real_dst);
167
168   grf = fcc->first_attr_grf + slot * 2;
169
170   switch (fcc->tgsi.inputs[slot].interp) {
171   case TGSI_INTERPOLATE_CONSTANT:
172      is_const = true;
173      break;
174   case TGSI_INTERPOLATE_LINEAR:
175      if (fcc->tgsi.inputs[slot].centroid)
176         interp = fcc->payloads[0].interp_nonperspective_centroid;
177      else
178         interp = fcc->payloads[0].interp_nonperspective_pixel;
179      break;
180   case TGSI_INTERPOLATE_COLOR:
181      if (fcc->variant->u.fs.flatshade) {
182         is_const = true;
183         break;
184      }
185      /* fall through */
186   case TGSI_INTERPOLATE_PERSPECTIVE:
187      if (fcc->tgsi.inputs[slot].centroid)
188         interp = fcc->payloads[0].interp_perspective_centroid;
189      else
190         interp = fcc->payloads[0].interp_perspective_pixel;
191      break;
192   default:
193      assert(!"unexpected FS interpolation");
194      interp = fcc->payloads[0].interp_perspective_pixel;
195      break;
196   }
197
198   if (is_const) {
199      struct toy_src a0[4];
200
201      a0[0] = tsrc(TOY_FILE_GRF, grf + 0, 3 * 4);
202      a0[1] = tsrc(TOY_FILE_GRF, grf + 0, 7 * 4);
203      a0[2] = tsrc(TOY_FILE_GRF, grf + 1, 3 * 4);
204      a0[3] = tsrc(TOY_FILE_GRF, grf + 1, 7 * 4);
205
206      for (ch = 0; ch < 4; ch++)
207         tc_MOV(tc, real_dst[ch], tsrc_rect(a0[ch], TOY_RECT_010));
208   }
209   else {
210      struct toy_src attr[4], uv;
211
212      attr[0] = tsrc(TOY_FILE_GRF, grf + 0, 0);
213      attr[1] = tsrc(TOY_FILE_GRF, grf + 0, 4 * 4);
214      attr[2] = tsrc(TOY_FILE_GRF, grf + 1, 0);
215      attr[3] = tsrc(TOY_FILE_GRF, grf + 1, 4 * 4);
216
217      uv = tsrc(TOY_FILE_GRF, interp, 0);
218
219      for (ch = 0; ch < 4; ch++) {
220         tc_add2(tc, GEN6_OPCODE_PLN, real_dst[ch],
221               tsrc_rect(attr[ch], TOY_RECT_010), uv);
222      }
223   }
224
225   if (fcc->tgsi.inputs[slot].semantic_name == TGSI_SEMANTIC_FOG) {
226      tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f));
227      tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f));
228      tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f));
229   }
230}
231
232static void
233fs_lower_opcode_tgsi_in(struct fs_compile_context *fcc,
234                        struct toy_dst dst, int dim, int idx)
235{
236   int slot;
237
238   assert(!dim);
239
240   slot = toy_tgsi_find_input(&fcc->tgsi, idx);
241   if (slot < 0)
242      return;
243
244   switch (fcc->tgsi.inputs[slot].semantic_name) {
245   case TGSI_SEMANTIC_POSITION:
246      fetch_position(fcc, dst);
247      break;
248   case TGSI_SEMANTIC_FACE:
249      fetch_face(fcc, dst);
250      break;
251   default:
252      fetch_attr(fcc, dst, slot);
253      break;
254   }
255}
256
257static void
258fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context *fcc,
259                                    struct toy_dst dst, int dim,
260                                    struct toy_src idx)
261{
262   const struct toy_dst offset =
263      tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
264   struct toy_compiler *tc = &fcc->tc;
265   unsigned simd_mode, param_size;
266   struct toy_inst *inst;
267   struct toy_src desc, real_src[4];
268   struct toy_dst tmp, real_dst[4];
269   unsigned i;
270
271   tsrc_transpose(idx, real_src);
272
273   /* set offset */
274   inst = tc_MOV(tc, offset, real_src[0]);
275   inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
276
277   switch (inst->exec_size) {
278   case GEN6_EXECSIZE_8:
279      simd_mode = GEN6_MSG_SAMPLER_SIMD8;
280      param_size = 1;
281      break;
282   case GEN6_EXECSIZE_16:
283      simd_mode = GEN6_MSG_SAMPLER_SIMD16;
284      param_size = 2;
285      break;
286   default:
287      assert(!"unsupported execution size");
288      tc_MOV(tc, dst, tsrc_imm_f(0.0f));
289      return;
290      break;
291   }
292
293   desc = tsrc_imm_mdesc_sampler(tc, param_size, param_size * 4, false,
294         simd_mode,
295         GEN6_MSG_SAMPLER_LD,
296         0,
297         fcc->shader->bt.const_base + dim);
298
299   tmp = tdst(TOY_FILE_VRF, tc_alloc_vrf(tc, param_size * 4), 0);
300   inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, GEN6_SFID_SAMPLER);
301   inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
302
303   tdst_transpose(dst, real_dst);
304   for (i = 0; i < 4; i++) {
305      const struct toy_src src =
306         tsrc_offset(tsrc_from(tmp), param_size * i, 0);
307
308      /* cast to type D to make sure these are raw moves */
309      tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
310   }
311}
312
313static bool
314fs_lower_opcode_tgsi_const_pcb(struct fs_compile_context *fcc,
315                               struct toy_dst dst, int dim,
316                               struct toy_src idx)
317{
318   const int grf = fcc->first_const_grf + idx.val32 / 2;
319   const int grf_subreg = (idx.val32 & 1) * 16;
320   struct toy_src src;
321   struct toy_dst real_dst[4];
322   unsigned i;
323
324   if (!fcc->variant->use_pcb || dim != 0 || idx.file != TOY_FILE_IMM ||
325       grf >= fcc->first_attr_grf)
326      return false;
327
328   src = tsrc_rect(tsrc(TOY_FILE_GRF, grf, grf_subreg), TOY_RECT_010);
329
330   tdst_transpose(dst, real_dst);
331   for (i = 0; i < 4; i++) {
332      /* cast to type D to make sure these are raw moves */
333      tc_MOV(&fcc->tc, tdst_d(real_dst[i]), tsrc_d(tsrc_offset(src, 0, i)));
334   }
335
336   return true;
337}
338
339static void
340fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc,
341                                struct toy_dst dst, int dim, struct toy_src idx)
342{
343   const struct toy_dst header =
344      tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
345   const struct toy_dst global_offset =
346      tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 2 * 4));
347   const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
348   struct toy_compiler *tc = &fcc->tc;
349   unsigned msg_type, msg_ctrl, msg_len;
350   struct toy_inst *inst;
351   struct toy_src desc;
352   struct toy_dst tmp, real_dst[4];
353   unsigned i;
354
355   if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx))
356      return;
357
358   /* set message header */
359   inst = tc_MOV(tc, header, r0);
360   inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
361
362   /* set global offset */
363   inst = tc_MOV(tc, global_offset, idx);
364   inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
365   inst->exec_size = GEN6_EXECSIZE_1;
366   inst->src[0].rect = TOY_RECT_010;
367
368   msg_type = GEN6_MSG_DP_OWORD_BLOCK_READ;
369   msg_ctrl = GEN6_MSG_DP_OWORD_BLOCK_SIZE_1_LO;
370   msg_len = 1;
371
372   desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false,
373         msg_type, msg_ctrl, fcc->shader->bt.const_base + dim);
374
375   tmp = tc_alloc_tmp(tc);
376
377   tc_SEND(tc, tmp, tsrc_from(header), desc, fcc->const_cache);
378
379   tdst_transpose(dst, real_dst);
380   for (i = 0; i < 4; i++) {
381      const struct toy_src src =
382         tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i);
383
384      /* cast to type D to make sure these are raw moves */
385      tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
386   }
387}
388
389static void
390fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context *fcc,
391                                struct toy_dst dst, int dim, struct toy_src idx)
392{
393   struct toy_compiler *tc = &fcc->tc;
394   const struct toy_dst offset =
395      tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
396   struct toy_src desc;
397   struct toy_inst *inst;
398   struct toy_dst tmp, real_dst[4];
399   unsigned i;
400
401   if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx))
402      return;
403
404   /*
405    * In 4c1fdae0a01b3f92ec03b61aac1d3df500d51fc6, pull constant load was
406    * changed from OWord Block Read to ld to increase performance in the
407    * classic driver.  Since we use the constant cache instead of the data
408    * cache, I wonder if we still want to follow the classic driver.
409    */
410
411   /* set offset */
412   inst = tc_MOV(tc, offset, tsrc_rect(idx, TOY_RECT_010));
413   inst->exec_size = GEN6_EXECSIZE_8;
414   inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
415
416   desc = tsrc_imm_mdesc_sampler(tc, 1, 1, false,
417         GEN6_MSG_SAMPLER_SIMD4X2,
418         GEN6_MSG_SAMPLER_LD,
419         0,
420         fcc->shader->bt.const_base + dim);
421
422   tmp = tc_alloc_tmp(tc);
423   inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, GEN6_SFID_SAMPLER);
424   inst->exec_size = GEN6_EXECSIZE_8;
425   inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
426
427   tdst_transpose(dst, real_dst);
428   for (i = 0; i < 4; i++) {
429      const struct toy_src src =
430         tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i);
431
432      /* cast to type D to make sure these are raw moves */
433      tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
434   }
435}
436
437static void
438fs_lower_opcode_tgsi_imm(struct fs_compile_context *fcc,
439                         struct toy_dst dst, int idx)
440{
441   const uint32_t *imm;
442   struct toy_dst real_dst[4];
443   int ch;
444
445   imm = toy_tgsi_get_imm(&fcc->tgsi, idx, NULL);
446
447   tdst_transpose(dst, real_dst);
448   /* raw moves */
449   for (ch = 0; ch < 4; ch++)
450      tc_MOV(&fcc->tc, tdst_ud(real_dst[ch]), tsrc_imm_ud(imm[ch]));
451}
452
453static void
454fs_lower_opcode_tgsi_sv(struct fs_compile_context *fcc,
455                        struct toy_dst dst, int dim, int idx)
456{
457   struct toy_compiler *tc = &fcc->tc;
458   const struct toy_tgsi *tgsi = &fcc->tgsi;
459   int slot;
460
461   assert(!dim);
462
463   slot = toy_tgsi_find_system_value(tgsi, idx);
464   if (slot < 0)
465      return;
466
467   switch (tgsi->system_values[slot].semantic_name) {
468   case TGSI_SEMANTIC_PRIMID:
469   case TGSI_SEMANTIC_INSTANCEID:
470   case TGSI_SEMANTIC_VERTEXID:
471   default:
472      tc_fail(tc, "unhandled system value");
473      tc_MOV(tc, dst, tsrc_imm_d(0));
474      break;
475   }
476}
477
478static void
479fs_lower_opcode_tgsi_direct(struct fs_compile_context *fcc,
480                            struct toy_inst *inst)
481{
482   struct toy_compiler *tc = &fcc->tc;
483   int dim, idx;
484
485   assert(inst->src[0].file == TOY_FILE_IMM);
486   dim = inst->src[0].val32;
487
488   assert(inst->src[1].file == TOY_FILE_IMM);
489   idx = inst->src[1].val32;
490
491   switch (inst->opcode) {
492   case TOY_OPCODE_TGSI_IN:
493      fs_lower_opcode_tgsi_in(fcc, inst->dst, dim, idx);
494      break;
495   case TOY_OPCODE_TGSI_CONST:
496      if (ilo_dev_gen(tc->dev) >= ILO_GEN(7))
497         fs_lower_opcode_tgsi_const_gen7(fcc, inst->dst, dim, inst->src[1]);
498      else
499         fs_lower_opcode_tgsi_const_gen6(fcc, inst->dst, dim, inst->src[1]);
500      break;
501   case TOY_OPCODE_TGSI_SV:
502      fs_lower_opcode_tgsi_sv(fcc, inst->dst, dim, idx);
503      break;
504   case TOY_OPCODE_TGSI_IMM:
505      assert(!dim);
506      fs_lower_opcode_tgsi_imm(fcc, inst->dst, idx);
507      break;
508   default:
509      tc_fail(tc, "unhandled TGSI fetch");
510      break;
511   }
512
513   tc_discard_inst(tc, inst);
514}
515
516static void
517fs_lower_opcode_tgsi_indirect(struct fs_compile_context *fcc,
518                              struct toy_inst *inst)
519{
520   struct toy_compiler *tc = &fcc->tc;
521   enum tgsi_file_type file;
522   int dim, idx;
523   struct toy_src indirect_dim, indirect_idx;
524
525   assert(inst->src[0].file == TOY_FILE_IMM);
526   file = inst->src[0].val32;
527
528   assert(inst->src[1].file == TOY_FILE_IMM);
529   dim = inst->src[1].val32;
530   indirect_dim = inst->src[2];
531
532   assert(inst->src[3].file == TOY_FILE_IMM);
533   idx = inst->src[3].val32;
534   indirect_idx = inst->src[4];
535
536   /* no dimension indirection */
537   assert(indirect_dim.file == TOY_FILE_IMM);
538   dim += indirect_dim.val32;
539
540   switch (inst->opcode) {
541   case TOY_OPCODE_TGSI_INDIRECT_FETCH:
542      if (file == TGSI_FILE_CONSTANT) {
543         if (idx) {
544            struct toy_dst tmp = tc_alloc_tmp(tc);
545
546            tc_ADD(tc, tmp, indirect_idx, tsrc_imm_d(idx));
547            indirect_idx = tsrc_from(tmp);
548         }
549
550         fs_lower_opcode_tgsi_indirect_const(fcc, inst->dst, dim, indirect_idx);
551         break;
552      }
553      /* fall through */
554   case TOY_OPCODE_TGSI_INDIRECT_STORE:
555   default:
556      tc_fail(tc, "unhandled TGSI indirection");
557      break;
558   }
559
560   tc_discard_inst(tc, inst);
561}
562
563/**
564 * Emit instructions to move sampling parameters to the message registers.
565 */
566static int
567fs_add_sampler_params_gen6(struct toy_compiler *tc, int msg_type,
568                           int base_mrf, int param_size,
569                           struct toy_src *coords, int num_coords,
570                           struct toy_src bias_or_lod, struct toy_src ref_or_si,
571                           struct toy_src *ddx, struct toy_src *ddy,
572                           int num_derivs)
573{
574   int num_params, i;
575
576   assert(num_coords <= 4);
577   assert(num_derivs <= 3 && num_derivs <= num_coords);
578
579#define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
580   switch (msg_type) {
581   case GEN6_MSG_SAMPLER_SAMPLE:
582      for (i = 0; i < num_coords; i++)
583         tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
584      num_params = num_coords;
585      break;
586   case GEN6_MSG_SAMPLER_SAMPLE_B:
587   case GEN6_MSG_SAMPLER_SAMPLE_L:
588      for (i = 0; i < num_coords; i++)
589         tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
590      tc_MOV(tc, SAMPLER_PARAM(4), bias_or_lod);
591      num_params = 5;
592      break;
593   case GEN6_MSG_SAMPLER_SAMPLE_C:
594      for (i = 0; i < num_coords; i++)
595         tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
596      tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si);
597      num_params = 5;
598      break;
599   case GEN6_MSG_SAMPLER_SAMPLE_D:
600      for (i = 0; i < num_coords; i++)
601         tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
602      for (i = 0; i < num_derivs; i++) {
603         tc_MOV(tc, SAMPLER_PARAM(4 + i * 2), ddx[i]);
604         tc_MOV(tc, SAMPLER_PARAM(5 + i * 2), ddy[i]);
605      }
606      num_params = 4 + num_derivs * 2;
607      break;
608   case GEN6_MSG_SAMPLER_SAMPLE_B_C:
609   case GEN6_MSG_SAMPLER_SAMPLE_L_C:
610      for (i = 0; i < num_coords; i++)
611         tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
612      tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si);
613      tc_MOV(tc, SAMPLER_PARAM(5), bias_or_lod);
614      num_params = 6;
615      break;
616   case GEN6_MSG_SAMPLER_LD:
617      assert(num_coords <= 3);
618
619      for (i = 0; i < num_coords; i++)
620         tc_MOV(tc, tdst_d(SAMPLER_PARAM(i)), coords[i]);
621      tc_MOV(tc, tdst_d(SAMPLER_PARAM(3)), bias_or_lod);
622      tc_MOV(tc, tdst_d(SAMPLER_PARAM(4)), ref_or_si);
623      num_params = 5;
624      break;
625   case GEN6_MSG_SAMPLER_RESINFO:
626      tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod);
627      num_params = 1;
628      break;
629   default:
630      tc_fail(tc, "unknown sampler opcode");
631      num_params = 0;
632      break;
633   }
634#undef SAMPLER_PARAM
635
636   return num_params * param_size;
637}
638
639static int
640fs_add_sampler_params_gen7(struct toy_compiler *tc, int msg_type,
641                           int base_mrf, int param_size,
642                           struct toy_src *coords, int num_coords,
643                           struct toy_src bias_or_lod, struct toy_src ref_or_si,
644                           struct toy_src *ddx, struct toy_src *ddy,
645                           int num_derivs)
646{
647   int num_params, i;
648
649   assert(num_coords <= 4);
650   assert(num_derivs <= 3 && num_derivs <= num_coords);
651
652#define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
653   switch (msg_type) {
654   case GEN6_MSG_SAMPLER_SAMPLE:
655      for (i = 0; i < num_coords; i++)
656         tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
657      num_params = num_coords;
658      break;
659   case GEN6_MSG_SAMPLER_SAMPLE_B:
660   case GEN6_MSG_SAMPLER_SAMPLE_L:
661      tc_MOV(tc, SAMPLER_PARAM(0), bias_or_lod);
662      for (i = 0; i < num_coords; i++)
663         tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]);
664      num_params = 1 + num_coords;
665      break;
666   case GEN6_MSG_SAMPLER_SAMPLE_C:
667      tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si);
668      for (i = 0; i < num_coords; i++)
669         tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]);
670      num_params = 1 + num_coords;
671      break;
672   case GEN6_MSG_SAMPLER_SAMPLE_D:
673      for (i = 0; i < num_coords; i++) {
674         tc_MOV(tc, SAMPLER_PARAM(i * 3), coords[i]);
675         if (i < num_derivs) {
676            tc_MOV(tc, SAMPLER_PARAM(i * 3 + 1), ddx[i]);
677            tc_MOV(tc, SAMPLER_PARAM(i * 3 + 2), ddy[i]);
678         }
679      }
680      num_params = num_coords * 3 - ((num_coords > num_derivs) ? 2 : 0);
681      break;
682   case GEN6_MSG_SAMPLER_SAMPLE_B_C:
683   case GEN6_MSG_SAMPLER_SAMPLE_L_C:
684      tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si);
685      tc_MOV(tc, SAMPLER_PARAM(1), bias_or_lod);
686      for (i = 0; i < num_coords; i++)
687         tc_MOV(tc, SAMPLER_PARAM(2 + i), coords[i]);
688      num_params = 2 + num_coords;
689      break;
690   case GEN6_MSG_SAMPLER_LD:
691      assert(num_coords >= 1 && num_coords <= 3);
692
693      tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), coords[0]);
694      tc_MOV(tc, tdst_d(SAMPLER_PARAM(1)), bias_or_lod);
695      for (i = 1; i < num_coords; i++)
696         tc_MOV(tc, tdst_d(SAMPLER_PARAM(1 + i)), coords[i]);
697      num_params = 1 + num_coords;
698      break;
699   case GEN6_MSG_SAMPLER_RESINFO:
700      tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod);
701      num_params = 1;
702      break;
703   default:
704      tc_fail(tc, "unknown sampler opcode");
705      num_params = 0;
706      break;
707   }
708#undef SAMPLER_PARAM
709
710   return num_params * param_size;
711}
712
713/**
714 * Set up message registers and return the message descriptor for sampling.
715 */
716static struct toy_src
717fs_prepare_tgsi_sampling(struct fs_compile_context *fcc,
718                         const struct toy_inst *inst,
719                         int base_mrf, const uint32_t *saturate_coords,
720                         unsigned *ret_sampler_index)
721{
722   struct toy_compiler *tc = &fcc->tc;
723   unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index;
724   struct toy_src coords[4], ddx[4], ddy[4], bias_or_lod, ref_or_si;
725   int num_coords, ref_pos, num_derivs;
726   int sampler_src, param_size, i;
727
728   switch (inst->exec_size) {
729   case GEN6_EXECSIZE_8:
730      simd_mode = GEN6_MSG_SAMPLER_SIMD8;
731      param_size = 1;
732      break;
733   case GEN6_EXECSIZE_16:
734      simd_mode = GEN6_MSG_SAMPLER_SIMD16;
735      param_size = 2;
736      break;
737   default:
738      tc_fail(tc, "unsupported execute size for sampling");
739      return tsrc_null();
740      break;
741   }
742
743   num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target);
744   ref_pos = tgsi_util_get_shadow_ref_src_index(inst->tex.target);
745
746   tsrc_transpose(inst->src[0], coords);
747   bias_or_lod = tsrc_null();
748   ref_or_si = tsrc_null();
749   num_derivs = 0;
750   sampler_src = 1;
751
752   /*
753    * For TXD,
754    *
755    *   src0 := (x, y, z, w)
756    *   src1 := ddx
757    *   src2 := ddy
758    *   src3 := sampler
759    *
760    * For TEX2, TXB2, and TXL2,
761    *
762    *   src0 := (x, y, z, w)
763    *   src1 := (v or bias or lod, ...)
764    *   src2 := sampler
765    *
766    * For TEX, TXB, TXL, and TXP,
767    *
768    *   src0 := (x, y, z, w or bias or lod or projection)
769    *   src1 := sampler
770    *
771    * For TXQ,
772    *
773    *   src0 := (lod, ...)
774    *   src1 := sampler
775    *
776    * For TXQ_LZ,
777    *
778    *   src0 := sampler
779    *
780    * And for TXF,
781    *
782    *   src0 := (x, y, z, w or lod)
783    *   src1 := sampler
784    *
785    * State trackers should not generate opcode+texture combinations with
786    * which the two definitions conflict (e.g., TXB with SHADOW2DARRAY).
787    */
788   switch (inst->opcode) {
789   case TOY_OPCODE_TGSI_TEX:
790      if (ref_pos >= 0) {
791         assert(ref_pos < 4);
792
793         msg_type = GEN6_MSG_SAMPLER_SAMPLE_C;
794         ref_or_si = coords[ref_pos];
795      }
796      else {
797         msg_type = GEN6_MSG_SAMPLER_SAMPLE;
798      }
799      break;
800   case TOY_OPCODE_TGSI_TXD:
801      if (ref_pos >= 0) {
802         assert(ref_pos < 4);
803
804         msg_type = GEN7_MSG_SAMPLER_SAMPLE_D_C;
805         ref_or_si = coords[ref_pos];
806
807         if (ilo_dev_gen(tc->dev) < ILO_GEN(7.5))
808            tc_fail(tc, "TXD with shadow sampler not supported");
809      }
810      else {
811         msg_type = GEN6_MSG_SAMPLER_SAMPLE_D;
812      }
813
814      tsrc_transpose(inst->src[1], ddx);
815      tsrc_transpose(inst->src[2], ddy);
816      num_derivs = num_coords;
817      sampler_src = 3;
818      break;
819   case TOY_OPCODE_TGSI_TXP:
820      if (ref_pos >= 0) {
821         assert(ref_pos < 3);
822
823         msg_type = GEN6_MSG_SAMPLER_SAMPLE_C;
824         ref_or_si = coords[ref_pos];
825      }
826      else {
827         msg_type = GEN6_MSG_SAMPLER_SAMPLE;
828      }
829
830      /* project the coordinates */
831      {
832         struct toy_dst tmp[4];
833
834         tc_alloc_tmp4(tc, tmp);
835
836         tc_INV(tc, tmp[3], coords[3]);
837         for (i = 0; i < num_coords && i < 3; i++) {
838            tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3]));
839            coords[i] = tsrc_from(tmp[i]);
840         }
841
842         if (ref_pos >= i) {
843            tc_MUL(tc, tmp[ref_pos], ref_or_si, tsrc_from(tmp[3]));
844            ref_or_si = tsrc_from(tmp[ref_pos]);
845         }
846      }
847      break;
848   case TOY_OPCODE_TGSI_TXB:
849      if (ref_pos >= 0) {
850         assert(ref_pos < 3);
851
852         msg_type = GEN6_MSG_SAMPLER_SAMPLE_B_C;
853         ref_or_si = coords[ref_pos];
854      }
855      else {
856         msg_type = GEN6_MSG_SAMPLER_SAMPLE_B;
857      }
858
859      bias_or_lod = coords[3];
860      break;
861   case TOY_OPCODE_TGSI_TXL:
862      if (ref_pos >= 0) {
863         assert(ref_pos < 3);
864
865         msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C;
866         ref_or_si = coords[ref_pos];
867      }
868      else {
869         msg_type = GEN6_MSG_SAMPLER_SAMPLE_L;
870      }
871
872      bias_or_lod = coords[3];
873      break;
874   case TOY_OPCODE_TGSI_TXF:
875      msg_type = GEN6_MSG_SAMPLER_LD;
876
877      switch (inst->tex.target) {
878      case TGSI_TEXTURE_2D_MSAA:
879      case TGSI_TEXTURE_2D_ARRAY_MSAA:
880         assert(ref_pos >= 0 && ref_pos < 4);
881         /* lod is always 0 */
882         bias_or_lod = tsrc_imm_d(0);
883         ref_or_si = coords[ref_pos];
884         break;
885      default:
886         bias_or_lod = coords[3];
887         break;
888      }
889
890      /* offset the coordinates */
891      if (!tsrc_is_null(inst->tex.offsets[0])) {
892         struct toy_dst tmp[4];
893         struct toy_src offsets[4];
894
895         tc_alloc_tmp4(tc, tmp);
896         tsrc_transpose(inst->tex.offsets[0], offsets);
897
898         for (i = 0; i < num_coords; i++) {
899            tc_ADD(tc, tmp[i], coords[i], offsets[i]);
900            coords[i] = tsrc_from(tmp[i]);
901         }
902      }
903
904      sampler_src = 1;
905      break;
906   case TOY_OPCODE_TGSI_TXQ:
907      msg_type = GEN6_MSG_SAMPLER_RESINFO;
908      num_coords = 0;
909      bias_or_lod = coords[0];
910      break;
911   case TOY_OPCODE_TGSI_TXQ_LZ:
912      msg_type = GEN6_MSG_SAMPLER_RESINFO;
913      num_coords = 0;
914      sampler_src = 0;
915      break;
916   case TOY_OPCODE_TGSI_TEX2:
917      if (ref_pos >= 0) {
918         assert(ref_pos < 5);
919
920         msg_type = GEN6_MSG_SAMPLER_SAMPLE_C;
921
922         if (ref_pos >= 4) {
923            struct toy_src src1[4];
924            tsrc_transpose(inst->src[1], src1);
925            ref_or_si = src1[ref_pos - 4];
926         }
927         else {
928            ref_or_si = coords[ref_pos];
929         }
930      }
931      else {
932         msg_type = GEN6_MSG_SAMPLER_SAMPLE;
933      }
934
935      sampler_src = 2;
936      break;
937   case TOY_OPCODE_TGSI_TXB2:
938      if (ref_pos >= 0) {
939         assert(ref_pos < 4);
940
941         msg_type = GEN6_MSG_SAMPLER_SAMPLE_B_C;
942         ref_or_si = coords[ref_pos];
943      }
944      else {
945         msg_type = GEN6_MSG_SAMPLER_SAMPLE_B;
946      }
947
948      {
949         struct toy_src src1[4];
950         tsrc_transpose(inst->src[1], src1);
951         bias_or_lod = src1[0];
952      }
953
954      sampler_src = 2;
955      break;
956   case TOY_OPCODE_TGSI_TXL2:
957      if (ref_pos >= 0) {
958         assert(ref_pos < 4);
959
960         msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C;
961         ref_or_si = coords[ref_pos];
962      }
963      else {
964         msg_type = GEN6_MSG_SAMPLER_SAMPLE_L;
965      }
966
967      {
968         struct toy_src src1[4];
969         tsrc_transpose(inst->src[1], src1);
970         bias_or_lod = src1[0];
971      }
972
973      sampler_src = 2;
974      break;
975   default:
976      assert(!"unhandled sampling opcode");
977      return tsrc_null();
978      break;
979   }
980
981   assert(inst->src[sampler_src].file == TOY_FILE_IMM);
982   sampler_index = inst->src[sampler_src].val32;
983   binding_table_index = fcc->shader->bt.tex_base + sampler_index;
984
985   /*
986    * From the Sandy Bridge PRM, volume 4 part 1, page 18:
987    *
988    *     "Note that the (cube map) coordinates delivered to the sampling
989    *      engine must already have been divided by the component with the
990    *      largest absolute value."
991    */
992   switch (inst->tex.target) {
993   case TGSI_TEXTURE_CUBE:
994   case TGSI_TEXTURE_SHADOWCUBE:
995   case TGSI_TEXTURE_CUBE_ARRAY:
996   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
997      /* TXQ does not need coordinates */
998      if (num_coords >= 3) {
999         struct toy_dst tmp[4];
1000
1001         tc_alloc_tmp4(tc, tmp);
1002
1003         tc_SEL(tc, tmp[3], tsrc_absolute(coords[0]),
1004               tsrc_absolute(coords[1]), GEN6_COND_GE);
1005         tc_SEL(tc, tmp[3], tsrc_from(tmp[3]),
1006               tsrc_absolute(coords[2]), GEN6_COND_GE);
1007         tc_INV(tc, tmp[3], tsrc_from(tmp[3]));
1008
1009         for (i = 0; i < 3; i++) {
1010            tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3]));
1011            coords[i] = tsrc_from(tmp[i]);
1012         }
1013      }
1014      break;
1015   }
1016
1017   /*
1018    * Saturate (s, t, r).  saturate_coords is set for sampler and coordinate
1019    * that uses linear filtering and PIPE_TEX_WRAP_CLAMP respectively.  It is
1020    * so that sampling outside the border gets the correct colors.
1021    */
1022   for (i = 0; i < MIN2(num_coords, 3); i++) {
1023      bool is_rect;
1024
1025      if (!(saturate_coords[i] & (1 << sampler_index)))
1026         continue;
1027
1028      switch (inst->tex.target) {
1029      case TGSI_TEXTURE_RECT:
1030      case TGSI_TEXTURE_SHADOWRECT:
1031         is_rect = true;
1032         break;
1033      default:
1034         is_rect = false;
1035         break;
1036      }
1037
1038      if (is_rect) {
1039         struct toy_src min, max;
1040         struct toy_dst tmp;
1041
1042         tc_fail(tc, "GL_CLAMP with rectangle texture unsupported");
1043         tmp = tc_alloc_tmp(tc);
1044
1045         /* saturate to [0, width] or [0, height] */
1046         /* TODO TXQ? */
1047         min = tsrc_imm_f(0.0f);
1048         max = tsrc_imm_f(2048.0f);
1049
1050         tc_SEL(tc, tmp, coords[i], min, GEN6_COND_G);
1051         tc_SEL(tc, tmp, tsrc_from(tmp), max, GEN6_COND_L);
1052
1053         coords[i] = tsrc_from(tmp);
1054      }
1055      else {
1056         struct toy_dst tmp;
1057         struct toy_inst *inst2;
1058
1059         tmp = tc_alloc_tmp(tc);
1060
1061         /* saturate to [0.0f, 1.0f] */
1062         inst2 = tc_MOV(tc, tmp, coords[i]);
1063         inst2->saturate = true;
1064
1065         coords[i] = tsrc_from(tmp);
1066      }
1067   }
1068
1069   /* set up sampler parameters */
1070   if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) {
1071      msg_len = fs_add_sampler_params_gen7(tc, msg_type, base_mrf, param_size,
1072            coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
1073   }
1074   else {
1075      msg_len = fs_add_sampler_params_gen6(tc, msg_type, base_mrf, param_size,
1076            coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
1077   }
1078
1079   /*
1080    * From the Sandy Bridge PRM, volume 4 part 1, page 136:
1081    *
1082    *     "The maximum message length allowed to the sampler is 11. This would
1083    *      disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of
1084    *      SIMD16."
1085    */
1086   if (msg_len > 11)
1087      tc_fail(tc, "maximum length for messages to the sampler is 11");
1088
1089   if (ret_sampler_index)
1090      *ret_sampler_index = sampler_index;
1091
1092   return tsrc_imm_mdesc_sampler(tc, msg_len, 4 * param_size,
1093         false, simd_mode, msg_type, sampler_index, binding_table_index);
1094}
1095
1096static void
1097fs_lower_opcode_tgsi_sampling(struct fs_compile_context *fcc,
1098                              struct toy_inst *inst)
1099{
1100   struct toy_compiler *tc = &fcc->tc;
1101   struct toy_dst dst[4], tmp[4];
1102   struct toy_src desc;
1103   unsigned sampler_index;
1104   int swizzles[4], i;
1105   bool need_filter;
1106
1107   desc = fs_prepare_tgsi_sampling(fcc, inst,
1108         fcc->first_free_mrf,
1109         fcc->variant->saturate_tex_coords,
1110         &sampler_index);
1111
1112   switch (inst->opcode) {
1113   case TOY_OPCODE_TGSI_TXF:
1114   case TOY_OPCODE_TGSI_TXQ:
1115   case TOY_OPCODE_TGSI_TXQ_LZ:
1116      need_filter = false;
1117      break;
1118   default:
1119      need_filter = true;
1120      break;
1121   }
1122
1123   toy_compiler_lower_to_send(tc, inst, false, GEN6_SFID_SAMPLER);
1124   inst->src[0] = tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0);
1125   inst->src[1] = desc;
1126   for (i = 2; i < ARRAY_SIZE(inst->src); i++)
1127      inst->src[i] = tsrc_null();
1128
1129   /* write to temps first */
1130   tc_alloc_tmp4(tc, tmp);
1131   for (i = 0; i < 4; i++)
1132      tmp[i].type = inst->dst.type;
1133   tdst_transpose(inst->dst, dst);
1134   inst->dst = tmp[0];
1135
1136   tc_move_inst(tc, inst);
1137
1138   if (need_filter) {
1139      assert(sampler_index < fcc->variant->num_sampler_views);
1140      swizzles[0] = fcc->variant->sampler_view_swizzles[sampler_index].r;
1141      swizzles[1] = fcc->variant->sampler_view_swizzles[sampler_index].g;
1142      swizzles[2] = fcc->variant->sampler_view_swizzles[sampler_index].b;
1143      swizzles[3] = fcc->variant->sampler_view_swizzles[sampler_index].a;
1144   }
1145   else {
1146      swizzles[0] = PIPE_SWIZZLE_X;
1147      swizzles[1] = PIPE_SWIZZLE_Y;
1148      swizzles[2] = PIPE_SWIZZLE_Z;
1149      swizzles[3] = PIPE_SWIZZLE_W;
1150   }
1151
1152   /* swizzle the results */
1153   for (i = 0; i < 4; i++) {
1154      switch (swizzles[i]) {
1155      case PIPE_SWIZZLE_0:
1156         tc_MOV(tc, dst[i], tsrc_imm_f(0.0f));
1157         break;
1158      case PIPE_SWIZZLE_1:
1159         tc_MOV(tc, dst[i], tsrc_imm_f(1.0f));
1160         break;
1161      default:
1162         tc_MOV(tc, dst[i], tsrc_from(tmp[swizzles[i]]));
1163         break;
1164      }
1165   }
1166}
1167
1168static void
1169fs_lower_opcode_derivative(struct toy_compiler *tc, struct toy_inst *inst)
1170{
1171   struct toy_dst dst[4];
1172   struct toy_src src[4];
1173   unsigned i;
1174
1175   tdst_transpose(inst->dst, dst);
1176   tsrc_transpose(inst->src[0], src);
1177
1178   /*
1179    * Every four fragments are from a 2x2 subspan, with
1180    *
1181    *   fragment 1 on the top-left,
1182    *   fragment 2 on the top-right,
1183    *   fragment 3 on the bottom-left,
1184    *   fragment 4 on the bottom-right.
1185    *
1186    * DDX should thus produce
1187    *
1188    *   dst = src.yyww - src.xxzz
1189    *
1190    * and DDY should produce
1191    *
1192    *   dst = src.zzww - src.xxyy
1193    *
1194    * But since we are in GEN6_ALIGN_1, swizzling does not work and we have to
1195    * play with the region parameters.
1196    */
1197   if (inst->opcode == TOY_OPCODE_DDX) {
1198      for (i = 0; i < 4; i++) {
1199         struct toy_src left, right;
1200
1201         left = tsrc_rect(src[i], TOY_RECT_220);
1202         right = tsrc_offset(left, 0, 1);
1203
1204         tc_ADD(tc, dst[i], right, tsrc_negate(left));
1205      }
1206   }
1207   else {
1208      for (i = 0; i < 4; i++) {
1209         struct toy_src top, bottom;
1210
1211         /* approximate with dst = src.zzzz - src.xxxx */
1212         top = tsrc_rect(src[i], TOY_RECT_440);
1213         bottom = tsrc_offset(top, 0, 2);
1214
1215         tc_ADD(tc, dst[i], bottom, tsrc_negate(top));
1216      }
1217   }
1218
1219   tc_discard_inst(tc, inst);
1220}
1221
1222static void
1223fs_lower_opcode_fb_write(struct toy_compiler *tc, struct toy_inst *inst)
1224{
1225   /* fs_write_fb() has set up the message registers */
1226   toy_compiler_lower_to_send(tc, inst, true,
1227         GEN6_SFID_DP_RC);
1228}
1229
1230static void
1231fs_lower_opcode_kil(struct toy_compiler *tc, struct toy_inst *inst)
1232{
1233   struct toy_dst pixel_mask_dst;
1234   struct toy_src f0, pixel_mask;
1235   struct toy_inst *tmp;
1236
1237   /* lower half of r1.7:ud */
1238   pixel_mask_dst = tdst_uw(tdst(TOY_FILE_GRF, 1, 7 * 4));
1239   pixel_mask = tsrc_rect(tsrc_from(pixel_mask_dst), TOY_RECT_010);
1240
1241   f0 = tsrc_rect(tsrc_uw(tsrc(TOY_FILE_ARF, GEN6_ARF_F0, 0)), TOY_RECT_010);
1242
1243   /* KILL or KILL_IF */
1244   if (tsrc_is_null(inst->src[0])) {
1245      struct toy_src dummy = tsrc_uw(tsrc(TOY_FILE_GRF, 0, 0));
1246      struct toy_dst f0_dst = tdst_uw(tdst(TOY_FILE_ARF, GEN6_ARF_F0, 0));
1247
1248      /* create a mask that masks out all pixels */
1249      tmp = tc_MOV(tc, f0_dst, tsrc_rect(tsrc_imm_uw(0xffff), TOY_RECT_010));
1250      tmp->exec_size = GEN6_EXECSIZE_1;
1251      tmp->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1252
1253      tc_CMP(tc, tdst_null(), dummy, dummy, GEN6_COND_NZ);
1254
1255      /* swapping the two src operands breaks glBitmap()!? */
1256      tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask);
1257      tmp->exec_size = GEN6_EXECSIZE_1;
1258      tmp->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1259   }
1260   else {
1261      struct toy_src src[4];
1262      unsigned i;
1263
1264      tsrc_transpose(inst->src[0], src);
1265      /* mask out killed pixels */
1266      for (i = 0; i < 4; i++) {
1267         tc_CMP(tc, tdst_null(), src[i], tsrc_imm_f(0.0f),
1268               GEN6_COND_GE);
1269
1270         /* swapping the two src operands breaks glBitmap()!? */
1271         tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask);
1272         tmp->exec_size = GEN6_EXECSIZE_1;
1273         tmp->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1274      }
1275   }
1276
1277   tc_discard_inst(tc, inst);
1278}
1279
1280static void
1281fs_lower_virtual_opcodes(struct fs_compile_context *fcc)
1282{
1283   struct toy_compiler *tc = &fcc->tc;
1284   struct toy_inst *inst;
1285
1286   /* lower TGSI's first, as they might be lowered to other virtual opcodes */
1287   tc_head(tc);
1288   while ((inst = tc_next(tc)) != NULL) {
1289      switch (inst->opcode) {
1290      case TOY_OPCODE_TGSI_IN:
1291      case TOY_OPCODE_TGSI_CONST:
1292      case TOY_OPCODE_TGSI_SV:
1293      case TOY_OPCODE_TGSI_IMM:
1294         fs_lower_opcode_tgsi_direct(fcc, inst);
1295         break;
1296      case TOY_OPCODE_TGSI_INDIRECT_FETCH:
1297      case TOY_OPCODE_TGSI_INDIRECT_STORE:
1298         fs_lower_opcode_tgsi_indirect(fcc, inst);
1299         break;
1300      case TOY_OPCODE_TGSI_TEX:
1301      case TOY_OPCODE_TGSI_TXB:
1302      case TOY_OPCODE_TGSI_TXD:
1303      case TOY_OPCODE_TGSI_TXL:
1304      case TOY_OPCODE_TGSI_TXP:
1305      case TOY_OPCODE_TGSI_TXF:
1306      case TOY_OPCODE_TGSI_TXQ:
1307      case TOY_OPCODE_TGSI_TXQ_LZ:
1308      case TOY_OPCODE_TGSI_TEX2:
1309      case TOY_OPCODE_TGSI_TXB2:
1310      case TOY_OPCODE_TGSI_TXL2:
1311      case TOY_OPCODE_TGSI_SAMPLE:
1312      case TOY_OPCODE_TGSI_SAMPLE_I:
1313      case TOY_OPCODE_TGSI_SAMPLE_I_MS:
1314      case TOY_OPCODE_TGSI_SAMPLE_B:
1315      case TOY_OPCODE_TGSI_SAMPLE_C:
1316      case TOY_OPCODE_TGSI_SAMPLE_C_LZ:
1317      case TOY_OPCODE_TGSI_SAMPLE_D:
1318      case TOY_OPCODE_TGSI_SAMPLE_L:
1319      case TOY_OPCODE_TGSI_GATHER4:
1320      case TOY_OPCODE_TGSI_SVIEWINFO:
1321      case TOY_OPCODE_TGSI_SAMPLE_POS:
1322      case TOY_OPCODE_TGSI_SAMPLE_INFO:
1323         fs_lower_opcode_tgsi_sampling(fcc, inst);
1324         break;
1325      }
1326   }
1327
1328   tc_head(tc);
1329   while ((inst = tc_next(tc)) != NULL) {
1330      switch (inst->opcode) {
1331      case TOY_OPCODE_INV:
1332      case TOY_OPCODE_LOG:
1333      case TOY_OPCODE_EXP:
1334      case TOY_OPCODE_SQRT:
1335      case TOY_OPCODE_RSQ:
1336      case TOY_OPCODE_SIN:
1337      case TOY_OPCODE_COS:
1338      case TOY_OPCODE_FDIV:
1339      case TOY_OPCODE_POW:
1340      case TOY_OPCODE_INT_DIV_QUOTIENT:
1341      case TOY_OPCODE_INT_DIV_REMAINDER:
1342         toy_compiler_lower_math(tc, inst);
1343         break;
1344      case TOY_OPCODE_DDX:
1345      case TOY_OPCODE_DDY:
1346         fs_lower_opcode_derivative(tc, inst);
1347         break;
1348      case TOY_OPCODE_FB_WRITE:
1349         fs_lower_opcode_fb_write(tc, inst);
1350         break;
1351      case TOY_OPCODE_KIL:
1352         fs_lower_opcode_kil(tc, inst);
1353         break;
1354      default:
1355         if (inst->opcode > 127)
1356            tc_fail(tc, "unhandled virtual opcode");
1357         break;
1358      }
1359   }
1360}
1361
1362/**
1363 * Compile the shader.
1364 */
1365static bool
1366fs_compile(struct fs_compile_context *fcc)
1367{
1368   struct toy_compiler *tc = &fcc->tc;
1369   struct ilo_shader *sh = fcc->shader;
1370
1371   fs_lower_virtual_opcodes(fcc);
1372   toy_compiler_legalize_for_ra(tc);
1373   toy_compiler_optimize(tc);
1374   toy_compiler_allocate_registers(tc,
1375         fcc->first_free_grf,
1376         fcc->last_free_grf,
1377         fcc->num_grf_per_vrf);
1378   toy_compiler_legalize_for_asm(tc);
1379
1380   if (tc->fail) {
1381      ilo_err("failed to legalize FS instructions: %s\n", tc->reason);
1382      return false;
1383   }
1384
1385   if (ilo_debug & ILO_DEBUG_FS) {
1386      ilo_printf("legalized instructions:\n");
1387      toy_compiler_dump(tc);
1388      ilo_printf("\n");
1389   }
1390
1391   if (true) {
1392      sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
1393   }
1394   else {
1395      static const uint32_t microcode[] = {
1396         /* fill in the microcode here */
1397         0x0, 0x0, 0x0, 0x0,
1398      };
1399      const bool swap = true;
1400
1401      sh->kernel_size = sizeof(microcode);
1402      sh->kernel = MALLOC(sh->kernel_size);
1403
1404      if (sh->kernel) {
1405         const int num_dwords = sizeof(microcode) / 4;
1406         const uint32_t *src = microcode;
1407         uint32_t *dst = (uint32_t *) sh->kernel;
1408         int i;
1409
1410         for (i = 0; i < num_dwords; i += 4) {
1411            if (swap) {
1412               dst[i + 0] = src[i + 3];
1413               dst[i + 1] = src[i + 2];
1414               dst[i + 2] = src[i + 1];
1415               dst[i + 3] = src[i + 0];
1416            }
1417            else {
1418               memcpy(dst, src, 16);
1419            }
1420         }
1421      }
1422   }
1423
1424   if (!sh->kernel) {
1425      ilo_err("failed to compile FS: %s\n", tc->reason);
1426      return false;
1427   }
1428
1429   if (ilo_debug & ILO_DEBUG_FS) {
1430      ilo_printf("disassembly:\n");
1431      toy_compiler_disassemble(tc->dev, sh->kernel, sh->kernel_size, false);
1432      ilo_printf("\n");
1433   }
1434
1435   return true;
1436}
1437
1438/**
1439 * Emit instructions to write the color buffers (and the depth buffer).
1440 */
1441static void
1442fs_write_fb(struct fs_compile_context *fcc)
1443{
1444   struct toy_compiler *tc = &fcc->tc;
1445   int base_mrf = fcc->first_free_mrf;
1446   const struct toy_dst header = tdst_ud(tdst(TOY_FILE_MRF, base_mrf, 0));
1447   bool header_present = false;
1448   struct toy_src desc;
1449   unsigned msg_type, ctrl;
1450   int color_slots[ILO_MAX_DRAW_BUFFERS], num_cbufs;
1451   int pos_slot = -1, cbuf, i;
1452
1453   for (i = 0; i < ARRAY_SIZE(color_slots); i++)
1454      color_slots[i] = -1;
1455
1456   for (i = 0; i < fcc->tgsi.num_outputs; i++) {
1457      if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_COLOR) {
1458         assert(fcc->tgsi.outputs[i].semantic_index < ARRAY_SIZE(color_slots));
1459         color_slots[fcc->tgsi.outputs[i].semantic_index] = i;
1460      }
1461      else if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_POSITION) {
1462         pos_slot = i;
1463      }
1464   }
1465
1466   num_cbufs = fcc->variant->u.fs.num_cbufs;
1467   /* still need to send EOT (and probably depth) */
1468   if (!num_cbufs)
1469      num_cbufs = 1;
1470
1471   /* we need the header to specify the pixel mask or render target */
1472   if (fcc->tgsi.uses_kill || num_cbufs > 1) {
1473      const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
1474      struct toy_inst *inst;
1475
1476      inst = tc_MOV(tc, header, r0);
1477      inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1478      base_mrf += fcc->num_grf_per_vrf;
1479
1480      /* this is a two-register header */
1481      if (fcc->dispatch_mode == GEN6_PS_DISPATCH_8) {
1482         inst = tc_MOV(tc, tdst_offset(header, 1, 0), tsrc_offset(r0, 1, 0));
1483         inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1484         base_mrf += fcc->num_grf_per_vrf;
1485      }
1486
1487      header_present = true;
1488   }
1489
1490   for (cbuf = 0; cbuf < num_cbufs; cbuf++) {
1491      const int slot =
1492         color_slots[(fcc->tgsi.props.fs_color0_writes_all_cbufs) ? 0 : cbuf];
1493      int mrf = base_mrf, vrf;
1494      struct toy_src src[4];
1495
1496      if (slot >= 0) {
1497         const unsigned undefined_mask =
1498            fcc->tgsi.outputs[slot].undefined_mask;
1499         const int index = fcc->tgsi.outputs[slot].index;
1500
1501         vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index);
1502         if (vrf >= 0) {
1503            const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0);
1504            tsrc_transpose(tmp, src);
1505         }
1506         else {
1507            /* use (0, 0, 0, 0) */
1508            tsrc_transpose(tsrc_imm_f(0.0f), src);
1509         }
1510
1511         for (i = 0; i < 4; i++) {
1512            const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
1513
1514            if (undefined_mask & (1 << i))
1515               src[i] = tsrc_imm_f(0.0f);
1516
1517            tc_MOV(tc, dst, src[i]);
1518
1519            mrf += fcc->num_grf_per_vrf;
1520         }
1521      }
1522      else {
1523         /* use (0, 0, 0, 0) */
1524         for (i = 0; i < 4; i++) {
1525            const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
1526
1527            tc_MOV(tc, dst, tsrc_imm_f(0.0f));
1528            mrf += fcc->num_grf_per_vrf;
1529         }
1530      }
1531
1532      /* select BLEND_STATE[rt] */
1533      if (cbuf > 0) {
1534         struct toy_inst *inst;
1535
1536         inst = tc_MOV(tc, tdst_offset(header, 0, 2), tsrc_imm_ud(cbuf));
1537         inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1538         inst->exec_size = GEN6_EXECSIZE_1;
1539         inst->src[0].rect = TOY_RECT_010;
1540      }
1541
1542      if (cbuf == 0 && pos_slot >= 0) {
1543         const int index = fcc->tgsi.outputs[pos_slot].index;
1544         const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
1545         struct toy_src src[4];
1546         int vrf;
1547
1548         vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index);
1549         if (vrf >= 0) {
1550            const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0);
1551            tsrc_transpose(tmp, src);
1552         }
1553         else {
1554            /* use (0, 0, 0, 0) */
1555            tsrc_transpose(tsrc_imm_f(0.0f), src);
1556         }
1557
1558         /* only Z */
1559         tc_MOV(tc, dst, src[2]);
1560
1561         mrf += fcc->num_grf_per_vrf;
1562      }
1563
1564      msg_type = (fcc->dispatch_mode == GEN6_PS_DISPATCH_16) ?
1565         GEN6_MSG_DP_RT_MODE_SIMD16 >> 8 :
1566         GEN6_MSG_DP_RT_MODE_SIMD8_LO >> 8;
1567
1568      ctrl = (cbuf == num_cbufs - 1) << 12 |
1569             msg_type << 8;
1570
1571      desc = tsrc_imm_mdesc_data_port(tc, cbuf == num_cbufs - 1,
1572            mrf - fcc->first_free_mrf, 0,
1573            header_present, false,
1574            GEN6_MSG_DP_RT_WRITE,
1575            ctrl, fcc->shader->bt.rt_base + cbuf);
1576
1577      tc_add2(tc, TOY_OPCODE_FB_WRITE, tdst_null(),
1578            tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0), desc);
1579   }
1580}
1581
1582/**
1583 * Set up shader outputs for fixed-function units.
1584 */
1585static void
1586fs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi)
1587{
1588   unsigned i;
1589
1590   sh->out.count = tgsi->num_outputs;
1591   for (i = 0; i < tgsi->num_outputs; i++) {
1592      sh->out.register_indices[i] = tgsi->outputs[i].index;
1593      sh->out.semantic_names[i] = tgsi->outputs[i].semantic_name;
1594      sh->out.semantic_indices[i] = tgsi->outputs[i].semantic_index;
1595
1596      if (tgsi->outputs[i].semantic_name == TGSI_SEMANTIC_POSITION)
1597         sh->out.has_pos = true;
1598   }
1599}
1600
1601/**
1602 * Set up shader inputs for fixed-function units.
1603 */
1604static void
1605fs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
1606                   bool flatshade)
1607{
1608   unsigned i;
1609
1610   sh->in.count = tgsi->num_inputs;
1611   for (i = 0; i < tgsi->num_inputs; i++) {
1612      sh->in.semantic_names[i] = tgsi->inputs[i].semantic_name;
1613      sh->in.semantic_indices[i] = tgsi->inputs[i].semantic_index;
1614      sh->in.interp[i] = tgsi->inputs[i].interp;
1615      sh->in.centroid[i] = tgsi->inputs[i].centroid;
1616
1617      if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_POSITION) {
1618         sh->in.has_pos = true;
1619         continue;
1620      }
1621      else if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_FACE) {
1622         continue;
1623      }
1624
1625      switch (tgsi->inputs[i].interp) {
1626      case TGSI_INTERPOLATE_CONSTANT:
1627         sh->in.const_interp_enable |= 1 << i;
1628         break;
1629      case TGSI_INTERPOLATE_LINEAR:
1630         sh->in.has_linear_interp = true;
1631
1632         if (tgsi->inputs[i].centroid) {
1633            sh->in.barycentric_interpolation_mode |=
1634               GEN6_INTERP_NONPERSPECTIVE_CENTROID;
1635         }
1636         else {
1637            sh->in.barycentric_interpolation_mode |=
1638               GEN6_INTERP_NONPERSPECTIVE_PIXEL;
1639         }
1640         break;
1641      case TGSI_INTERPOLATE_COLOR:
1642         if (flatshade) {
1643            sh->in.const_interp_enable |= 1 << i;
1644            break;
1645         }
1646         /* fall through */
1647      case TGSI_INTERPOLATE_PERSPECTIVE:
1648         if (tgsi->inputs[i].centroid) {
1649            sh->in.barycentric_interpolation_mode |=
1650               GEN6_INTERP_PERSPECTIVE_CENTROID;
1651         }
1652         else {
1653            sh->in.barycentric_interpolation_mode |=
1654               GEN6_INTERP_PERSPECTIVE_PIXEL;
1655         }
1656         break;
1657      default:
1658         break;
1659      }
1660   }
1661}
1662
1663static int
1664fs_setup_payloads(struct fs_compile_context *fcc)
1665{
1666   const struct ilo_shader *sh = fcc->shader;
1667   int grf, i;
1668
1669   grf = 0;
1670
1671   /* r0: header */
1672   grf++;
1673
1674   /* r1-r2: coordinates and etc. */
1675   grf += (fcc->dispatch_mode == GEN6_PS_DISPATCH_32) ? 2 : 1;
1676
1677   for (i = 0; i < ARRAY_SIZE(fcc->payloads); i++) {
1678      const int reg_scale =
1679         (fcc->dispatch_mode == GEN6_PS_DISPATCH_8) ? 1 : 2;
1680
1681      /* r3-r26 or r32-r55: barycentric interpolation parameters */
1682      if (sh->in.barycentric_interpolation_mode &
1683            (GEN6_INTERP_PERSPECTIVE_PIXEL)) {
1684         fcc->payloads[i].interp_perspective_pixel = grf;
1685         grf += 2 * reg_scale;
1686      }
1687      if (sh->in.barycentric_interpolation_mode &
1688            (GEN6_INTERP_PERSPECTIVE_CENTROID)) {
1689         fcc->payloads[i].interp_perspective_centroid = grf;
1690         grf += 2 * reg_scale;
1691      }
1692      if (sh->in.barycentric_interpolation_mode &
1693            (GEN6_INTERP_PERSPECTIVE_SAMPLE)) {
1694         fcc->payloads[i].interp_perspective_sample = grf;
1695         grf += 2 * reg_scale;
1696      }
1697      if (sh->in.barycentric_interpolation_mode &
1698            (GEN6_INTERP_NONPERSPECTIVE_PIXEL)) {
1699         fcc->payloads[i].interp_nonperspective_pixel = grf;
1700         grf += 2 * reg_scale;
1701      }
1702      if (sh->in.barycentric_interpolation_mode &
1703            (GEN6_INTERP_NONPERSPECTIVE_CENTROID)) {
1704         fcc->payloads[i].interp_nonperspective_centroid = grf;
1705         grf += 2 * reg_scale;
1706      }
1707      if (sh->in.barycentric_interpolation_mode &
1708            (GEN6_INTERP_NONPERSPECTIVE_SAMPLE)) {
1709         fcc->payloads[i].interp_nonperspective_sample = grf;
1710         grf += 2 * reg_scale;
1711      }
1712
1713      /* r27-r28 or r56-r57: interpoloated depth */
1714      if (sh->in.has_pos) {
1715         fcc->payloads[i].source_depth = grf;
1716         grf += 1 * reg_scale;
1717      }
1718
1719      /* r29-r30 or r58-r59: interpoloated w */
1720      if (sh->in.has_pos) {
1721         fcc->payloads[i].source_w = grf;
1722         grf += 1 * reg_scale;
1723      }
1724
1725      /* r31 or r60: position offset */
1726      if (false) {
1727         fcc->payloads[i].pos_offset = grf;
1728         grf++;
1729      }
1730
1731      if (fcc->dispatch_mode != GEN6_PS_DISPATCH_32)
1732         break;
1733   }
1734
1735   return grf;
1736}
1737
1738/**
1739 * Translate the TGSI tokens.
1740 */
1741static bool
1742fs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens,
1743              struct toy_tgsi *tgsi)
1744{
1745   if (ilo_debug & ILO_DEBUG_FS) {
1746      ilo_printf("dumping fragment shader\n");
1747      ilo_printf("\n");
1748
1749      tgsi_dump(tokens, 0);
1750      ilo_printf("\n");
1751   }
1752
1753   toy_compiler_translate_tgsi(tc, tokens, false, tgsi);
1754   if (tc->fail) {
1755      ilo_err("failed to translate FS TGSI tokens: %s\n", tc->reason);
1756      return false;
1757   }
1758
1759   if (ilo_debug & ILO_DEBUG_FS) {
1760      ilo_printf("TGSI translator:\n");
1761      toy_tgsi_dump(tgsi);
1762      ilo_printf("\n");
1763      toy_compiler_dump(tc);
1764      ilo_printf("\n");
1765   }
1766
1767   return true;
1768}
1769
1770/**
1771 * Set up FS compile context.  This includes translating the TGSI tokens.
1772 */
1773static bool
1774fs_setup(struct fs_compile_context *fcc,
1775         const struct ilo_shader_state *state,
1776         const struct ilo_shader_variant *variant)
1777{
1778   int num_consts;
1779
1780   memset(fcc, 0, sizeof(*fcc));
1781
1782   fcc->shader = CALLOC_STRUCT(ilo_shader);
1783   if (!fcc->shader)
1784      return false;
1785
1786   fcc->variant = variant;
1787
1788   toy_compiler_init(&fcc->tc, state->info.dev);
1789
1790   fcc->dispatch_mode = GEN6_PS_DISPATCH_8;
1791
1792   fcc->tc.templ.access_mode = GEN6_ALIGN_1;
1793   if (fcc->dispatch_mode == GEN6_PS_DISPATCH_16) {
1794      fcc->tc.templ.qtr_ctrl = GEN6_QTRCTRL_1H;
1795      fcc->tc.templ.exec_size = GEN6_EXECSIZE_16;
1796   }
1797   else {
1798      fcc->tc.templ.qtr_ctrl = GEN6_QTRCTRL_1Q;
1799      fcc->tc.templ.exec_size = GEN6_EXECSIZE_8;
1800   }
1801
1802   fcc->tc.rect_linear_width = 8;
1803
1804   /*
1805    * The classic driver uses the sampler cache (gen6) or the data cache
1806    * (gen7).  Why?
1807    */
1808   fcc->const_cache = GEN6_SFID_DP_CC;
1809
1810   if (!fs_setup_tgsi(&fcc->tc, state->info.tokens, &fcc->tgsi)) {
1811      toy_compiler_cleanup(&fcc->tc);
1812      FREE(fcc->shader);
1813      return false;
1814   }
1815
1816   fs_setup_shader_in(fcc->shader, &fcc->tgsi, fcc->variant->u.fs.flatshade);
1817   fs_setup_shader_out(fcc->shader, &fcc->tgsi);
1818
1819   if (fcc->variant->use_pcb && !fcc->tgsi.const_indirect) {
1820      num_consts = (fcc->tgsi.const_count + 1) / 2;
1821
1822      /*
1823       * From the Sandy Bridge PRM, volume 2 part 1, page 287:
1824       *
1825       *     "The sum of all four read length fields (each incremented to
1826       *      represent the actual read length) must be less than or equal to
1827       *      64"
1828       *
1829       * Since we are usually under a high register pressure, do not allow
1830       * for more than 8.
1831       */
1832      if (num_consts > 8)
1833         num_consts = 0;
1834   }
1835   else {
1836      num_consts = 0;
1837   }
1838
1839   fcc->shader->skip_cbuf0_upload = (!fcc->tgsi.const_count || num_consts);
1840   fcc->shader->pcb.cbuf0_size = num_consts * (sizeof(float) * 8);
1841
1842   fcc->first_const_grf = fs_setup_payloads(fcc);
1843   fcc->first_attr_grf = fcc->first_const_grf + num_consts;
1844   fcc->first_free_grf = fcc->first_attr_grf + fcc->shader->in.count * 2;
1845   fcc->last_free_grf = 127;
1846
1847   /* m0 is reserved for system routines */
1848   fcc->first_free_mrf = 1;
1849   fcc->last_free_mrf = 15;
1850
1851   /* instructions are compressed with GEN6_EXECSIZE_16 */
1852   fcc->num_grf_per_vrf =
1853      (fcc->dispatch_mode == GEN6_PS_DISPATCH_16) ? 2 : 1;
1854
1855   if (ilo_dev_gen(fcc->tc.dev) >= ILO_GEN(7)) {
1856      fcc->last_free_grf -= 15;
1857      fcc->first_free_mrf = fcc->last_free_grf + 1;
1858      fcc->last_free_mrf = fcc->first_free_mrf + 14;
1859   }
1860
1861   fcc->shader->in.start_grf = fcc->first_const_grf;
1862   fcc->shader->has_kill = fcc->tgsi.uses_kill;
1863   fcc->shader->dispatch_16 =
1864      (fcc->dispatch_mode == GEN6_PS_DISPATCH_16);
1865
1866   fcc->shader->bt.rt_base = 0;
1867   fcc->shader->bt.rt_count = fcc->variant->u.fs.num_cbufs;
1868   /* to send EOT */
1869   if (!fcc->shader->bt.rt_count)
1870      fcc->shader->bt.rt_count = 1;
1871
1872   fcc->shader->bt.tex_base = fcc->shader->bt.rt_base +
1873                              fcc->shader->bt.rt_count;
1874   fcc->shader->bt.tex_count = fcc->variant->num_sampler_views;
1875
1876   fcc->shader->bt.const_base = fcc->shader->bt.tex_base +
1877                                fcc->shader->bt.tex_count;
1878   fcc->shader->bt.const_count = state->info.constant_buffer_count;
1879
1880   fcc->shader->bt.total_count = fcc->shader->bt.const_base +
1881                                 fcc->shader->bt.const_count;
1882
1883   return true;
1884}
1885
1886/**
1887 * Compile the fragment shader.
1888 */
1889struct ilo_shader *
1890ilo_shader_compile_fs(const struct ilo_shader_state *state,
1891                      const struct ilo_shader_variant *variant)
1892{
1893   struct fs_compile_context fcc;
1894
1895   if (!fs_setup(&fcc, state, variant))
1896      return NULL;
1897
1898   fs_write_fb(&fcc);
1899
1900   if (!fs_compile(&fcc)) {
1901      FREE(fcc.shader);
1902      fcc.shader = NULL;
1903   }
1904
1905   toy_tgsi_cleanup(&fcc.tgsi);
1906   toy_compiler_cleanup(&fcc.tc);
1907
1908   return fcc.shader;
1909}
1910