nv50_program.c revision d2d19ea51fa3575a8d014a69a9b835c335728817
1/*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23#include "nv50_program.h"
24#include "nv50_pc.h"
25#include "nv50_context.h"
26
27#include "pipe/p_shader_tokens.h"
28#include "tgsi/tgsi_parse.h"
29#include "tgsi/tgsi_util.h"
30#include "tgsi/tgsi_dump.h"
31
32#include "codegen/nv50_ir_driver.h"
33
34static INLINE unsigned
35bitcount4(const uint32_t val)
36{
37   static const unsigned cnt[16]
38   = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
39   return cnt[val & 0xf];
40}
41
42static unsigned
43nv50_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c)
44{
45   unsigned mask = inst->Dst[0].Register.WriteMask;
46
47   switch (inst->Instruction.Opcode) {
48   case TGSI_OPCODE_COS:
49   case TGSI_OPCODE_SIN:
50      return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
51   case TGSI_OPCODE_DP3:
52      return 0x7;
53   case TGSI_OPCODE_DP4:
54   case TGSI_OPCODE_DPH:
55   case TGSI_OPCODE_KIL: /* WriteMask ignored */
56      return 0xf;
57   case TGSI_OPCODE_DST:
58      return mask & (c ? 0xa : 0x6);
59   case TGSI_OPCODE_EX2:
60   case TGSI_OPCODE_EXP:
61   case TGSI_OPCODE_LG2:
62   case TGSI_OPCODE_LOG:
63   case TGSI_OPCODE_POW:
64   case TGSI_OPCODE_RCP:
65   case TGSI_OPCODE_RSQ:
66   case TGSI_OPCODE_SCS:
67      return 0x1;
68   case TGSI_OPCODE_IF:
69      return 0x1;
70   case TGSI_OPCODE_LIT:
71      return 0xb;
72   case TGSI_OPCODE_TEX:
73   case TGSI_OPCODE_TXB:
74   case TGSI_OPCODE_TXL:
75   case TGSI_OPCODE_TXP:
76   {
77      const struct tgsi_instruction_texture *tex;
78
79      assert(inst->Instruction.Texture);
80      tex = &inst->Texture;
81
82      mask = 0x7;
83      if (inst->Instruction.Opcode != TGSI_OPCODE_TEX &&
84          inst->Instruction.Opcode != TGSI_OPCODE_TXD)
85         mask |= 0x8; /* bias, lod or proj */
86
87      switch (tex->Texture) {
88      case TGSI_TEXTURE_1D:
89         mask &= 0x9;
90         break;
91      case TGSI_TEXTURE_SHADOW1D:
92         mask &= 0x5;
93         break;
94      case TGSI_TEXTURE_2D:
95         mask &= 0xb;
96         break;
97      default:
98         break;
99      }
100   }
101  	   return mask;
102   case TGSI_OPCODE_XPD:
103   {
104      unsigned x = 0;
105      if (mask & 1) x |= 0x6;
106      if (mask & 2) x |= 0x5;
107      if (mask & 4) x |= 0x3;
108      return x;
109   }
110   default:
111      break;
112   }
113
114   return mask;
115}
116
117static void
118nv50_indirect_inputs(struct nv50_translation_info *ti, int id)
119{
120   int i, c;
121
122   for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
123      for (c = 0; c < 4; ++c)
124         ti->input_access[i][c] = id;
125
126   ti->indirect_inputs = TRUE;
127}
128
129static void
130nv50_indirect_outputs(struct nv50_translation_info *ti, int id)
131{
132   int i, c;
133
134   for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
135      for (c = 0; c < 4; ++c)
136         ti->output_access[i][c] = id;
137
138   ti->indirect_outputs = TRUE;
139}
140
141static void
142prog_inst(struct nv50_translation_info *ti,
143          const struct tgsi_full_instruction *inst, int id)
144{
145   const struct tgsi_dst_register *dst;
146   const struct tgsi_src_register *src;
147   int s, c, k;
148   unsigned mask;
149
150   if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
151      ti->subr[ti->subr_nr].pos = id - 1;
152      ti->subr[ti->subr_nr].id = ti->subr_nr + 1; /* id 0 is main program */
153      ++ti->subr_nr;
154   }
155
156   if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
157      dst = &inst->Dst[0].Register;
158
159      for (c = 0; c < 4; ++c) {
160         if (dst->Indirect)
161            nv50_indirect_outputs(ti, id);
162         if (!(dst->WriteMask & (1 << c)))
163            continue;
164         ti->output_access[dst->Index][c] = id;
165      }
166
167      if (inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
168          inst->Src[0].Register.File == TGSI_FILE_INPUT &&
169          dst->Index == ti->edgeflag_out)
170         ti->p->vp.edgeflag = inst->Src[0].Register.Index;
171   } else
172   if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
173      if (inst->Dst[0].Register.Indirect)
174         ti->store_to_memory = TRUE;
175   }
176
177   for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
178      src = &inst->Src[s].Register;
179      if (src->File == TGSI_FILE_TEMPORARY)
180         if (inst->Src[s].Register.Indirect)
181            ti->store_to_memory = TRUE;
182      if (src->File != TGSI_FILE_INPUT)
183         continue;
184      mask = nv50_tgsi_src_mask(inst, s);
185
186      if (inst->Src[s].Register.Indirect)
187         nv50_indirect_inputs(ti, id);
188
189      for (c = 0; c < 4; ++c) {
190         if (!(mask & (1 << c)))
191            continue;
192         k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
193         if (k <= TGSI_SWIZZLE_W)
194            ti->input_access[src->Index][k] = id;
195      }
196   }
197}
198
199/* Probably should introduce something like struct tgsi_function_declaration
200 * instead of trying to guess inputs/outputs.
201 */
202static void
203prog_subroutine_inst(struct nv50_subroutine *subr,
204                     const struct tgsi_full_instruction *inst)
205{
206   const struct tgsi_dst_register *dst;
207   const struct tgsi_src_register *src;
208   int s, c, k;
209   unsigned mask;
210
211   for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
212      src = &inst->Src[s].Register;
213      if (src->File != TGSI_FILE_TEMPORARY)
214         continue;
215      mask = nv50_tgsi_src_mask(inst, s);
216
217      assert(!inst->Src[s].Register.Indirect);
218
219      for (c = 0; c < 4; ++c) {
220         k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
221
222         if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W)
223            if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32))))
224               subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32);
225      }
226   }
227
228   if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
229      dst = &inst->Dst[0].Register;
230
231      for (c = 0; c < 4; ++c)
232         if (dst->WriteMask & (1 << c))
233            subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32);
234   }
235}
236
237static void
238prog_immediate(struct nv50_translation_info *ti,
239               const struct tgsi_full_immediate *imm)
240{
241   int c;
242   unsigned n = ti->immd32_nr++;
243
244   assert(ti->immd32_nr <= ti->scan.immediate_count);
245
246   for (c = 0; c < 4; ++c)
247      ti->immd32[n * 4 + c] = imm->u[c].Uint;
248
249   ti->immd32_ty[n] = imm->Immediate.DataType;
250}
251
252static INLINE unsigned
253translate_interpolate(const struct tgsi_full_declaration *decl)
254{
255   unsigned mode;
256
257   if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT)
258      mode = NV50_INTERP_FLAT;
259   else
260   if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
261      mode = 0;
262   else
263      mode = NV50_INTERP_LINEAR;
264
265   if (decl->Declaration.Centroid)
266      mode |= NV50_INTERP_CENTROID;
267
268   return mode;
269}
270
271static void
272prog_decl(struct nv50_translation_info *ti,
273          const struct tgsi_full_declaration *decl)
274{
275   unsigned i, first, last, sn = 0, si = 0;
276
277   first = decl->Range.First;
278   last = decl->Range.Last;
279
280   if (decl->Declaration.Semantic) {
281      sn = decl->Semantic.Name;
282      si = decl->Semantic.Index;
283   }
284
285   switch (decl->Declaration.File) {
286   case TGSI_FILE_INPUT:
287      for (i = first; i <= last; ++i)
288         ti->interp_mode[i] = translate_interpolate(decl);
289
290      if (!decl->Declaration.Semantic)
291         break;
292
293      for (i = first; i <= last; ++i) {
294         ti->p->in[i].sn = sn;
295         ti->p->in[i].si = si;
296      }
297
298      switch (sn) {
299      case TGSI_SEMANTIC_FACE:
300         break;
301      case TGSI_SEMANTIC_COLOR:
302         if (ti->p->type == PIPE_SHADER_FRAGMENT)
303            ti->p->vp.bfc[si] = first;
304         break;
305      }
306      break;
307   case TGSI_FILE_OUTPUT:
308      if (!decl->Declaration.Semantic)
309         break;
310
311      for (i = first; i <= last; ++i) {
312         ti->p->out[i].sn = sn;
313         ti->p->out[i].si = si;
314      }
315
316      switch (sn) {
317      case TGSI_SEMANTIC_BCOLOR:
318         ti->p->vp.bfc[si] = first;
319         break;
320      case TGSI_SEMANTIC_PSIZE:
321         ti->p->vp.psiz = first;
322         break;
323      case TGSI_SEMANTIC_EDGEFLAG:
324         ti->edgeflag_out = first;
325         break;
326      default:
327         break;
328      }
329      break;
330   case TGSI_FILE_SYSTEM_VALUE:
331      /* For VP/GP inputs, they are put in s[] after the last normal input.
332       * Let sysval_map reflect the order of the sysvals in s[] and fixup later.
333       */
334      switch (decl->Semantic.Name) {
335      case TGSI_SEMANTIC_FACE:
336         break;
337      case TGSI_SEMANTIC_INSTANCEID:
338         ti->p->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID;
339         ti->sysval_map[first] = 2;
340         break;
341      case TGSI_SEMANTIC_PRIMID:
342         break;
343         /*
344      case TGSI_SEMANTIC_PRIMIDIN:
345         break;
346      case TGSI_SEMANTIC_VERTEXID:
347         break;
348         */
349      default:
350         break;
351      }
352      break;
353   case TGSI_FILE_CONSTANT:
354      ti->p->parm_size = MAX2(ti->p->parm_size, (last + 1) * 16);
355      break;
356   case TGSI_FILE_ADDRESS:
357   case TGSI_FILE_SAMPLER:
358   case TGSI_FILE_TEMPORARY:
359      break;
360   default:
361      assert(0);
362      break;
363   }
364}
365
366static int
367nv50_vertprog_prepare(struct nv50_translation_info *ti)
368{
369   struct nv50_program *p = ti->p;
370   int i, c;
371   unsigned num_inputs = 0;
372
373   ti->input_file = NV_FILE_MEM_S;
374   ti->output_file = NV_FILE_OUT;
375
376   for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
377      p->in[i].id = i;
378      p->in[i].hw = num_inputs;
379
380      for (c = 0; c < 4; ++c) {
381         if (!ti->input_access[i][c])
382            continue;
383         ti->input_map[i][c] = num_inputs++;
384         p->vp.attrs[(4 * i + c) / 32] |= 1 << ((i * 4 + c) % 32);
385      }
386   }
387
388   for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
389      p->out[i].id = i;
390      p->out[i].hw = p->max_out;
391
392      for (c = 0; c < 4; ++c) {
393         if (!ti->output_access[i][c])
394            continue;
395         ti->output_map[i][c] = p->max_out++;
396         p->out[i].mask |= 1 << c;
397      }
398   }
399
400   p->vp.clpd = p->max_out;
401   p->max_out += p->vp.clpd_nr;
402
403   for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) {
404      switch (ti->sysval_map[i]) {
405      case 2:
406         if (!(ti->p->vp.attrs[2] & NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID))
407            ti->sysval_map[i] = 1;
408         ti->sysval_map[i] = (ti->sysval_map[i] - 1) + num_inputs;
409         break;
410      default:
411         break;
412      }
413   }
414
415   if (p->vp.psiz < 0x40)
416      p->vp.psiz = p->out[p->vp.psiz].hw;
417
418   return 0;
419}
420
421static int
422nv50_fragprog_prepare(struct nv50_translation_info *ti)
423{
424   struct nv50_program *p = ti->p;
425   int i, j, c;
426   unsigned nvary, nintp, depr;
427   unsigned n = 0, m = 0, skip = 0;
428   ubyte sn[16], si[16];
429
430   /* FP flags */
431
432   if (ti->scan.writes_z) {
433      p->fp.flags[1] = 0x11;
434      p->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z;
435   }
436
437   if (ti->scan.uses_kill)
438      p->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
439
440   /* FP inputs */
441
442   ti->input_file = NV_FILE_MEM_V;
443   ti->output_file = NV_FILE_GPR;
444
445   /* count non-flat inputs, save semantic info */
446   for (i = 0; i < p->in_nr; ++i) {
447      m += (ti->interp_mode[i] & NV50_INTERP_FLAT) ? 0 : 1;
448      sn[i] = p->in[i].sn;
449      si[i] = p->in[i].si;
450   }
451
452   /* reorder p->in[] so that non-flat inputs are first and
453    * kick out special inputs that don't use VP/GP_RESULT_MAP
454    */
455   nintp = 0;
456   for (i = 0; i < p->in_nr; ++i) {
457      if (sn[i] == TGSI_SEMANTIC_POSITION) {
458         for (c = 0; c < 4; ++c) {
459            ti->input_map[i][c] = nintp;
460            if (ti->input_access[i][c]) {
461               p->fp.interp |= 1 << (24 + c);
462               ++nintp;
463            }
464         }
465         skip++;
466         continue;
467      } else
468      if (sn[i] == TGSI_SEMANTIC_FACE) {
469         ti->input_map[i][0] = 255;
470         skip++;
471         continue;
472      }
473
474      j = (ti->interp_mode[i] & NV50_INTERP_FLAT) ? m++ : n++;
475
476      if (sn[i] == TGSI_SEMANTIC_COLOR)
477         p->vp.bfc[si[i]] = j;
478
479      p->in[j].linear = (ti->interp_mode[i] & NV50_INTERP_LINEAR) ? 1 : 0;
480      p->in[j].id = i;
481      p->in[j].sn = sn[i];
482      p->in[j].si = si[i];
483   }
484   assert(n <= m);
485   p->in_nr -= skip;
486
487   if (!(p->fp.interp & (8 << 24))) {
488      p->fp.interp |= (8 << 24);
489      ++nintp;
490   }
491
492   p->fp.colors = 4 << NV50_3D_MAP_SEMANTIC_0_FFC0_ID__SHIFT; /* after HPOS */
493
494   for (i = 0; i < p->in_nr; ++i) {
495      int j = p->in[i].id;
496      p->in[i].hw = nintp;
497
498      for (c = 0; c < 4; ++c) {
499         if (!ti->input_access[j][c])
500            continue;
501         p->in[i].mask |= 1 << c;
502         ti->input_map[j][c] = nintp++;
503      }
504      /* count color inputs */
505      if (i == p->vp.bfc[0] || i == p->vp.bfc[1])
506         p->fp.colors += bitcount4(p->in[i].mask) << 16;
507   }
508   nintp -= bitcount4(p->fp.interp >> 24); /* subtract position inputs */
509   nvary = nintp;
510   if (n < m)
511      nvary -= p->in[n].hw;
512
513   p->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT;
514   p->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT;
515
516   /* FP outputs */
517
518   if (p->out_nr > (1 + (ti->scan.writes_z ? 1 : 0)))
519      p->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS;
520
521   depr = p->out_nr;
522   for (i = 0; i < p->out_nr; ++i) {
523      p->out[i].id = i;
524      if (p->out[i].sn == TGSI_SEMANTIC_POSITION) {
525         depr = i;
526         continue;
527      }
528      p->out[i].hw = p->max_out;
529      p->out[i].mask = 0xf;
530
531      for (c = 0; c < 4; ++c)
532         ti->output_map[i][c] = p->max_out++;
533   }
534   if (depr < p->out_nr) {
535      p->out[depr].mask = 0x4;
536      p->out[depr].hw = ti->output_map[depr][2] = p->max_out++;
537   } else {
538      /* allowed values are 1, 4, 5, 8, 9, ... */
539      p->max_out = MAX2(4, p->max_out);
540   }
541
542   return 0;
543}
544
545static int
546nv50_geomprog_prepare(struct nv50_translation_info *ti)
547{
548   ti->input_file = NV_FILE_MEM_S;
549   ti->output_file = NV_FILE_OUT;
550
551   assert(0);
552   return 1;
553}
554
555static int
556nv50_prog_scan(struct nv50_translation_info *ti)
557{
558   struct nv50_program *p = ti->p;
559   struct tgsi_parse_context parse;
560   int ret, i;
561
562   p->vp.edgeflag = 0x40;
563   p->vp.psiz = 0x40;
564   p->vp.bfc[0] = 0x40;
565   p->vp.bfc[1] = 0x40;
566   p->gp.primid = 0x80;
567
568   tgsi_scan_shader(p->pipe.tokens, &ti->scan);
569
570#if NV50_DEBUG & NV50_DEBUG_SHADER
571   tgsi_dump(p->pipe.tokens, 0);
572#endif
573
574   ti->subr =
575      CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0]));
576
577   ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16);
578   ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte));
579
580   ti->insns = MALLOC(ti->scan.num_instructions * sizeof(ti->insns[0]));
581
582   tgsi_parse_init(&parse, p->pipe.tokens);
583   while (!tgsi_parse_end_of_tokens(&parse)) {
584      tgsi_parse_token(&parse);
585
586      switch (parse.FullToken.Token.Type) {
587      case TGSI_TOKEN_TYPE_IMMEDIATE:
588         prog_immediate(ti, &parse.FullToken.FullImmediate);
589         break;
590      case TGSI_TOKEN_TYPE_DECLARATION:
591         prog_decl(ti, &parse.FullToken.FullDeclaration);
592         break;
593      case TGSI_TOKEN_TYPE_INSTRUCTION:
594         ti->insns[ti->inst_nr] = parse.FullToken.FullInstruction;
595         prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->inst_nr);
596         break;
597      }
598   }
599
600   /* Scan to determine which registers are inputs/outputs of a subroutine. */
601   for (i = 0; i < ti->subr_nr; ++i) {
602      int pc = ti->subr[i].id;
603      while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB)
604         prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]);
605   }
606
607   p->in_nr = ti->scan.file_max[TGSI_FILE_INPUT] + 1;
608   p->out_nr = ti->scan.file_max[TGSI_FILE_OUTPUT] + 1;
609
610   switch (p->type) {
611   case PIPE_SHADER_VERTEX:
612      ret = nv50_vertprog_prepare(ti);
613      break;
614   case PIPE_SHADER_FRAGMENT:
615      ret = nv50_fragprog_prepare(ti);
616      break;
617   case PIPE_SHADER_GEOMETRY:
618      ret = nv50_geomprog_prepare(ti);
619      break;
620   default:
621      assert(!"unsupported program type");
622      ret = -1;
623      break;
624   }
625
626   assert(!ret);
627   return ret;
628}
629
630/* Temporary, need a reference to nv50_ir_generate_code in libnv50 or
631 * it "gets disappeared" and cannot be used in libnvc0 ...
632 */
633boolean
634nv50_program_translate_new(struct nv50_program *p)
635{
636   struct nv50_ir_prog_info info;
637
638   return nv50_ir_generate_code(&info);
639}
640
641boolean
642nv50_program_translate(struct nv50_program *p)
643{
644   struct nv50_translation_info *ti;
645   int ret;
646
647   ti = CALLOC_STRUCT(nv50_translation_info);
648   ti->p = p;
649
650   ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS;
651
652   ret = nv50_prog_scan(ti);
653   if (ret) {
654      NOUVEAU_ERR("unsupported shader program\n");
655      goto out;
656   }
657
658   ret = nv50_generate_code(ti);
659   if (ret) {
660      NOUVEAU_ERR("error during shader translation\n");
661      goto out;
662   }
663
664out:
665   if (ti->immd32)
666      FREE(ti->immd32);
667   if (ti->immd32_ty)
668      FREE(ti->immd32_ty);
669   if (ti->insns)
670      FREE(ti->insns);
671   if (ti->subr)
672      FREE(ti->subr);
673   FREE(ti);
674   return ret ? FALSE : TRUE;
675}
676
677void
678nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
679{
680   if (p->res)
681      nouveau_resource_free(&p->res);
682
683   if (p->code)
684      FREE(p->code);
685
686   if (p->fixups)
687      FREE(p->fixups);
688
689   p->translated = FALSE;
690}
691