nv50_program.c revision e08f70a41d1012a0270468866614485a3415168e
1/*
2 * Copyright 2010 Chrsitoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23/* #define NV50_PROGRAM_DEBUG */
24
25#include "nv50_program.h"
26#include "nv50_pc.h"
27#include "nv50_context.h"
28
29#include "pipe/p_shader_tokens.h"
30#include "tgsi/tgsi_parse.h"
31#include "tgsi/tgsi_util.h"
32#include "tgsi/tgsi_dump.h"
33
34static INLINE unsigned
35bitcount4(const uint32_t val)
36{
37   static const unsigned cnt[16]
38   = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
39   return cnt[val & 0xf];
40}
41
42static unsigned
43nv50_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c)
44{
45   unsigned mask = inst->Dst[0].Register.WriteMask;
46
47   switch (inst->Instruction.Opcode) {
48   case TGSI_OPCODE_COS:
49   case TGSI_OPCODE_SIN:
50      return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
51   case TGSI_OPCODE_DP3:
52      return 0x7;
53   case TGSI_OPCODE_DP4:
54   case TGSI_OPCODE_DPH:
55   case TGSI_OPCODE_KIL: /* WriteMask ignored */
56      return 0xf;
57   case TGSI_OPCODE_DST:
58      return mask & (c ? 0xa : 0x6);
59   case TGSI_OPCODE_EX2:
60   case TGSI_OPCODE_EXP:
61   case TGSI_OPCODE_LG2:
62   case TGSI_OPCODE_LOG:
63   case TGSI_OPCODE_POW:
64   case TGSI_OPCODE_RCP:
65   case TGSI_OPCODE_RSQ:
66   case TGSI_OPCODE_SCS:
67      return 0x1;
68   case TGSI_OPCODE_IF:
69      return 0x1;
70   case TGSI_OPCODE_LIT:
71      return 0xb;
72   case TGSI_OPCODE_TEX:
73   case TGSI_OPCODE_TXB:
74   case TGSI_OPCODE_TXL:
75   case TGSI_OPCODE_TXP:
76   {
77      const struct tgsi_instruction_texture *tex;
78
79      assert(inst->Instruction.Texture);
80      tex = &inst->Texture;
81
82      mask = 0x7;
83      if (inst->Instruction.Opcode != TGSI_OPCODE_TEX &&
84          inst->Instruction.Opcode != TGSI_OPCODE_TXD)
85         mask |= 0x8; /* bias, lod or proj */
86
87      switch (tex->Texture) {
88      case TGSI_TEXTURE_1D:
89         mask &= 0x9;
90         break;
91      case TGSI_TEXTURE_SHADOW1D:
92         mask &= 0x5;
93         break;
94      case TGSI_TEXTURE_2D:
95         mask &= 0xb;
96         break;
97      default:
98         break;
99      }
100   }
101  	   return mask;
102   case TGSI_OPCODE_XPD:
103   {
104      unsigned x = 0;
105      if (mask & 1) x |= 0x6;
106      if (mask & 2) x |= 0x5;
107      if (mask & 4) x |= 0x3;
108      return x;
109   }
110   default:
111      break;
112   }
113
114   return mask;
115}
116
117static void
118nv50_indirect_inputs(struct nv50_translation_info *ti, int id)
119{
120   int i, c;
121
122   for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
123      for (c = 0; c < 4; ++c)
124         ti->input_access[i][c] = id;
125
126   ti->indirect_inputs = TRUE;
127}
128
129static void
130nv50_indirect_outputs(struct nv50_translation_info *ti, int id)
131{
132   int i, c;
133
134   for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
135      for (c = 0; c < 4; ++c)
136         ti->output_access[i][c] = id;
137
138   ti->indirect_outputs = TRUE;
139}
140
141static void
142prog_inst(struct nv50_translation_info *ti,
143          const struct tgsi_full_instruction *inst, int id)
144{
145   const struct tgsi_dst_register *dst;
146   const struct tgsi_src_register *src;
147   int s, c, k;
148   unsigned mask;
149
150   if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
151      for (c = 0; c < 4; ++c) {
152         dst = &inst->Dst[0].Register;
153         if (inst->Dst[0].Register.Indirect)
154            nv50_indirect_outputs(ti, id);
155         if (!(dst->WriteMask & (1 << c)))
156            continue;
157         ti->output_access[dst->Index][c] = id;
158      }
159
160      if (inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
161          inst->Src[0].Register.File == TGSI_FILE_INPUT &&
162          dst->Index == ti->edgeflag_out)
163         ti->p->vp.edgeflag = inst->Src[0].Register.Index;
164   }
165
166   for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
167      src = &inst->Src[s].Register;
168      if (src->File != TGSI_FILE_INPUT)
169         continue;
170      mask = nv50_tgsi_src_mask(inst, s);
171
172      if (inst->Src[s].Register.Indirect)
173         nv50_indirect_inputs(ti, id);
174
175      for (c = 0; c < 4; ++c) {
176         if (!(mask & (1 << c)))
177            continue;
178         k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
179         if (k <= TGSI_SWIZZLE_W)
180            ti->input_access[src->Index][k] = id;
181      }
182   }
183}
184
185static void
186prog_immediate(struct nv50_translation_info *ti,
187               const struct tgsi_full_immediate *imm)
188{
189   int c;
190   unsigned n = ti->immd32_nr++;
191
192   assert(ti->immd32_nr <= ti->scan.immediate_count);
193
194   for (c = 0; c < 4; ++c)
195      ti->immd32[n * 4 + c] = imm->u[c].Uint;
196
197   ti->immd32_ty[n] = imm->Immediate.DataType;
198}
199
200static INLINE unsigned
201translate_interpolate(const struct tgsi_full_declaration *decl)
202{
203   unsigned mode;
204
205   if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT)
206      mode = NV50_INTERP_FLAT;
207   else
208   if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
209      mode = 0;
210   else
211      mode = NV50_INTERP_LINEAR;
212
213   if (decl->Declaration.Centroid)
214      mode |= NV50_INTERP_CENTROID;
215
216   return mode;
217}
218
219static void
220prog_decl(struct nv50_translation_info *ti,
221          const struct tgsi_full_declaration *decl)
222{
223   unsigned i, first, last, sn = 0, si = 0;
224
225   first = decl->Range.First;
226   last = decl->Range.Last;
227
228   if (decl->Declaration.Semantic) {
229      sn = decl->Semantic.Name;
230      si = decl->Semantic.Index;
231   }
232
233   switch (decl->Declaration.File) {
234   case TGSI_FILE_INPUT:
235      for (i = first; i <= last; ++i)
236         ti->interp_mode[i] = translate_interpolate(decl);
237
238      if (!decl->Declaration.Semantic)
239         break;
240
241      for (i = first; i <= last; ++i) {
242         ti->p->in[i].sn = sn;
243         ti->p->in[i].si = si;
244      }
245
246      switch (sn) {
247      case TGSI_SEMANTIC_FACE:
248         break;
249      case TGSI_SEMANTIC_COLOR:
250         if (ti->p->type == PIPE_SHADER_FRAGMENT)
251            ti->p->vp.bfc[si] = first;
252         break;
253      }
254      break;
255   case TGSI_FILE_OUTPUT:
256      if (!decl->Declaration.Semantic)
257         break;
258
259      for (i = first; i <= last; ++i) {
260         ti->p->out[i].sn = sn;
261         ti->p->out[i].si = si;
262      }
263
264      switch (sn) {
265      case TGSI_SEMANTIC_BCOLOR:
266         ti->p->vp.bfc[si] = first;
267         break;
268      case TGSI_SEMANTIC_PSIZE:
269         ti->p->vp.psiz = first;
270         break;
271      case TGSI_SEMANTIC_EDGEFLAG:
272         ti->edgeflag_out = first;
273         break;
274      default:
275         break;
276      }
277      break;
278   case TGSI_FILE_SYSTEM_VALUE:
279      switch (decl->Semantic.Name) {
280      case TGSI_SEMANTIC_FACE:
281         break;
282      case TGSI_SEMANTIC_INSTANCEID:
283         break;
284      case TGSI_SEMANTIC_PRIMID:
285         break;
286         /*
287      case TGSI_SEMANTIC_PRIMIDIN:
288         break;
289      case TGSI_SEMANTIC_VERTEXID:
290         break;
291         */
292      default:
293         break;
294      }
295      break;
296   case TGSI_FILE_CONSTANT:
297      ti->p->parm_size = MAX2(ti->p->parm_size, (last + 1) * 16);
298      break;
299   case TGSI_FILE_ADDRESS:
300   case TGSI_FILE_SAMPLER:
301   case TGSI_FILE_TEMPORARY:
302      break;
303   default:
304      assert(0);
305      break;
306   }
307}
308
309static int
310nv50_vertprog_prepare(struct nv50_translation_info *ti)
311{
312   struct nv50_program *p = ti->p;
313   int i, c;
314   unsigned num_inputs = 0;
315
316   ti->input_file = NV_FILE_MEM_S;
317   ti->output_file = NV_FILE_OUT;
318
319   for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
320      p->in[i].id = i;
321      p->in[i].hw = num_inputs;
322
323      for (c = 0; c < 4; ++c) {
324         if (!ti->input_access[i][c])
325            continue;
326         ti->input_map[i][c] = num_inputs++;
327         p->vp.attrs[(4 * i + c) / 32] |= 1 << ((i * 4 + c) % 32);
328      }
329   }
330
331   for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
332      p->out[i].id = i;
333      p->out[i].hw = p->max_out;
334
335      for (c = 0; c < 4; ++c) {
336         if (!ti->output_access[i][c])
337            continue;
338         ti->output_map[i][c] = p->max_out++;
339         p->out[i].mask |= 1 << c;
340      }
341   }
342
343   if (p->vp.psiz < 0x40)
344      p->vp.psiz = p->out[p->vp.psiz].hw;
345
346   return 0;
347}
348
349static int
350nv50_fragprog_prepare(struct nv50_translation_info *ti)
351{
352   struct nv50_program *p = ti->p;
353   int i, j, c;
354   unsigned nvary, nintp, depr;
355   unsigned n = 0, m = 0, skip = 0;
356   ubyte sn[16], si[16];
357
358   /* FP flags */
359
360   if (ti->scan.writes_z) {
361      p->fp.flags[1] = 0x11;
362      p->fp.flags[0] |= NV50TCL_FP_CONTROL_EXPORTS_Z;
363   }
364
365   if (ti->scan.uses_kill)
366      p->fp.flags[0] |= NV50TCL_FP_CONTROL_USES_KIL;
367
368   /* FP inputs */
369
370   ti->input_file = NV_FILE_MEM_V;
371   ti->output_file = NV_FILE_GPR;
372
373   /* count non-flat inputs, save semantic info */
374   for (i = 0; i < p->in_nr; ++i) {
375      m += (ti->interp_mode[i] & NV50_INTERP_FLAT) ? 0 : 1;
376      sn[i] = p->in[i].sn;
377      si[i] = p->in[i].si;
378   }
379
380   /* reorder p->in[] so that non-flat inputs are first and
381    * kick out special inputs that don't use VP/GP_RESULT_MAP
382    */
383   nintp = 0;
384   for (i = 0; i < p->in_nr; ++i) {
385      if (sn[i] == TGSI_SEMANTIC_POSITION) {
386         for (c = 0; c < 4; ++c) {
387            ti->input_map[i][c] = nintp;
388            if (ti->input_access[i][c]) {
389               p->fp.interp |= 1 << (24 + c);
390               ++nintp;
391            }
392         }
393         skip++;
394         continue;
395      } else
396      if (sn[i] == TGSI_SEMANTIC_FACE) {
397         ti->input_map[i][0] = 255;
398         skip++;
399         continue;
400      }
401
402      j = (ti->interp_mode[i] & NV50_INTERP_FLAT) ? m++ : n++;
403
404      if (sn[i] == TGSI_SEMANTIC_COLOR)
405         p->vp.bfc[si[i]] = j;
406
407      p->in[j].linear = (ti->interp_mode[i] & NV50_INTERP_LINEAR) ? 1 : 0;
408      p->in[j].id = i;
409      p->in[j].sn = sn[i];
410      p->in[j].si = si[i];
411   }
412   assert(n <= m);
413   p->in_nr -= skip;
414
415   if (!(p->fp.interp & (8 << 24))) {
416      p->fp.interp |= (8 << 24);
417      ++nintp;
418   }
419
420   p->fp.colors = (1 << 24) | 4; /* CLAMP, FFC0_ID = 4 */
421
422   for (i = 0; i < p->in_nr; ++i) {
423      int j = p->in[i].id;
424      p->in[i].hw = nintp;
425
426      for (c = 0; c < 4; ++c) {
427         if (!ti->input_access[j][c])
428            continue;
429         p->in[i].mask |= 1 << c;
430         ti->input_map[j][c] = nintp++;
431      }
432      /* count color inputs */
433      if (i == p->vp.bfc[0] || i == p->vp.bfc[1])
434         p->fp.colors += bitcount4(p->in[i].mask) << 16;
435   }
436   nintp -= bitcount4(p->fp.interp >> 24); /* subtract position inputs */
437   nvary = nintp;
438   if (n < m)
439      nvary -= p->in[n].hw;
440
441   p->fp.interp |= nvary << NV50TCL_FP_INTERPOLANT_CTRL_COUNT_NONFLAT_SHIFT;
442   p->fp.interp |= nintp << NV50TCL_FP_INTERPOLANT_CTRL_COUNT_SHIFT;
443
444   /* FP outputs */
445
446   if (p->out_nr > (1 + (ti->scan.writes_z ? 1 : 0)))
447      p->fp.flags[0] |= NV50TCL_FP_CONTROL_MULTIPLE_RESULTS;
448
449   depr = p->out_nr;
450   for (i = 0; i < p->out_nr; ++i) {
451      p->out[i].id = i;
452      if (p->out[i].sn == TGSI_SEMANTIC_POSITION) {
453         depr = i;
454         continue;
455      }
456      p->out[i].hw = p->max_out;
457      p->out[i].mask = 0xf;
458
459      for (c = 0; c < 4; ++c)
460         ti->output_map[i][c] = p->max_out++;
461   }
462   if (depr < p->out_nr) {
463      p->out[depr].mask = 0x4;
464      p->out[depr].hw = ti->output_map[depr][2] = p->max_out++;
465   }
466
467   return 0;
468}
469
470static int
471nv50_geomprog_prepare(struct nv50_translation_info *ti)
472{
473   ti->input_file = NV_FILE_MEM_S;
474   ti->output_file = NV_FILE_OUT;
475
476   assert(0);
477   return 1;
478}
479
480static int
481nv50_prog_scan(struct nv50_translation_info *ti)
482{
483   struct nv50_program *p = ti->p;
484   struct tgsi_parse_context parse;
485   int ret;
486
487   p->vp.edgeflag = 0x40;
488   p->vp.psiz = 0x40;
489   p->vp.bfc[0] = 0x40;
490   p->vp.bfc[1] = 0x40;
491   p->gp.primid = 0x80;
492
493   tgsi_scan_shader(p->pipe.tokens, &ti->scan);
494
495#ifdef NV50_PROGRAM_DEBUG
496   tgsi_dump(p->pipe.tokens, 0);
497#endif
498
499   ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16);
500   ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte));
501
502   tgsi_parse_init(&parse, p->pipe.tokens);
503   while (!tgsi_parse_end_of_tokens(&parse)) {
504      tgsi_parse_token(&parse);
505
506      switch (parse.FullToken.Token.Type) {
507      case TGSI_TOKEN_TYPE_IMMEDIATE:
508         prog_immediate(ti, &parse.FullToken.FullImmediate);
509         break;
510      case TGSI_TOKEN_TYPE_DECLARATION:
511         prog_decl(ti, &parse.FullToken.FullDeclaration);
512         break;
513      case TGSI_TOKEN_TYPE_INSTRUCTION:
514         prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->inst_nr);
515         break;
516      }
517   }
518
519   p->in_nr = ti->scan.file_max[TGSI_FILE_INPUT] + 1;
520   p->out_nr = ti->scan.file_max[TGSI_FILE_OUTPUT] + 1;
521
522   switch (p->type) {
523   case PIPE_SHADER_VERTEX:
524      ret = nv50_vertprog_prepare(ti);
525      break;
526   case PIPE_SHADER_FRAGMENT:
527      ret = nv50_fragprog_prepare(ti);
528      break;
529   case PIPE_SHADER_GEOMETRY:
530      ret = nv50_geomprog_prepare(ti);
531      break;
532   default:
533      assert(!"unsupported program type");
534      ret = -1;
535      break;
536   }
537
538   assert(!ret);
539   return ret;
540}
541
542boolean
543nv50_program_tx(struct nv50_program *p)
544{
545   struct nv50_translation_info *ti;
546   int ret;
547
548   ti = CALLOC_STRUCT(nv50_translation_info);
549   ti->p = p;
550
551   ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS;
552
553   ret = nv50_prog_scan(ti);
554   if (ret) {
555      NOUVEAU_ERR("unsupported shader program\n");
556      goto out;
557   }
558
559   ret = nv50_generate_code(ti);
560   if (ret) {
561      NOUVEAU_ERR("error during shader translation\n");
562      goto out;
563   }
564
565out:
566   if (ti->immd32)
567      FREE(ti->immd32);
568   if (ti->immd32_ty)
569      FREE(ti->immd32_ty);
570   FREE(ti);
571   return ret ? FALSE : TRUE;
572}
573
574void
575nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
576{
577   nouveau_bo_ref(NULL, &p->bo);
578
579   so_ref(NULL, &p->so);
580
581   if (p->code)
582      FREE(p->code);
583
584   p->translated = FALSE;
585}
586