1/*
2 * Copyright © 2016 Red Hat
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include <stdbool.h>
25
26#include "st_tgsi_lower_yuv.h"
27#include "tgsi/tgsi_transform.h"
28#include "tgsi/tgsi_scan.h"
29#include "tgsi/tgsi_dump.h"
30#include "util/u_debug.h"
31
32#include "util/bitscan.h"
33
34struct tgsi_yuv_transform {
35   struct tgsi_transform_context base;
36   struct tgsi_shader_info info;
37   struct tgsi_full_src_register imm[4];
38   struct {
39      struct tgsi_full_src_register src;
40      struct tgsi_full_dst_register dst;
41   } tmp[2];
42#define A 0
43#define B 1
44
45   /* Maps a primary sampler (used for Y) to the U or UV sampler.  In
46    * case of 3-plane YUV format, the V plane is next sampler after U.
47    */
48   unsigned char sampler_map[PIPE_MAX_SAMPLERS][2];
49
50   bool first_instruction_emitted;
51   unsigned free_slots;
52   unsigned lower_nv12;
53   unsigned lower_iyuv;
54};
55
56static inline struct tgsi_yuv_transform *
57tgsi_yuv_transform(struct tgsi_transform_context *tctx)
58{
59   return (struct tgsi_yuv_transform *)tctx;
60}
61
62static void
63reg_dst(struct tgsi_full_dst_register *dst,
64        const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
65{
66   *dst = *orig_dst;
67   dst->Register.WriteMask &= wrmask;
68   assert(dst->Register.WriteMask);
69}
70
71static inline void
72get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
73{
74   swiz[0] = src->SwizzleX;
75   swiz[1] = src->SwizzleY;
76   swiz[2] = src->SwizzleZ;
77   swiz[3] = src->SwizzleW;
78}
79
80static void
81reg_src(struct tgsi_full_src_register *src,
82        const struct tgsi_full_src_register *orig_src,
83        unsigned sx, unsigned sy, unsigned sz, unsigned sw)
84{
85   unsigned swiz[4];
86   get_swiz(swiz, &orig_src->Register);
87   *src = *orig_src;
88   src->Register.SwizzleX = swiz[sx];
89   src->Register.SwizzleY = swiz[sy];
90   src->Register.SwizzleZ = swiz[sz];
91   src->Register.SwizzleW = swiz[sw];
92}
93
94#define TGSI_SWIZZLE__ TGSI_SWIZZLE_X  /* don't-care value! */
95#define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y,   \
96      TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
97
98static inline struct tgsi_full_instruction
99tex_instruction(unsigned samp)
100{
101   struct tgsi_full_instruction inst;
102
103   inst = tgsi_default_full_instruction();
104   inst.Instruction.Opcode = TGSI_OPCODE_TEX;
105   inst.Instruction.Texture = 1;
106   inst.Texture.Texture = TGSI_TEXTURE_2D;
107   inst.Instruction.NumDstRegs = 1;
108   inst.Instruction.NumSrcRegs = 2;
109   inst.Src[1].Register.File  = TGSI_FILE_SAMPLER;
110   inst.Src[1].Register.Index = samp;
111
112   return inst;
113}
114
115static inline struct tgsi_full_instruction
116mov_instruction(void)
117{
118   struct tgsi_full_instruction inst;
119
120   inst = tgsi_default_full_instruction();
121   inst.Instruction.Opcode = TGSI_OPCODE_MOV;
122   inst.Instruction.Saturate = 0;
123   inst.Instruction.NumDstRegs = 1;
124   inst.Instruction.NumSrcRegs = 1;
125
126   return inst;
127}
128
129static inline struct tgsi_full_instruction
130dp3_instruction(void)
131{
132   struct tgsi_full_instruction inst;
133
134   inst = tgsi_default_full_instruction();
135   inst.Instruction.Opcode = TGSI_OPCODE_DP3;
136   inst.Instruction.NumDstRegs = 1;
137   inst.Instruction.NumSrcRegs = 2;
138
139   return inst;
140}
141
142
143
144static void
145emit_immed(struct tgsi_transform_context *tctx, int idx,
146           float x, float y, float z, float w)
147{
148   struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
149   struct tgsi_shader_info *info = &ctx->info;
150   struct tgsi_full_immediate immed;
151
152   immed = tgsi_default_full_immediate();
153   immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
154   immed.u[0].Float = x;
155   immed.u[1].Float = y;
156   immed.u[2].Float = z;
157   immed.u[3].Float = w;
158   tctx->emit_immediate(tctx, &immed);
159
160   ctx->imm[idx].Register.File = TGSI_FILE_IMMEDIATE;
161   ctx->imm[idx].Register.Index = info->immediate_count + idx;
162   ctx->imm[idx].Register.SwizzleX = TGSI_SWIZZLE_X;
163   ctx->imm[idx].Register.SwizzleY = TGSI_SWIZZLE_Y;
164   ctx->imm[idx].Register.SwizzleZ = TGSI_SWIZZLE_Z;
165   ctx->imm[idx].Register.SwizzleW = TGSI_SWIZZLE_W;
166}
167
168static void
169emit_samp(struct tgsi_transform_context *tctx, unsigned samp)
170{
171   tgsi_transform_sampler_decl(tctx, samp);
172   tgsi_transform_sampler_view_decl(tctx, samp, PIPE_TEXTURE_2D,
173                                    TGSI_RETURN_TYPE_FLOAT);
174}
175
176/* Emit extra declarations we need:
177 *  + 2 TEMP to hold intermediate results
178 *  + 1 (for 2-plane YUV) or 2 (for 3-plane YUV) extra samplers per
179 *    lowered YUV sampler
180 *  + extra immediates for doing CSC
181 */
182static void
183emit_decls(struct tgsi_transform_context *tctx)
184{
185   struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
186   struct tgsi_shader_info *info = &ctx->info;
187   unsigned mask, tempbase, i;
188   struct tgsi_full_declaration decl;
189
190   /*
191    * Declare immediates for CSC conversion:
192    */
193
194   /* ITU-R BT.601 conversion */
195   emit_immed(tctx, 0, 1.164,  0.000,  1.596,  0.0);
196   emit_immed(tctx, 1, 1.164, -0.392, -0.813,  0.0);
197   emit_immed(tctx, 2, 1.164,  2.017,  0.000,  0.0);
198   emit_immed(tctx, 3, 0.0625, 0.500,  0.500,  1.0);
199
200   /*
201    * Declare extra samplers / sampler-views:
202    */
203
204   mask = ctx->lower_nv12 | ctx->lower_iyuv;
205   while (mask) {
206      unsigned extra, y_samp = u_bit_scan(&mask);
207
208      extra = u_bit_scan(&ctx->free_slots);
209      ctx->sampler_map[y_samp][0] = extra;
210      emit_samp(tctx, extra);
211
212      if (ctx->lower_iyuv & (1 << y_samp)) {
213         extra = u_bit_scan(&ctx->free_slots);
214         ctx->sampler_map[y_samp][1] = extra;
215         emit_samp(tctx, extra);
216      }
217   }
218
219   /*
220    * Declare extra temp:
221    */
222
223   tempbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
224
225   for (i = 0; i < 2; i++) {
226      decl = tgsi_default_full_declaration();
227      decl.Declaration.File = TGSI_FILE_TEMPORARY;
228      decl.Range.First = decl.Range.Last = tempbase + i;
229      tctx->emit_declaration(tctx, &decl);
230
231      ctx->tmp[i].src.Register.File  = TGSI_FILE_TEMPORARY;
232      ctx->tmp[i].src.Register.Index = tempbase + i;
233      ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
234      ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
235      ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
236      ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
237
238      ctx->tmp[i].dst.Register.File  = TGSI_FILE_TEMPORARY;
239      ctx->tmp[i].dst.Register.Index = tempbase + i;
240      ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
241   }
242}
243
244/* call with YUV in tmpA.xyz */
245static void
246yuv_to_rgb(struct tgsi_transform_context *tctx,
247           struct tgsi_full_dst_register *dst)
248{
249   struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
250   struct tgsi_full_instruction inst;
251
252   /*
253    * IMM[0] FLT32 { 1.164,  0.000,  1.596,  0.0 }
254    * IMM[1] FLT32 { 1.164, -0.392, -0.813,  0.0 }
255    * IMM[2] FLT32 { 1.164,  2.017,  0.000,  0.0 }
256    * IMM[3] FLT32 { 0.0625, 0.500,  0.500,  1.0 }
257    */
258
259   /* SUB tmpA.xyz, tmpA, imm[3] */
260   inst = tgsi_default_full_instruction();
261   inst.Instruction.Opcode = TGSI_OPCODE_ADD;
262   inst.Instruction.Saturate = 0;
263   inst.Instruction.NumDstRegs = 1;
264   inst.Instruction.NumSrcRegs = 2;
265   reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
266   reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
267   reg_src(&inst.Src[1], &ctx->imm[3], SWIZ(X, Y, Z, _));
268   inst.Src[1].Register.Negate = 1;
269   tctx->emit_instruction(tctx, &inst);
270
271   /* DP3 dst.x, tmpA, imm[0] */
272   inst = dp3_instruction();
273   reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_X);
274   reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
275   reg_src(&inst.Src[1], &ctx->imm[0], SWIZ(X, Y, Z, W));
276   tctx->emit_instruction(tctx, &inst);
277
278   /* DP3 dst.y, tmpA, imm[1] */
279   inst = dp3_instruction();
280   reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Y);
281   reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
282   reg_src(&inst.Src[1], &ctx->imm[1], SWIZ(X, Y, Z, W));
283   tctx->emit_instruction(tctx, &inst);
284
285   /* DP3 dst.z, tmpA, imm[2] */
286   inst = dp3_instruction();
287   reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Z);
288   reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
289   reg_src(&inst.Src[1], &ctx->imm[2], SWIZ(X, Y, Z, W));
290   tctx->emit_instruction(tctx, &inst);
291
292   /* MOV dst.w, imm[0].x */
293   inst = mov_instruction();
294   reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_W);
295   reg_src(&inst.Src[0], &ctx->imm[3], SWIZ(_, _, _, W));
296   tctx->emit_instruction(tctx, &inst);
297}
298
299static void
300lower_nv12(struct tgsi_transform_context *tctx,
301           struct tgsi_full_instruction *originst)
302{
303   struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
304   struct tgsi_full_instruction inst;
305   struct tgsi_full_src_register *coord = &originst->Src[0];
306   unsigned samp = originst->Src[1].Register.Index;
307
308   /* sample Y:
309    *    TEX tempA.x, coord, texture[samp], 2D;
310    */
311   inst = tex_instruction(samp);
312   reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
313   reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
314   tctx->emit_instruction(tctx, &inst);
315
316   /* sample UV:
317    *    TEX tempB.xy, coord, texture[sampler_map[samp][0]], 2D;
318    *    MOV tempA.yz, tempB._xy_
319    */
320   inst = tex_instruction(ctx->sampler_map[samp][0]);
321   reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XY);
322   reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
323   tctx->emit_instruction(tctx, &inst);
324
325   inst = mov_instruction();
326   reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_YZ);
327   reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, Y, _));
328   tctx->emit_instruction(tctx, &inst);
329
330   /* At this point, we have YUV in tempA.xyz, rest is common: */
331   yuv_to_rgb(tctx, &originst->Dst[0]);
332}
333
334static void
335lower_iyuv(struct tgsi_transform_context *tctx,
336           struct tgsi_full_instruction *originst)
337{
338   struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
339   struct tgsi_full_instruction inst;
340   struct tgsi_full_src_register *coord = &originst->Src[0];
341   unsigned samp = originst->Src[1].Register.Index;
342
343   /* sample Y:
344    *    TEX tempA.x, coord, texture[samp], 2D;
345    */
346   inst = tex_instruction(samp);
347   reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
348   reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
349   tctx->emit_instruction(tctx, &inst);
350
351   /* sample U:
352    *    TEX tempB.x, coord, texture[sampler_map[samp][0]], 2D;
353    *    MOV tempA.y, tempB._x__
354    */
355   inst = tex_instruction(ctx->sampler_map[samp][0]);
356   reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
357   reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
358   tctx->emit_instruction(tctx, &inst);
359
360   inst = mov_instruction();
361   reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
362   reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, _, _));
363   tctx->emit_instruction(tctx, &inst);
364
365   /* sample V:
366    *    TEX tempB.x, coord, texture[sampler_map[samp][1]], 2D;
367    *    MOV tempA.z, tempB.__x_
368    */
369   inst = tex_instruction(ctx->sampler_map[samp][1]);
370   reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
371   reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
372   tctx->emit_instruction(tctx, &inst);
373
374   inst = mov_instruction();
375   reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
376   reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, _, X, _));
377   tctx->emit_instruction(tctx, &inst);
378
379   /* At this point, we have YUV in tempA.xyz, rest is common: */
380   yuv_to_rgb(tctx, &originst->Dst[0]);
381}
382
383static void
384transform_instr(struct tgsi_transform_context *tctx,
385                struct tgsi_full_instruction *inst)
386{
387   struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
388
389   if (!ctx->first_instruction_emitted) {
390      emit_decls(tctx);
391      ctx->first_instruction_emitted = true;
392   }
393
394   switch (inst->Instruction.Opcode) {
395   /* TODO what other tex opcode's can be used w/ external eglimgs? */
396   case TGSI_OPCODE_TEX: {
397      unsigned samp = inst->Src[1].Register.Index;
398      if (ctx->lower_nv12 & (1 << samp)) {
399         lower_nv12(tctx, inst);
400      } else if (ctx->lower_iyuv & (1 << samp)) {
401         lower_iyuv(tctx, inst);
402      } else {
403         goto skip;
404      }
405      break;
406   }
407   default:
408   skip:
409      tctx->emit_instruction(tctx, inst);
410      return;
411   }
412}
413
414extern const struct tgsi_token *
415st_tgsi_lower_yuv(const struct tgsi_token *tokens, unsigned free_slots,
416                  unsigned lower_nv12, unsigned lower_iyuv)
417{
418   struct tgsi_yuv_transform ctx;
419   struct tgsi_token *newtoks;
420   int newlen;
421
422   assert(!(lower_nv12 & lower_iyuv)); /* bitmasks should be mutually exclusive */
423
424//   tgsi_dump(tokens, 0);
425//   debug_printf("\n");
426
427   memset(&ctx, 0, sizeof(ctx));
428   ctx.base.transform_instruction = transform_instr;
429   ctx.free_slots = free_slots;
430   ctx.lower_nv12 = lower_nv12;
431   ctx.lower_iyuv = lower_iyuv;
432   tgsi_scan_shader(tokens, &ctx.info);
433
434   /* TODO better job of figuring out how many extra tokens we need..
435    * this is a pain about tgsi_transform :-/
436    */
437   newlen = tgsi_num_tokens(tokens) + 120;
438   newtoks = tgsi_alloc_tokens(newlen);
439   if (!newtoks)
440      return NULL;
441
442   tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
443
444//   tgsi_dump(newtoks, 0);
445//   debug_printf("\n");
446
447   return newtoks;
448}
449