asm_fill.h revision a19eaaa6c1956add5343295af7e9f682efa08d74
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.  All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
15 * of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
20 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27#ifndef ASM_FILL_H
28#define ASM_FILL_H
29
30#include "tgsi/tgsi_ureg.h"
31
32typedef void (* ureg_func)( struct ureg_program *ureg,
33                            struct ureg_dst *out,
34                            struct ureg_src *in,
35                            struct ureg_src *sampler,
36                            struct ureg_dst *temp,
37                            struct ureg_src *constant);
38
39static INLINE void
40solid_fill( struct ureg_program *ureg,
41            struct ureg_dst *out,
42            struct ureg_src *in,
43            struct ureg_src *sampler,
44            struct ureg_dst *temp,
45            struct ureg_src *constant)
46{
47   ureg_MOV(ureg, *out, constant[2]);
48}
49
50/**
51 * Perform frag-coord-to-paint-coord transform.  The transformation is in
52 * CONST[4..6].
53 */
54#define PAINT_TRANSFORM                                                 \
55   ureg_MOV(ureg, ureg_writemask(temp[0], TGSI_WRITEMASK_XY), in[0]);   \
56   ureg_MOV(ureg,                                                       \
57            ureg_writemask(temp[0], TGSI_WRITEMASK_Z),                  \
58            ureg_scalar(constant[3], TGSI_SWIZZLE_Y));                  \
59   ureg_DP3(ureg, temp[1], constant[4], ureg_src(temp[0]));             \
60   ureg_DP3(ureg, temp[2], constant[5], ureg_src(temp[0]));             \
61   ureg_DP3(ureg, temp[3], constant[6], ureg_src(temp[0]));             \
62   ureg_RCP(ureg, temp[3], ureg_src(temp[3]));                          \
63   ureg_MUL(ureg, temp[1], ureg_src(temp[1]), ureg_src(temp[3]));       \
64   ureg_MUL(ureg, temp[2], ureg_src(temp[2]), ureg_src(temp[3]));       \
65   ureg_MOV(ureg,                                                       \
66            ureg_writemask(temp[4], TGSI_WRITEMASK_X),                  \
67            ureg_src(temp[1]));                                         \
68   ureg_MOV(ureg,                                                       \
69            ureg_writemask(temp[4], TGSI_WRITEMASK_Y),                  \
70            ureg_src(temp[2]));
71
72static INLINE void
73linear_grad( struct ureg_program *ureg,
74             struct ureg_dst *out,
75             struct ureg_src *in,
76             struct ureg_src *sampler,
77             struct ureg_dst *temp,
78             struct ureg_src *constant)
79{
80   PAINT_TRANSFORM
81
82   /* grad = DP2((x, y), CONST[2].xy) * CONST[2].z */
83   ureg_MUL(ureg, temp[0],
84            ureg_scalar(constant[2], TGSI_SWIZZLE_Y),
85            ureg_scalar(ureg_src(temp[4]), TGSI_SWIZZLE_Y));
86   ureg_MAD(ureg, temp[1],
87            ureg_scalar(constant[2], TGSI_SWIZZLE_X),
88            ureg_scalar(ureg_src(temp[4]), TGSI_SWIZZLE_X),
89            ureg_src(temp[0]));
90   ureg_MUL(ureg, temp[2], ureg_src(temp[1]),
91            ureg_scalar(constant[2], TGSI_SWIZZLE_Z));
92
93   ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[2]), sampler[0]);
94}
95
96static INLINE void
97radial_grad( struct ureg_program *ureg,
98             struct ureg_dst *out,
99             struct ureg_src *in,
100             struct ureg_src *sampler,
101             struct ureg_dst *temp,
102             struct ureg_src *constant)
103{
104   PAINT_TRANSFORM
105
106   /*
107    * Calculate (sqrt(B^2 + AC) - B) / A, where
108    *
109    *   A is CONST[2].z,
110    *   B is DP2((x, y), CONST[2].xy), and
111    *   C is DP2((x, y), (x, y)).
112    */
113
114   /* B and C */
115   ureg_DP2(ureg, temp[0], ureg_src(temp[4]), constant[2]);
116   ureg_DP2(ureg, temp[1], ureg_src(temp[4]), ureg_src(temp[4]));
117
118   /* the square root */
119   ureg_MUL(ureg, temp[2], ureg_src(temp[0]), ureg_src(temp[0]));
120   ureg_MAD(ureg, temp[3], ureg_src(temp[1]),
121         ureg_scalar(constant[2], TGSI_SWIZZLE_Z), ureg_src(temp[2]));
122   ureg_RSQ(ureg, temp[3], ureg_src(temp[3]));
123   ureg_RCP(ureg, temp[3], ureg_src(temp[3]));
124
125   ureg_SUB(ureg, temp[3], ureg_src(temp[3]), ureg_src(temp[0]));
126   ureg_RCP(ureg, temp[0], ureg_scalar(constant[2], TGSI_SWIZZLE_Z));
127   ureg_MUL(ureg, temp[0], ureg_src(temp[0]), ureg_src(temp[3]));
128
129   ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[0]), sampler[0]);
130}
131
132
133static INLINE void
134pattern( struct ureg_program *ureg,
135         struct ureg_dst     *out,
136         struct ureg_src     *in,
137         struct ureg_src     *sampler,
138         struct ureg_dst     *temp,
139         struct ureg_src     *constant)
140{
141   PAINT_TRANSFORM
142
143   /* (s, t) = (x / tex_width, y / tex_height) */
144   ureg_RCP(ureg, temp[0],
145            ureg_swizzle(constant[3],
146                         TGSI_SWIZZLE_Z,
147                         TGSI_SWIZZLE_W,
148                         TGSI_SWIZZLE_Z,
149                         TGSI_SWIZZLE_W));
150   ureg_MOV(ureg, temp[1], ureg_src(temp[4]));
151   ureg_MUL(ureg,
152            ureg_writemask(temp[1], TGSI_WRITEMASK_X),
153            ureg_src(temp[1]),
154            ureg_src(temp[0]));
155   ureg_MUL(ureg,
156            ureg_writemask(temp[1], TGSI_WRITEMASK_Y),
157            ureg_src(temp[1]),
158            ureg_src(temp[0]));
159
160   ureg_TEX(ureg, *out, TGSI_TEXTURE_2D, ureg_src(temp[1]), sampler[0]);
161}
162
163static INLINE void
164paint_degenerate( struct ureg_program *ureg,
165                  struct ureg_dst *out,
166                  struct ureg_src *in,
167                  struct ureg_src *sampler,
168                  struct ureg_dst *temp,
169                  struct ureg_src *constant)
170{
171   /* CONST[3].y is 1.0f */
172   ureg_MOV(ureg, temp[1], ureg_scalar(constant[3], TGSI_SWIZZLE_Y));
173   ureg_TEX(ureg, *out, TGSI_TEXTURE_1D, ureg_src(temp[1]), sampler[0]);
174}
175
176static INLINE void
177image_normal( struct ureg_program *ureg,
178              struct ureg_dst *out,
179              struct ureg_src *in,
180              struct ureg_src *sampler,
181              struct ureg_dst *temp,
182              struct ureg_src *constant)
183{
184   ureg_TEX(ureg, *out, TGSI_TEXTURE_2D, in[1], sampler[3]);
185}
186
187
188static INLINE void
189image_multiply( struct ureg_program *ureg,
190                struct ureg_dst *out,
191                struct ureg_src *in,
192                struct ureg_src *sampler,
193                struct ureg_dst *temp,
194                struct ureg_src *constant)
195{
196   ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[1], sampler[3]);
197   ureg_MUL(ureg, *out, ureg_src(temp[0]), ureg_src(temp[1]));
198}
199
200
201static INLINE void
202image_stencil( struct ureg_program *ureg,
203               struct ureg_dst *out,
204               struct ureg_src *in,
205               struct ureg_src *sampler,
206               struct ureg_dst *temp,
207               struct ureg_src *constant)
208{
209   ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[1], sampler[3]);
210   ureg_MUL(ureg, *out, ureg_src(temp[0]), ureg_src(temp[1]));
211}
212
213static INLINE void
214color_transform( struct ureg_program *ureg,
215                 struct ureg_dst *out,
216                 struct ureg_src *in,
217                 struct ureg_src *sampler,
218                 struct ureg_dst *temp,
219                 struct ureg_src *constant)
220{
221   ureg_MAD(ureg, temp[1], ureg_src(temp[0]), constant[0], constant[1]);
222   /* clamp to [0.0f, 1.0f] */
223   ureg_CLAMP(ureg, temp[1],
224              ureg_src(temp[1]),
225              ureg_scalar(constant[3], TGSI_SWIZZLE_X),
226              ureg_scalar(constant[3], TGSI_SWIZZLE_Y));
227   ureg_MOV(ureg, *out, ureg_src(temp[1]));
228}
229
230/**
231 * Emit instructions for the specified blend mode.  Colors should be
232 * premultiplied.  Two temporary registers are required.
233 *
234 * XXX callers do not pass premultiplied colors!
235 */
236static INLINE void
237blend_generic(struct ureg_program *ureg,
238              VGBlendMode mode,
239              struct ureg_dst out,
240              struct ureg_src src,
241              struct ureg_src dst,
242              struct ureg_src src_channel_alpha,
243              struct ureg_src one,
244              struct ureg_dst temp[2])
245{
246   switch (mode) {
247   case VG_BLEND_SRC:
248      ureg_MOV(ureg, out, src);
249      break;
250   case VG_BLEND_SRC_OVER:
251      /* RGBA_out = RGBA_src + (1 - A_src) * RGBA_dst */
252      ureg_SUB(ureg, temp[0], one, src_channel_alpha);
253      ureg_MAD(ureg, out, ureg_src(temp[0]), dst, src);
254      break;
255   case VG_BLEND_DST_OVER:
256      /* RGBA_out = RGBA_dst + (1 - A_dst) * RGBA_src */
257      ureg_SUB(ureg, temp[0], one, ureg_scalar(dst, TGSI_SWIZZLE_W));
258      ureg_MAD(ureg, out, ureg_src(temp[0]), src, dst);
259      break;
260   case VG_BLEND_SRC_IN:
261      ureg_MUL(ureg, out, src, ureg_scalar(dst, TGSI_SWIZZLE_W));
262      break;
263   case VG_BLEND_DST_IN:
264      ureg_MUL(ureg, out, dst, src_channel_alpha);
265      break;
266   case VG_BLEND_MULTIPLY:
267      /*
268       * RGB_out = (1 - A_dst) * RGB_src + (1 - A_src) * RGB_dst +
269       *           RGB_src * RGB_dst
270       */
271      ureg_MAD(ureg, temp[0],
272            ureg_scalar(dst, TGSI_SWIZZLE_W), ureg_negate(src), src);
273      ureg_MAD(ureg, temp[1],
274            src_channel_alpha, ureg_negate(dst), dst);
275      ureg_MAD(ureg, temp[1], src, dst, ureg_src(temp[1]));
276      ureg_ADD(ureg, out, ureg_src(temp[0]), ureg_src(temp[1]));
277      /* alpha is src over */
278      ureg_ADD(ureg, ureg_writemask(out, TGSI_WRITEMASK_W),
279            src, ureg_src(temp[1]));
280      break;
281   case VG_BLEND_SCREEN:
282      /* RGBA_out = RGBA_src + (1 - RGBA_src) * RGBA_dst */
283      ureg_SUB(ureg, temp[0], one, src);
284      ureg_MAD(ureg, out, ureg_src(temp[0]), dst, src);
285      break;
286   case VG_BLEND_DARKEN:
287   case VG_BLEND_LIGHTEN:
288      /* src over */
289      ureg_SUB(ureg, temp[0], one, src_channel_alpha);
290      ureg_MAD(ureg, temp[0], ureg_src(temp[0]), dst, src);
291      /* dst over */
292      ureg_SUB(ureg, temp[1], one, ureg_scalar(dst, TGSI_SWIZZLE_W));
293      ureg_MAD(ureg, temp[1], ureg_src(temp[1]), src, dst);
294      /* take min/max for colors */
295      if (mode == VG_BLEND_DARKEN) {
296         ureg_MIN(ureg, ureg_writemask(out, TGSI_WRITEMASK_XYZ),
297               ureg_src(temp[0]), ureg_src(temp[1]));
298      }
299      else {
300         ureg_MAX(ureg, ureg_writemask(out, TGSI_WRITEMASK_XYZ),
301               ureg_src(temp[0]), ureg_src(temp[1]));
302      }
303      break;
304   case VG_BLEND_ADDITIVE:
305      /* RGBA_out = RGBA_src + RGBA_dst */
306      ureg_ADD(ureg, temp[0], src, dst);
307      ureg_MIN(ureg, out, ureg_src(temp[0]), one);
308      break;
309   default:
310      assert(0);
311      break;
312   }
313}
314
315static INLINE void
316blend_multiply( struct ureg_program *ureg,
317                struct ureg_dst *out,
318                struct ureg_src *in,
319                struct ureg_src *sampler,
320                struct ureg_dst *temp,
321                struct ureg_src *constant)
322{
323   ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]);
324   blend_generic(ureg, VG_BLEND_MULTIPLY, *out,
325                 ureg_src(temp[0]),
326                 ureg_src(temp[1]),
327                 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W),
328                 ureg_scalar(constant[3], TGSI_SWIZZLE_Y),
329                 temp + 2);
330}
331
332static INLINE void
333blend_screen( struct ureg_program *ureg,
334              struct ureg_dst     *out,
335              struct ureg_src     *in,
336              struct ureg_src     *sampler,
337              struct ureg_dst     *temp,
338              struct ureg_src     *constant)
339{
340   ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]);
341   blend_generic(ureg, VG_BLEND_SCREEN, *out,
342                 ureg_src(temp[0]),
343                 ureg_src(temp[1]),
344                 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W),
345                 ureg_scalar(constant[3], TGSI_SWIZZLE_Y),
346                 temp + 2);
347}
348
349static INLINE void
350blend_darken( struct ureg_program *ureg,
351              struct ureg_dst     *out,
352              struct ureg_src     *in,
353              struct ureg_src     *sampler,
354              struct ureg_dst     *temp,
355              struct ureg_src     *constant)
356{
357   ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]);
358   blend_generic(ureg, VG_BLEND_DARKEN, *out,
359                 ureg_src(temp[0]),
360                 ureg_src(temp[1]),
361                 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W),
362                 ureg_scalar(constant[3], TGSI_SWIZZLE_Y),
363                 temp + 2);
364}
365
366static INLINE void
367blend_lighten( struct ureg_program *ureg,
368               struct ureg_dst     *out,
369               struct ureg_src     *in,
370               struct ureg_src     *sampler,
371               struct ureg_dst *temp,
372               struct ureg_src     *constant)
373{
374   ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[2]);
375   blend_generic(ureg, VG_BLEND_LIGHTEN, *out,
376                 ureg_src(temp[0]),
377                 ureg_src(temp[1]),
378                 ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W),
379                 ureg_scalar(constant[3], TGSI_SWIZZLE_Y),
380                 temp + 2);
381}
382
383static INLINE void
384mask( struct ureg_program *ureg,
385      struct ureg_dst *out,
386      struct ureg_src *in,
387      struct ureg_src *sampler,
388      struct ureg_dst *temp,
389      struct ureg_src *constant)
390{
391   ureg_TEX(ureg, temp[1], TGSI_TEXTURE_2D, in[0], sampler[1]);
392   ureg_MUL(ureg, ureg_writemask(temp[0], TGSI_WRITEMASK_W),
393            ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W),
394            ureg_scalar(ureg_src(temp[1]), TGSI_SWIZZLE_W));
395   ureg_MOV(ureg, *out, ureg_src(temp[0]));
396}
397
398static INLINE void
399premultiply( struct ureg_program *ureg,
400                struct ureg_dst *out,
401                struct ureg_src *in,
402                struct ureg_src *sampler,
403                struct ureg_dst *temp,
404                struct ureg_src *constant)
405{
406   ureg_MUL(ureg,
407            ureg_writemask(temp[0], TGSI_WRITEMASK_XYZ),
408            ureg_src(temp[0]),
409            ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W));
410}
411
412static INLINE void
413unpremultiply( struct ureg_program *ureg,
414                struct ureg_dst *out,
415                struct ureg_src *in,
416                struct ureg_src *sampler,
417                struct ureg_dst *temp,
418                struct ureg_src *constant)
419{
420   ureg_TEX(ureg, temp[0], TGSI_TEXTURE_2D, in[0], sampler[1]);
421}
422
423
424static INLINE void
425color_bw( struct ureg_program *ureg,
426                struct ureg_dst *out,
427                struct ureg_src *in,
428                struct ureg_src *sampler,
429                struct ureg_dst *temp,
430                struct ureg_src *constant)
431{
432   ureg_ADD(ureg, temp[1],
433            ureg_scalar(constant[3], TGSI_SWIZZLE_Y),
434            ureg_scalar(constant[3], TGSI_SWIZZLE_Y));
435   ureg_RCP(ureg, temp[2], ureg_src(temp[1]));
436   ureg_ADD(ureg, temp[1],
437            ureg_scalar(constant[3], TGSI_SWIZZLE_Y),
438            ureg_src(temp[2]));
439   ureg_ADD(ureg, ureg_writemask(temp[2], TGSI_WRITEMASK_X),
440            ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_X),
441            ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_Y));
442   ureg_ADD(ureg, ureg_writemask(temp[2], TGSI_WRITEMASK_X),
443            ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_Z),
444            ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_X));
445   ureg_SGE(ureg,
446            ureg_writemask(temp[0], TGSI_WRITEMASK_XYZ),
447            ureg_scalar(ureg_src(temp[2]), TGSI_SWIZZLE_X),
448            ureg_src(temp[1]));
449  ureg_SGE(ureg,
450           ureg_writemask(temp[0], TGSI_WRITEMASK_W),
451           ureg_scalar(ureg_src(temp[0]), TGSI_SWIZZLE_W),
452           ureg_scalar(ureg_src(temp[2]), TGSI_SWIZZLE_Y));
453  ureg_MOV(ureg, *out, ureg_src(temp[0]));
454}
455
456
457struct shader_asm_info {
458   VGint id;
459   ureg_func func;
460
461   VGboolean needs_position;
462
463   VGint start_const;
464   VGint num_consts;
465
466   VGint start_sampler;
467   VGint num_samplers;
468
469   VGint start_temp;
470   VGint num_temps;
471};
472
473
474/* paint types */
475static const struct shader_asm_info shaders_paint_asm[] = {
476   {VEGA_SOLID_FILL_SHADER, solid_fill,
477    VG_FALSE, 2, 1, 0, 0, 0, 0},
478   {VEGA_LINEAR_GRADIENT_SHADER, linear_grad,
479    VG_TRUE,  2, 5, 0, 1, 0, 5},
480   {VEGA_RADIAL_GRADIENT_SHADER, radial_grad,
481    VG_TRUE,  2, 5, 0, 1, 0, 5},
482   {VEGA_PATTERN_SHADER, pattern,
483    VG_TRUE,  3, 4, 0, 1, 0, 5},
484   {VEGA_PAINT_DEGENERATE_SHADER, paint_degenerate,
485    VG_FALSE,  3, 1, 0, 1, 0, 2}
486};
487
488/* image draw modes */
489static const struct shader_asm_info shaders_image_asm[] = {
490   {VEGA_IMAGE_NORMAL_SHADER, image_normal,
491    VG_TRUE,  0, 0, 3, 1, 0, 0},
492   {VEGA_IMAGE_MULTIPLY_SHADER, image_multiply,
493    VG_TRUE,  0, 0, 3, 1, 0, 2},
494   {VEGA_IMAGE_STENCIL_SHADER, image_stencil,
495    VG_TRUE,  0, 0, 3, 1, 0, 2}
496};
497
498static const struct shader_asm_info shaders_color_transform_asm[] = {
499   {VEGA_COLOR_TRANSFORM_SHADER, color_transform,
500    VG_FALSE, 0, 4, 0, 0, 0, 2}
501};
502
503/* extra blend modes */
504static const struct shader_asm_info shaders_blend_asm[] = {
505   {VEGA_BLEND_MULTIPLY_SHADER, blend_multiply,
506    VG_TRUE,  3, 1, 2, 1, 0, 4},
507   {VEGA_BLEND_SCREEN_SHADER, blend_screen,
508    VG_TRUE,  3, 1, 2, 1, 0, 4},
509   {VEGA_BLEND_DARKEN_SHADER, blend_darken,
510    VG_TRUE,  3, 1, 2, 1, 0, 4},
511   {VEGA_BLEND_LIGHTEN_SHADER, blend_lighten,
512    VG_TRUE,  3, 1, 2, 1, 0, 4},
513};
514
515static const struct shader_asm_info shaders_mask_asm[] = {
516   {VEGA_MASK_SHADER, mask,
517    VG_TRUE,  0, 0, 1, 1, 0, 2}
518};
519
520/* premultiply */
521static const struct shader_asm_info shaders_premultiply_asm[] = {
522   {VEGA_PREMULTIPLY_SHADER, premultiply,
523    VG_FALSE,  0, 0, 0, 0, 0, 1},
524   {VEGA_UNPREMULTIPLY_SHADER, unpremultiply,
525    VG_FALSE,  0, 0, 0, 0, 0, 1},
526};
527
528/* color transform to black and white */
529static const struct shader_asm_info shaders_bw_asm[] = {
530   {VEGA_BW_SHADER, color_bw,
531    VG_FALSE,  3, 1, 0, 0, 0, 3},
532};
533
534#endif
535