draw_pipe_aapoint.c revision 102bf6e2a70f565f03d5e9c4995b29d61c0aa165
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * AA point stage:  AA points are converted to quads and rendered with a
30 * special fragment shader.  Another approach would be to use a texture
31 * map image of a point, but experiments indicate the quality isn't nearly
32 * as good as this approach.
33 *
34 * Note: this looks a lot like draw_aaline.c but there's actually little
35 * if any code that can be shared.
36 *
37 * Authors:  Brian Paul
38 */
39
40
41#include "pipe/p_context.h"
42#include "pipe/p_defines.h"
43#include "pipe/p_shader_tokens.h"
44
45#include "tgsi/tgsi_transform.h"
46#include "tgsi/tgsi_dump.h"
47
48#include "util/u_math.h"
49#include "util/u_memory.h"
50
51#include "draw_context.h"
52#include "draw_vs.h"
53#include "draw_pipe.h"
54
55
56/** Approx number of new tokens for instructions in aa_transform_inst() */
57#define NUM_NEW_TOKENS 200
58
59
60/*
61 * Enabling NORMALIZE might give _slightly_ better results.
62 * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or
63 * d=x*x+y*y.  Since we're working with a unit circle, the later seems
64 * close enough and saves some costly instructions.
65 */
66#define NORMALIZE 0
67
68
69/**
70 * Subclass of pipe_shader_state to carry extra fragment shader info.
71 */
72struct aapoint_fragment_shader
73{
74   struct pipe_shader_state state;
75   void *driver_fs;   /**< the regular shader */
76   void *aapoint_fs;  /**< the aa point-augmented shader */
77   int generic_attrib; /**< The generic input attrib/texcoord we'll use */
78};
79
80
81/**
82 * Subclass of draw_stage
83 */
84struct aapoint_stage
85{
86   struct draw_stage stage;
87
88   /** half of pipe_rasterizer_state::point_size */
89   float radius;
90
91   /** vertex attrib slot containing point size */
92   int psize_slot;
93
94   /** this is the vertex attrib slot for the new texcoords */
95   uint tex_slot;
96
97   /** vertex attrib slot containing position */
98   uint pos_slot;
99
100   /** Currently bound fragment shader */
101   struct aapoint_fragment_shader *fs;
102
103   /*
104    * Driver interface/override functions
105    */
106   void * (*driver_create_fs_state)(struct pipe_context *,
107                                    const struct pipe_shader_state *);
108   void (*driver_bind_fs_state)(struct pipe_context *, void *);
109   void (*driver_delete_fs_state)(struct pipe_context *, void *);
110};
111
112
113
114/**
115 * Subclass of tgsi_transform_context, used for transforming the
116 * user's fragment shader to add the special AA instructions.
117 */
118struct aa_transform_context {
119   struct tgsi_transform_context base;
120   uint tempsUsed;  /**< bitmask */
121   int colorOutput; /**< which output is the primary color */
122   int maxInput, maxGeneric;  /**< max input index found */
123   int tmp0, colorTemp;  /**< temp registers */
124   boolean firstInstruction;
125};
126
127
128/**
129 * TGSI declaration transform callback.
130 * Look for two free temp regs and available input reg for new texcoords.
131 */
132static void
133aa_transform_decl(struct tgsi_transform_context *ctx,
134                  struct tgsi_full_declaration *decl)
135{
136   struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
137
138   if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
139       decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
140       decl->Semantic.Index == 0) {
141      aactx->colorOutput = decl->Range.First;
142   }
143   else if (decl->Declaration.File == TGSI_FILE_INPUT) {
144      if ((int) decl->Range.Last > aactx->maxInput)
145         aactx->maxInput = decl->Range.Last;
146      if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
147           (int) decl->Semantic.Index > aactx->maxGeneric) {
148         aactx->maxGeneric = decl->Semantic.Index;
149      }
150   }
151   else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
152      uint i;
153      for (i = decl->Range.First;
154           i <= decl->Range.Last; i++) {
155         aactx->tempsUsed |= (1 << i);
156      }
157   }
158
159   ctx->emit_declaration(ctx, decl);
160}
161
162
163/**
164 * TGSI instruction transform callback.
165 * Replace writes to result.color w/ a temp reg.
166 * Upon END instruction, insert texture sampling code for antialiasing.
167 */
168static void
169aa_transform_inst(struct tgsi_transform_context *ctx,
170                  struct tgsi_full_instruction *inst)
171{
172   struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
173   struct tgsi_full_instruction newInst;
174
175   if (aactx->firstInstruction) {
176      /* emit our new declarations before the first instruction */
177
178      struct tgsi_full_declaration decl;
179      const int texInput = aactx->maxInput + 1;
180      int tmp0;
181      uint i;
182
183      /* find two free temp regs */
184      for (i = 0; i < 32; i++) {
185         if ((aactx->tempsUsed & (1 << i)) == 0) {
186            /* found a free temp */
187            if (aactx->tmp0 < 0)
188               aactx->tmp0 = i;
189            else if (aactx->colorTemp < 0)
190               aactx->colorTemp = i;
191            else
192               break;
193         }
194      }
195
196      assert(aactx->colorTemp != aactx->tmp0);
197
198      tmp0 = aactx->tmp0;
199
200      /* declare new generic input/texcoord */
201      decl = tgsi_default_full_declaration();
202      decl.Declaration.File = TGSI_FILE_INPUT;
203      /* XXX this could be linear... */
204      decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
205      decl.Declaration.Semantic = 1;
206      decl.Semantic.Name = TGSI_SEMANTIC_GENERIC;
207      decl.Semantic.Index = aactx->maxGeneric + 1;
208      decl.Range.First =
209      decl.Range.Last = texInput;
210      ctx->emit_declaration(ctx, &decl);
211
212      /* declare new temp regs */
213      decl = tgsi_default_full_declaration();
214      decl.Declaration.File = TGSI_FILE_TEMPORARY;
215      decl.Range.First =
216      decl.Range.Last = tmp0;
217      ctx->emit_declaration(ctx, &decl);
218
219      decl = tgsi_default_full_declaration();
220      decl.Declaration.File = TGSI_FILE_TEMPORARY;
221      decl.Range.First =
222      decl.Range.Last = aactx->colorTemp;
223      ctx->emit_declaration(ctx, &decl);
224
225      aactx->firstInstruction = FALSE;
226
227
228      /*
229       * Emit code to compute fragment coverage, kill if outside point radius
230       *
231       * Temp reg0 usage:
232       *  t0.x = distance of fragment from center point
233       *  t0.y = boolean, is t0.x > 1.0, also misc temp usage
234       *  t0.z = temporary for computing 1/(1-k) value
235       *  t0.w = final coverage value
236       */
237
238      /* MUL t0.xy, tex, tex;  # compute x^2, y^2 */
239      newInst = tgsi_default_full_instruction();
240      newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
241      newInst.Instruction.NumDstRegs = 1;
242      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
243      newInst.Dst[0].Register.Index = tmp0;
244      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XY;
245      newInst.Instruction.NumSrcRegs = 2;
246      newInst.Src[0].Register.File = TGSI_FILE_INPUT;
247      newInst.Src[0].Register.Index = texInput;
248      newInst.Src[1].Register.File = TGSI_FILE_INPUT;
249      newInst.Src[1].Register.Index = texInput;
250      ctx->emit_instruction(ctx, &newInst);
251
252      /* ADD t0.x, t0.x, t0.y;  # x^2 + y^2 */
253      newInst = tgsi_default_full_instruction();
254      newInst.Instruction.Opcode = TGSI_OPCODE_ADD;
255      newInst.Instruction.NumDstRegs = 1;
256      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
257      newInst.Dst[0].Register.Index = tmp0;
258      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
259      newInst.Instruction.NumSrcRegs = 2;
260      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
261      newInst.Src[0].Register.Index = tmp0;
262      newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
263      newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
264      newInst.Src[1].Register.Index = tmp0;
265      newInst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_Y;
266      ctx->emit_instruction(ctx, &newInst);
267
268#if NORMALIZE  /* OPTIONAL normalization of length */
269      /* RSQ t0.x, t0.x; */
270      newInst = tgsi_default_full_instruction();
271      newInst.Instruction.Opcode = TGSI_OPCODE_RSQ;
272      newInst.Instruction.NumDstRegs = 1;
273      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
274      newInst.Dst[0].Register.Index = tmp0;
275      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
276      newInst.Instruction.NumSrcRegs = 1;
277      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
278      newInst.Src[0].Register.Index = tmp0;
279      ctx->emit_instruction(ctx, &newInst);
280
281      /* RCP t0.x, t0.x; */
282      newInst = tgsi_default_full_instruction();
283      newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
284      newInst.Instruction.NumDstRegs = 1;
285      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
286      newInst.Dst[0].Register.Index = tmp0;
287      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
288      newInst.Instruction.NumSrcRegs = 1;
289      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
290      newInst.Src[0].Register.Index = tmp0;
291      ctx->emit_instruction(ctx, &newInst);
292#endif
293
294      /* SGT t0.y, t0.xxxx, tex.wwww;  # bool b = d > 1 (NOTE tex.w == 1) */
295      newInst = tgsi_default_full_instruction();
296      newInst.Instruction.Opcode = TGSI_OPCODE_SGT;
297      newInst.Instruction.NumDstRegs = 1;
298      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
299      newInst.Dst[0].Register.Index = tmp0;
300      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y;
301      newInst.Instruction.NumSrcRegs = 2;
302      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
303      newInst.Src[0].Register.Index = tmp0;
304      newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
305      newInst.Src[1].Register.File = TGSI_FILE_INPUT;
306      newInst.Src[1].Register.Index = texInput;
307      newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_W;
308      ctx->emit_instruction(ctx, &newInst);
309
310      /* KIL -tmp0.yyyy;   # if -tmp0.y < 0, KILL */
311      newInst = tgsi_default_full_instruction();
312      newInst.Instruction.Opcode = TGSI_OPCODE_KIL;
313      newInst.Instruction.NumDstRegs = 0;
314      newInst.Instruction.NumSrcRegs = 1;
315      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
316      newInst.Src[0].Register.Index = tmp0;
317      newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Y;
318      newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y;
319      newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Y;
320      newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y;
321      newInst.Src[0].Register.Negate = 1;
322      ctx->emit_instruction(ctx, &newInst);
323
324
325      /* compute coverage factor = (1-d)/(1-k) */
326
327      /* SUB t0.z, tex.w, tex.z;  # m = 1 - k */
328      newInst = tgsi_default_full_instruction();
329      newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
330      newInst.Instruction.NumDstRegs = 1;
331      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
332      newInst.Dst[0].Register.Index = tmp0;
333      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z;
334      newInst.Instruction.NumSrcRegs = 2;
335      newInst.Src[0].Register.File = TGSI_FILE_INPUT;
336      newInst.Src[0].Register.Index = texInput;
337      newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_W;
338      newInst.Src[1].Register.File = TGSI_FILE_INPUT;
339      newInst.Src[1].Register.Index = texInput;
340      newInst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
341      ctx->emit_instruction(ctx, &newInst);
342
343      /* RCP t0.z, t0.z;  # t0.z = 1 / m */
344      newInst = tgsi_default_full_instruction();
345      newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
346      newInst.Instruction.NumDstRegs = 1;
347      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
348      newInst.Dst[0].Register.Index = tmp0;
349      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z;
350      newInst.Instruction.NumSrcRegs = 1;
351      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
352      newInst.Src[0].Register.Index = tmp0;
353      newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Z;
354      ctx->emit_instruction(ctx, &newInst);
355
356      /* SUB t0.y, 1, t0.x;  # d = 1 - d */
357      newInst = tgsi_default_full_instruction();
358      newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
359      newInst.Instruction.NumDstRegs = 1;
360      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
361      newInst.Dst[0].Register.Index = tmp0;
362      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y;
363      newInst.Instruction.NumSrcRegs = 2;
364      newInst.Src[0].Register.File = TGSI_FILE_INPUT;
365      newInst.Src[0].Register.Index = texInput;
366      newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_W;
367      newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
368      newInst.Src[1].Register.Index = tmp0;
369      newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_X;
370      ctx->emit_instruction(ctx, &newInst);
371
372      /* MUL t0.w, t0.y, t0.z;   # coverage = d * m */
373      newInst = tgsi_default_full_instruction();
374      newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
375      newInst.Instruction.NumDstRegs = 1;
376      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
377      newInst.Dst[0].Register.Index = tmp0;
378      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
379      newInst.Instruction.NumSrcRegs = 2;
380      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
381      newInst.Src[0].Register.Index = tmp0;
382      newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y;
383      newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
384      newInst.Src[1].Register.Index = tmp0;
385      newInst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_Z;
386      ctx->emit_instruction(ctx, &newInst);
387
388      /* SLE t0.y, t0.x, tex.z;  # bool b = distance <= k */
389      newInst = tgsi_default_full_instruction();
390      newInst.Instruction.Opcode = TGSI_OPCODE_SLE;
391      newInst.Instruction.NumDstRegs = 1;
392      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
393      newInst.Dst[0].Register.Index = tmp0;
394      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y;
395      newInst.Instruction.NumSrcRegs = 2;
396      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
397      newInst.Src[0].Register.Index = tmp0;
398      newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
399      newInst.Src[1].Register.File = TGSI_FILE_INPUT;
400      newInst.Src[1].Register.Index = texInput;
401      newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Z;
402      ctx->emit_instruction(ctx, &newInst);
403
404      /* CMP t0.w, -t0.y, tex.w, t0.w;
405       *  # if -t0.y < 0 then
406       *       t0.w = 1
407       *    else
408       *       t0.w = t0.w
409       */
410      newInst = tgsi_default_full_instruction();
411      newInst.Instruction.Opcode = TGSI_OPCODE_CMP;
412      newInst.Instruction.NumDstRegs = 1;
413      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
414      newInst.Dst[0].Register.Index = tmp0;
415      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
416      newInst.Instruction.NumSrcRegs = 3;
417      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
418      newInst.Src[0].Register.Index = tmp0;
419      newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Y;
420      newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y;
421      newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Y;
422      newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y;
423      newInst.Src[0].Register.Negate = 1;
424      newInst.Src[1].Register.File = TGSI_FILE_INPUT;
425      newInst.Src[1].Register.Index = texInput;
426      newInst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_W;
427      newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_W;
428      newInst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_W;
429      newInst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
430      newInst.Src[2].Register.File = TGSI_FILE_TEMPORARY;
431      newInst.Src[2].Register.Index = tmp0;
432      newInst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_W;
433      newInst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_W;
434      newInst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_W;
435      newInst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
436      ctx->emit_instruction(ctx, &newInst);
437
438   }
439
440   if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
441      /* add alpha modulation code at tail of program */
442
443      /* MOV result.color.xyz, colorTemp; */
444      newInst = tgsi_default_full_instruction();
445      newInst.Instruction.Opcode = TGSI_OPCODE_MOV;
446      newInst.Instruction.NumDstRegs = 1;
447      newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
448      newInst.Dst[0].Register.Index = aactx->colorOutput;
449      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ;
450      newInst.Instruction.NumSrcRegs = 1;
451      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
452      newInst.Src[0].Register.Index = aactx->colorTemp;
453      ctx->emit_instruction(ctx, &newInst);
454
455      /* MUL result.color.w, colorTemp, tmp0.w; */
456      newInst = tgsi_default_full_instruction();
457      newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
458      newInst.Instruction.NumDstRegs = 1;
459      newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
460      newInst.Dst[0].Register.Index = aactx->colorOutput;
461      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
462      newInst.Instruction.NumSrcRegs = 2;
463      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
464      newInst.Src[0].Register.Index = aactx->colorTemp;
465      newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
466      newInst.Src[1].Register.Index = aactx->tmp0;
467      ctx->emit_instruction(ctx, &newInst);
468   }
469   else {
470      /* Not an END instruction.
471       * Look for writes to result.color and replace with colorTemp reg.
472       */
473      uint i;
474
475      for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
476         struct tgsi_full_dst_register *dst = &inst->Dst[i];
477         if (dst->Register.File == TGSI_FILE_OUTPUT &&
478             dst->Register.Index == aactx->colorOutput) {
479            dst->Register.File = TGSI_FILE_TEMPORARY;
480            dst->Register.Index = aactx->colorTemp;
481         }
482      }
483   }
484
485   ctx->emit_instruction(ctx, inst);
486}
487
488
489/**
490 * Generate the frag shader we'll use for drawing AA points.
491 * This will be the user's shader plus some texture/modulate instructions.
492 */
493static boolean
494generate_aapoint_fs(struct aapoint_stage *aapoint)
495{
496   const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
497   struct pipe_shader_state aapoint_fs;
498   struct aa_transform_context transform;
499   const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS;
500   struct pipe_context *pipe = aapoint->stage.draw->pipe;
501
502   aapoint_fs = *orig_fs; /* copy to init */
503   aapoint_fs.tokens = tgsi_alloc_tokens(newLen);
504   if (aapoint_fs.tokens == NULL)
505      return FALSE;
506
507   memset(&transform, 0, sizeof(transform));
508   transform.colorOutput = -1;
509   transform.maxInput = -1;
510   transform.maxGeneric = -1;
511   transform.colorTemp = -1;
512   transform.tmp0 = -1;
513   transform.firstInstruction = TRUE;
514   transform.base.transform_instruction = aa_transform_inst;
515   transform.base.transform_declaration = aa_transform_decl;
516
517   tgsi_transform_shader(orig_fs->tokens,
518                         (struct tgsi_token *) aapoint_fs.tokens,
519                         newLen, &transform.base);
520
521#if 0 /* DEBUG */
522   debug_printf("draw_aapoint, orig shader:\n");
523   tgsi_dump(orig_fs->tokens, 0);
524   debug_printf("draw_aapoint, new shader:\n");
525   tgsi_dump(aapoint_fs.tokens, 0);
526#endif
527
528   aapoint->fs->aapoint_fs
529      = aapoint->driver_create_fs_state(pipe, &aapoint_fs);
530   if (aapoint->fs->aapoint_fs == NULL)
531      goto fail;
532
533   aapoint->fs->generic_attrib = transform.maxGeneric + 1;
534   FREE((void *)aapoint_fs.tokens);
535   return TRUE;
536
537fail:
538   FREE((void *)aapoint_fs.tokens);
539   return FALSE;
540}
541
542
543/**
544 * When we're about to draw our first AA point in a batch, this function is
545 * called to tell the driver to bind our modified fragment shader.
546 */
547static boolean
548bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
549{
550   struct draw_context *draw = aapoint->stage.draw;
551   struct pipe_context *pipe = draw->pipe;
552
553   if (!aapoint->fs->aapoint_fs &&
554       !generate_aapoint_fs(aapoint))
555      return FALSE;
556
557   draw->suspend_flushing = TRUE;
558   aapoint->driver_bind_fs_state(pipe, aapoint->fs->aapoint_fs);
559   draw->suspend_flushing = FALSE;
560
561   return TRUE;
562}
563
564
565
566static INLINE struct aapoint_stage *
567aapoint_stage( struct draw_stage *stage )
568{
569   return (struct aapoint_stage *) stage;
570}
571
572
573
574
575/**
576 * Draw an AA point by drawing a quad.
577 */
578static void
579aapoint_point(struct draw_stage *stage, struct prim_header *header)
580{
581   const struct aapoint_stage *aapoint = aapoint_stage(stage);
582   struct prim_header tri;
583   struct vertex_header *v[4];
584   const uint tex_slot = aapoint->tex_slot;
585   const uint pos_slot = aapoint->pos_slot;
586   float radius, *pos, *tex;
587   uint i;
588   float k;
589
590   if (aapoint->psize_slot >= 0) {
591      radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0];
592   }
593   else {
594      radius = aapoint->radius;
595   }
596
597   /*
598    * Note: the texcoords (generic attrib, really) we use are special:
599    * The S and T components simply vary from -1 to +1.
600    * The R component is k, below.
601    * The Q component is 1.0 and will used as a handy constant in the
602    * fragment shader.
603    */
604
605   /*
606    * k is the threshold distance from the point's center at which
607    * we begin alpha attenuation (the coverage value).
608    * Operating within a unit circle, we'll compute the fragment's
609    * distance 'd' from the center point using the texcoords.
610    * IF d > 1.0 THEN
611    *    KILL fragment
612    * ELSE IF d > k THEN
613    *    compute coverage in [0,1] proportional to d in [k, 1].
614    * ELSE
615    *    coverage = 1.0;  // full coverage
616    * ENDIF
617    *
618    * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to
619    * avoid using IF/ELSE/ENDIF TGSI opcodes.
620    */
621
622#if !NORMALIZE
623   k = 1.0f / radius;
624   k = 1.0f - 2.0f * k + k * k;
625#else
626   k = 1.0f - 1.0f / radius;
627#endif
628
629   /* allocate/dup new verts */
630   for (i = 0; i < 4; i++) {
631      v[i] = dup_vert(stage, header->v[0], i);
632   }
633
634   /* new verts */
635   pos = v[0]->data[pos_slot];
636   pos[0] -= radius;
637   pos[1] -= radius;
638
639   pos = v[1]->data[pos_slot];
640   pos[0] += radius;
641   pos[1] -= radius;
642
643   pos = v[2]->data[pos_slot];
644   pos[0] += radius;
645   pos[1] += radius;
646
647   pos = v[3]->data[pos_slot];
648   pos[0] -= radius;
649   pos[1] += radius;
650
651   /* new texcoords */
652   tex = v[0]->data[tex_slot];
653   ASSIGN_4V(tex, -1, -1, k, 1);
654
655   tex = v[1]->data[tex_slot];
656   ASSIGN_4V(tex,  1, -1, k, 1);
657
658   tex = v[2]->data[tex_slot];
659   ASSIGN_4V(tex,  1,  1, k, 1);
660
661   tex = v[3]->data[tex_slot];
662   ASSIGN_4V(tex, -1,  1, k, 1);
663
664   /* emit 2 tris for the quad strip */
665   tri.v[0] = v[0];
666   tri.v[1] = v[1];
667   tri.v[2] = v[2];
668   stage->next->tri( stage->next, &tri );
669
670   tri.v[0] = v[0];
671   tri.v[1] = v[2];
672   tri.v[2] = v[3];
673   stage->next->tri( stage->next, &tri );
674}
675
676
677static void
678aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
679{
680   auto struct aapoint_stage *aapoint = aapoint_stage(stage);
681   struct draw_context *draw = stage->draw;
682   struct pipe_context *pipe = draw->pipe;
683   const struct pipe_rasterizer_state *rast = draw->rasterizer;
684   void *r;
685
686   assert(draw->rasterizer->point_smooth);
687
688   if (draw->rasterizer->point_size <= 2.0)
689      aapoint->radius = 1.0;
690   else
691      aapoint->radius = 0.5f * draw->rasterizer->point_size;
692
693   /*
694    * Bind (generate) our fragprog.
695    */
696   bind_aapoint_fragment_shader(aapoint);
697
698   /* update vertex attrib info */
699   aapoint->pos_slot = draw_current_shader_position_output(draw);
700
701   /* allocate the extra post-transformed vertex attribute */
702   aapoint->tex_slot = draw_alloc_extra_vertex_attrib(draw,
703                                                      TGSI_SEMANTIC_GENERIC,
704                                                      aapoint->fs->generic_attrib);
705   assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
706
707   /* find psize slot in post-transform vertex */
708   aapoint->psize_slot = -1;
709   if (draw->rasterizer->point_size_per_vertex) {
710      const struct tgsi_shader_info *info = draw_get_shader_info(draw);
711      uint i;
712      /* find PSIZ vertex output */
713      for (i = 0; i < info->num_outputs; i++) {
714         if (info->output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
715            aapoint->psize_slot = i;
716            break;
717         }
718      }
719   }
720
721   draw->suspend_flushing = TRUE;
722
723   /* Disable triangle culling, stippling, unfilled mode etc. */
724   r = draw_get_rasterizer_no_cull(draw, rast->scissor, rast->flatshade);
725   pipe->bind_rasterizer_state(pipe, r);
726
727   draw->suspend_flushing = FALSE;
728
729   /* now really draw first point */
730   stage->point = aapoint_point;
731   stage->point(stage, header);
732}
733
734
735static void
736aapoint_flush(struct draw_stage *stage, unsigned flags)
737{
738   struct draw_context *draw = stage->draw;
739   struct aapoint_stage *aapoint = aapoint_stage(stage);
740   struct pipe_context *pipe = draw->pipe;
741
742   stage->point = aapoint_first_point;
743   stage->next->flush( stage->next, flags );
744
745   /* restore original frag shader */
746   draw->suspend_flushing = TRUE;
747   aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs);
748
749   /* restore original rasterizer state */
750   if (draw->rast_handle) {
751      pipe->bind_rasterizer_state(pipe, draw->rast_handle);
752   }
753
754   draw->suspend_flushing = FALSE;
755
756   draw_remove_extra_vertex_attribs(draw);
757}
758
759
760static void
761aapoint_reset_stipple_counter(struct draw_stage *stage)
762{
763   stage->next->reset_stipple_counter( stage->next );
764}
765
766
767static void
768aapoint_destroy(struct draw_stage *stage)
769{
770   struct aapoint_stage* aapoint = aapoint_stage(stage);
771   struct pipe_context *pipe = stage->draw->pipe;
772
773   draw_free_temp_verts( stage );
774
775   /* restore the old entry points */
776   pipe->create_fs_state = aapoint->driver_create_fs_state;
777   pipe->bind_fs_state = aapoint->driver_bind_fs_state;
778   pipe->delete_fs_state = aapoint->driver_delete_fs_state;
779
780   FREE( stage );
781}
782
783
784static struct aapoint_stage *
785draw_aapoint_stage(struct draw_context *draw)
786{
787   struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage);
788   if (aapoint == NULL)
789      goto fail;
790
791   aapoint->stage.draw = draw;
792   aapoint->stage.name = "aapoint";
793   aapoint->stage.next = NULL;
794   aapoint->stage.point = aapoint_first_point;
795   aapoint->stage.line = draw_pipe_passthrough_line;
796   aapoint->stage.tri = draw_pipe_passthrough_tri;
797   aapoint->stage.flush = aapoint_flush;
798   aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
799   aapoint->stage.destroy = aapoint_destroy;
800
801   if (!draw_alloc_temp_verts( &aapoint->stage, 4 ))
802      goto fail;
803
804   return aapoint;
805
806 fail:
807   if (aapoint)
808      aapoint->stage.destroy(&aapoint->stage);
809
810   return NULL;
811
812}
813
814
815static struct aapoint_stage *
816aapoint_stage_from_pipe(struct pipe_context *pipe)
817{
818   struct draw_context *draw = (struct draw_context *) pipe->draw;
819   return aapoint_stage(draw->pipeline.aapoint);
820}
821
822
823/**
824 * This function overrides the driver's create_fs_state() function and
825 * will typically be called by the state tracker.
826 */
827static void *
828aapoint_create_fs_state(struct pipe_context *pipe,
829                       const struct pipe_shader_state *fs)
830{
831   struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
832   struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader);
833   if (aafs == NULL)
834      return NULL;
835
836   aafs->state.tokens = tgsi_dup_tokens(fs->tokens);
837
838   /* pass-through */
839   aafs->driver_fs = aapoint->driver_create_fs_state(pipe, fs);
840
841   return aafs;
842}
843
844
845static void
846aapoint_bind_fs_state(struct pipe_context *pipe, void *fs)
847{
848   struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
849   struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
850   /* save current */
851   aapoint->fs = aafs;
852   /* pass-through */
853   aapoint->driver_bind_fs_state(pipe,
854                                 (aafs ? aafs->driver_fs : NULL));
855}
856
857
858static void
859aapoint_delete_fs_state(struct pipe_context *pipe, void *fs)
860{
861   struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
862   struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
863
864   /* pass-through */
865   aapoint->driver_delete_fs_state(pipe, aafs->driver_fs);
866
867   if (aafs->aapoint_fs)
868      aapoint->driver_delete_fs_state(pipe, aafs->aapoint_fs);
869
870   FREE((void*)aafs->state.tokens);
871
872   FREE(aafs);
873}
874
875
876/**
877 * Called by drivers that want to install this AA point prim stage
878 * into the draw module's pipeline.  This will not be used if the
879 * hardware has native support for AA points.
880 */
881boolean
882draw_install_aapoint_stage(struct draw_context *draw,
883                           struct pipe_context *pipe)
884{
885   struct aapoint_stage *aapoint;
886
887   pipe->draw = (void *) draw;
888
889   /*
890    * Create / install AA point drawing / prim stage
891    */
892   aapoint = draw_aapoint_stage( draw );
893   if (aapoint == NULL)
894      return FALSE;
895
896   /* save original driver functions */
897   aapoint->driver_create_fs_state = pipe->create_fs_state;
898   aapoint->driver_bind_fs_state = pipe->bind_fs_state;
899   aapoint->driver_delete_fs_state = pipe->delete_fs_state;
900
901   /* override the driver's functions */
902   pipe->create_fs_state = aapoint_create_fs_state;
903   pipe->bind_fs_state = aapoint_bind_fs_state;
904   pipe->delete_fs_state = aapoint_delete_fs_state;
905
906   draw->pipeline.aapoint = &aapoint->stage;
907
908   return TRUE;
909}
910