1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * AA point stage:  AA points are converted to quads and rendered with a
30 * special fragment shader.  Another approach would be to use a texture
31 * map image of a point, but experiments indicate the quality isn't nearly
32 * as good as this approach.
33 *
34 * Note: this looks a lot like draw_aaline.c but there's actually little
35 * if any code that can be shared.
36 *
37 * Authors:  Brian Paul
38 */
39
40
41#include "pipe/p_context.h"
42#include "pipe/p_defines.h"
43#include "pipe/p_shader_tokens.h"
44
45#include "tgsi/tgsi_transform.h"
46#include "tgsi/tgsi_dump.h"
47
48#include "util/u_math.h"
49#include "util/u_memory.h"
50
51#include "draw_context.h"
52#include "draw_vs.h"
53#include "draw_pipe.h"
54
55
56/** Approx number of new tokens for instructions in aa_transform_inst() */
57#define NUM_NEW_TOKENS 200
58
59
60/*
61 * Enabling NORMALIZE might give _slightly_ better results.
62 * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or
63 * d=x*x+y*y.  Since we're working with a unit circle, the later seems
64 * close enough and saves some costly instructions.
65 */
66#define NORMALIZE 0
67
68
69/**
70 * Subclass of pipe_shader_state to carry extra fragment shader info.
71 */
72struct aapoint_fragment_shader
73{
74   struct pipe_shader_state state;
75   void *driver_fs;   /**< the regular shader */
76   void *aapoint_fs;  /**< the aa point-augmented shader */
77   int generic_attrib; /**< The generic input attrib/texcoord we'll use */
78};
79
80
81/**
82 * Subclass of draw_stage
83 */
84struct aapoint_stage
85{
86   struct draw_stage stage;
87
88   /** half of pipe_rasterizer_state::point_size */
89   float radius;
90
91   /** vertex attrib slot containing point size */
92   int psize_slot;
93
94   /** this is the vertex attrib slot for the new texcoords */
95   uint tex_slot;
96
97   /** vertex attrib slot containing position */
98   uint pos_slot;
99
100   /** Currently bound fragment shader */
101   struct aapoint_fragment_shader *fs;
102
103   /*
104    * Driver interface/override functions
105    */
106   void * (*driver_create_fs_state)(struct pipe_context *,
107                                    const struct pipe_shader_state *);
108   void (*driver_bind_fs_state)(struct pipe_context *, void *);
109   void (*driver_delete_fs_state)(struct pipe_context *, void *);
110};
111
112
113
114/**
115 * Subclass of tgsi_transform_context, used for transforming the
116 * user's fragment shader to add the special AA instructions.
117 */
118struct aa_transform_context {
119   struct tgsi_transform_context base;
120   uint tempsUsed;  /**< bitmask */
121   int colorOutput; /**< which output is the primary color */
122   int maxInput, maxGeneric;  /**< max input index found */
123   int tmp0, colorTemp;  /**< temp registers */
124   boolean firstInstruction;
125};
126
127
128/**
129 * TGSI declaration transform callback.
130 * Look for two free temp regs and available input reg for new texcoords.
131 */
132static void
133aa_transform_decl(struct tgsi_transform_context *ctx,
134                  struct tgsi_full_declaration *decl)
135{
136   struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
137
138   if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
139       decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
140       decl->Semantic.Index == 0) {
141      aactx->colorOutput = decl->Range.First;
142   }
143   else if (decl->Declaration.File == TGSI_FILE_INPUT) {
144      if ((int) decl->Range.Last > aactx->maxInput)
145         aactx->maxInput = decl->Range.Last;
146      if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
147           (int) decl->Semantic.Index > aactx->maxGeneric) {
148         aactx->maxGeneric = decl->Semantic.Index;
149      }
150   }
151   else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
152      uint i;
153      for (i = decl->Range.First;
154           i <= decl->Range.Last; i++) {
155         aactx->tempsUsed |= (1 << i);
156      }
157   }
158
159   ctx->emit_declaration(ctx, decl);
160}
161
162
163/**
164 * TGSI instruction transform callback.
165 * Replace writes to result.color w/ a temp reg.
166 * Upon END instruction, insert texture sampling code for antialiasing.
167 */
168static void
169aa_transform_inst(struct tgsi_transform_context *ctx,
170                  struct tgsi_full_instruction *inst)
171{
172   struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
173   struct tgsi_full_instruction newInst;
174
175   if (aactx->firstInstruction) {
176      /* emit our new declarations before the first instruction */
177
178      struct tgsi_full_declaration decl;
179      const int texInput = aactx->maxInput + 1;
180      int tmp0;
181      uint i;
182
183      /* find two free temp regs */
184      for (i = 0; i < 32; i++) {
185         if ((aactx->tempsUsed & (1 << i)) == 0) {
186            /* found a free temp */
187            if (aactx->tmp0 < 0)
188               aactx->tmp0 = i;
189            else if (aactx->colorTemp < 0)
190               aactx->colorTemp = i;
191            else
192               break;
193         }
194      }
195
196      assert(aactx->colorTemp != aactx->tmp0);
197
198      tmp0 = aactx->tmp0;
199
200      /* declare new generic input/texcoord */
201      decl = tgsi_default_full_declaration();
202      decl.Declaration.File = TGSI_FILE_INPUT;
203      /* XXX this could be linear... */
204      decl.Declaration.Interpolate = 1;
205      decl.Declaration.Semantic = 1;
206      decl.Semantic.Name = TGSI_SEMANTIC_GENERIC;
207      decl.Semantic.Index = aactx->maxGeneric + 1;
208      decl.Range.First =
209      decl.Range.Last = texInput;
210      decl.Interp.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
211      ctx->emit_declaration(ctx, &decl);
212
213      /* declare new temp regs */
214      decl = tgsi_default_full_declaration();
215      decl.Declaration.File = TGSI_FILE_TEMPORARY;
216      decl.Range.First =
217      decl.Range.Last = tmp0;
218      ctx->emit_declaration(ctx, &decl);
219
220      decl = tgsi_default_full_declaration();
221      decl.Declaration.File = TGSI_FILE_TEMPORARY;
222      decl.Range.First =
223      decl.Range.Last = aactx->colorTemp;
224      ctx->emit_declaration(ctx, &decl);
225
226      aactx->firstInstruction = FALSE;
227
228
229      /*
230       * Emit code to compute fragment coverage, kill if outside point radius
231       *
232       * Temp reg0 usage:
233       *  t0.x = distance of fragment from center point
234       *  t0.y = boolean, is t0.x > 1.0, also misc temp usage
235       *  t0.z = temporary for computing 1/(1-k) value
236       *  t0.w = final coverage value
237       */
238
239      /* MUL t0.xy, tex, tex;  # compute x^2, y^2 */
240      newInst = tgsi_default_full_instruction();
241      newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
242      newInst.Instruction.NumDstRegs = 1;
243      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
244      newInst.Dst[0].Register.Index = tmp0;
245      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XY;
246      newInst.Instruction.NumSrcRegs = 2;
247      newInst.Src[0].Register.File = TGSI_FILE_INPUT;
248      newInst.Src[0].Register.Index = texInput;
249      newInst.Src[1].Register.File = TGSI_FILE_INPUT;
250      newInst.Src[1].Register.Index = texInput;
251      ctx->emit_instruction(ctx, &newInst);
252
253      /* ADD t0.x, t0.x, t0.y;  # x^2 + y^2 */
254      newInst = tgsi_default_full_instruction();
255      newInst.Instruction.Opcode = TGSI_OPCODE_ADD;
256      newInst.Instruction.NumDstRegs = 1;
257      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
258      newInst.Dst[0].Register.Index = tmp0;
259      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
260      newInst.Instruction.NumSrcRegs = 2;
261      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
262      newInst.Src[0].Register.Index = tmp0;
263      newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
264      newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
265      newInst.Src[1].Register.Index = tmp0;
266      newInst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_Y;
267      ctx->emit_instruction(ctx, &newInst);
268
269#if NORMALIZE  /* OPTIONAL normalization of length */
270      /* RSQ t0.x, t0.x; */
271      newInst = tgsi_default_full_instruction();
272      newInst.Instruction.Opcode = TGSI_OPCODE_RSQ;
273      newInst.Instruction.NumDstRegs = 1;
274      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
275      newInst.Dst[0].Register.Index = tmp0;
276      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
277      newInst.Instruction.NumSrcRegs = 1;
278      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
279      newInst.Src[0].Register.Index = tmp0;
280      ctx->emit_instruction(ctx, &newInst);
281
282      /* RCP t0.x, t0.x; */
283      newInst = tgsi_default_full_instruction();
284      newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
285      newInst.Instruction.NumDstRegs = 1;
286      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
287      newInst.Dst[0].Register.Index = tmp0;
288      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
289      newInst.Instruction.NumSrcRegs = 1;
290      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
291      newInst.Src[0].Register.Index = tmp0;
292      ctx->emit_instruction(ctx, &newInst);
293#endif
294
295      /* SGT t0.y, t0.xxxx, tex.wwww;  # bool b = d > 1 (NOTE tex.w == 1) */
296      newInst = tgsi_default_full_instruction();
297      newInst.Instruction.Opcode = TGSI_OPCODE_SGT;
298      newInst.Instruction.NumDstRegs = 1;
299      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
300      newInst.Dst[0].Register.Index = tmp0;
301      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y;
302      newInst.Instruction.NumSrcRegs = 2;
303      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
304      newInst.Src[0].Register.Index = tmp0;
305      newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
306      newInst.Src[1].Register.File = TGSI_FILE_INPUT;
307      newInst.Src[1].Register.Index = texInput;
308      newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_W;
309      ctx->emit_instruction(ctx, &newInst);
310
311      /* KIL -tmp0.yyyy;   # if -tmp0.y < 0, KILL */
312      newInst = tgsi_default_full_instruction();
313      newInst.Instruction.Opcode = TGSI_OPCODE_KIL;
314      newInst.Instruction.NumDstRegs = 0;
315      newInst.Instruction.NumSrcRegs = 1;
316      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
317      newInst.Src[0].Register.Index = tmp0;
318      newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Y;
319      newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y;
320      newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Y;
321      newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y;
322      newInst.Src[0].Register.Negate = 1;
323      ctx->emit_instruction(ctx, &newInst);
324
325
326      /* compute coverage factor = (1-d)/(1-k) */
327
328      /* SUB t0.z, tex.w, tex.z;  # m = 1 - k */
329      newInst = tgsi_default_full_instruction();
330      newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
331      newInst.Instruction.NumDstRegs = 1;
332      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
333      newInst.Dst[0].Register.Index = tmp0;
334      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z;
335      newInst.Instruction.NumSrcRegs = 2;
336      newInst.Src[0].Register.File = TGSI_FILE_INPUT;
337      newInst.Src[0].Register.Index = texInput;
338      newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_W;
339      newInst.Src[1].Register.File = TGSI_FILE_INPUT;
340      newInst.Src[1].Register.Index = texInput;
341      newInst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
342      ctx->emit_instruction(ctx, &newInst);
343
344      /* RCP t0.z, t0.z;  # t0.z = 1 / m */
345      newInst = tgsi_default_full_instruction();
346      newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
347      newInst.Instruction.NumDstRegs = 1;
348      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
349      newInst.Dst[0].Register.Index = tmp0;
350      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z;
351      newInst.Instruction.NumSrcRegs = 1;
352      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
353      newInst.Src[0].Register.Index = tmp0;
354      newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Z;
355      ctx->emit_instruction(ctx, &newInst);
356
357      /* SUB t0.y, 1, t0.x;  # d = 1 - d */
358      newInst = tgsi_default_full_instruction();
359      newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
360      newInst.Instruction.NumDstRegs = 1;
361      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
362      newInst.Dst[0].Register.Index = tmp0;
363      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y;
364      newInst.Instruction.NumSrcRegs = 2;
365      newInst.Src[0].Register.File = TGSI_FILE_INPUT;
366      newInst.Src[0].Register.Index = texInput;
367      newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_W;
368      newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
369      newInst.Src[1].Register.Index = tmp0;
370      newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_X;
371      ctx->emit_instruction(ctx, &newInst);
372
373      /* MUL t0.w, t0.y, t0.z;   # coverage = d * m */
374      newInst = tgsi_default_full_instruction();
375      newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
376      newInst.Instruction.NumDstRegs = 1;
377      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
378      newInst.Dst[0].Register.Index = tmp0;
379      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
380      newInst.Instruction.NumSrcRegs = 2;
381      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
382      newInst.Src[0].Register.Index = tmp0;
383      newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y;
384      newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
385      newInst.Src[1].Register.Index = tmp0;
386      newInst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_Z;
387      ctx->emit_instruction(ctx, &newInst);
388
389      /* SLE t0.y, t0.x, tex.z;  # bool b = distance <= k */
390      newInst = tgsi_default_full_instruction();
391      newInst.Instruction.Opcode = TGSI_OPCODE_SLE;
392      newInst.Instruction.NumDstRegs = 1;
393      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
394      newInst.Dst[0].Register.Index = tmp0;
395      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y;
396      newInst.Instruction.NumSrcRegs = 2;
397      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
398      newInst.Src[0].Register.Index = tmp0;
399      newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
400      newInst.Src[1].Register.File = TGSI_FILE_INPUT;
401      newInst.Src[1].Register.Index = texInput;
402      newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Z;
403      ctx->emit_instruction(ctx, &newInst);
404
405      /* CMP t0.w, -t0.y, tex.w, t0.w;
406       *  # if -t0.y < 0 then
407       *       t0.w = 1
408       *    else
409       *       t0.w = t0.w
410       */
411      newInst = tgsi_default_full_instruction();
412      newInst.Instruction.Opcode = TGSI_OPCODE_CMP;
413      newInst.Instruction.NumDstRegs = 1;
414      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
415      newInst.Dst[0].Register.Index = tmp0;
416      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
417      newInst.Instruction.NumSrcRegs = 3;
418      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
419      newInst.Src[0].Register.Index = tmp0;
420      newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Y;
421      newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y;
422      newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Y;
423      newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y;
424      newInst.Src[0].Register.Negate = 1;
425      newInst.Src[1].Register.File = TGSI_FILE_INPUT;
426      newInst.Src[1].Register.Index = texInput;
427      newInst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_W;
428      newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_W;
429      newInst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_W;
430      newInst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
431      newInst.Src[2].Register.File = TGSI_FILE_TEMPORARY;
432      newInst.Src[2].Register.Index = tmp0;
433      newInst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_W;
434      newInst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_W;
435      newInst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_W;
436      newInst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
437      ctx->emit_instruction(ctx, &newInst);
438
439   }
440
441   if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
442      /* add alpha modulation code at tail of program */
443
444      /* MOV result.color.xyz, colorTemp; */
445      newInst = tgsi_default_full_instruction();
446      newInst.Instruction.Opcode = TGSI_OPCODE_MOV;
447      newInst.Instruction.NumDstRegs = 1;
448      newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
449      newInst.Dst[0].Register.Index = aactx->colorOutput;
450      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ;
451      newInst.Instruction.NumSrcRegs = 1;
452      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
453      newInst.Src[0].Register.Index = aactx->colorTemp;
454      ctx->emit_instruction(ctx, &newInst);
455
456      /* MUL result.color.w, colorTemp, tmp0.w; */
457      newInst = tgsi_default_full_instruction();
458      newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
459      newInst.Instruction.NumDstRegs = 1;
460      newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
461      newInst.Dst[0].Register.Index = aactx->colorOutput;
462      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
463      newInst.Instruction.NumSrcRegs = 2;
464      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
465      newInst.Src[0].Register.Index = aactx->colorTemp;
466      newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
467      newInst.Src[1].Register.Index = aactx->tmp0;
468      ctx->emit_instruction(ctx, &newInst);
469   }
470   else {
471      /* Not an END instruction.
472       * Look for writes to result.color and replace with colorTemp reg.
473       */
474      uint i;
475
476      for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
477         struct tgsi_full_dst_register *dst = &inst->Dst[i];
478         if (dst->Register.File == TGSI_FILE_OUTPUT &&
479             dst->Register.Index == aactx->colorOutput) {
480            dst->Register.File = TGSI_FILE_TEMPORARY;
481            dst->Register.Index = aactx->colorTemp;
482         }
483      }
484   }
485
486   ctx->emit_instruction(ctx, inst);
487}
488
489
490/**
491 * Generate the frag shader we'll use for drawing AA points.
492 * This will be the user's shader plus some texture/modulate instructions.
493 */
494static boolean
495generate_aapoint_fs(struct aapoint_stage *aapoint)
496{
497   const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
498   struct pipe_shader_state aapoint_fs;
499   struct aa_transform_context transform;
500   const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS;
501   struct pipe_context *pipe = aapoint->stage.draw->pipe;
502
503   aapoint_fs = *orig_fs; /* copy to init */
504   aapoint_fs.tokens = tgsi_alloc_tokens(newLen);
505   if (aapoint_fs.tokens == NULL)
506      return FALSE;
507
508   memset(&transform, 0, sizeof(transform));
509   transform.colorOutput = -1;
510   transform.maxInput = -1;
511   transform.maxGeneric = -1;
512   transform.colorTemp = -1;
513   transform.tmp0 = -1;
514   transform.firstInstruction = TRUE;
515   transform.base.transform_instruction = aa_transform_inst;
516   transform.base.transform_declaration = aa_transform_decl;
517
518   tgsi_transform_shader(orig_fs->tokens,
519                         (struct tgsi_token *) aapoint_fs.tokens,
520                         newLen, &transform.base);
521
522#if 0 /* DEBUG */
523   debug_printf("draw_aapoint, orig shader:\n");
524   tgsi_dump(orig_fs->tokens, 0);
525   debug_printf("draw_aapoint, new shader:\n");
526   tgsi_dump(aapoint_fs.tokens, 0);
527#endif
528
529   aapoint->fs->aapoint_fs
530      = aapoint->driver_create_fs_state(pipe, &aapoint_fs);
531   if (aapoint->fs->aapoint_fs == NULL)
532      goto fail;
533
534   aapoint->fs->generic_attrib = transform.maxGeneric + 1;
535   FREE((void *)aapoint_fs.tokens);
536   return TRUE;
537
538fail:
539   FREE((void *)aapoint_fs.tokens);
540   return FALSE;
541}
542
543
544/**
545 * When we're about to draw our first AA point in a batch, this function is
546 * called to tell the driver to bind our modified fragment shader.
547 */
548static boolean
549bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
550{
551   struct draw_context *draw = aapoint->stage.draw;
552   struct pipe_context *pipe = draw->pipe;
553
554   if (!aapoint->fs->aapoint_fs &&
555       !generate_aapoint_fs(aapoint))
556      return FALSE;
557
558   draw->suspend_flushing = TRUE;
559   aapoint->driver_bind_fs_state(pipe, aapoint->fs->aapoint_fs);
560   draw->suspend_flushing = FALSE;
561
562   return TRUE;
563}
564
565
566
567static INLINE struct aapoint_stage *
568aapoint_stage( struct draw_stage *stage )
569{
570   return (struct aapoint_stage *) stage;
571}
572
573
574
575
576/**
577 * Draw an AA point by drawing a quad.
578 */
579static void
580aapoint_point(struct draw_stage *stage, struct prim_header *header)
581{
582   const struct aapoint_stage *aapoint = aapoint_stage(stage);
583   struct prim_header tri;
584   struct vertex_header *v[4];
585   const uint tex_slot = aapoint->tex_slot;
586   const uint pos_slot = aapoint->pos_slot;
587   float radius, *pos, *tex;
588   uint i;
589   float k;
590
591   if (aapoint->psize_slot >= 0) {
592      radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0];
593   }
594   else {
595      radius = aapoint->radius;
596   }
597
598   /*
599    * Note: the texcoords (generic attrib, really) we use are special:
600    * The S and T components simply vary from -1 to +1.
601    * The R component is k, below.
602    * The Q component is 1.0 and will used as a handy constant in the
603    * fragment shader.
604    */
605
606   /*
607    * k is the threshold distance from the point's center at which
608    * we begin alpha attenuation (the coverage value).
609    * Operating within a unit circle, we'll compute the fragment's
610    * distance 'd' from the center point using the texcoords.
611    * IF d > 1.0 THEN
612    *    KILL fragment
613    * ELSE IF d > k THEN
614    *    compute coverage in [0,1] proportional to d in [k, 1].
615    * ELSE
616    *    coverage = 1.0;  // full coverage
617    * ENDIF
618    *
619    * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to
620    * avoid using IF/ELSE/ENDIF TGSI opcodes.
621    */
622
623#if !NORMALIZE
624   k = 1.0f / radius;
625   k = 1.0f - 2.0f * k + k * k;
626#else
627   k = 1.0f - 1.0f / radius;
628#endif
629
630   /* allocate/dup new verts */
631   for (i = 0; i < 4; i++) {
632      v[i] = dup_vert(stage, header->v[0], i);
633   }
634
635   /* new verts */
636   pos = v[0]->data[pos_slot];
637   pos[0] -= radius;
638   pos[1] -= radius;
639
640   pos = v[1]->data[pos_slot];
641   pos[0] += radius;
642   pos[1] -= radius;
643
644   pos = v[2]->data[pos_slot];
645   pos[0] += radius;
646   pos[1] += radius;
647
648   pos = v[3]->data[pos_slot];
649   pos[0] -= radius;
650   pos[1] += radius;
651
652   /* new texcoords */
653   tex = v[0]->data[tex_slot];
654   ASSIGN_4V(tex, -1, -1, k, 1);
655
656   tex = v[1]->data[tex_slot];
657   ASSIGN_4V(tex,  1, -1, k, 1);
658
659   tex = v[2]->data[tex_slot];
660   ASSIGN_4V(tex,  1,  1, k, 1);
661
662   tex = v[3]->data[tex_slot];
663   ASSIGN_4V(tex, -1,  1, k, 1);
664
665   /* emit 2 tris for the quad strip */
666   tri.v[0] = v[0];
667   tri.v[1] = v[1];
668   tri.v[2] = v[2];
669   stage->next->tri( stage->next, &tri );
670
671   tri.v[0] = v[0];
672   tri.v[1] = v[2];
673   tri.v[2] = v[3];
674   stage->next->tri( stage->next, &tri );
675}
676
677
678static void
679aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
680{
681   auto struct aapoint_stage *aapoint = aapoint_stage(stage);
682   struct draw_context *draw = stage->draw;
683   struct pipe_context *pipe = draw->pipe;
684   const struct pipe_rasterizer_state *rast = draw->rasterizer;
685   void *r;
686
687   assert(draw->rasterizer->point_smooth);
688
689   if (draw->rasterizer->point_size <= 2.0)
690      aapoint->radius = 1.0;
691   else
692      aapoint->radius = 0.5f * draw->rasterizer->point_size;
693
694   /*
695    * Bind (generate) our fragprog.
696    */
697   bind_aapoint_fragment_shader(aapoint);
698
699   /* update vertex attrib info */
700   aapoint->pos_slot = draw_current_shader_position_output(draw);
701
702   /* allocate the extra post-transformed vertex attribute */
703   aapoint->tex_slot = draw_alloc_extra_vertex_attrib(draw,
704                                                      TGSI_SEMANTIC_GENERIC,
705                                                      aapoint->fs->generic_attrib);
706   assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
707
708   /* find psize slot in post-transform vertex */
709   aapoint->psize_slot = -1;
710   if (draw->rasterizer->point_size_per_vertex) {
711      const struct tgsi_shader_info *info = draw_get_shader_info(draw);
712      uint i;
713      /* find PSIZ vertex output */
714      for (i = 0; i < info->num_outputs; i++) {
715         if (info->output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
716            aapoint->psize_slot = i;
717            break;
718         }
719      }
720   }
721
722   draw->suspend_flushing = TRUE;
723
724   /* Disable triangle culling, stippling, unfilled mode etc. */
725   r = draw_get_rasterizer_no_cull(draw, rast->scissor, rast->flatshade);
726   pipe->bind_rasterizer_state(pipe, r);
727
728   draw->suspend_flushing = FALSE;
729
730   /* now really draw first point */
731   stage->point = aapoint_point;
732   stage->point(stage, header);
733}
734
735
736static void
737aapoint_flush(struct draw_stage *stage, unsigned flags)
738{
739   struct draw_context *draw = stage->draw;
740   struct aapoint_stage *aapoint = aapoint_stage(stage);
741   struct pipe_context *pipe = draw->pipe;
742
743   stage->point = aapoint_first_point;
744   stage->next->flush( stage->next, flags );
745
746   /* restore original frag shader */
747   draw->suspend_flushing = TRUE;
748   aapoint->driver_bind_fs_state(pipe, aapoint->fs ? aapoint->fs->driver_fs : NULL);
749
750   /* restore original rasterizer state */
751   if (draw->rast_handle) {
752      pipe->bind_rasterizer_state(pipe, draw->rast_handle);
753   }
754
755   draw->suspend_flushing = FALSE;
756
757   draw_remove_extra_vertex_attribs(draw);
758}
759
760
761static void
762aapoint_reset_stipple_counter(struct draw_stage *stage)
763{
764   stage->next->reset_stipple_counter( stage->next );
765}
766
767
768static void
769aapoint_destroy(struct draw_stage *stage)
770{
771   struct aapoint_stage* aapoint = aapoint_stage(stage);
772   struct pipe_context *pipe = stage->draw->pipe;
773
774   draw_free_temp_verts( stage );
775
776   /* restore the old entry points */
777   pipe->create_fs_state = aapoint->driver_create_fs_state;
778   pipe->bind_fs_state = aapoint->driver_bind_fs_state;
779   pipe->delete_fs_state = aapoint->driver_delete_fs_state;
780
781   FREE( stage );
782}
783
784
785static struct aapoint_stage *
786draw_aapoint_stage(struct draw_context *draw)
787{
788   struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage);
789   if (aapoint == NULL)
790      goto fail;
791
792   aapoint->stage.draw = draw;
793   aapoint->stage.name = "aapoint";
794   aapoint->stage.next = NULL;
795   aapoint->stage.point = aapoint_first_point;
796   aapoint->stage.line = draw_pipe_passthrough_line;
797   aapoint->stage.tri = draw_pipe_passthrough_tri;
798   aapoint->stage.flush = aapoint_flush;
799   aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
800   aapoint->stage.destroy = aapoint_destroy;
801
802   if (!draw_alloc_temp_verts( &aapoint->stage, 4 ))
803      goto fail;
804
805   return aapoint;
806
807 fail:
808   if (aapoint)
809      aapoint->stage.destroy(&aapoint->stage);
810
811   return NULL;
812
813}
814
815
816static struct aapoint_stage *
817aapoint_stage_from_pipe(struct pipe_context *pipe)
818{
819   struct draw_context *draw = (struct draw_context *) pipe->draw;
820   return aapoint_stage(draw->pipeline.aapoint);
821}
822
823
824/**
825 * This function overrides the driver's create_fs_state() function and
826 * will typically be called by the state tracker.
827 */
828static void *
829aapoint_create_fs_state(struct pipe_context *pipe,
830                       const struct pipe_shader_state *fs)
831{
832   struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
833   struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader);
834   if (aafs == NULL)
835      return NULL;
836
837   aafs->state.tokens = tgsi_dup_tokens(fs->tokens);
838
839   /* pass-through */
840   aafs->driver_fs = aapoint->driver_create_fs_state(pipe, fs);
841
842   return aafs;
843}
844
845
846static void
847aapoint_bind_fs_state(struct pipe_context *pipe, void *fs)
848{
849   struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
850   struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
851   /* save current */
852   aapoint->fs = aafs;
853   /* pass-through */
854   aapoint->driver_bind_fs_state(pipe,
855                                 (aafs ? aafs->driver_fs : NULL));
856}
857
858
859static void
860aapoint_delete_fs_state(struct pipe_context *pipe, void *fs)
861{
862   struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
863   struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
864
865   /* pass-through */
866   aapoint->driver_delete_fs_state(pipe, aafs->driver_fs);
867
868   if (aafs->aapoint_fs)
869      aapoint->driver_delete_fs_state(pipe, aafs->aapoint_fs);
870
871   FREE((void*)aafs->state.tokens);
872
873   FREE(aafs);
874}
875
876
877/**
878 * Called by drivers that want to install this AA point prim stage
879 * into the draw module's pipeline.  This will not be used if the
880 * hardware has native support for AA points.
881 */
882boolean
883draw_install_aapoint_stage(struct draw_context *draw,
884                           struct pipe_context *pipe)
885{
886   struct aapoint_stage *aapoint;
887
888   pipe->draw = (void *) draw;
889
890   /*
891    * Create / install AA point drawing / prim stage
892    */
893   aapoint = draw_aapoint_stage( draw );
894   if (aapoint == NULL)
895      return FALSE;
896
897   /* save original driver functions */
898   aapoint->driver_create_fs_state = pipe->create_fs_state;
899   aapoint->driver_bind_fs_state = pipe->bind_fs_state;
900   aapoint->driver_delete_fs_state = pipe->delete_fs_state;
901
902   /* override the driver's functions */
903   pipe->create_fs_state = aapoint_create_fs_state;
904   pipe->bind_fs_state = aapoint_bind_fs_state;
905   pipe->delete_fs_state = aapoint_delete_fs_state;
906
907   draw->pipeline.aapoint = &aapoint->stage;
908
909   return TRUE;
910}
911