1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * @file
31 * Position and shader input interpolation.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 */
35
36#include "pipe/p_shader_tokens.h"
37#include "util/u_debug.h"
38#include "util/u_memory.h"
39#include "util/u_math.h"
40#include "tgsi/tgsi_scan.h"
41#include "gallivm/lp_bld_debug.h"
42#include "gallivm/lp_bld_const.h"
43#include "gallivm/lp_bld_arit.h"
44#include "gallivm/lp_bld_swizzle.h"
45#include "gallivm/lp_bld_flow.h"
46#include "lp_bld_interp.h"
47
48
49/*
50 * The shader JIT function operates on blocks of quads.
51 * Each block has 2x2 quads and each quad has 2x2 pixels.
52 *
53 * We iterate over the quads in order 0, 1, 2, 3:
54 *
55 * #################
56 * #   |   #   |   #
57 * #---0---#---1---#
58 * #   |   #   |   #
59 * #################
60 * #   |   #   |   #
61 * #---2---#---3---#
62 * #   |   #   |   #
63 * #################
64 *
65 * If we iterate over multiple quads at once, quads 01 and 23 are processed
66 * together.
67 *
68 * Within each quad, we have four pixels which are represented in SOA
69 * order:
70 *
71 * #########
72 * # 0 | 1 #
73 * #---+---#
74 * # 2 | 3 #
75 * #########
76 *
77 * So the green channel (for example) of the four pixels is stored in
78 * a single vector register: {g0, g1, g2, g3}.
79 * The order stays the same even with multiple quads:
80 * 0 1 4 5
81 * 2 3 6 7
82 * is stored as g0..g7
83 */
84
85
86/**
87 * Do one perspective divide per quad.
88 *
89 * For perspective interpolation, the final attribute value is given
90 *
91 *  a' = a/w = a * oow
92 *
93 * where
94 *
95 *  a = a0 + dadx*x + dady*y
96 *  w = w0 + dwdx*x + dwdy*y
97 *  oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
98 *
99 * Instead of computing the division per pixel, with this macro we compute the
100 * division on the upper left pixel of each quad, and use a linear
101 * approximation in the remaining pixels, given by:
102 *
103 *  da'dx = (dadx - dwdx*a)*oow
104 *  da'dy = (dady - dwdy*a)*oow
105 *
106 * Ironically, this actually makes things slower -- probably because the
107 * divide hardware unit is rarely used, whereas the multiply unit is typically
108 * already saturated.
109 */
110#define PERSPECTIVE_DIVIDE_PER_QUAD 0
111
112
113static const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3};
114static const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3};
115
116
117static void
118attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
119{
120   if(attrib == 0)
121      lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
122   else
123      lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
124}
125
126static void
127calc_offsets(struct lp_build_context *coeff_bld,
128             unsigned quad_start_index,
129             LLVMValueRef *pixoffx,
130             LLVMValueRef *pixoffy)
131{
132   unsigned i;
133   unsigned num_pix = coeff_bld->type.length;
134   struct gallivm_state *gallivm = coeff_bld->gallivm;
135   LLVMBuilderRef builder = coeff_bld->gallivm->builder;
136   LLVMValueRef nr, pixxf, pixyf;
137
138   *pixoffx = coeff_bld->undef;
139   *pixoffy = coeff_bld->undef;
140
141   for (i = 0; i < num_pix; i++) {
142      nr = lp_build_const_int32(gallivm, i);
143      pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
144                                   (quad_start_index & 1) * 2);
145      pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
146                                   (quad_start_index & 2));
147      *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, "");
148      *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, "");
149   }
150}
151
152
153/* Much easier, and significantly less instructions in the per-stamp
154 * part (less than half) but overall more instructions so a loss if
155 * most quads are active. Might be a win though with larger vectors.
156 * No ability to do per-quad divide (doable but not implemented)
157 * Could be made to work with passed in pixel offsets (i.e. active quad merging).
158 */
159static void
160coeffs_init_simple(struct lp_build_interp_soa_context *bld,
161                   LLVMValueRef a0_ptr,
162                   LLVMValueRef dadx_ptr,
163                   LLVMValueRef dady_ptr)
164{
165   struct lp_build_context *coeff_bld = &bld->coeff_bld;
166   struct lp_build_context *setup_bld = &bld->setup_bld;
167   struct gallivm_state *gallivm = coeff_bld->gallivm;
168   LLVMBuilderRef builder = gallivm->builder;
169   unsigned attrib;
170
171   for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
172      /*
173       * always fetch all 4 values for performance/simplicity
174       * Note: we do that here because it seems to generate better
175       * code. It generates a lot of moves initially but less
176       * moves later. As far as I can tell this looks like a
177       * llvm issue, instead of simply reloading the values from
178       * the passed in pointers it if it runs out of registers
179       * it spills/reloads them. Maybe some optimization passes
180       * would help.
181       * Might want to investigate this again later.
182       */
183      const unsigned interp = bld->interp[attrib];
184      LLVMValueRef index = lp_build_const_int32(gallivm,
185                                attrib * TGSI_NUM_CHANNELS);
186      LLVMValueRef ptr;
187      LLVMValueRef dadxaos = setup_bld->zero;
188      LLVMValueRef dadyaos = setup_bld->zero;
189      LLVMValueRef a0aos = setup_bld->zero;
190
191      switch (interp) {
192      case LP_INTERP_PERSPECTIVE:
193         /* fall-through */
194
195      case LP_INTERP_LINEAR:
196         ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
197         ptr = LLVMBuildBitCast(builder, ptr,
198               LLVMPointerType(setup_bld->vec_type, 0), "");
199         dadxaos = LLVMBuildLoad(builder, ptr, "");
200
201         ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
202         ptr = LLVMBuildBitCast(builder, ptr,
203               LLVMPointerType(setup_bld->vec_type, 0), "");
204         dadyaos = LLVMBuildLoad(builder, ptr, "");
205
206         attrib_name(dadxaos, attrib, 0, ".dadxaos");
207         attrib_name(dadyaos, attrib, 0, ".dadyaos");
208         /* fall-through */
209
210      case LP_INTERP_CONSTANT:
211      case LP_INTERP_FACING:
212         ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
213         ptr = LLVMBuildBitCast(builder, ptr,
214               LLVMPointerType(setup_bld->vec_type, 0), "");
215         a0aos = LLVMBuildLoad(builder, ptr, "");
216         attrib_name(a0aos, attrib, 0, ".a0aos");
217         break;
218
219      case LP_INTERP_POSITION:
220         /* Nothing to do as the position coeffs are already setup in slot 0 */
221         continue;
222
223      default:
224         assert(0);
225         break;
226      }
227      bld->a0aos[attrib] = a0aos;
228      bld->dadxaos[attrib] = dadxaos;
229      bld->dadyaos[attrib] = dadyaos;
230   }
231}
232
233/**
234 * Interpolate the shader input attribute values.
235 * This is called for each (group of) quad(s).
236 */
237static void
238attribs_update_simple(struct lp_build_interp_soa_context *bld,
239                      struct gallivm_state *gallivm,
240                      int quad_start_index,
241                      LLVMValueRef loop_iter,
242                      int start,
243                      int end)
244{
245   LLVMBuilderRef builder = gallivm->builder;
246   struct lp_build_context *coeff_bld = &bld->coeff_bld;
247   struct lp_build_context *setup_bld = &bld->setup_bld;
248   LLVMValueRef oow = NULL;
249   unsigned attrib;
250   LLVMValueRef pixoffx;
251   LLVMValueRef pixoffy;
252
253   /* could do this with code-generated passed in pixel offsets too */
254   if (bld->dynamic_offsets) {
255      LLVMValueRef ptr;
256
257      assert(loop_iter);
258      ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
259      pixoffx = LLVMBuildLoad(builder, ptr, "");
260      ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
261      pixoffy = LLVMBuildLoad(builder, ptr, "");
262   }
263   else {
264      calc_offsets(coeff_bld, quad_start_index, &pixoffx, &pixoffy);
265   }
266
267   pixoffx = LLVMBuildFAdd(builder, pixoffx,
268                           lp_build_broadcast_scalar(coeff_bld, bld->x), "");
269   pixoffy = LLVMBuildFAdd(builder, pixoffy,
270                           lp_build_broadcast_scalar(coeff_bld, bld->y), "");
271
272   for (attrib = start; attrib < end; attrib++) {
273      const unsigned mask = bld->mask[attrib];
274      const unsigned interp = bld->interp[attrib];
275      unsigned chan;
276
277      for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
278         if (mask & (1 << chan)) {
279            LLVMValueRef index;
280            LLVMValueRef dadx = coeff_bld->zero;
281            LLVMValueRef dady = coeff_bld->zero;
282            LLVMValueRef a = coeff_bld->zero;
283
284            index = lp_build_const_int32(gallivm, chan);
285            switch (interp) {
286            case LP_INTERP_PERSPECTIVE:
287               /* fall-through */
288
289            case LP_INTERP_LINEAR:
290               if (attrib == 0 && chan == 0) {
291                  dadx = coeff_bld->one;
292               }
293               else if (attrib == 0 && chan == 1) {
294                  dady = coeff_bld->one;
295               }
296               else {
297                  dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
298                                                    coeff_bld->type, bld->dadxaos[attrib],
299                                                    index);
300                  dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
301                                                    coeff_bld->type, bld->dadyaos[attrib],
302                                                    index);
303                  a = lp_build_extract_broadcast(gallivm, setup_bld->type,
304                                                 coeff_bld->type, bld->a0aos[attrib],
305                                                 index);
306               }
307               /*
308                * a = a0 + (x * dadx + y * dady)
309                */
310               dadx = LLVMBuildFMul(builder, dadx, pixoffx, "");
311               dady = LLVMBuildFMul(builder, dady, pixoffy, "");
312               a = LLVMBuildFAdd(builder, a, dadx, "");
313               a = LLVMBuildFAdd(builder, a, dady, "");
314
315               if (interp == LP_INTERP_PERSPECTIVE) {
316                  if (oow == NULL) {
317                     LLVMValueRef w = bld->attribs[0][3];
318                     assert(attrib != 0);
319                     assert(bld->mask[0] & TGSI_WRITEMASK_W);
320                     oow = lp_build_rcp(coeff_bld, w);
321                  }
322                  a = lp_build_mul(coeff_bld, a, oow);
323               }
324               break;
325
326            case LP_INTERP_CONSTANT:
327            case LP_INTERP_FACING:
328               a = lp_build_extract_broadcast(gallivm, setup_bld->type,
329                                              coeff_bld->type, bld->a0aos[attrib],
330                                              index);
331               break;
332
333            case LP_INTERP_POSITION:
334               assert(attrib > 0);
335               a = bld->attribs[0][chan];
336               break;
337
338            default:
339               assert(0);
340               break;
341            }
342
343            if ((attrib == 0) && (chan == 2)){
344               /* FIXME: Depth values can exceed 1.0, due to the fact that
345                * setup interpolation coefficients refer to (0,0) which causes
346                * precision loss. So we must clamp to 1.0 here to avoid artifacts
347                */
348               a = lp_build_min(coeff_bld, a, coeff_bld->one);
349            }
350            bld->attribs[attrib][chan] = a;
351         }
352      }
353   }
354}
355
356/**
357 * Initialize the bld->a, dadq fields.  This involves fetching
358 * those values from the arrays which are passed into the JIT function.
359 */
360static void
361coeffs_init(struct lp_build_interp_soa_context *bld,
362            LLVMValueRef a0_ptr,
363            LLVMValueRef dadx_ptr,
364            LLVMValueRef dady_ptr)
365{
366   struct lp_build_context *coeff_bld = &bld->coeff_bld;
367   struct lp_build_context *setup_bld = &bld->setup_bld;
368   struct gallivm_state *gallivm = coeff_bld->gallivm;
369   LLVMBuilderRef builder = gallivm->builder;
370   LLVMValueRef pixoffx, pixoffy;
371   unsigned attrib;
372   unsigned chan;
373   unsigned i;
374
375   pixoffx = coeff_bld->undef;
376   pixoffy = coeff_bld->undef;
377   for (i = 0; i < coeff_bld->type.length; i++) {
378      LLVMValueRef nr = lp_build_const_int32(gallivm, i);
379      LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i]);
380      LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i]);
381      pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, "");
382      pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, "");
383   }
384
385
386   for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
387      const unsigned mask = bld->mask[attrib];
388      const unsigned interp = bld->interp[attrib];
389      LLVMValueRef index = lp_build_const_int32(gallivm,
390                                attrib * TGSI_NUM_CHANNELS);
391      LLVMValueRef ptr;
392      LLVMValueRef dadxaos = setup_bld->zero;
393      LLVMValueRef dadyaos = setup_bld->zero;
394      LLVMValueRef a0aos = setup_bld->zero;
395
396      /* always fetch all 4 values for performance/simplicity */
397      switch (interp) {
398      case LP_INTERP_PERSPECTIVE:
399         /* fall-through */
400
401      case LP_INTERP_LINEAR:
402         ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
403         ptr = LLVMBuildBitCast(builder, ptr,
404               LLVMPointerType(setup_bld->vec_type, 0), "");
405         dadxaos = LLVMBuildLoad(builder, ptr, "");
406
407         ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
408         ptr = LLVMBuildBitCast(builder, ptr,
409               LLVMPointerType(setup_bld->vec_type, 0), "");
410         dadyaos = LLVMBuildLoad(builder, ptr, "");
411
412         attrib_name(dadxaos, attrib, 0, ".dadxaos");
413         attrib_name(dadyaos, attrib, 0, ".dadyaos");
414         /* fall-through */
415
416      case LP_INTERP_CONSTANT:
417      case LP_INTERP_FACING:
418         ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
419         ptr = LLVMBuildBitCast(builder, ptr,
420               LLVMPointerType(setup_bld->vec_type, 0), "");
421         a0aos = LLVMBuildLoad(builder, ptr, "");
422         attrib_name(a0aos, attrib, 0, ".a0aos");
423         break;
424
425      case LP_INTERP_POSITION:
426         /* Nothing to do as the position coeffs are already setup in slot 0 */
427         continue;
428
429      default:
430         assert(0);
431         break;
432      }
433
434      /*
435       * a = a0 + (x * dadx + y * dady)
436       * a0aos is the attrib value at top left corner of stamp
437       */
438      if (interp != LP_INTERP_CONSTANT &&
439          interp != LP_INTERP_FACING) {
440         LLVMValueRef axaos, ayaos;
441         axaos = LLVMBuildFMul(builder, lp_build_broadcast_scalar(setup_bld, bld->x),
442                               dadxaos, "");
443         ayaos = LLVMBuildFMul(builder, lp_build_broadcast_scalar(setup_bld, bld->y),
444                               dadyaos, "");
445         a0aos = LLVMBuildFAdd(builder, a0aos, ayaos, "");
446         a0aos = LLVMBuildFAdd(builder, a0aos, axaos, "");
447      }
448
449      /*
450       * dadq = {0, dadx, dady, dadx + dady}
451       * for two quads (side by side) this is:
452       * {0, dadx, dady, dadx+dady, 2*dadx, 2*dadx+dady, 3*dadx+dady}
453       */
454      for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
455         /* this generates a CRAPLOAD of shuffles... */
456         if (mask & (1 << chan)) {
457            LLVMValueRef dadx, dady;
458            LLVMValueRef dadq, dadq2;
459            LLVMValueRef a;
460            LLVMValueRef chan_index = lp_build_const_int32(gallivm, chan);
461
462            if (attrib == 0 && chan == 0) {
463               a = lp_build_broadcast_scalar(coeff_bld, bld->x);
464               dadx = coeff_bld->one;
465               dady = coeff_bld->zero;
466            }
467            else if (attrib == 0 && chan == 1) {
468               a = lp_build_broadcast_scalar(coeff_bld, bld->y);
469               dady = coeff_bld->one;
470               dadx = coeff_bld->zero;
471            }
472            else {
473               dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
474                                              coeff_bld->type, dadxaos, chan_index);
475               dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
476                                              coeff_bld->type, dadyaos, chan_index);
477
478               /*
479                * a = {a, a, a, a}
480                */
481               a = lp_build_extract_broadcast(gallivm, setup_bld->type,
482                                              coeff_bld->type, a0aos, chan_index);
483            }
484
485            dadx = LLVMBuildFMul(builder, dadx, pixoffx, "");
486            dady = LLVMBuildFMul(builder, dady, pixoffy, "");
487            dadq = LLVMBuildFAdd(builder, dadx, dady, "");
488
489            /*
490             * Compute the attrib values on the upper-left corner of each
491             * group of quads.
492             * Note that if we process 2 quads at once this doesn't
493             * really exactly to what we want.
494             * We need to access elem 0 and 2 respectively later if we process
495             * 2 quads at once.
496             */
497
498            if (interp != LP_INTERP_CONSTANT &&
499                interp != LP_INTERP_FACING) {
500               dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
501               a = LLVMBuildFAdd(builder, a, dadq2, "");
502	    }
503
504#if PERSPECTIVE_DIVIDE_PER_QUAD
505            /*
506             * a *= 1 / w
507             */
508
509            /*
510             * XXX since we're only going to access elements 0,2 out of 8
511             * if we have 8-wide vectors we should do the division only 4-wide.
512             * a is really a 2-elements in a 4-wide vector disguised as 8-wide
513             * in this case.
514             */
515            if (interp == LP_INTERP_PERSPECTIVE) {
516               LLVMValueRef w = bld->a[0][3];
517               assert(attrib != 0);
518               assert(bld->mask[0] & TGSI_WRITEMASK_W);
519               if (!bld->oow) {
520                  bld->oow = lp_build_rcp(coeff_bld, w);
521                  lp_build_name(bld->oow, "oow");
522               }
523               a = lp_build_mul(coeff_bld, a, bld->oow);
524            }
525#endif
526
527            attrib_name(a, attrib, chan, ".a");
528            attrib_name(dadq, attrib, chan, ".dadq");
529
530            if (bld->dynamic_offsets) {
531               bld->a[attrib][chan] = lp_build_alloca(gallivm,
532                                                      LLVMTypeOf(a), "");
533               LLVMBuildStore(builder, a, bld->a[attrib][chan]);
534            }
535            else {
536               bld->a[attrib][chan] = a;
537            }
538            bld->dadq[attrib][chan] = dadq;
539         }
540      }
541   }
542}
543
544
545/**
546 * Increment the shader input attribute values.
547 * This is called when we move from one quad to the next.
548 */
549static void
550attribs_update(struct lp_build_interp_soa_context *bld,
551               struct gallivm_state *gallivm,
552               int quad_start_index,
553               LLVMValueRef loop_iter,
554               int start,
555               int end)
556{
557   LLVMBuilderRef builder = gallivm->builder;
558   struct lp_build_context *coeff_bld = &bld->coeff_bld;
559   LLVMValueRef shuffle = lp_build_const_int_vec(gallivm, coeff_bld->type, quad_start_index);
560   LLVMValueRef oow = NULL;
561   unsigned attrib;
562   unsigned chan;
563
564   assert(quad_start_index < 4);
565
566   for(attrib = start; attrib < end; ++attrib) {
567      const unsigned mask = bld->mask[attrib];
568      const unsigned interp = bld->interp[attrib];
569      for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
570         if(mask & (1 << chan)) {
571            LLVMValueRef a;
572            if (interp == LP_INTERP_CONSTANT ||
573                interp == LP_INTERP_FACING) {
574               a = bld->a[attrib][chan];
575               if (bld->dynamic_offsets) {
576                  a = LLVMBuildLoad(builder, a, "");
577               }
578            }
579            else if (interp == LP_INTERP_POSITION) {
580               assert(attrib > 0);
581               a = bld->attribs[0][chan];
582            }
583            else {
584               LLVMValueRef dadq;
585
586               a = bld->a[attrib][chan];
587
588               /*
589                * Broadcast the attribute value for this quad into all elements
590                */
591
592               if (bld->dynamic_offsets) {
593                  /* stored as vector load as float */
594                  LLVMTypeRef ptr_type = LLVMPointerType(LLVMFloatTypeInContext(
595                                                            gallivm->context), 0);
596                  LLVMValueRef ptr;
597                  a = LLVMBuildBitCast(builder, a, ptr_type, "");
598                  ptr = LLVMBuildGEP(builder, a, &loop_iter, 1, "");
599                  a = LLVMBuildLoad(builder, ptr, "");
600                  a = lp_build_broadcast_scalar(&bld->coeff_bld, a);
601               }
602               else {
603                  a = LLVMBuildShuffleVector(builder,
604                                             a, coeff_bld->undef, shuffle, "");
605               }
606
607               /*
608                * Get the derivatives.
609                */
610
611               dadq = bld->dadq[attrib][chan];
612
613#if PERSPECTIVE_DIVIDE_PER_QUAD
614               if (interp == LP_INTERP_PERSPECTIVE) {
615                  LLVMValueRef dwdq = bld->dadq[0][3];
616
617                  if (oow == NULL) {
618                     assert(bld->oow);
619                     oow = LLVMBuildShuffleVector(coeff_bld->builder,
620                                                  bld->oow, coeff_bld->undef,
621                                                  shuffle, "");
622                  }
623
624                  dadq = lp_build_sub(coeff_bld,
625                                      dadq,
626                                      lp_build_mul(coeff_bld, a, dwdq));
627                  dadq = lp_build_mul(coeff_bld, dadq, oow);
628               }
629#endif
630
631               /*
632                * Add the derivatives
633                */
634
635               a = lp_build_add(coeff_bld, a, dadq);
636
637#if !PERSPECTIVE_DIVIDE_PER_QUAD
638               if (interp == LP_INTERP_PERSPECTIVE) {
639                  if (oow == NULL) {
640                     LLVMValueRef w = bld->attribs[0][3];
641                     assert(attrib != 0);
642                     assert(bld->mask[0] & TGSI_WRITEMASK_W);
643                     oow = lp_build_rcp(coeff_bld, w);
644                  }
645                  a = lp_build_mul(coeff_bld, a, oow);
646               }
647#endif
648
649               if (attrib == 0 && chan == 2) {
650                  /* FIXME: Depth values can exceed 1.0, due to the fact that
651                   * setup interpolation coefficients refer to (0,0) which causes
652                   * precision loss. So we must clamp to 1.0 here to avoid artifacts
653                   */
654                  a = lp_build_min(coeff_bld, a, coeff_bld->one);
655               }
656
657               attrib_name(a, attrib, chan, "");
658            }
659            bld->attribs[attrib][chan] = a;
660         }
661      }
662   }
663}
664
665
666/**
667 * Generate the position vectors.
668 *
669 * Parameter x0, y0 are the integer values with upper left coordinates.
670 */
671static void
672pos_init(struct lp_build_interp_soa_context *bld,
673         LLVMValueRef x0,
674         LLVMValueRef y0)
675{
676   LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder;
677   struct lp_build_context *coeff_bld = &bld->coeff_bld;
678
679   bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, "");
680   bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, "");
681}
682
683
684/**
685 * Initialize fragment shader input attribute info.
686 */
687void
688lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
689                         struct gallivm_state *gallivm,
690                         unsigned num_inputs,
691                         const struct lp_shader_input *inputs,
692                         LLVMBuilderRef builder,
693                         struct lp_type type,
694                         boolean dynamic_offsets,
695                         LLVMValueRef a0_ptr,
696                         LLVMValueRef dadx_ptr,
697                         LLVMValueRef dady_ptr,
698                         LLVMValueRef x0,
699                         LLVMValueRef y0)
700{
701   struct lp_type coeff_type;
702   struct lp_type setup_type;
703   unsigned attrib;
704   unsigned chan;
705
706   memset(bld, 0, sizeof *bld);
707
708   memset(&coeff_type, 0, sizeof coeff_type);
709   coeff_type.floating = TRUE;
710   coeff_type.sign = TRUE;
711   coeff_type.width = 32;
712   coeff_type.length = type.length;
713
714   memset(&setup_type, 0, sizeof setup_type);
715   setup_type.floating = TRUE;
716   setup_type.sign = TRUE;
717   setup_type.width = 32;
718   setup_type.length = TGSI_NUM_CHANNELS;
719
720
721   /* XXX: we don't support interpolating into any other types */
722   assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
723
724   lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);
725   lp_build_context_init(&bld->setup_bld, gallivm, setup_type);
726
727   /* For convenience */
728   bld->pos = bld->attribs[0];
729   bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1];
730
731   /* Position */
732   bld->mask[0] = TGSI_WRITEMASK_XYZW;
733   bld->interp[0] = LP_INTERP_LINEAR;
734
735   /* Inputs */
736   for (attrib = 0; attrib < num_inputs; ++attrib) {
737      bld->mask[1 + attrib] = inputs[attrib].usage_mask;
738      bld->interp[1 + attrib] = inputs[attrib].interp;
739   }
740   bld->num_attribs = 1 + num_inputs;
741
742   /* Ensure all masked out input channels have a valid value */
743   for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
744      for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
745         bld->attribs[attrib][chan] = bld->coeff_bld.undef;
746      }
747   }
748
749   pos_init(bld, x0, y0);
750
751   if (coeff_type.length > 4) {
752      bld->simple_interp = TRUE;
753      if (dynamic_offsets) {
754         /* XXX this should use a global static table */
755         unsigned i;
756         unsigned num_loops = 16 / type.length;
757         LLVMValueRef pixoffx, pixoffy, index;
758         LLVMValueRef ptr;
759
760         bld->dynamic_offsets = TRUE;
761         bld->xoffset_store = lp_build_array_alloca(gallivm,
762                                                    lp_build_vec_type(gallivm, type),
763                                                    lp_build_const_int32(gallivm, num_loops),
764                                                    "");
765         bld->yoffset_store = lp_build_array_alloca(gallivm,
766                                                    lp_build_vec_type(gallivm, type),
767                                                    lp_build_const_int32(gallivm, num_loops),
768                                                    "");
769         for (i = 0; i < num_loops; i++) {
770            index = lp_build_const_int32(gallivm, i);
771            calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy);
772            ptr = LLVMBuildGEP(builder, bld->xoffset_store, &index, 1, "");
773            LLVMBuildStore(builder, pixoffx, ptr);
774            ptr = LLVMBuildGEP(builder, bld->yoffset_store, &index, 1, "");
775            LLVMBuildStore(builder, pixoffy, ptr);
776         }
777      }
778      coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr);
779   }
780   else {
781      bld->simple_interp = FALSE;
782      if (dynamic_offsets) {
783         bld->dynamic_offsets = TRUE;
784      }
785      coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
786   }
787
788}
789
790
791/**
792 * Advance the position and inputs to the given quad within the block.
793 */
794void
795lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
796                                  struct gallivm_state *gallivm,
797                                  int quad_start_index)
798{
799   assert(quad_start_index < 4);
800
801   if (bld->simple_interp) {
802      attribs_update_simple(bld, gallivm, quad_start_index, NULL, 1, bld->num_attribs);
803   }
804   else {
805      attribs_update(bld, gallivm, quad_start_index, NULL, 1, bld->num_attribs);
806   }
807}
808
809void
810lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
811                               struct gallivm_state *gallivm,
812                               int quad_start_index)
813{
814   assert(quad_start_index < 4);
815
816   if (bld->simple_interp) {
817      attribs_update_simple(bld, gallivm, quad_start_index, NULL, 0, 1);
818   }
819   else {
820      attribs_update(bld, gallivm, quad_start_index, NULL, 0, 1);
821   }
822}
823
824void
825lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld,
826                                      struct gallivm_state *gallivm,
827                                      LLVMValueRef quad_start_index)
828{
829   if (bld->simple_interp) {
830      attribs_update_simple(bld, gallivm, 0, quad_start_index, 1, bld->num_attribs);
831   }
832   else {
833      attribs_update(bld, gallivm, 0, quad_start_index, 1, bld->num_attribs);
834   }
835}
836
837void
838lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld,
839                                   struct gallivm_state *gallivm,
840                                   LLVMValueRef quad_start_index)
841{
842   if (bld->simple_interp) {
843      attribs_update_simple(bld, gallivm, 0, quad_start_index, 0, 1);
844   }
845   else {
846      attribs_update(bld, gallivm, 0, quad_start_index, 0, 1);
847   }
848}
849
850