1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33#include "main/glheader.h"
34#include "main/macros.h"
35#include "main/enums.h"
36
37#include "intel_batchbuffer.h"
38
39#include "brw_defines.h"
40#include "brw_context.h"
41#include "brw_eu.h"
42#include "brw_util.h"
43#include "brw_sf.h"
44
45
46/**
47 * Determine the vert_result corresponding to the given half of the given
48 * register.  half=0 means the first half of a register, half=1 means the
49 * second half.
50 */
51static inline int vert_reg_to_vert_result(struct brw_sf_compile *c, GLuint reg,
52                                          int half)
53{
54   int vue_slot = (reg + c->urb_entry_read_offset) * 2 + half;
55   return c->vue_map.slot_to_vert_result[vue_slot];
56}
57
58/**
59 * Determine the register corresponding to the given vert_result.
60 */
61static struct brw_reg get_vert_result(struct brw_sf_compile *c,
62                                      struct brw_reg vert,
63                                      GLuint vert_result)
64{
65   int vue_slot = c->vue_map.vert_result_to_slot[vert_result];
66   assert (vue_slot >= c->urb_entry_read_offset);
67   GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
68   GLuint sub = vue_slot % 2;
69
70   return brw_vec4_grf(vert.nr + off, sub * 4);
71}
72
73static bool
74have_attr(struct brw_sf_compile *c, GLuint attr)
75{
76   return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0;
77}
78
79/***********************************************************************
80 * Twoside lighting
81 */
82static void copy_bfc( struct brw_sf_compile *c,
83		      struct brw_reg vert )
84{
85   struct brw_compile *p = &c->func;
86   GLuint i;
87
88   for (i = 0; i < 2; i++) {
89      if (have_attr(c, VERT_RESULT_COL0+i) &&
90	  have_attr(c, VERT_RESULT_BFC0+i))
91	 brw_MOV(p,
92		 get_vert_result(c, vert, VERT_RESULT_COL0+i),
93		 get_vert_result(c, vert, VERT_RESULT_BFC0+i));
94   }
95}
96
97
98static void do_twoside_color( struct brw_sf_compile *c )
99{
100   struct brw_compile *p = &c->func;
101   GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
102
103   /* Already done in clip program:
104    */
105   if (c->key.primitive == SF_UNFILLED_TRIS)
106      return;
107
108   /* XXX: What happens if BFC isn't present?  This could only happen
109    * for user-supplied vertex programs, as t_vp_build.c always does
110    * the right thing.
111    */
112   if (!(have_attr(c, VERT_RESULT_COL0) && have_attr(c, VERT_RESULT_BFC0)) &&
113       !(have_attr(c, VERT_RESULT_COL1) && have_attr(c, VERT_RESULT_BFC1)))
114      return;
115
116   /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
117    * to get all channels active inside the IF.  In the clipping code
118    * we run with NoMask, so it's not an option and we can use
119    * BRW_EXECUTE_1 for all comparisions.
120    */
121   brw_push_insn_state(p);
122   brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
123   brw_IF(p, BRW_EXECUTE_4);
124   {
125      switch (c->nr_verts) {
126      case 3: copy_bfc(c, c->vert[2]);
127      case 2: copy_bfc(c, c->vert[1]);
128      case 1: copy_bfc(c, c->vert[0]);
129      }
130   }
131   brw_ENDIF(p);
132   brw_pop_insn_state(p);
133}
134
135
136
137/***********************************************************************
138 * Flat shading
139 */
140
141#define VERT_RESULT_COLOR_BITS (BITFIELD64_BIT(VERT_RESULT_COL0) | \
142				BITFIELD64_BIT(VERT_RESULT_COL1))
143
144static void copy_colors( struct brw_sf_compile *c,
145		     struct brw_reg dst,
146		     struct brw_reg src)
147{
148   struct brw_compile *p = &c->func;
149   GLuint i;
150
151   for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) {
152      if (have_attr(c,i))
153	 brw_MOV(p,
154		 get_vert_result(c, dst, i),
155		 get_vert_result(c, src, i));
156   }
157}
158
159
160
161/* Need to use a computed jump to copy flatshaded attributes as the
162 * vertices are ordered according to y-coordinate before reaching this
163 * point, so the PV could be anywhere.
164 */
165static void do_flatshade_triangle( struct brw_sf_compile *c )
166{
167   struct brw_compile *p = &c->func;
168   struct intel_context *intel = &p->brw->intel;
169   struct brw_reg ip = brw_ip_reg();
170   GLuint nr = _mesa_bitcount_64(c->key.attrs & VERT_RESULT_COLOR_BITS);
171   GLuint jmpi = 1;
172
173   if (!nr)
174      return;
175
176   /* Already done in clip program:
177    */
178   if (c->key.primitive == SF_UNFILLED_TRIS)
179      return;
180
181   if (intel->gen == 5)
182       jmpi = 2;
183
184   brw_push_insn_state(p);
185
186   brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
187   brw_JMPI(p, ip, ip, c->pv);
188
189   copy_colors(c, c->vert[1], c->vert[0]);
190   copy_colors(c, c->vert[2], c->vert[0]);
191   brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1)));
192
193   copy_colors(c, c->vert[0], c->vert[1]);
194   copy_colors(c, c->vert[2], c->vert[1]);
195   brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2));
196
197   copy_colors(c, c->vert[0], c->vert[2]);
198   copy_colors(c, c->vert[1], c->vert[2]);
199
200   brw_pop_insn_state(p);
201}
202
203
204static void do_flatshade_line( struct brw_sf_compile *c )
205{
206   struct brw_compile *p = &c->func;
207   struct intel_context *intel = &p->brw->intel;
208   struct brw_reg ip = brw_ip_reg();
209   GLuint nr = _mesa_bitcount_64(c->key.attrs & VERT_RESULT_COLOR_BITS);
210   GLuint jmpi = 1;
211
212   if (!nr)
213      return;
214
215   /* Already done in clip program:
216    */
217   if (c->key.primitive == SF_UNFILLED_TRIS)
218      return;
219
220   if (intel->gen == 5)
221       jmpi = 2;
222
223   brw_push_insn_state(p);
224
225   brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
226   brw_JMPI(p, ip, ip, c->pv);
227   copy_colors(c, c->vert[1], c->vert[0]);
228
229   brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr));
230   copy_colors(c, c->vert[0], c->vert[1]);
231
232   brw_pop_insn_state(p);
233}
234
235
236
237/***********************************************************************
238 * Triangle setup.
239 */
240
241
242static void alloc_regs( struct brw_sf_compile *c )
243{
244   GLuint reg, i;
245
246   /* Values computed by fixed function unit:
247    */
248   c->pv  = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
249   c->det = brw_vec1_grf(1, 2);
250   c->dx0 = brw_vec1_grf(1, 3);
251   c->dx2 = brw_vec1_grf(1, 4);
252   c->dy0 = brw_vec1_grf(1, 5);
253   c->dy2 = brw_vec1_grf(1, 6);
254
255   /* z and 1/w passed in seperately:
256    */
257   c->z[0]     = brw_vec1_grf(2, 0);
258   c->inv_w[0] = brw_vec1_grf(2, 1);
259   c->z[1]     = brw_vec1_grf(2, 2);
260   c->inv_w[1] = brw_vec1_grf(2, 3);
261   c->z[2]     = brw_vec1_grf(2, 4);
262   c->inv_w[2] = brw_vec1_grf(2, 5);
263
264   /* The vertices:
265    */
266   reg = 3;
267   for (i = 0; i < c->nr_verts; i++) {
268      c->vert[i] = brw_vec8_grf(reg, 0);
269      reg += c->nr_attr_regs;
270   }
271
272   /* Temporaries, allocated after last vertex reg.
273    */
274   c->inv_det = brw_vec1_grf(reg, 0);  reg++;
275   c->a1_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
276   c->a2_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
277   c->tmp = brw_vec8_grf(reg, 0);  reg++;
278
279   /* Note grf allocation:
280    */
281   c->prog_data.total_grf = reg;
282
283
284   /* Outputs of this program - interpolation coefficients for
285    * rasterization:
286    */
287   c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
288   c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
289   c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
290}
291
292
293static void copy_z_inv_w( struct brw_sf_compile *c )
294{
295   struct brw_compile *p = &c->func;
296   GLuint i;
297
298   brw_push_insn_state(p);
299
300   /* Copy both scalars with a single MOV:
301    */
302   for (i = 0; i < c->nr_verts; i++)
303      brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
304
305   brw_pop_insn_state(p);
306}
307
308
309static void invert_det( struct brw_sf_compile *c)
310{
311   /* Looks like we invert all 8 elements just to get 1/det in
312    * position 2 !?!
313    */
314   brw_math(&c->func,
315	    c->inv_det,
316	    BRW_MATH_FUNCTION_INV,
317	    0,
318	    c->det,
319	    BRW_MATH_DATA_SCALAR,
320	    BRW_MATH_PRECISION_FULL);
321
322}
323
324
325static bool
326calculate_masks(struct brw_sf_compile *c,
327	        GLuint reg,
328		GLushort *pc,
329		GLushort *pc_persp,
330		GLushort *pc_linear)
331{
332   bool is_last_attr = (reg == c->nr_setup_regs - 1);
333   GLbitfield64 persp_mask;
334   GLbitfield64 linear_mask;
335
336   if (c->key.do_flat_shading)
337      persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_HPOS) |
338                                    BITFIELD64_BIT(VERT_RESULT_COL0) |
339                                    BITFIELD64_BIT(VERT_RESULT_COL1));
340   else
341      persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_HPOS));
342
343   if (c->key.do_flat_shading)
344      linear_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_COL0) |
345                                     BITFIELD64_BIT(VERT_RESULT_COL1));
346   else
347      linear_mask = c->key.attrs;
348
349   *pc_persp = 0;
350   *pc_linear = 0;
351   *pc = 0xf;
352
353   if (persp_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 0)))
354      *pc_persp = 0xf;
355
356   if (linear_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 0)))
357      *pc_linear = 0xf;
358
359   /* Maybe only processs one attribute on the final round:
360    */
361   if (vert_reg_to_vert_result(c, reg, 1) != BRW_VERT_RESULT_MAX) {
362      *pc |= 0xf0;
363
364      if (persp_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 1)))
365	 *pc_persp |= 0xf0;
366
367      if (linear_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 1)))
368	 *pc_linear |= 0xf0;
369   }
370
371   return is_last_attr;
372}
373
374/* Calculates the predicate control for which channels of a reg
375 * (containing 2 attrs) to do point sprite coordinate replacement on.
376 */
377static uint16_t
378calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)
379{
380   int vert_result1, vert_result2;
381   uint16_t pc = 0;
382
383   vert_result1 = vert_reg_to_vert_result(c, reg, 0);
384   if (vert_result1 >= VERT_RESULT_TEX0 && vert_result1 <= VERT_RESULT_TEX7) {
385      if (c->key.point_sprite_coord_replace & (1 << (vert_result1 - VERT_RESULT_TEX0)))
386	 pc |= 0x0f;
387   }
388   if (vert_result1 == BRW_VERT_RESULT_PNTC)
389      pc |= 0x0f;
390
391   vert_result2 = vert_reg_to_vert_result(c, reg, 1);
392   if (vert_result2 >= VERT_RESULT_TEX0 && vert_result2 <= VERT_RESULT_TEX7) {
393      if (c->key.point_sprite_coord_replace & (1 << (vert_result2 -
394                                                     VERT_RESULT_TEX0)))
395         pc |= 0xf0;
396   }
397   if (vert_result2 == BRW_VERT_RESULT_PNTC)
398      pc |= 0xf0;
399
400   return pc;
401}
402
403
404
405void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
406{
407   struct brw_compile *p = &c->func;
408   GLuint i;
409
410   c->nr_verts = 3;
411
412   if (allocate)
413      alloc_regs(c);
414
415   invert_det(c);
416   copy_z_inv_w(c);
417
418   if (c->key.do_twoside_color)
419      do_twoside_color(c);
420
421   if (c->key.do_flat_shading)
422      do_flatshade_triangle(c);
423
424
425   for (i = 0; i < c->nr_setup_regs; i++)
426   {
427      /* Pair of incoming attributes:
428       */
429      struct brw_reg a0 = offset(c->vert[0], i);
430      struct brw_reg a1 = offset(c->vert[1], i);
431      struct brw_reg a2 = offset(c->vert[2], i);
432      GLushort pc, pc_persp, pc_linear;
433      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
434
435      if (pc_persp)
436      {
437	 brw_set_predicate_control_flag_value(p, pc_persp);
438	 brw_MUL(p, a0, a0, c->inv_w[0]);
439	 brw_MUL(p, a1, a1, c->inv_w[1]);
440	 brw_MUL(p, a2, a2, c->inv_w[2]);
441      }
442
443
444      /* Calculate coefficients for interpolated values:
445       */
446      if (pc_linear)
447      {
448	 brw_set_predicate_control_flag_value(p, pc_linear);
449
450	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
451	 brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
452
453	 /* calculate dA/dx
454	  */
455	 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
456	 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
457	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
458
459	 /* calculate dA/dy
460	  */
461	 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
462	 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
463	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
464      }
465
466      {
467	 brw_set_predicate_control_flag_value(p, pc);
468	 /* start point for interpolation
469	  */
470	 brw_MOV(p, c->m3C0, a0);
471
472	 /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in
473	  * the send instruction:
474	  */
475	 brw_urb_WRITE(p,
476		       brw_null_reg(),
477		       0,
478		       brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
479		       0, 	/* allocate */
480		       1,	/* used */
481		       4, 	/* msg len */
482		       0,	/* response len */
483		       last,	/* eot */
484		       last, 	/* writes complete */
485		       i*4,	/* offset */
486		       BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
487      }
488   }
489}
490
491
492
493void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
494{
495   struct brw_compile *p = &c->func;
496   GLuint i;
497
498
499   c->nr_verts = 2;
500
501   if (allocate)
502      alloc_regs(c);
503
504   invert_det(c);
505   copy_z_inv_w(c);
506
507   if (c->key.do_flat_shading)
508      do_flatshade_line(c);
509
510   for (i = 0; i < c->nr_setup_regs; i++)
511   {
512      /* Pair of incoming attributes:
513       */
514      struct brw_reg a0 = offset(c->vert[0], i);
515      struct brw_reg a1 = offset(c->vert[1], i);
516      GLushort pc, pc_persp, pc_linear;
517      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
518
519      if (pc_persp)
520      {
521	 brw_set_predicate_control_flag_value(p, pc_persp);
522	 brw_MUL(p, a0, a0, c->inv_w[0]);
523	 brw_MUL(p, a1, a1, c->inv_w[1]);
524      }
525
526      /* Calculate coefficients for position, color:
527       */
528      if (pc_linear) {
529	 brw_set_predicate_control_flag_value(p, pc_linear);
530
531	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
532
533 	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
534	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
535
536	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
537	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
538      }
539
540      {
541	 brw_set_predicate_control_flag_value(p, pc);
542
543	 /* start point for interpolation
544	  */
545	 brw_MOV(p, c->m3C0, a0);
546
547	 /* Copy m0..m3 to URB.
548	  */
549	 brw_urb_WRITE(p,
550		       brw_null_reg(),
551		       0,
552		       brw_vec8_grf(0, 0),
553		       0, 	/* allocate */
554		       1, 	/* used */
555		       4, 	/* msg len */
556		       0,	/* response len */
557		       last, 	/* eot */
558		       last, 	/* writes complete */
559		       i*4,	/* urb destination offset */
560		       BRW_URB_SWIZZLE_TRANSPOSE);
561      }
562   }
563}
564
565void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
566{
567   struct brw_compile *p = &c->func;
568   GLuint i;
569
570   c->nr_verts = 1;
571
572   if (allocate)
573      alloc_regs(c);
574
575   copy_z_inv_w(c);
576   for (i = 0; i < c->nr_setup_regs; i++)
577   {
578      struct brw_reg a0 = offset(c->vert[0], i);
579      GLushort pc, pc_persp, pc_linear, pc_coord_replace;
580      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
581
582      pc_coord_replace = calculate_point_sprite_mask(c, i);
583      pc_persp &= ~pc_coord_replace;
584
585      if (pc_persp) {
586	 brw_set_predicate_control_flag_value(p, pc_persp);
587	 brw_MUL(p, a0, a0, c->inv_w[0]);
588      }
589
590      /* Point sprite coordinate replacement: A texcoord with this
591       * enabled gets replaced with the value (x, y, 0, 1) where x and
592       * y vary from 0 to 1 across the horizontal and vertical of the
593       * point.
594       */
595      if (pc_coord_replace) {
596	 brw_set_predicate_control_flag_value(p, pc_coord_replace);
597	 /* Caculate 1.0/PointWidth */
598	 brw_math(&c->func,
599		  c->tmp,
600		  BRW_MATH_FUNCTION_INV,
601		  0,
602		  c->dx0,
603		  BRW_MATH_DATA_SCALAR,
604		  BRW_MATH_PRECISION_FULL);
605
606	 brw_set_access_mode(p, BRW_ALIGN_16);
607
608	 /* dA/dx, dA/dy */
609	 brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
610	 brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
611	 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
612	 if (c->key.sprite_origin_lower_left) {
613	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
614	 } else {
615	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
616	 }
617
618	 /* attribute constant offset */
619	 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
620	 if (c->key.sprite_origin_lower_left) {
621	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
622	 } else {
623	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
624	 }
625
626	 brw_set_access_mode(p, BRW_ALIGN_1);
627      }
628
629      if (pc & ~pc_coord_replace) {
630	 brw_set_predicate_control_flag_value(p, pc & ~pc_coord_replace);
631	 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
632	 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
633	 brw_MOV(p, c->m3C0, a0); /* constant value */
634      }
635
636
637      brw_set_predicate_control_flag_value(p, pc);
638      /* Copy m0..m3 to URB. */
639      brw_urb_WRITE(p,
640		    brw_null_reg(),
641		    0,
642		    brw_vec8_grf(0, 0),
643		    0, 	/* allocate */
644		    1,	/* used */
645		    4, 	/* msg len */
646		    0,	/* response len */
647		    last, 	/* eot */
648		    last, 	/* writes complete */
649		    i*4,	/* urb destination offset */
650		    BRW_URB_SWIZZLE_TRANSPOSE);
651   }
652}
653
654/* Points setup - several simplifications as all attributes are
655 * constant across the face of the point (point sprites excluded!)
656 */
657void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)
658{
659   struct brw_compile *p = &c->func;
660   GLuint i;
661
662   c->nr_verts = 1;
663
664   if (allocate)
665      alloc_regs(c);
666
667   copy_z_inv_w(c);
668
669   brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
670   brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
671
672   for (i = 0; i < c->nr_setup_regs; i++)
673   {
674      struct brw_reg a0 = offset(c->vert[0], i);
675      GLushort pc, pc_persp, pc_linear;
676      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
677
678      if (pc_persp)
679      {
680	 /* This seems odd as the values are all constant, but the
681	  * fragment shader will be expecting it:
682	  */
683	 brw_set_predicate_control_flag_value(p, pc_persp);
684	 brw_MUL(p, a0, a0, c->inv_w[0]);
685      }
686
687
688      /* The delta values are always zero, just send the starting
689       * coordinate.  Again, this is to fit in with the interpolation
690       * code in the fragment shader.
691       */
692      {
693	 brw_set_predicate_control_flag_value(p, pc);
694
695	 brw_MOV(p, c->m3C0, a0); /* constant value */
696
697	 /* Copy m0..m3 to URB.
698	  */
699	 brw_urb_WRITE(p,
700		       brw_null_reg(),
701		       0,
702		       brw_vec8_grf(0, 0),
703		       0, 	/* allocate */
704		       1,	/* used */
705		       4, 	/* msg len */
706		       0,	/* response len */
707		       last, 	/* eot */
708		       last, 	/* writes complete */
709		       i*4,	/* urb destination offset */
710		       BRW_URB_SWIZZLE_TRANSPOSE);
711      }
712   }
713}
714
715void brw_emit_anyprim_setup( struct brw_sf_compile *c )
716{
717   struct brw_compile *p = &c->func;
718   struct brw_reg ip = brw_ip_reg();
719   struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
720   struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
721   struct brw_reg primmask;
722   int jmp;
723   struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
724
725   GLuint saveflag;
726
727   c->nr_verts = 3;
728   alloc_regs(c);
729
730   primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
731
732   brw_MOV(p, primmask, brw_imm_ud(1));
733   brw_SHL(p, primmask, primmask, payload_prim);
734
735   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
736   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
737					       (1<<_3DPRIM_TRISTRIP) |
738					       (1<<_3DPRIM_TRIFAN) |
739					       (1<<_3DPRIM_TRISTRIP_REVERSE) |
740					       (1<<_3DPRIM_POLYGON) |
741					       (1<<_3DPRIM_RECTLIST) |
742					       (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
743   jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p->store;
744   {
745      saveflag = p->flag_value;
746      brw_push_insn_state(p);
747      brw_emit_tri_setup( c, false );
748      brw_pop_insn_state(p);
749      p->flag_value = saveflag;
750      /* note - thread killed in subroutine, so must
751       * restore the flag which is changed when building
752       * the subroutine. fix #13240
753       */
754   }
755   brw_land_fwd_jump(p, jmp);
756
757   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
758   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
759					       (1<<_3DPRIM_LINESTRIP) |
760					       (1<<_3DPRIM_LINELOOP) |
761					       (1<<_3DPRIM_LINESTRIP_CONT) |
762					       (1<<_3DPRIM_LINESTRIP_BF) |
763					       (1<<_3DPRIM_LINESTRIP_CONT_BF)));
764   jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p->store;
765   {
766      saveflag = p->flag_value;
767      brw_push_insn_state(p);
768      brw_emit_line_setup( c, false );
769      brw_pop_insn_state(p);
770      p->flag_value = saveflag;
771      /* note - thread killed in subroutine */
772   }
773   brw_land_fwd_jump(p, jmp);
774
775   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
776   brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
777   jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p->store;
778   {
779      saveflag = p->flag_value;
780      brw_push_insn_state(p);
781      brw_emit_point_sprite_setup( c, false );
782      brw_pop_insn_state(p);
783      p->flag_value = saveflag;
784   }
785   brw_land_fwd_jump(p, jmp);
786
787   brw_emit_point_setup( c, false );
788}
789
790
791
792
793