brw_eu_emit.c revision 77397ef96edbc17a698ae2a02ec4807b1059c036
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33#include "brw_context.h"
34#include "brw_defines.h"
35#include "brw_eu.h"
36
37#include "../glsl/ralloc.h"
38
39/***********************************************************************
40 * Internal helper for constructing instructions
41 */
42
43static void guess_execution_size(struct brw_compile *p,
44				 struct brw_instruction *insn,
45				 struct brw_reg reg)
46{
47   if (reg.width == BRW_WIDTH_8 && p->compressed)
48      insn->header.execution_size = BRW_EXECUTE_16;
49   else
50      insn->header.execution_size = reg.width;	/* note - definitions are compatible */
51}
52
53
54/**
55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56 * registers, implicitly moving the operand to a message register.
57 *
58 * On Sandybridge, this is no longer the case.  This function performs the
59 * explicit move; it should be called before emitting a SEND instruction.
60 */
61static void
62gen6_resolve_implied_move(struct brw_compile *p,
63			  struct brw_reg *src,
64			  GLuint msg_reg_nr)
65{
66   struct intel_context *intel = &p->brw->intel;
67   if (intel->gen < 6)
68      return;
69
70   if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
71      brw_push_insn_state(p);
72      brw_set_mask_control(p, BRW_MASK_DISABLE);
73      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
74      brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
75	      retype(*src, BRW_REGISTER_TYPE_UD));
76      brw_pop_insn_state(p);
77   }
78   *src = brw_message_reg(msg_reg_nr);
79}
80
81static void
82gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
83{
84   struct intel_context *intel = &p->brw->intel;
85   if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
86      reg->file = BRW_GENERAL_REGISTER_FILE;
87      reg->nr += 111;
88   }
89}
90
91
92static void brw_set_dest(struct brw_compile *p,
93			 struct brw_instruction *insn,
94			 struct brw_reg dest)
95{
96   struct intel_context *intel = &p->brw->intel;
97
98   if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
99       dest.file != BRW_MESSAGE_REGISTER_FILE)
100      assert(dest.nr < 128);
101
102   gen7_convert_mrf_to_grf(p, &dest);
103
104   insn->bits1.da1.dest_reg_file = dest.file;
105   insn->bits1.da1.dest_reg_type = dest.type;
106   insn->bits1.da1.dest_address_mode = dest.address_mode;
107
108   if (dest.address_mode == BRW_ADDRESS_DIRECT) {
109      insn->bits1.da1.dest_reg_nr = dest.nr;
110
111      if (insn->header.access_mode == BRW_ALIGN_1) {
112	 insn->bits1.da1.dest_subreg_nr = dest.subnr;
113	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
114	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
115	 insn->bits1.da1.dest_horiz_stride = dest.hstride;
116      }
117      else {
118	 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
119	 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
120	 /* even ignored in da16, still need to set as '01' */
121	 insn->bits1.da16.dest_horiz_stride = 1;
122      }
123   }
124   else {
125      insn->bits1.ia1.dest_subreg_nr = dest.subnr;
126
127      /* These are different sizes in align1 vs align16:
128       */
129      if (insn->header.access_mode == BRW_ALIGN_1) {
130	 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
131	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
132	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
133	 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
134      }
135      else {
136	 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
137	 /* even ignored in da16, still need to set as '01' */
138	 insn->bits1.ia16.dest_horiz_stride = 1;
139      }
140   }
141
142   /* NEW: Set the execution size based on dest.width and
143    * insn->compression_control:
144    */
145   guess_execution_size(p, insn, dest);
146}
147
148extern int reg_type_size[];
149
150static void
151validate_reg(struct brw_instruction *insn, struct brw_reg reg)
152{
153   int hstride_for_reg[] = {0, 1, 2, 4};
154   int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
155   int width_for_reg[] = {1, 2, 4, 8, 16};
156   int execsize_for_reg[] = {1, 2, 4, 8, 16};
157   int width, hstride, vstride, execsize;
158
159   if (reg.file == BRW_IMMEDIATE_VALUE) {
160      /* 3.3.6: Region Parameters.  Restriction: Immediate vectors
161       * mean the destination has to be 128-bit aligned and the
162       * destination horiz stride has to be a word.
163       */
164      if (reg.type == BRW_REGISTER_TYPE_V) {
165	 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
166		reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
167      }
168
169      return;
170   }
171
172   if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
173       reg.file == BRW_ARF_NULL)
174      return;
175
176   assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
177   hstride = hstride_for_reg[reg.hstride];
178
179   if (reg.vstride == 0xf) {
180      vstride = -1;
181   } else {
182      assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
183      vstride = vstride_for_reg[reg.vstride];
184   }
185
186   assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
187   width = width_for_reg[reg.width];
188
189   assert(insn->header.execution_size >= 0 &&
190	  insn->header.execution_size < Elements(execsize_for_reg));
191   execsize = execsize_for_reg[insn->header.execution_size];
192
193   /* Restrictions from 3.3.10: Register Region Restrictions. */
194   /* 3. */
195   assert(execsize >= width);
196
197   /* 4. */
198   if (execsize == width && hstride != 0) {
199      assert(vstride == -1 || vstride == width * hstride);
200   }
201
202   /* 5. */
203   if (execsize == width && hstride == 0) {
204      /* no restriction on vstride. */
205   }
206
207   /* 6. */
208   if (width == 1) {
209      assert(hstride == 0);
210   }
211
212   /* 7. */
213   if (execsize == 1 && width == 1) {
214      assert(hstride == 0);
215      assert(vstride == 0);
216   }
217
218   /* 8. */
219   if (vstride == 0 && hstride == 0) {
220      assert(width == 1);
221   }
222
223   /* 10. Check destination issues. */
224}
225
226static void brw_set_src0(struct brw_compile *p,
227			 struct brw_instruction *insn,
228			 struct brw_reg reg)
229{
230   if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
231      assert(reg.nr < 128);
232
233   gen7_convert_mrf_to_grf(p, &reg);
234
235   validate_reg(insn, reg);
236
237   insn->bits1.da1.src0_reg_file = reg.file;
238   insn->bits1.da1.src0_reg_type = reg.type;
239   insn->bits2.da1.src0_abs = reg.abs;
240   insn->bits2.da1.src0_negate = reg.negate;
241   insn->bits2.da1.src0_address_mode = reg.address_mode;
242
243   if (reg.file == BRW_IMMEDIATE_VALUE) {
244      insn->bits3.ud = reg.dw1.ud;
245
246      /* Required to set some fields in src1 as well:
247       */
248      insn->bits1.da1.src1_reg_file = 0; /* arf */
249      insn->bits1.da1.src1_reg_type = reg.type;
250   }
251   else
252   {
253      if (reg.address_mode == BRW_ADDRESS_DIRECT) {
254	 if (insn->header.access_mode == BRW_ALIGN_1) {
255	    insn->bits2.da1.src0_subreg_nr = reg.subnr;
256	    insn->bits2.da1.src0_reg_nr = reg.nr;
257	 }
258	 else {
259	    insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
260	    insn->bits2.da16.src0_reg_nr = reg.nr;
261	 }
262      }
263      else {
264	 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
265
266	 if (insn->header.access_mode == BRW_ALIGN_1) {
267	    insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
268	 }
269	 else {
270	    insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
271	 }
272      }
273
274      if (insn->header.access_mode == BRW_ALIGN_1) {
275	 if (reg.width == BRW_WIDTH_1 &&
276	     insn->header.execution_size == BRW_EXECUTE_1) {
277	    insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
278	    insn->bits2.da1.src0_width = BRW_WIDTH_1;
279	    insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
280	 }
281	 else {
282	    insn->bits2.da1.src0_horiz_stride = reg.hstride;
283	    insn->bits2.da1.src0_width = reg.width;
284	    insn->bits2.da1.src0_vert_stride = reg.vstride;
285	 }
286      }
287      else {
288	 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
289	 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
290	 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
291	 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
292
293	 /* This is an oddity of the fact we're using the same
294	  * descriptions for registers in align_16 as align_1:
295	  */
296	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
297	    insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
298	 else
299	    insn->bits2.da16.src0_vert_stride = reg.vstride;
300      }
301   }
302}
303
304
305void brw_set_src1(struct brw_compile *p,
306		  struct brw_instruction *insn,
307		  struct brw_reg reg)
308{
309   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
310
311   assert(reg.nr < 128);
312
313   gen7_convert_mrf_to_grf(p, &reg);
314
315   validate_reg(insn, reg);
316
317   insn->bits1.da1.src1_reg_file = reg.file;
318   insn->bits1.da1.src1_reg_type = reg.type;
319   insn->bits3.da1.src1_abs = reg.abs;
320   insn->bits3.da1.src1_negate = reg.negate;
321
322   /* Only src1 can be immediate in two-argument instructions.
323    */
324   assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
325
326   if (reg.file == BRW_IMMEDIATE_VALUE) {
327      insn->bits3.ud = reg.dw1.ud;
328   }
329   else {
330      /* This is a hardware restriction, which may or may not be lifted
331       * in the future:
332       */
333      assert (reg.address_mode == BRW_ADDRESS_DIRECT);
334      /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
335
336      if (insn->header.access_mode == BRW_ALIGN_1) {
337	 insn->bits3.da1.src1_subreg_nr = reg.subnr;
338	 insn->bits3.da1.src1_reg_nr = reg.nr;
339      }
340      else {
341	 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
342	 insn->bits3.da16.src1_reg_nr = reg.nr;
343      }
344
345      if (insn->header.access_mode == BRW_ALIGN_1) {
346	 if (reg.width == BRW_WIDTH_1 &&
347	     insn->header.execution_size == BRW_EXECUTE_1) {
348	    insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
349	    insn->bits3.da1.src1_width = BRW_WIDTH_1;
350	    insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
351	 }
352	 else {
353	    insn->bits3.da1.src1_horiz_stride = reg.hstride;
354	    insn->bits3.da1.src1_width = reg.width;
355	    insn->bits3.da1.src1_vert_stride = reg.vstride;
356	 }
357      }
358      else {
359	 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
360	 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
361	 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
362	 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
363
364	 /* This is an oddity of the fact we're using the same
365	  * descriptions for registers in align_16 as align_1:
366	  */
367	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
368	    insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
369	 else
370	    insn->bits3.da16.src1_vert_stride = reg.vstride;
371      }
372   }
373}
374
375
376
377static void brw_set_math_message( struct brw_compile *p,
378				  struct brw_instruction *insn,
379				  GLuint msg_length,
380				  GLuint response_length,
381				  GLuint function,
382				  GLuint integer_type,
383				  GLboolean low_precision,
384				  GLboolean saturate,
385				  GLuint dataType )
386{
387   struct brw_context *brw = p->brw;
388   struct intel_context *intel = &brw->intel;
389   brw_set_src1(p, insn, brw_imm_d(0));
390
391   if (intel->gen == 5) {
392       insn->bits3.math_gen5.function = function;
393       insn->bits3.math_gen5.int_type = integer_type;
394       insn->bits3.math_gen5.precision = low_precision;
395       insn->bits3.math_gen5.saturate = saturate;
396       insn->bits3.math_gen5.data_type = dataType;
397       insn->bits3.math_gen5.snapshot = 0;
398       insn->bits3.math_gen5.header_present = 0;
399       insn->bits3.math_gen5.response_length = response_length;
400       insn->bits3.math_gen5.msg_length = msg_length;
401       insn->bits3.math_gen5.end_of_thread = 0;
402       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
403       insn->bits2.send_gen5.end_of_thread = 0;
404   } else {
405       insn->bits3.math.function = function;
406       insn->bits3.math.int_type = integer_type;
407       insn->bits3.math.precision = low_precision;
408       insn->bits3.math.saturate = saturate;
409       insn->bits3.math.data_type = dataType;
410       insn->bits3.math.response_length = response_length;
411       insn->bits3.math.msg_length = msg_length;
412       insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
413       insn->bits3.math.end_of_thread = 0;
414   }
415}
416
417
418static void brw_set_ff_sync_message(struct brw_compile *p,
419				    struct brw_instruction *insn,
420				    GLboolean allocate,
421				    GLuint response_length,
422				    GLboolean end_of_thread)
423{
424	struct brw_context *brw = p->brw;
425	struct intel_context *intel = &brw->intel;
426	brw_set_src1(p, insn, brw_imm_d(0));
427
428	insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
429	insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
430	insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
431	insn->bits3.urb_gen5.allocate = allocate;
432	insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
433	insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
434	insn->bits3.urb_gen5.header_present = 1;
435	insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
436	insn->bits3.urb_gen5.msg_length = 1;
437	insn->bits3.urb_gen5.end_of_thread = end_of_thread;
438	if (intel->gen >= 6) {
439	   insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
440	} else {
441	   insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
442	   insn->bits2.send_gen5.end_of_thread = end_of_thread;
443	}
444}
445
446static void brw_set_urb_message( struct brw_compile *p,
447				 struct brw_instruction *insn,
448				 GLboolean allocate,
449				 GLboolean used,
450				 GLuint msg_length,
451				 GLuint response_length,
452				 GLboolean end_of_thread,
453				 GLboolean complete,
454				 GLuint offset,
455				 GLuint swizzle_control )
456{
457    struct brw_context *brw = p->brw;
458    struct intel_context *intel = &brw->intel;
459    brw_set_src1(p, insn, brw_imm_d(0));
460
461    if (intel->gen == 7) {
462        insn->bits3.urb_gen7.opcode = 0;	/* URB_WRITE_HWORD */
463        insn->bits3.urb_gen7.offset = offset;
464        assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
465        insn->bits3.urb_gen7.swizzle_control = swizzle_control;
466        /* per_slot_offset = 0 makes it ignore offsets in message header */
467        insn->bits3.urb_gen7.per_slot_offset = 0;
468        insn->bits3.urb_gen7.complete = complete;
469        insn->bits3.urb_gen7.header_present = 1;
470        insn->bits3.urb_gen7.response_length = response_length;
471        insn->bits3.urb_gen7.msg_length = msg_length;
472        insn->bits3.urb_gen7.end_of_thread = end_of_thread;
473	insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
474    } else if (intel->gen >= 5) {
475        insn->bits3.urb_gen5.opcode = 0;	/* URB_WRITE */
476        insn->bits3.urb_gen5.offset = offset;
477        insn->bits3.urb_gen5.swizzle_control = swizzle_control;
478        insn->bits3.urb_gen5.allocate = allocate;
479        insn->bits3.urb_gen5.used = used;	/* ? */
480        insn->bits3.urb_gen5.complete = complete;
481        insn->bits3.urb_gen5.header_present = 1;
482        insn->bits3.urb_gen5.response_length = response_length;
483        insn->bits3.urb_gen5.msg_length = msg_length;
484        insn->bits3.urb_gen5.end_of_thread = end_of_thread;
485	if (intel->gen >= 6) {
486	   /* For SNB, the SFID bits moved to the condmod bits, and
487	    * EOT stayed in bits3 above.  Does the EOT bit setting
488	    * below on Ironlake even do anything?
489	    */
490	   insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
491	} else {
492	   insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
493	   insn->bits2.send_gen5.end_of_thread = end_of_thread;
494	}
495    } else {
496        insn->bits3.urb.opcode = 0;	/* ? */
497        insn->bits3.urb.offset = offset;
498        insn->bits3.urb.swizzle_control = swizzle_control;
499        insn->bits3.urb.allocate = allocate;
500        insn->bits3.urb.used = used;	/* ? */
501        insn->bits3.urb.complete = complete;
502        insn->bits3.urb.response_length = response_length;
503        insn->bits3.urb.msg_length = msg_length;
504        insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
505        insn->bits3.urb.end_of_thread = end_of_thread;
506    }
507}
508
509static void brw_set_dp_write_message( struct brw_compile *p,
510				      struct brw_instruction *insn,
511				      GLuint binding_table_index,
512				      GLuint msg_control,
513				      GLuint msg_type,
514				      GLuint msg_length,
515				      GLboolean header_present,
516				      GLuint pixel_scoreboard_clear,
517				      GLuint response_length,
518				      GLuint end_of_thread,
519				      GLuint send_commit_msg)
520{
521   struct brw_context *brw = p->brw;
522   struct intel_context *intel = &brw->intel;
523   brw_set_src1(p, insn, brw_imm_ud(0));
524
525   if (intel->gen >= 7) {
526       insn->bits3.gen7_dp.binding_table_index = binding_table_index;
527       insn->bits3.gen7_dp.msg_control = msg_control;
528       insn->bits3.gen7_dp.pixel_scoreboard_clear = pixel_scoreboard_clear;
529       insn->bits3.gen7_dp.msg_type = msg_type;
530       insn->bits3.gen7_dp.header_present = header_present;
531       insn->bits3.gen7_dp.response_length = response_length;
532       insn->bits3.gen7_dp.msg_length = msg_length;
533       insn->bits3.gen7_dp.end_of_thread = end_of_thread;
534
535       /* We always use the render cache for write messages */
536       insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
537   } else if (intel->gen == 6) {
538       insn->bits3.gen6_dp.binding_table_index = binding_table_index;
539       insn->bits3.gen6_dp.msg_control = msg_control;
540       insn->bits3.gen6_dp.pixel_scoreboard_clear = pixel_scoreboard_clear;
541       insn->bits3.gen6_dp.msg_type = msg_type;
542       insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
543       insn->bits3.gen6_dp.header_present = header_present;
544       insn->bits3.gen6_dp.response_length = response_length;
545       insn->bits3.gen6_dp.msg_length = msg_length;
546       insn->bits3.gen6_dp.end_of_thread = end_of_thread;
547
548       /* We always use the render cache for write messages */
549       insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
550   } else if (intel->gen == 5) {
551       insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
552       insn->bits3.dp_write_gen5.msg_control = msg_control;
553       insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
554       insn->bits3.dp_write_gen5.msg_type = msg_type;
555       insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
556       insn->bits3.dp_write_gen5.header_present = header_present;
557       insn->bits3.dp_write_gen5.response_length = response_length;
558       insn->bits3.dp_write_gen5.msg_length = msg_length;
559       insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
560       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
561       insn->bits2.send_gen5.end_of_thread = end_of_thread;
562   } else {
563       insn->bits3.dp_write.binding_table_index = binding_table_index;
564       insn->bits3.dp_write.msg_control = msg_control;
565       insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
566       insn->bits3.dp_write.msg_type = msg_type;
567       insn->bits3.dp_write.send_commit_msg = send_commit_msg;
568       insn->bits3.dp_write.response_length = response_length;
569       insn->bits3.dp_write.msg_length = msg_length;
570       insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
571       insn->bits3.dp_write.end_of_thread = end_of_thread;
572   }
573}
574
575static void
576brw_set_dp_read_message(struct brw_compile *p,
577			struct brw_instruction *insn,
578			GLuint binding_table_index,
579			GLuint msg_control,
580			GLuint msg_type,
581			GLuint target_cache,
582			GLuint msg_length,
583			GLuint response_length)
584{
585   struct brw_context *brw = p->brw;
586   struct intel_context *intel = &brw->intel;
587   brw_set_src1(p, insn, brw_imm_d(0));
588
589   if (intel->gen >= 6) {
590       uint32_t target_function;
591
592       if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE)
593	  target_function = GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE;
594       else
595	  target_function = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
596
597       insn->bits3.gen6_dp.binding_table_index = binding_table_index;
598       insn->bits3.gen6_dp.msg_control = msg_control;
599       insn->bits3.gen6_dp.pixel_scoreboard_clear = 0;
600       insn->bits3.gen6_dp.msg_type = msg_type;
601       insn->bits3.gen6_dp.send_commit_msg = 0;
602       insn->bits3.gen6_dp.header_present = 1;
603       insn->bits3.gen6_dp.response_length = response_length;
604       insn->bits3.gen6_dp.msg_length = msg_length;
605       insn->bits3.gen6_dp.end_of_thread = 0;
606       insn->header.destreg__conditionalmod = target_function;
607   } else if (intel->gen == 5) {
608       insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
609       insn->bits3.dp_read_gen5.msg_control = msg_control;
610       insn->bits3.dp_read_gen5.msg_type = msg_type;
611       insn->bits3.dp_read_gen5.target_cache = target_cache;
612       insn->bits3.dp_read_gen5.header_present = 1;
613       insn->bits3.dp_read_gen5.response_length = response_length;
614       insn->bits3.dp_read_gen5.msg_length = msg_length;
615       insn->bits3.dp_read_gen5.pad1 = 0;
616       insn->bits3.dp_read_gen5.end_of_thread = 0;
617       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
618       insn->bits2.send_gen5.end_of_thread = 0;
619   } else if (intel->is_g4x) {
620       insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
621       insn->bits3.dp_read_g4x.msg_control = msg_control;  /*8:10*/
622       insn->bits3.dp_read_g4x.msg_type = msg_type;  /*11:13*/
623       insn->bits3.dp_read_g4x.target_cache = target_cache;  /*14:15*/
624       insn->bits3.dp_read_g4x.response_length = response_length;  /*16:19*/
625       insn->bits3.dp_read_g4x.msg_length = msg_length;  /*20:23*/
626       insn->bits3.dp_read_g4x.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
627       insn->bits3.dp_read_g4x.pad1 = 0;
628       insn->bits3.dp_read_g4x.end_of_thread = 0;
629   } else {
630       insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
631       insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
632       insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
633       insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
634       insn->bits3.dp_read.response_length = response_length;  /*16:19*/
635       insn->bits3.dp_read.msg_length = msg_length;  /*20:23*/
636       insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
637       insn->bits3.dp_read.pad1 = 0;  /*28:30*/
638       insn->bits3.dp_read.end_of_thread = 0;  /*31*/
639   }
640}
641
642static void brw_set_sampler_message(struct brw_compile *p,
643                                    struct brw_instruction *insn,
644                                    GLuint binding_table_index,
645                                    GLuint sampler,
646                                    GLuint msg_type,
647                                    GLuint response_length,
648                                    GLuint msg_length,
649                                    GLboolean eot,
650                                    GLuint header_present,
651                                    GLuint simd_mode)
652{
653   struct brw_context *brw = p->brw;
654   struct intel_context *intel = &brw->intel;
655   assert(eot == 0);
656   brw_set_src1(p, insn, brw_imm_d(0));
657
658   if (intel->gen >= 7) {
659      insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
660      insn->bits3.sampler_gen7.sampler = sampler;
661      insn->bits3.sampler_gen7.msg_type = msg_type;
662      insn->bits3.sampler_gen7.simd_mode = simd_mode;
663      insn->bits3.sampler_gen7.header_present = header_present;
664      insn->bits3.sampler_gen7.response_length = response_length;
665      insn->bits3.sampler_gen7.msg_length = msg_length;
666      insn->bits3.sampler_gen7.end_of_thread = eot;
667      insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
668   } else if (intel->gen >= 5) {
669      insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
670      insn->bits3.sampler_gen5.sampler = sampler;
671      insn->bits3.sampler_gen5.msg_type = msg_type;
672      insn->bits3.sampler_gen5.simd_mode = simd_mode;
673      insn->bits3.sampler_gen5.header_present = header_present;
674      insn->bits3.sampler_gen5.response_length = response_length;
675      insn->bits3.sampler_gen5.msg_length = msg_length;
676      insn->bits3.sampler_gen5.end_of_thread = eot;
677      if (intel->gen >= 6)
678	  insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
679      else {
680	  insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
681	  insn->bits2.send_gen5.end_of_thread = eot;
682      }
683   } else if (intel->is_g4x) {
684      insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
685      insn->bits3.sampler_g4x.sampler = sampler;
686      insn->bits3.sampler_g4x.msg_type = msg_type;
687      insn->bits3.sampler_g4x.response_length = response_length;
688      insn->bits3.sampler_g4x.msg_length = msg_length;
689      insn->bits3.sampler_g4x.end_of_thread = eot;
690      insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
691   } else {
692      insn->bits3.sampler.binding_table_index = binding_table_index;
693      insn->bits3.sampler.sampler = sampler;
694      insn->bits3.sampler.msg_type = msg_type;
695      insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
696      insn->bits3.sampler.response_length = response_length;
697      insn->bits3.sampler.msg_length = msg_length;
698      insn->bits3.sampler.end_of_thread = eot;
699      insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
700   }
701}
702
703
704
705static struct brw_instruction *next_insn( struct brw_compile *p,
706					  GLuint opcode )
707{
708   struct brw_instruction *insn;
709
710   assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
711
712   insn = &p->store[p->nr_insn++];
713   memcpy(insn, p->current, sizeof(*insn));
714
715   /* Reset this one-shot flag:
716    */
717
718   if (p->current->header.destreg__conditionalmod) {
719      p->current->header.destreg__conditionalmod = 0;
720      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
721   }
722
723   insn->header.opcode = opcode;
724   return insn;
725}
726
727
728static struct brw_instruction *brw_alu1( struct brw_compile *p,
729					 GLuint opcode,
730					 struct brw_reg dest,
731					 struct brw_reg src )
732{
733   struct brw_instruction *insn = next_insn(p, opcode);
734   brw_set_dest(p, insn, dest);
735   brw_set_src0(p, insn, src);
736   return insn;
737}
738
739static struct brw_instruction *brw_alu2(struct brw_compile *p,
740					GLuint opcode,
741					struct brw_reg dest,
742					struct brw_reg src0,
743					struct brw_reg src1 )
744{
745   struct brw_instruction *insn = next_insn(p, opcode);
746   brw_set_dest(p, insn, dest);
747   brw_set_src0(p, insn, src0);
748   brw_set_src1(p, insn, src1);
749   return insn;
750}
751
752
753/***********************************************************************
754 * Convenience routines.
755 */
756#define ALU1(OP)					\
757struct brw_instruction *brw_##OP(struct brw_compile *p,	\
758	      struct brw_reg dest,			\
759	      struct brw_reg src0)   			\
760{							\
761   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
762}
763
764#define ALU2(OP)					\
765struct brw_instruction *brw_##OP(struct brw_compile *p,	\
766	      struct brw_reg dest,			\
767	      struct brw_reg src0,			\
768	      struct brw_reg src1)   			\
769{							\
770   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
771}
772
773/* Rounding operations (other than RNDD) require two instructions - the first
774 * stores a rounded value (possibly the wrong way) in the dest register, but
775 * also sets a per-channel "increment bit" in the flag register.  A predicated
776 * add of 1.0 fixes dest to contain the desired result.
777 */
778#define ROUND(OP)							      \
779void brw_##OP(struct brw_compile *p,					      \
780	      struct brw_reg dest,					      \
781	      struct brw_reg src)					      \
782{									      \
783   struct brw_instruction *rnd, *add;					      \
784   rnd = next_insn(p, BRW_OPCODE_##OP);					      \
785   brw_set_dest(p, rnd, dest);						      \
786   brw_set_src0(p, rnd, src);						      \
787   rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */  \
788									      \
789   add = brw_ADD(p, dest, dest, brw_imm_f(1.0f));			      \
790   add->header.predicate_control = BRW_PREDICATE_NORMAL;		      \
791}
792
793
794ALU1(MOV)
795ALU2(SEL)
796ALU1(NOT)
797ALU2(AND)
798ALU2(OR)
799ALU2(XOR)
800ALU2(SHR)
801ALU2(SHL)
802ALU2(RSR)
803ALU2(RSL)
804ALU2(ASR)
805ALU1(FRC)
806ALU1(RNDD)
807ALU2(MAC)
808ALU2(MACH)
809ALU1(LZD)
810ALU2(DP4)
811ALU2(DPH)
812ALU2(DP3)
813ALU2(DP2)
814ALU2(LINE)
815ALU2(PLN)
816
817
818ROUND(RNDZ)
819ROUND(RNDE)
820
821
822struct brw_instruction *brw_ADD(struct brw_compile *p,
823				struct brw_reg dest,
824				struct brw_reg src0,
825				struct brw_reg src1)
826{
827   /* 6.2.2: add */
828   if (src0.type == BRW_REGISTER_TYPE_F ||
829       (src0.file == BRW_IMMEDIATE_VALUE &&
830	src0.type == BRW_REGISTER_TYPE_VF)) {
831      assert(src1.type != BRW_REGISTER_TYPE_UD);
832      assert(src1.type != BRW_REGISTER_TYPE_D);
833   }
834
835   if (src1.type == BRW_REGISTER_TYPE_F ||
836       (src1.file == BRW_IMMEDIATE_VALUE &&
837	src1.type == BRW_REGISTER_TYPE_VF)) {
838      assert(src0.type != BRW_REGISTER_TYPE_UD);
839      assert(src0.type != BRW_REGISTER_TYPE_D);
840   }
841
842   return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
843}
844
845struct brw_instruction *brw_MUL(struct brw_compile *p,
846				struct brw_reg dest,
847				struct brw_reg src0,
848				struct brw_reg src1)
849{
850   /* 6.32.38: mul */
851   if (src0.type == BRW_REGISTER_TYPE_D ||
852       src0.type == BRW_REGISTER_TYPE_UD ||
853       src1.type == BRW_REGISTER_TYPE_D ||
854       src1.type == BRW_REGISTER_TYPE_UD) {
855      assert(dest.type != BRW_REGISTER_TYPE_F);
856   }
857
858   if (src0.type == BRW_REGISTER_TYPE_F ||
859       (src0.file == BRW_IMMEDIATE_VALUE &&
860	src0.type == BRW_REGISTER_TYPE_VF)) {
861      assert(src1.type != BRW_REGISTER_TYPE_UD);
862      assert(src1.type != BRW_REGISTER_TYPE_D);
863   }
864
865   if (src1.type == BRW_REGISTER_TYPE_F ||
866       (src1.file == BRW_IMMEDIATE_VALUE &&
867	src1.type == BRW_REGISTER_TYPE_VF)) {
868      assert(src0.type != BRW_REGISTER_TYPE_UD);
869      assert(src0.type != BRW_REGISTER_TYPE_D);
870   }
871
872   assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
873	  src0.nr != BRW_ARF_ACCUMULATOR);
874   assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
875	  src1.nr != BRW_ARF_ACCUMULATOR);
876
877   return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
878}
879
880
881void brw_NOP(struct brw_compile *p)
882{
883   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
884   brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
885   brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
886   brw_set_src1(p, insn, brw_imm_ud(0x0));
887}
888
889
890
891
892
893/***********************************************************************
894 * Comparisons, if/else/endif
895 */
896
897struct brw_instruction *brw_JMPI(struct brw_compile *p,
898                                 struct brw_reg dest,
899                                 struct brw_reg src0,
900                                 struct brw_reg src1)
901{
902   struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
903
904   insn->header.execution_size = 1;
905   insn->header.compression_control = BRW_COMPRESSION_NONE;
906   insn->header.mask_control = BRW_MASK_DISABLE;
907
908   p->current->header.predicate_control = BRW_PREDICATE_NONE;
909
910   return insn;
911}
912
913static void
914push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
915{
916   p->if_stack[p->if_stack_depth] = inst;
917
918   p->if_stack_depth++;
919   if (p->if_stack_array_size <= p->if_stack_depth) {
920      p->if_stack_array_size *= 2;
921      p->if_stack = reralloc(p->mem_ctx, p->if_stack, struct brw_instruction *,
922			     p->if_stack_array_size);
923   }
924}
925
926/* EU takes the value from the flag register and pushes it onto some
927 * sort of a stack (presumably merging with any flag value already on
928 * the stack).  Within an if block, the flags at the top of the stack
929 * control execution on each channel of the unit, eg. on each of the
930 * 16 pixel values in our wm programs.
931 *
932 * When the matching 'else' instruction is reached (presumably by
933 * countdown of the instruction count patched in by our ELSE/ENDIF
934 * functions), the relevent flags are inverted.
935 *
936 * When the matching 'endif' instruction is reached, the flags are
937 * popped off.  If the stack is now empty, normal execution resumes.
938 */
939struct brw_instruction *
940brw_IF(struct brw_compile *p, GLuint execute_size)
941{
942   struct intel_context *intel = &p->brw->intel;
943   struct brw_instruction *insn;
944
945   insn = next_insn(p, BRW_OPCODE_IF);
946
947   /* Override the defaults for this instruction:
948    */
949   if (intel->gen < 6) {
950      brw_set_dest(p, insn, brw_ip_reg());
951      brw_set_src0(p, insn, brw_ip_reg());
952      brw_set_src1(p, insn, brw_imm_d(0x0));
953   } else if (intel->gen == 6) {
954      brw_set_dest(p, insn, brw_imm_w(0));
955      insn->bits1.branch_gen6.jump_count = 0;
956      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
957      brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
958   } else {
959      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
960      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
961      brw_set_src1(p, insn, brw_imm_ud(0));
962      insn->bits3.break_cont.jip = 0;
963      insn->bits3.break_cont.uip = 0;
964   }
965
966   insn->header.execution_size = execute_size;
967   insn->header.compression_control = BRW_COMPRESSION_NONE;
968   insn->header.predicate_control = BRW_PREDICATE_NORMAL;
969   insn->header.mask_control = BRW_MASK_ENABLE;
970   if (!p->single_program_flow)
971       insn->header.thread_control = BRW_THREAD_SWITCH;
972
973   p->current->header.predicate_control = BRW_PREDICATE_NONE;
974
975   push_if_stack(p, insn);
976   return insn;
977}
978
979/* This function is only used for gen6-style IF instructions with an
980 * embedded comparison (conditional modifier).  It is not used on gen7.
981 */
982struct brw_instruction *
983gen6_IF(struct brw_compile *p, uint32_t conditional,
984	struct brw_reg src0, struct brw_reg src1)
985{
986   struct brw_instruction *insn;
987
988   insn = next_insn(p, BRW_OPCODE_IF);
989
990   brw_set_dest(p, insn, brw_imm_w(0));
991   insn->header.execution_size = BRW_EXECUTE_8;
992   insn->bits1.branch_gen6.jump_count = 0;
993   brw_set_src0(p, insn, src0);
994   brw_set_src1(p, insn, src1);
995
996   assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
997   assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
998   insn->header.destreg__conditionalmod = conditional;
999
1000   if (!p->single_program_flow)
1001       insn->header.thread_control = BRW_THREAD_SWITCH;
1002
1003   push_if_stack(p, insn);
1004   return insn;
1005}
1006
1007/**
1008 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1009 */
1010static void
1011convert_IF_ELSE_to_ADD(struct brw_compile *p,
1012		       struct brw_instruction *if_inst,
1013		       struct brw_instruction *else_inst)
1014{
1015   /* The next instruction (where the ENDIF would be, if it existed) */
1016   struct brw_instruction *next_inst = &p->store[p->nr_insn];
1017
1018   assert(p->single_program_flow);
1019   assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1020   assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1021   assert(if_inst->header.execution_size == BRW_EXECUTE_1);
1022
1023   /* Convert IF to an ADD instruction that moves the instruction pointer
1024    * to the first instruction of the ELSE block.  If there is no ELSE
1025    * block, point to where ENDIF would be.  Reverse the predicate.
1026    *
1027    * There's no need to execute an ENDIF since we don't need to do any
1028    * stack operations, and if we're currently executing, we just want to
1029    * continue normally.
1030    */
1031   if_inst->header.opcode = BRW_OPCODE_ADD;
1032   if_inst->header.predicate_inverse = 1;
1033
1034   if (else_inst != NULL) {
1035      /* Convert ELSE to an ADD instruction that points where the ENDIF
1036       * would be.
1037       */
1038      else_inst->header.opcode = BRW_OPCODE_ADD;
1039
1040      if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
1041      else_inst->bits3.ud = (next_inst - else_inst) * 16;
1042   } else {
1043      if_inst->bits3.ud = (next_inst - if_inst) * 16;
1044   }
1045}
1046
1047/**
1048 * Patch IF and ELSE instructions with appropriate jump targets.
1049 */
1050static void
1051patch_IF_ELSE(struct brw_compile *p,
1052	      struct brw_instruction *if_inst,
1053	      struct brw_instruction *else_inst,
1054	      struct brw_instruction *endif_inst)
1055{
1056   struct intel_context *intel = &p->brw->intel;
1057
1058   assert(!p->single_program_flow);
1059   assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
1060   assert(endif_inst != NULL);
1061   assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
1062
1063   unsigned br = 1;
1064   /* Jump count is for 64bit data chunk each, so one 128bit instruction
1065    * requires 2 chunks.
1066    */
1067   if (intel->gen >= 5)
1068      br = 2;
1069
1070   assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
1071   endif_inst->header.execution_size = if_inst->header.execution_size;
1072
1073   if (else_inst == NULL) {
1074      /* Patch IF -> ENDIF */
1075      if (intel->gen < 6) {
1076	 /* Turn it into an IFF, which means no mask stack operations for
1077	  * all-false and jumping past the ENDIF.
1078	  */
1079	 if_inst->header.opcode = BRW_OPCODE_IFF;
1080	 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
1081	 if_inst->bits3.if_else.pop_count = 0;
1082	 if_inst->bits3.if_else.pad0 = 0;
1083      } else if (intel->gen == 6) {
1084	 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1085	 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
1086      } else {
1087	 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1088	 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
1089      }
1090   } else {
1091      else_inst->header.execution_size = if_inst->header.execution_size;
1092
1093      /* Patch IF -> ELSE */
1094      if (intel->gen < 6) {
1095	 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
1096	 if_inst->bits3.if_else.pop_count = 0;
1097	 if_inst->bits3.if_else.pad0 = 0;
1098      } else if (intel->gen == 6) {
1099	 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
1100      }
1101
1102      /* Patch ELSE -> ENDIF */
1103      if (intel->gen < 6) {
1104	 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1105	  * matching ENDIF.
1106	  */
1107	 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
1108	 else_inst->bits3.if_else.pop_count = 1;
1109	 else_inst->bits3.if_else.pad0 = 0;
1110      } else if (intel->gen == 6) {
1111	 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1112	 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
1113      } else {
1114	 /* The IF instruction's JIP should point just past the ELSE */
1115	 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
1116	 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1117	 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
1118	 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
1119      }
1120   }
1121}
1122
1123void
1124brw_ELSE(struct brw_compile *p)
1125{
1126   struct intel_context *intel = &p->brw->intel;
1127   struct brw_instruction *insn;
1128
1129   insn = next_insn(p, BRW_OPCODE_ELSE);
1130
1131   if (intel->gen < 6) {
1132      brw_set_dest(p, insn, brw_ip_reg());
1133      brw_set_src0(p, insn, brw_ip_reg());
1134      brw_set_src1(p, insn, brw_imm_d(0x0));
1135   } else if (intel->gen == 6) {
1136      brw_set_dest(p, insn, brw_imm_w(0));
1137      insn->bits1.branch_gen6.jump_count = 0;
1138      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1139      brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1140   } else {
1141      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1142      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1143      brw_set_src1(p, insn, brw_imm_ud(0));
1144      insn->bits3.break_cont.jip = 0;
1145      insn->bits3.break_cont.uip = 0;
1146   }
1147
1148   insn->header.compression_control = BRW_COMPRESSION_NONE;
1149   insn->header.mask_control = BRW_MASK_ENABLE;
1150   if (!p->single_program_flow)
1151       insn->header.thread_control = BRW_THREAD_SWITCH;
1152
1153   push_if_stack(p, insn);
1154}
1155
1156void
1157brw_ENDIF(struct brw_compile *p)
1158{
1159   struct intel_context *intel = &p->brw->intel;
1160   struct brw_instruction *insn;
1161   struct brw_instruction *else_inst = NULL;
1162   struct brw_instruction *if_inst = NULL;
1163
1164   /* Pop the IF and (optional) ELSE instructions from the stack */
1165   p->if_stack_depth--;
1166   if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) {
1167      else_inst = p->if_stack[p->if_stack_depth];
1168      p->if_stack_depth--;
1169   }
1170   if_inst = p->if_stack[p->if_stack_depth];
1171
1172   if (p->single_program_flow) {
1173      /* ENDIF is useless; don't bother emitting it. */
1174      convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1175      return;
1176   }
1177
1178   insn = next_insn(p, BRW_OPCODE_ENDIF);
1179
1180   if (intel->gen < 6) {
1181      brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1182      brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1183      brw_set_src1(p, insn, brw_imm_d(0x0));
1184   } else if (intel->gen == 6) {
1185      brw_set_dest(p, insn, brw_imm_w(0));
1186      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1187      brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1188   } else {
1189      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1190      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1191      brw_set_src1(p, insn, brw_imm_ud(0));
1192   }
1193
1194   insn->header.compression_control = BRW_COMPRESSION_NONE;
1195   insn->header.mask_control = BRW_MASK_ENABLE;
1196   insn->header.thread_control = BRW_THREAD_SWITCH;
1197
1198   /* Also pop item off the stack in the endif instruction: */
1199   if (intel->gen < 6) {
1200      insn->bits3.if_else.jump_count = 0;
1201      insn->bits3.if_else.pop_count = 1;
1202      insn->bits3.if_else.pad0 = 0;
1203   } else if (intel->gen == 6) {
1204      insn->bits1.branch_gen6.jump_count = 2;
1205   } else {
1206      insn->bits3.break_cont.jip = 2;
1207   }
1208   patch_IF_ELSE(p, if_inst, else_inst, insn);
1209}
1210
1211struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
1212{
1213   struct intel_context *intel = &p->brw->intel;
1214   struct brw_instruction *insn;
1215
1216   insn = next_insn(p, BRW_OPCODE_BREAK);
1217   if (intel->gen >= 6) {
1218      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1219      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1220      brw_set_src1(p, insn, brw_imm_d(0x0));
1221   } else {
1222      brw_set_dest(p, insn, brw_ip_reg());
1223      brw_set_src0(p, insn, brw_ip_reg());
1224      brw_set_src1(p, insn, brw_imm_d(0x0));
1225      insn->bits3.if_else.pad0 = 0;
1226      insn->bits3.if_else.pop_count = pop_count;
1227   }
1228   insn->header.compression_control = BRW_COMPRESSION_NONE;
1229   insn->header.execution_size = BRW_EXECUTE_8;
1230
1231   return insn;
1232}
1233
1234struct brw_instruction *gen6_CONT(struct brw_compile *p,
1235				  struct brw_instruction *do_insn)
1236{
1237   struct brw_instruction *insn;
1238   int br = 2;
1239
1240   insn = next_insn(p, BRW_OPCODE_CONTINUE);
1241   brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1242   brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1243   brw_set_dest(p, insn, brw_ip_reg());
1244   brw_set_src0(p, insn, brw_ip_reg());
1245   brw_set_src1(p, insn, brw_imm_d(0x0));
1246
1247   insn->bits3.break_cont.uip = br * (do_insn - insn);
1248
1249   insn->header.compression_control = BRW_COMPRESSION_NONE;
1250   insn->header.execution_size = BRW_EXECUTE_8;
1251   return insn;
1252}
1253
1254struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
1255{
1256   struct brw_instruction *insn;
1257   insn = next_insn(p, BRW_OPCODE_CONTINUE);
1258   brw_set_dest(p, insn, brw_ip_reg());
1259   brw_set_src0(p, insn, brw_ip_reg());
1260   brw_set_src1(p, insn, brw_imm_d(0x0));
1261   insn->header.compression_control = BRW_COMPRESSION_NONE;
1262   insn->header.execution_size = BRW_EXECUTE_8;
1263   /* insn->header.mask_control = BRW_MASK_DISABLE; */
1264   insn->bits3.if_else.pad0 = 0;
1265   insn->bits3.if_else.pop_count = pop_count;
1266   return insn;
1267}
1268
1269/* DO/WHILE loop:
1270 *
1271 * The DO/WHILE is just an unterminated loop -- break or continue are
1272 * used for control within the loop.  We have a few ways they can be
1273 * done.
1274 *
1275 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1276 * jip and no DO instruction.
1277 *
1278 * For non-uniform control flow pre-gen6, there's a DO instruction to
1279 * push the mask, and a WHILE to jump back, and BREAK to get out and
1280 * pop the mask.
1281 *
1282 * For gen6, there's no more mask stack, so no need for DO.  WHILE
1283 * just points back to the first instruction of the loop.
1284 */
1285struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
1286{
1287   struct intel_context *intel = &p->brw->intel;
1288
1289   if (intel->gen >= 6 || p->single_program_flow) {
1290      return &p->store[p->nr_insn];
1291   } else {
1292      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
1293
1294      /* Override the defaults for this instruction:
1295       */
1296      brw_set_dest(p, insn, brw_null_reg());
1297      brw_set_src0(p, insn, brw_null_reg());
1298      brw_set_src1(p, insn, brw_null_reg());
1299
1300      insn->header.compression_control = BRW_COMPRESSION_NONE;
1301      insn->header.execution_size = execute_size;
1302      insn->header.predicate_control = BRW_PREDICATE_NONE;
1303      /* insn->header.mask_control = BRW_MASK_ENABLE; */
1304      /* insn->header.mask_control = BRW_MASK_DISABLE; */
1305
1306      return insn;
1307   }
1308}
1309
1310
1311
1312struct brw_instruction *brw_WHILE(struct brw_compile *p,
1313                                  struct brw_instruction *do_insn)
1314{
1315   struct intel_context *intel = &p->brw->intel;
1316   struct brw_instruction *insn;
1317   GLuint br = 1;
1318
1319   if (intel->gen >= 5)
1320      br = 2;
1321
1322   if (intel->gen >= 7) {
1323      insn = next_insn(p, BRW_OPCODE_WHILE);
1324
1325      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1326      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1327      brw_set_src1(p, insn, brw_imm_ud(0));
1328      insn->bits3.break_cont.jip = br * (do_insn - insn);
1329
1330      insn->header.execution_size = do_insn->header.execution_size;
1331      assert(insn->header.execution_size == BRW_EXECUTE_8);
1332   } else if (intel->gen == 6) {
1333      insn = next_insn(p, BRW_OPCODE_WHILE);
1334
1335      brw_set_dest(p, insn, brw_imm_w(0));
1336      insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1337      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1338      brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1339
1340      insn->header.execution_size = do_insn->header.execution_size;
1341      assert(insn->header.execution_size == BRW_EXECUTE_8);
1342   } else {
1343      if (p->single_program_flow) {
1344	 insn = next_insn(p, BRW_OPCODE_ADD);
1345
1346	 brw_set_dest(p, insn, brw_ip_reg());
1347	 brw_set_src0(p, insn, brw_ip_reg());
1348	 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1349	 insn->header.execution_size = BRW_EXECUTE_1;
1350      } else {
1351	 insn = next_insn(p, BRW_OPCODE_WHILE);
1352
1353	 assert(do_insn->header.opcode == BRW_OPCODE_DO);
1354
1355	 brw_set_dest(p, insn, brw_ip_reg());
1356	 brw_set_src0(p, insn, brw_ip_reg());
1357	 brw_set_src1(p, insn, brw_imm_d(0));
1358
1359	 insn->header.execution_size = do_insn->header.execution_size;
1360	 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1361	 insn->bits3.if_else.pop_count = 0;
1362	 insn->bits3.if_else.pad0 = 0;
1363      }
1364   }
1365   insn->header.compression_control = BRW_COMPRESSION_NONE;
1366   p->current->header.predicate_control = BRW_PREDICATE_NONE;
1367
1368   return insn;
1369}
1370
1371
1372/* FORWARD JUMPS:
1373 */
1374void brw_land_fwd_jump(struct brw_compile *p,
1375		       struct brw_instruction *jmp_insn)
1376{
1377   struct intel_context *intel = &p->brw->intel;
1378   struct brw_instruction *landing = &p->store[p->nr_insn];
1379   GLuint jmpi = 1;
1380
1381   if (intel->gen >= 5)
1382       jmpi = 2;
1383
1384   assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1385   assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1386
1387   jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1388}
1389
1390
1391
1392/* To integrate with the above, it makes sense that the comparison
1393 * instruction should populate the flag register.  It might be simpler
1394 * just to use the flag reg for most WM tasks?
1395 */
1396void brw_CMP(struct brw_compile *p,
1397	     struct brw_reg dest,
1398	     GLuint conditional,
1399	     struct brw_reg src0,
1400	     struct brw_reg src1)
1401{
1402   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1403
1404   insn->header.destreg__conditionalmod = conditional;
1405   brw_set_dest(p, insn, dest);
1406   brw_set_src0(p, insn, src0);
1407   brw_set_src1(p, insn, src1);
1408
1409/*    guess_execution_size(insn, src0); */
1410
1411
1412   /* Make it so that future instructions will use the computed flag
1413    * value until brw_set_predicate_control_flag_value() is called
1414    * again.
1415    */
1416   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1417       dest.nr == 0) {
1418      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1419      p->flag_value = 0xff;
1420   }
1421}
1422
1423/* Issue 'wait' instruction for n1, host could program MMIO
1424   to wake up thread. */
1425void brw_WAIT (struct brw_compile *p)
1426{
1427   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1428   struct brw_reg src = brw_notification_1_reg();
1429
1430   brw_set_dest(p, insn, src);
1431   brw_set_src0(p, insn, src);
1432   brw_set_src1(p, insn, brw_null_reg());
1433   insn->header.execution_size = 0; /* must */
1434   insn->header.predicate_control = 0;
1435   insn->header.compression_control = 0;
1436}
1437
1438
1439/***********************************************************************
1440 * Helpers for the various SEND message types:
1441 */
1442
1443/** Extended math function, float[8].
1444 */
1445void brw_math( struct brw_compile *p,
1446	       struct brw_reg dest,
1447	       GLuint function,
1448	       GLuint saturate,
1449	       GLuint msg_reg_nr,
1450	       struct brw_reg src,
1451	       GLuint data_type,
1452	       GLuint precision )
1453{
1454   struct intel_context *intel = &p->brw->intel;
1455
1456   if (intel->gen >= 6) {
1457      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1458
1459      assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1460      assert(src.file == BRW_GENERAL_REGISTER_FILE);
1461
1462      assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1463      assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1464
1465      /* Source modifiers are ignored for extended math instructions. */
1466      assert(!src.negate);
1467      assert(!src.abs);
1468
1469      if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1470	  function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1471	 assert(src.type == BRW_REGISTER_TYPE_F);
1472      }
1473
1474      /* Math is the same ISA format as other opcodes, except that CondModifier
1475       * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1476       */
1477      insn->header.destreg__conditionalmod = function;
1478      insn->header.saturate = saturate;
1479
1480      brw_set_dest(p, insn, dest);
1481      brw_set_src0(p, insn, src);
1482      brw_set_src1(p, insn, brw_null_reg());
1483   } else {
1484      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1485      GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1486      GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1487      /* Example code doesn't set predicate_control for send
1488       * instructions.
1489       */
1490      insn->header.predicate_control = 0;
1491      insn->header.destreg__conditionalmod = msg_reg_nr;
1492
1493      brw_set_dest(p, insn, dest);
1494      brw_set_src0(p, insn, src);
1495      brw_set_math_message(p,
1496			   insn,
1497			   msg_length, response_length,
1498			   function,
1499			   BRW_MATH_INTEGER_UNSIGNED,
1500			   precision,
1501			   saturate,
1502			   data_type);
1503   }
1504}
1505
1506/** Extended math function, float[8].
1507 */
1508void brw_math2(struct brw_compile *p,
1509	       struct brw_reg dest,
1510	       GLuint function,
1511	       struct brw_reg src0,
1512	       struct brw_reg src1)
1513{
1514   struct intel_context *intel = &p->brw->intel;
1515   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1516
1517   assert(intel->gen >= 6);
1518   (void) intel;
1519
1520
1521   assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1522   assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1523   assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1524
1525   assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1526   assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1527   assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1528
1529   if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1530       function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1531      assert(src0.type == BRW_REGISTER_TYPE_F);
1532      assert(src1.type == BRW_REGISTER_TYPE_F);
1533   }
1534
1535   /* Source modifiers are ignored for extended math instructions. */
1536   assert(!src0.negate);
1537   assert(!src0.abs);
1538   assert(!src1.negate);
1539   assert(!src1.abs);
1540
1541   /* Math is the same ISA format as other opcodes, except that CondModifier
1542    * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1543    */
1544   insn->header.destreg__conditionalmod = function;
1545
1546   brw_set_dest(p, insn, dest);
1547   brw_set_src0(p, insn, src0);
1548   brw_set_src1(p, insn, src1);
1549}
1550
1551/**
1552 * Extended math function, float[16].
1553 * Use 2 send instructions.
1554 */
1555void brw_math_16( struct brw_compile *p,
1556		  struct brw_reg dest,
1557		  GLuint function,
1558		  GLuint saturate,
1559		  GLuint msg_reg_nr,
1560		  struct brw_reg src,
1561		  GLuint precision )
1562{
1563   struct intel_context *intel = &p->brw->intel;
1564   struct brw_instruction *insn;
1565   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1566   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1567
1568   if (intel->gen >= 6) {
1569      insn = next_insn(p, BRW_OPCODE_MATH);
1570
1571      /* Math is the same ISA format as other opcodes, except that CondModifier
1572       * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1573       */
1574      insn->header.destreg__conditionalmod = function;
1575      insn->header.saturate = saturate;
1576
1577      /* Source modifiers are ignored for extended math instructions. */
1578      assert(!src.negate);
1579      assert(!src.abs);
1580
1581      brw_set_dest(p, insn, dest);
1582      brw_set_src0(p, insn, src);
1583      brw_set_src1(p, insn, brw_null_reg());
1584      return;
1585   }
1586
1587   /* First instruction:
1588    */
1589   brw_push_insn_state(p);
1590   brw_set_predicate_control_flag_value(p, 0xff);
1591   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1592
1593   insn = next_insn(p, BRW_OPCODE_SEND);
1594   insn->header.destreg__conditionalmod = msg_reg_nr;
1595
1596   brw_set_dest(p, insn, dest);
1597   brw_set_src0(p, insn, src);
1598   brw_set_math_message(p,
1599			insn,
1600			msg_length, response_length,
1601			function,
1602			BRW_MATH_INTEGER_UNSIGNED,
1603			precision,
1604			saturate,
1605			BRW_MATH_DATA_VECTOR);
1606
1607   /* Second instruction:
1608    */
1609   insn = next_insn(p, BRW_OPCODE_SEND);
1610   insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1611   insn->header.destreg__conditionalmod = msg_reg_nr+1;
1612
1613   brw_set_dest(p, insn, offset(dest,1));
1614   brw_set_src0(p, insn, src);
1615   brw_set_math_message(p,
1616			insn,
1617			msg_length, response_length,
1618			function,
1619			BRW_MATH_INTEGER_UNSIGNED,
1620			precision,
1621			saturate,
1622			BRW_MATH_DATA_VECTOR);
1623
1624   brw_pop_insn_state(p);
1625}
1626
1627
1628/**
1629 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1630 * using a constant offset per channel.
1631 *
1632 * The offset must be aligned to oword size (16 bytes).  Used for
1633 * register spilling.
1634 */
1635void brw_oword_block_write_scratch(struct brw_compile *p,
1636				   struct brw_reg mrf,
1637				   int num_regs,
1638				   GLuint offset)
1639{
1640   struct intel_context *intel = &p->brw->intel;
1641   uint32_t msg_control, msg_type;
1642   int mlen;
1643
1644   if (intel->gen >= 6)
1645      offset /= 16;
1646
1647   mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1648
1649   if (num_regs == 1) {
1650      msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1651      mlen = 2;
1652   } else {
1653      msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1654      mlen = 3;
1655   }
1656
1657   /* Set up the message header.  This is g0, with g0.2 filled with
1658    * the offset.  We don't want to leave our offset around in g0 or
1659    * it'll screw up texture samples, so set it up inside the message
1660    * reg.
1661    */
1662   {
1663      brw_push_insn_state(p);
1664      brw_set_mask_control(p, BRW_MASK_DISABLE);
1665      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1666
1667      brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1668
1669      /* set message header global offset field (reg 0, element 2) */
1670      brw_MOV(p,
1671	      retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1672				  mrf.nr,
1673				  2), BRW_REGISTER_TYPE_UD),
1674	      brw_imm_ud(offset));
1675
1676      brw_pop_insn_state(p);
1677   }
1678
1679   {
1680      struct brw_reg dest;
1681      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1682      int send_commit_msg;
1683      struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
1684					 BRW_REGISTER_TYPE_UW);
1685
1686      if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1687	 insn->header.compression_control = BRW_COMPRESSION_NONE;
1688	 src_header = vec16(src_header);
1689      }
1690      assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1691      insn->header.destreg__conditionalmod = mrf.nr;
1692
1693      /* Until gen6, writes followed by reads from the same location
1694       * are not guaranteed to be ordered unless write_commit is set.
1695       * If set, then a no-op write is issued to the destination
1696       * register to set a dependency, and a read from the destination
1697       * can be used to ensure the ordering.
1698       *
1699       * For gen6, only writes between different threads need ordering
1700       * protection.  Our use of DP writes is all about register
1701       * spilling within a thread.
1702       */
1703      if (intel->gen >= 6) {
1704	 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1705	 send_commit_msg = 0;
1706      } else {
1707	 dest = src_header;
1708	 send_commit_msg = 1;
1709      }
1710
1711      brw_set_dest(p, insn, dest);
1712      if (intel->gen >= 6) {
1713	 brw_set_src0(p, insn, mrf);
1714      } else {
1715	 brw_set_src0(p, insn, brw_null_reg());
1716      }
1717
1718      if (intel->gen >= 6)
1719	 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1720      else
1721	 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1722
1723      brw_set_dp_write_message(p,
1724			       insn,
1725			       255, /* binding table index (255=stateless) */
1726			       msg_control,
1727			       msg_type,
1728			       mlen,
1729			       GL_TRUE, /* header_present */
1730			       0, /* pixel scoreboard */
1731			       send_commit_msg, /* response_length */
1732			       0, /* eot */
1733			       send_commit_msg);
1734   }
1735}
1736
1737
1738/**
1739 * Read a block of owords (half a GRF each) from the scratch buffer
1740 * using a constant index per channel.
1741 *
1742 * Offset must be aligned to oword size (16 bytes).  Used for register
1743 * spilling.
1744 */
1745void
1746brw_oword_block_read_scratch(struct brw_compile *p,
1747			     struct brw_reg dest,
1748			     struct brw_reg mrf,
1749			     int num_regs,
1750			     GLuint offset)
1751{
1752   struct intel_context *intel = &p->brw->intel;
1753   uint32_t msg_control;
1754   int rlen;
1755
1756   if (intel->gen >= 6)
1757      offset /= 16;
1758
1759   mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1760   dest = retype(dest, BRW_REGISTER_TYPE_UW);
1761
1762   if (num_regs == 1) {
1763      msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1764      rlen = 1;
1765   } else {
1766      msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1767      rlen = 2;
1768   }
1769
1770   {
1771      brw_push_insn_state(p);
1772      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1773      brw_set_mask_control(p, BRW_MASK_DISABLE);
1774
1775      brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1776
1777      /* set message header global offset field (reg 0, element 2) */
1778      brw_MOV(p,
1779	      retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1780				  mrf.nr,
1781				  2), BRW_REGISTER_TYPE_UD),
1782	      brw_imm_ud(offset));
1783
1784      brw_pop_insn_state(p);
1785   }
1786
1787   {
1788      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1789
1790      assert(insn->header.predicate_control == 0);
1791      insn->header.compression_control = BRW_COMPRESSION_NONE;
1792      insn->header.destreg__conditionalmod = mrf.nr;
1793
1794      brw_set_dest(p, insn, dest);	/* UW? */
1795      if (intel->gen >= 6) {
1796	 brw_set_src0(p, insn, mrf);
1797      } else {
1798	 brw_set_src0(p, insn, brw_null_reg());
1799      }
1800
1801      brw_set_dp_read_message(p,
1802			      insn,
1803			      255, /* binding table index (255=stateless) */
1804			      msg_control,
1805			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1806			      BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1807			      1, /* msg_length */
1808			      rlen);
1809   }
1810}
1811
1812/**
1813 * Read a float[4] vector from the data port Data Cache (const buffer).
1814 * Location (in buffer) should be a multiple of 16.
1815 * Used for fetching shader constants.
1816 */
1817void brw_oword_block_read(struct brw_compile *p,
1818			  struct brw_reg dest,
1819			  struct brw_reg mrf,
1820			  uint32_t offset,
1821			  uint32_t bind_table_index)
1822{
1823   struct intel_context *intel = &p->brw->intel;
1824
1825   /* On newer hardware, offset is in units of owords. */
1826   if (intel->gen >= 6)
1827      offset /= 16;
1828
1829   mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1830
1831   brw_push_insn_state(p);
1832   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1833   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1834   brw_set_mask_control(p, BRW_MASK_DISABLE);
1835
1836   brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1837
1838   /* set message header global offset field (reg 0, element 2) */
1839   brw_MOV(p,
1840	   retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1841			       mrf.nr,
1842			       2), BRW_REGISTER_TYPE_UD),
1843	   brw_imm_ud(offset));
1844
1845   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1846   insn->header.destreg__conditionalmod = mrf.nr;
1847
1848   /* cast dest to a uword[8] vector */
1849   dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1850
1851   brw_set_dest(p, insn, dest);
1852   if (intel->gen >= 6) {
1853      brw_set_src0(p, insn, mrf);
1854   } else {
1855      brw_set_src0(p, insn, brw_null_reg());
1856   }
1857
1858   brw_set_dp_read_message(p,
1859			   insn,
1860			   bind_table_index,
1861			   BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
1862			   BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
1863			   BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1864			   1, /* msg_length */
1865			   1); /* response_length (1 reg, 2 owords!) */
1866
1867   brw_pop_insn_state(p);
1868}
1869
1870/**
1871 * Read a set of dwords from the data port Data Cache (const buffer).
1872 *
1873 * Location (in buffer) appears as UD offsets in the register after
1874 * the provided mrf header reg.
1875 */
1876void brw_dword_scattered_read(struct brw_compile *p,
1877			      struct brw_reg dest,
1878			      struct brw_reg mrf,
1879			      uint32_t bind_table_index)
1880{
1881   mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1882
1883   brw_push_insn_state(p);
1884   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1885   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1886   brw_set_mask_control(p, BRW_MASK_DISABLE);
1887   brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1888   brw_pop_insn_state(p);
1889
1890   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1891   insn->header.destreg__conditionalmod = mrf.nr;
1892
1893   /* cast dest to a uword[8] vector */
1894   dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1895
1896   brw_set_dest(p, insn, dest);
1897   brw_set_src0(p, insn, brw_null_reg());
1898
1899   brw_set_dp_read_message(p,
1900			   insn,
1901			   bind_table_index,
1902			   BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
1903			   BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
1904			   BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1905			   2, /* msg_length */
1906			   1); /* response_length */
1907}
1908
1909
1910
1911/**
1912 * Read float[4] constant(s) from VS constant buffer.
1913 * For relative addressing, two float[4] constants will be read into 'dest'.
1914 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1915 */
1916void brw_dp_READ_4_vs(struct brw_compile *p,
1917                      struct brw_reg dest,
1918                      GLuint location,
1919                      GLuint bind_table_index)
1920{
1921   struct intel_context *intel = &p->brw->intel;
1922   struct brw_instruction *insn;
1923   GLuint msg_reg_nr = 1;
1924
1925   if (intel->gen >= 6)
1926      location /= 16;
1927
1928   /* Setup MRF[1] with location/offset into const buffer */
1929   brw_push_insn_state(p);
1930   brw_set_access_mode(p, BRW_ALIGN_1);
1931   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1932   brw_set_mask_control(p, BRW_MASK_DISABLE);
1933   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1934   brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
1935		     BRW_REGISTER_TYPE_UD),
1936	   brw_imm_ud(location));
1937   brw_pop_insn_state(p);
1938
1939   insn = next_insn(p, BRW_OPCODE_SEND);
1940
1941   insn->header.predicate_control = BRW_PREDICATE_NONE;
1942   insn->header.compression_control = BRW_COMPRESSION_NONE;
1943   insn->header.destreg__conditionalmod = msg_reg_nr;
1944   insn->header.mask_control = BRW_MASK_DISABLE;
1945
1946   brw_set_dest(p, insn, dest);
1947   if (intel->gen >= 6) {
1948      brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
1949   } else {
1950      brw_set_src0(p, insn, brw_null_reg());
1951   }
1952
1953   brw_set_dp_read_message(p,
1954			   insn,
1955			   bind_table_index,
1956			   0,
1957			   BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1958			   BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1959			   1, /* msg_length */
1960			   1); /* response_length (1 Oword) */
1961}
1962
1963/**
1964 * Read a float[4] constant per vertex from VS constant buffer, with
1965 * relative addressing.
1966 */
1967void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1968			       struct brw_reg dest,
1969			       struct brw_reg addr_reg,
1970			       GLuint offset,
1971			       GLuint bind_table_index)
1972{
1973   struct intel_context *intel = &p->brw->intel;
1974   struct brw_reg src = brw_vec8_grf(0, 0);
1975   int msg_type;
1976
1977   /* Setup MRF[1] with offset into const buffer */
1978   brw_push_insn_state(p);
1979   brw_set_access_mode(p, BRW_ALIGN_1);
1980   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1981   brw_set_mask_control(p, BRW_MASK_DISABLE);
1982   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1983
1984   /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1985    * fields ignored.
1986    */
1987   brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
1988	   addr_reg, brw_imm_d(offset));
1989   brw_pop_insn_state(p);
1990
1991   gen6_resolve_implied_move(p, &src, 0);
1992   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1993
1994   insn->header.predicate_control = BRW_PREDICATE_NONE;
1995   insn->header.compression_control = BRW_COMPRESSION_NONE;
1996   insn->header.destreg__conditionalmod = 0;
1997   insn->header.mask_control = BRW_MASK_DISABLE;
1998
1999   brw_set_dest(p, insn, dest);
2000   brw_set_src0(p, insn, src);
2001
2002   if (intel->gen == 6)
2003      msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2004   else if (intel->gen == 5 || intel->is_g4x)
2005      msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2006   else
2007      msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
2008
2009   brw_set_dp_read_message(p,
2010			   insn,
2011			   bind_table_index,
2012			   BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
2013			   msg_type,
2014			   BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2015			   2, /* msg_length */
2016			   1); /* response_length */
2017}
2018
2019
2020
2021void brw_fb_WRITE(struct brw_compile *p,
2022		  int dispatch_width,
2023                  GLuint msg_reg_nr,
2024                  struct brw_reg src0,
2025                  GLuint binding_table_index,
2026                  GLuint msg_length,
2027                  GLuint response_length,
2028                  GLboolean eot,
2029                  GLboolean header_present)
2030{
2031   struct intel_context *intel = &p->brw->intel;
2032   struct brw_instruction *insn;
2033   GLuint msg_control, msg_type;
2034   struct brw_reg dest;
2035
2036   if (dispatch_width == 16)
2037      dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2038   else
2039      dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2040
2041   if (intel->gen >= 6 && binding_table_index == 0) {
2042      insn = next_insn(p, BRW_OPCODE_SENDC);
2043   } else {
2044      insn = next_insn(p, BRW_OPCODE_SEND);
2045   }
2046   /* The execution mask is ignored for render target writes. */
2047   insn->header.predicate_control = 0;
2048   insn->header.compression_control = BRW_COMPRESSION_NONE;
2049
2050   if (intel->gen >= 6) {
2051       /* headerless version, just submit color payload */
2052       src0 = brw_message_reg(msg_reg_nr);
2053
2054       msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2055   } else {
2056      insn->header.destreg__conditionalmod = msg_reg_nr;
2057
2058      msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2059   }
2060
2061   if (dispatch_width == 16)
2062      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
2063   else
2064      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
2065
2066   brw_set_dest(p, insn, dest);
2067   brw_set_src0(p, insn, src0);
2068   brw_set_dp_write_message(p,
2069			    insn,
2070			    binding_table_index,
2071			    msg_control,
2072			    msg_type,
2073			    msg_length,
2074			    header_present,
2075			    1,	/* pixel scoreboard */
2076			    response_length,
2077			    eot,
2078			    0 /* send_commit_msg */);
2079}
2080
2081
2082/**
2083 * Texture sample instruction.
2084 * Note: the msg_type plus msg_length values determine exactly what kind
2085 * of sampling operation is performed.  See volume 4, page 161 of docs.
2086 */
2087void brw_SAMPLE(struct brw_compile *p,
2088		struct brw_reg dest,
2089		GLuint msg_reg_nr,
2090		struct brw_reg src0,
2091		GLuint binding_table_index,
2092		GLuint sampler,
2093		GLuint writemask,
2094		GLuint msg_type,
2095		GLuint response_length,
2096		GLuint msg_length,
2097		GLboolean eot,
2098		GLuint header_present,
2099		GLuint simd_mode)
2100{
2101   struct intel_context *intel = &p->brw->intel;
2102   GLboolean need_stall = 0;
2103
2104   if (writemask == 0) {
2105      /*printf("%s: zero writemask??\n", __FUNCTION__); */
2106      return;
2107   }
2108
2109   /* Hardware doesn't do destination dependency checking on send
2110    * instructions properly.  Add a workaround which generates the
2111    * dependency by other means.  In practice it seems like this bug
2112    * only crops up for texture samples, and only where registers are
2113    * written by the send and then written again later without being
2114    * read in between.  Luckily for us, we already track that
2115    * information and use it to modify the writemask for the
2116    * instruction, so that is a guide for whether a workaround is
2117    * needed.
2118    */
2119   if (writemask != WRITEMASK_XYZW) {
2120      GLuint dst_offset = 0;
2121      GLuint i, newmask = 0, len = 0;
2122
2123      for (i = 0; i < 4; i++) {
2124	 if (writemask & (1<<i))
2125	    break;
2126	 dst_offset += 2;
2127      }
2128      for (; i < 4; i++) {
2129	 if (!(writemask & (1<<i)))
2130	    break;
2131	 newmask |= 1<<i;
2132	 len++;
2133      }
2134
2135      if (newmask != writemask) {
2136	 need_stall = 1;
2137         /* printf("need stall %x %x\n", newmask , writemask); */
2138      }
2139      else {
2140	 GLboolean dispatch_16 = GL_FALSE;
2141
2142	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
2143
2144	 guess_execution_size(p, p->current, dest);
2145	 if (p->current->header.execution_size == BRW_EXECUTE_16)
2146	    dispatch_16 = GL_TRUE;
2147
2148	 newmask = ~newmask & WRITEMASK_XYZW;
2149
2150	 brw_push_insn_state(p);
2151
2152	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2153	 brw_set_mask_control(p, BRW_MASK_DISABLE);
2154
2155	 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
2156		 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
2157  	 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
2158
2159	 brw_pop_insn_state(p);
2160
2161  	 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
2162	 dest = offset(dest, dst_offset);
2163
2164	 /* For 16-wide dispatch, masked channels are skipped in the
2165	  * response.  For 8-wide, masked channels still take up slots,
2166	  * and are just not written to.
2167	  */
2168	 if (dispatch_16)
2169	    response_length = len * 2;
2170      }
2171   }
2172
2173   {
2174      struct brw_instruction *insn;
2175
2176      gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2177
2178      insn = next_insn(p, BRW_OPCODE_SEND);
2179      insn->header.predicate_control = 0; /* XXX */
2180      insn->header.compression_control = BRW_COMPRESSION_NONE;
2181      if (intel->gen < 6)
2182	  insn->header.destreg__conditionalmod = msg_reg_nr;
2183
2184      brw_set_dest(p, insn, dest);
2185      brw_set_src0(p, insn, src0);
2186      brw_set_sampler_message(p, insn,
2187			      binding_table_index,
2188			      sampler,
2189			      msg_type,
2190			      response_length,
2191			      msg_length,
2192			      eot,
2193			      header_present,
2194			      simd_mode);
2195   }
2196
2197   if (need_stall) {
2198      struct brw_reg reg = vec8(offset(dest, response_length-1));
2199
2200      /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
2201       */
2202      brw_push_insn_state(p);
2203      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
2204      brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
2205	      retype(reg, BRW_REGISTER_TYPE_UD));
2206      brw_pop_insn_state(p);
2207   }
2208
2209}
2210
2211/* All these variables are pretty confusing - we might be better off
2212 * using bitmasks and macros for this, in the old style.  Or perhaps
2213 * just having the caller instantiate the fields in dword3 itself.
2214 */
2215void brw_urb_WRITE(struct brw_compile *p,
2216		   struct brw_reg dest,
2217		   GLuint msg_reg_nr,
2218		   struct brw_reg src0,
2219		   GLboolean allocate,
2220		   GLboolean used,
2221		   GLuint msg_length,
2222		   GLuint response_length,
2223		   GLboolean eot,
2224		   GLboolean writes_complete,
2225		   GLuint offset,
2226		   GLuint swizzle)
2227{
2228   struct intel_context *intel = &p->brw->intel;
2229   struct brw_instruction *insn;
2230
2231   gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2232
2233   if (intel->gen == 7) {
2234      /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2235      brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2236		       BRW_REGISTER_TYPE_UD),
2237	        retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2238		brw_imm_ud(0xff00));
2239   }
2240
2241   insn = next_insn(p, BRW_OPCODE_SEND);
2242
2243   assert(msg_length < BRW_MAX_MRF);
2244
2245   brw_set_dest(p, insn, dest);
2246   brw_set_src0(p, insn, src0);
2247   brw_set_src1(p, insn, brw_imm_d(0));
2248
2249   if (intel->gen < 6)
2250      insn->header.destreg__conditionalmod = msg_reg_nr;
2251
2252   brw_set_urb_message(p,
2253		       insn,
2254		       allocate,
2255		       used,
2256		       msg_length,
2257		       response_length,
2258		       eot,
2259		       writes_complete,
2260		       offset,
2261		       swizzle);
2262}
2263
2264static int
2265brw_find_next_block_end(struct brw_compile *p, int start)
2266{
2267   int ip;
2268
2269   for (ip = start + 1; ip < p->nr_insn; ip++) {
2270      struct brw_instruction *insn = &p->store[ip];
2271
2272      switch (insn->header.opcode) {
2273      case BRW_OPCODE_ENDIF:
2274      case BRW_OPCODE_ELSE:
2275      case BRW_OPCODE_WHILE:
2276	 return ip;
2277      }
2278   }
2279   assert(!"not reached");
2280   return start + 1;
2281}
2282
2283/* There is no DO instruction on gen6, so to find the end of the loop
2284 * we have to see if the loop is jumping back before our start
2285 * instruction.
2286 */
2287static int
2288brw_find_loop_end(struct brw_compile *p, int start)
2289{
2290   struct intel_context *intel = &p->brw->intel;
2291   int ip;
2292   int br = 2;
2293
2294   for (ip = start + 1; ip < p->nr_insn; ip++) {
2295      struct brw_instruction *insn = &p->store[ip];
2296
2297      if (insn->header.opcode == BRW_OPCODE_WHILE) {
2298	 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
2299				   : insn->bits3.break_cont.jip;
2300	 if (ip + jip / br < start)
2301	    return ip;
2302      }
2303   }
2304   assert(!"not reached");
2305   return start + 1;
2306}
2307
2308/* After program generation, go back and update the UIP and JIP of
2309 * BREAK and CONT instructions to their correct locations.
2310 */
2311void
2312brw_set_uip_jip(struct brw_compile *p)
2313{
2314   struct intel_context *intel = &p->brw->intel;
2315   int ip;
2316   int br = 2;
2317
2318   if (intel->gen < 6)
2319      return;
2320
2321   for (ip = 0; ip < p->nr_insn; ip++) {
2322      struct brw_instruction *insn = &p->store[ip];
2323
2324      switch (insn->header.opcode) {
2325      case BRW_OPCODE_BREAK:
2326	 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2327	 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2328	 insn->bits3.break_cont.uip =
2329	    br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
2330	 break;
2331      case BRW_OPCODE_CONTINUE:
2332	 /* JIP is set at CONTINUE emit time, since that's when we
2333	  * know where the start of the loop is.
2334	  */
2335	 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
2336	 assert(insn->bits3.break_cont.uip != 0);
2337	 assert(insn->bits3.break_cont.jip != 0);
2338	 break;
2339      }
2340   }
2341}
2342
2343void brw_ff_sync(struct brw_compile *p,
2344		   struct brw_reg dest,
2345		   GLuint msg_reg_nr,
2346		   struct brw_reg src0,
2347		   GLboolean allocate,
2348		   GLuint response_length,
2349		   GLboolean eot)
2350{
2351   struct intel_context *intel = &p->brw->intel;
2352   struct brw_instruction *insn;
2353
2354   gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2355
2356   insn = next_insn(p, BRW_OPCODE_SEND);
2357   brw_set_dest(p, insn, dest);
2358   brw_set_src0(p, insn, src0);
2359   brw_set_src1(p, insn, brw_imm_d(0));
2360
2361   if (intel->gen < 6)
2362       insn->header.destreg__conditionalmod = msg_reg_nr;
2363
2364   brw_set_ff_sync_message(p,
2365			   insn,
2366			   allocate,
2367			   response_length,
2368			   eot);
2369}
2370