brw_eu_emit.c revision 56ff30a9f97a1a7094432333906544d6138d6bf2
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33#include "brw_context.h"
34#include "brw_defines.h"
35#include "brw_eu.h"
36
37
38
39
40/***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44static void guess_execution_size( struct brw_instruction *insn,
45				  struct brw_reg reg )
46{
47   if (reg.width == BRW_WIDTH_8 &&
48       insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49      insn->header.execution_size = BRW_EXECUTE_16;
50   else
51      insn->header.execution_size = reg.width;	/* note - definitions are compatible */
52}
53
54
55static void brw_set_dest( struct brw_instruction *insn,
56			  struct brw_reg dest )
57{
58   if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
59       dest.file != BRW_MESSAGE_REGISTER_FILE)
60      assert(dest.nr < 128);
61
62   insn->bits1.da1.dest_reg_file = dest.file;
63   insn->bits1.da1.dest_reg_type = dest.type;
64   insn->bits1.da1.dest_address_mode = dest.address_mode;
65
66   if (dest.address_mode == BRW_ADDRESS_DIRECT) {
67      insn->bits1.da1.dest_reg_nr = dest.nr;
68
69      if (insn->header.access_mode == BRW_ALIGN_1) {
70	 insn->bits1.da1.dest_subreg_nr = dest.subnr;
71	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
72	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
73	 insn->bits1.da1.dest_horiz_stride = dest.hstride;
74      }
75      else {
76	 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
77	 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
78      }
79   }
80   else {
81      insn->bits1.ia1.dest_subreg_nr = dest.subnr;
82
83      /* These are different sizes in align1 vs align16:
84       */
85      if (insn->header.access_mode == BRW_ALIGN_1) {
86	 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
87	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
88	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
89	 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
90      }
91      else {
92	 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
93      }
94   }
95
96   /* NEW: Set the execution size based on dest.width and
97    * insn->compression_control:
98    */
99   guess_execution_size(insn, dest);
100}
101
102static void brw_set_src0( struct brw_instruction *insn,
103                          struct brw_reg reg )
104{
105   if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
106      assert(reg.nr < 128);
107
108   insn->bits1.da1.src0_reg_file = reg.file;
109   insn->bits1.da1.src0_reg_type = reg.type;
110   insn->bits2.da1.src0_abs = reg.abs;
111   insn->bits2.da1.src0_negate = reg.negate;
112   insn->bits2.da1.src0_address_mode = reg.address_mode;
113
114   if (reg.file == BRW_IMMEDIATE_VALUE) {
115      insn->bits3.ud = reg.dw1.ud;
116
117      /* Required to set some fields in src1 as well:
118       */
119      insn->bits1.da1.src1_reg_file = 0; /* arf */
120      insn->bits1.da1.src1_reg_type = reg.type;
121   }
122   else
123   {
124      if (reg.address_mode == BRW_ADDRESS_DIRECT) {
125	 if (insn->header.access_mode == BRW_ALIGN_1) {
126	    insn->bits2.da1.src0_subreg_nr = reg.subnr;
127	    insn->bits2.da1.src0_reg_nr = reg.nr;
128	 }
129	 else {
130	    insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
131	    insn->bits2.da16.src0_reg_nr = reg.nr;
132	 }
133      }
134      else {
135	 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
136
137	 if (insn->header.access_mode == BRW_ALIGN_1) {
138	    insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
139	 }
140	 else {
141	    insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
142	 }
143      }
144
145      if (insn->header.access_mode == BRW_ALIGN_1) {
146	 if (reg.width == BRW_WIDTH_1 &&
147	     insn->header.execution_size == BRW_EXECUTE_1) {
148	    insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
149	    insn->bits2.da1.src0_width = BRW_WIDTH_1;
150	    insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
151	 }
152	 else {
153	    insn->bits2.da1.src0_horiz_stride = reg.hstride;
154	    insn->bits2.da1.src0_width = reg.width;
155	    insn->bits2.da1.src0_vert_stride = reg.vstride;
156	 }
157      }
158      else {
159	 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
160	 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
161	 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
162	 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
163
164	 /* This is an oddity of the fact we're using the same
165	  * descriptions for registers in align_16 as align_1:
166	  */
167	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
168	    insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
169	 else
170	    insn->bits2.da16.src0_vert_stride = reg.vstride;
171      }
172   }
173}
174
175
176void brw_set_src1( struct brw_instruction *insn,
177                   struct brw_reg reg )
178{
179   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
180
181   assert(reg.nr < 128);
182
183   insn->bits1.da1.src1_reg_file = reg.file;
184   insn->bits1.da1.src1_reg_type = reg.type;
185   insn->bits3.da1.src1_abs = reg.abs;
186   insn->bits3.da1.src1_negate = reg.negate;
187
188   /* Only src1 can be immediate in two-argument instructions.
189    */
190   assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
191
192   if (reg.file == BRW_IMMEDIATE_VALUE) {
193      insn->bits3.ud = reg.dw1.ud;
194   }
195   else {
196      /* This is a hardware restriction, which may or may not be lifted
197       * in the future:
198       */
199      assert (reg.address_mode == BRW_ADDRESS_DIRECT);
200      /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
201
202      if (insn->header.access_mode == BRW_ALIGN_1) {
203	 insn->bits3.da1.src1_subreg_nr = reg.subnr;
204	 insn->bits3.da1.src1_reg_nr = reg.nr;
205      }
206      else {
207	 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
208	 insn->bits3.da16.src1_reg_nr = reg.nr;
209      }
210
211      if (insn->header.access_mode == BRW_ALIGN_1) {
212	 if (reg.width == BRW_WIDTH_1 &&
213	     insn->header.execution_size == BRW_EXECUTE_1) {
214	    insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
215	    insn->bits3.da1.src1_width = BRW_WIDTH_1;
216	    insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
217	 }
218	 else {
219	    insn->bits3.da1.src1_horiz_stride = reg.hstride;
220	    insn->bits3.da1.src1_width = reg.width;
221	    insn->bits3.da1.src1_vert_stride = reg.vstride;
222	 }
223      }
224      else {
225	 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
226	 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
227	 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
228	 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
229
230	 /* This is an oddity of the fact we're using the same
231	  * descriptions for registers in align_16 as align_1:
232	  */
233	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
234	    insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
235	 else
236	    insn->bits3.da16.src1_vert_stride = reg.vstride;
237      }
238   }
239}
240
241
242
243static void brw_set_math_message( struct brw_context *brw,
244				  struct brw_instruction *insn,
245				  GLuint msg_length,
246				  GLuint response_length,
247				  GLuint function,
248				  GLuint integer_type,
249				  GLboolean low_precision,
250				  GLboolean saturate,
251				  GLuint dataType )
252{
253   struct intel_context *intel = &brw->intel;
254   brw_set_src1(insn, brw_imm_d(0));
255
256   if (intel->is_ironlake) {
257       insn->bits3.math_igdng.function = function;
258       insn->bits3.math_igdng.int_type = integer_type;
259       insn->bits3.math_igdng.precision = low_precision;
260       insn->bits3.math_igdng.saturate = saturate;
261       insn->bits3.math_igdng.data_type = dataType;
262       insn->bits3.math_igdng.snapshot = 0;
263       insn->bits3.math_igdng.header_present = 0;
264       insn->bits3.math_igdng.response_length = response_length;
265       insn->bits3.math_igdng.msg_length = msg_length;
266       insn->bits3.math_igdng.end_of_thread = 0;
267       insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH;
268       insn->bits2.send_igdng.end_of_thread = 0;
269   } else {
270       insn->bits3.math.function = function;
271       insn->bits3.math.int_type = integer_type;
272       insn->bits3.math.precision = low_precision;
273       insn->bits3.math.saturate = saturate;
274       insn->bits3.math.data_type = dataType;
275       insn->bits3.math.response_length = response_length;
276       insn->bits3.math.msg_length = msg_length;
277       insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
278       insn->bits3.math.end_of_thread = 0;
279   }
280}
281
282
283static void brw_set_ff_sync_message( struct brw_context *brw,
284				 struct brw_instruction *insn,
285				 GLboolean allocate,
286				 GLboolean used,
287				 GLuint msg_length,
288				 GLuint response_length,
289				 GLboolean end_of_thread,
290				 GLboolean complete,
291				 GLuint offset,
292				 GLuint swizzle_control )
293{
294	brw_set_src1(insn, brw_imm_d(0));
295
296	insn->bits3.urb_igdng.opcode = 1;
297	insn->bits3.urb_igdng.offset = offset;
298	insn->bits3.urb_igdng.swizzle_control = swizzle_control;
299	insn->bits3.urb_igdng.allocate = allocate;
300	insn->bits3.urb_igdng.used = used;
301	insn->bits3.urb_igdng.complete = complete;
302	insn->bits3.urb_igdng.header_present = 1;
303	insn->bits3.urb_igdng.response_length = response_length;
304	insn->bits3.urb_igdng.msg_length = msg_length;
305	insn->bits3.urb_igdng.end_of_thread = end_of_thread;
306	insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
307	insn->bits2.send_igdng.end_of_thread = end_of_thread;
308}
309
310static void brw_set_urb_message( struct brw_context *brw,
311				 struct brw_instruction *insn,
312				 GLboolean allocate,
313				 GLboolean used,
314				 GLuint msg_length,
315				 GLuint response_length,
316				 GLboolean end_of_thread,
317				 GLboolean complete,
318				 GLuint offset,
319				 GLuint swizzle_control )
320{
321    struct intel_context *intel = &brw->intel;
322    brw_set_src1(insn, brw_imm_d(0));
323
324    if (intel->is_ironlake || intel->gen >= 6) {
325        insn->bits3.urb_igdng.opcode = 0;	/* ? */
326        insn->bits3.urb_igdng.offset = offset;
327        insn->bits3.urb_igdng.swizzle_control = swizzle_control;
328        insn->bits3.urb_igdng.allocate = allocate;
329        insn->bits3.urb_igdng.used = used;	/* ? */
330        insn->bits3.urb_igdng.complete = complete;
331        insn->bits3.urb_igdng.header_present = 1;
332        insn->bits3.urb_igdng.response_length = response_length;
333        insn->bits3.urb_igdng.msg_length = msg_length;
334        insn->bits3.urb_igdng.end_of_thread = end_of_thread;
335	if (intel->gen >= 6) {
336	   /* For SNB, the SFID bits moved to the condmod bits, and
337	    * EOT stayed in bits3 above.  Does the EOT bit setting
338	    * below on Ironlake even do anything?
339	    */
340	   insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
341	} else {
342	   insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
343	   insn->bits2.send_igdng.end_of_thread = end_of_thread;
344	}
345    } else {
346        insn->bits3.urb.opcode = 0;	/* ? */
347        insn->bits3.urb.offset = offset;
348        insn->bits3.urb.swizzle_control = swizzle_control;
349        insn->bits3.urb.allocate = allocate;
350        insn->bits3.urb.used = used;	/* ? */
351        insn->bits3.urb.complete = complete;
352        insn->bits3.urb.response_length = response_length;
353        insn->bits3.urb.msg_length = msg_length;
354        insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
355        insn->bits3.urb.end_of_thread = end_of_thread;
356    }
357}
358
359static void brw_set_dp_write_message( struct brw_context *brw,
360				      struct brw_instruction *insn,
361				      GLuint binding_table_index,
362				      GLuint msg_control,
363				      GLuint msg_type,
364				      GLuint msg_length,
365				      GLuint pixel_scoreboard_clear,
366				      GLuint response_length,
367				      GLuint end_of_thread )
368{
369   struct intel_context *intel = &brw->intel;
370   brw_set_src1(insn, brw_imm_d(0));
371
372   if (intel->is_ironlake) {
373       insn->bits3.dp_write_igdng.binding_table_index = binding_table_index;
374       insn->bits3.dp_write_igdng.msg_control = msg_control;
375       insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear;
376       insn->bits3.dp_write_igdng.msg_type = msg_type;
377       insn->bits3.dp_write_igdng.send_commit_msg = 0;
378       insn->bits3.dp_write_igdng.header_present = 1;
379       insn->bits3.dp_write_igdng.response_length = response_length;
380       insn->bits3.dp_write_igdng.msg_length = msg_length;
381       insn->bits3.dp_write_igdng.end_of_thread = end_of_thread;
382       insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
383       insn->bits2.send_igdng.end_of_thread = end_of_thread;
384   } else {
385       insn->bits3.dp_write.binding_table_index = binding_table_index;
386       insn->bits3.dp_write.msg_control = msg_control;
387       insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
388       insn->bits3.dp_write.msg_type = msg_type;
389       insn->bits3.dp_write.send_commit_msg = 0;
390       insn->bits3.dp_write.response_length = response_length;
391       insn->bits3.dp_write.msg_length = msg_length;
392       insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
393       insn->bits3.dp_write.end_of_thread = end_of_thread;
394   }
395}
396
397static void brw_set_dp_read_message( struct brw_context *brw,
398				      struct brw_instruction *insn,
399				      GLuint binding_table_index,
400				      GLuint msg_control,
401				      GLuint msg_type,
402				      GLuint target_cache,
403				      GLuint msg_length,
404				      GLuint response_length,
405				      GLuint end_of_thread )
406{
407   struct intel_context *intel = &brw->intel;
408   brw_set_src1(insn, brw_imm_d(0));
409
410   if (intel->is_ironlake) {
411       insn->bits3.dp_read_igdng.binding_table_index = binding_table_index;
412       insn->bits3.dp_read_igdng.msg_control = msg_control;
413       insn->bits3.dp_read_igdng.msg_type = msg_type;
414       insn->bits3.dp_read_igdng.target_cache = target_cache;
415       insn->bits3.dp_read_igdng.header_present = 1;
416       insn->bits3.dp_read_igdng.response_length = response_length;
417       insn->bits3.dp_read_igdng.msg_length = msg_length;
418       insn->bits3.dp_read_igdng.pad1 = 0;
419       insn->bits3.dp_read_igdng.end_of_thread = end_of_thread;
420       insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
421       insn->bits2.send_igdng.end_of_thread = end_of_thread;
422   } else {
423       insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
424       insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
425       insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
426       insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
427       insn->bits3.dp_read.response_length = response_length;  /*16:19*/
428       insn->bits3.dp_read.msg_length = msg_length;  /*20:23*/
429       insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
430       insn->bits3.dp_read.pad1 = 0;  /*28:30*/
431       insn->bits3.dp_read.end_of_thread = end_of_thread;  /*31*/
432   }
433}
434
435static void brw_set_sampler_message(struct brw_context *brw,
436                                    struct brw_instruction *insn,
437                                    GLuint binding_table_index,
438                                    GLuint sampler,
439                                    GLuint msg_type,
440                                    GLuint response_length,
441                                    GLuint msg_length,
442                                    GLboolean eot,
443                                    GLuint header_present,
444                                    GLuint simd_mode)
445{
446   struct intel_context *intel = &brw->intel;
447   assert(eot == 0);
448   brw_set_src1(insn, brw_imm_d(0));
449
450   if (intel->is_ironlake) {
451      insn->bits3.sampler_igdng.binding_table_index = binding_table_index;
452      insn->bits3.sampler_igdng.sampler = sampler;
453      insn->bits3.sampler_igdng.msg_type = msg_type;
454      insn->bits3.sampler_igdng.simd_mode = simd_mode;
455      insn->bits3.sampler_igdng.header_present = header_present;
456      insn->bits3.sampler_igdng.response_length = response_length;
457      insn->bits3.sampler_igdng.msg_length = msg_length;
458      insn->bits3.sampler_igdng.end_of_thread = eot;
459      insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER;
460      insn->bits2.send_igdng.end_of_thread = eot;
461   } else if (intel->is_g4x) {
462      insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
463      insn->bits3.sampler_g4x.sampler = sampler;
464      insn->bits3.sampler_g4x.msg_type = msg_type;
465      insn->bits3.sampler_g4x.response_length = response_length;
466      insn->bits3.sampler_g4x.msg_length = msg_length;
467      insn->bits3.sampler_g4x.end_of_thread = eot;
468      insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
469   } else {
470      insn->bits3.sampler.binding_table_index = binding_table_index;
471      insn->bits3.sampler.sampler = sampler;
472      insn->bits3.sampler.msg_type = msg_type;
473      insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
474      insn->bits3.sampler.response_length = response_length;
475      insn->bits3.sampler.msg_length = msg_length;
476      insn->bits3.sampler.end_of_thread = eot;
477      insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
478   }
479}
480
481
482
483static struct brw_instruction *next_insn( struct brw_compile *p,
484					  GLuint opcode )
485{
486   struct brw_instruction *insn;
487
488   assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
489
490   insn = &p->store[p->nr_insn++];
491   memcpy(insn, p->current, sizeof(*insn));
492
493   /* Reset this one-shot flag:
494    */
495
496   if (p->current->header.destreg__conditionalmod) {
497      p->current->header.destreg__conditionalmod = 0;
498      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
499   }
500
501   insn->header.opcode = opcode;
502   return insn;
503}
504
505
506static struct brw_instruction *brw_alu1( struct brw_compile *p,
507					 GLuint opcode,
508					 struct brw_reg dest,
509					 struct brw_reg src )
510{
511   struct brw_instruction *insn = next_insn(p, opcode);
512   brw_set_dest(insn, dest);
513   brw_set_src0(insn, src);
514   return insn;
515}
516
517static struct brw_instruction *brw_alu2(struct brw_compile *p,
518					GLuint opcode,
519					struct brw_reg dest,
520					struct brw_reg src0,
521					struct brw_reg src1 )
522{
523   struct brw_instruction *insn = next_insn(p, opcode);
524   brw_set_dest(insn, dest);
525   brw_set_src0(insn, src0);
526   brw_set_src1(insn, src1);
527   return insn;
528}
529
530
531/***********************************************************************
532 * Convenience routines.
533 */
534#define ALU1(OP)					\
535struct brw_instruction *brw_##OP(struct brw_compile *p,	\
536	      struct brw_reg dest,			\
537	      struct brw_reg src0)   			\
538{							\
539   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
540}
541
542#define ALU2(OP)					\
543struct brw_instruction *brw_##OP(struct brw_compile *p,	\
544	      struct brw_reg dest,			\
545	      struct brw_reg src0,			\
546	      struct brw_reg src1)   			\
547{							\
548   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
549}
550
551
552ALU1(MOV)
553ALU2(SEL)
554ALU1(NOT)
555ALU2(AND)
556ALU2(OR)
557ALU2(XOR)
558ALU2(SHR)
559ALU2(SHL)
560ALU2(RSR)
561ALU2(RSL)
562ALU2(ASR)
563ALU2(ADD)
564ALU2(MUL)
565ALU1(FRC)
566ALU1(RNDD)
567ALU1(RNDZ)
568ALU2(MAC)
569ALU2(MACH)
570ALU1(LZD)
571ALU2(DP4)
572ALU2(DPH)
573ALU2(DP3)
574ALU2(DP2)
575ALU2(LINE)
576ALU2(PLN)
577
578
579
580void brw_NOP(struct brw_compile *p)
581{
582   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
583   brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
584   brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
585   brw_set_src1(insn, brw_imm_ud(0x0));
586}
587
588
589
590
591
592/***********************************************************************
593 * Comparisons, if/else/endif
594 */
595
596struct brw_instruction *brw_JMPI(struct brw_compile *p,
597                                 struct brw_reg dest,
598                                 struct brw_reg src0,
599                                 struct brw_reg src1)
600{
601   struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
602
603   insn->header.execution_size = 1;
604   insn->header.compression_control = BRW_COMPRESSION_NONE;
605   insn->header.mask_control = BRW_MASK_DISABLE;
606
607   p->current->header.predicate_control = BRW_PREDICATE_NONE;
608
609   return insn;
610}
611
612/* EU takes the value from the flag register and pushes it onto some
613 * sort of a stack (presumably merging with any flag value already on
614 * the stack).  Within an if block, the flags at the top of the stack
615 * control execution on each channel of the unit, eg. on each of the
616 * 16 pixel values in our wm programs.
617 *
618 * When the matching 'else' instruction is reached (presumably by
619 * countdown of the instruction count patched in by our ELSE/ENDIF
620 * functions), the relevent flags are inverted.
621 *
622 * When the matching 'endif' instruction is reached, the flags are
623 * popped off.  If the stack is now empty, normal execution resumes.
624 *
625 * No attempt is made to deal with stack overflow (14 elements?).
626 */
627struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
628{
629   struct brw_instruction *insn;
630
631   if (p->single_program_flow) {
632      assert(execute_size == BRW_EXECUTE_1);
633
634      insn = next_insn(p, BRW_OPCODE_ADD);
635      insn->header.predicate_inverse = 1;
636   } else {
637      insn = next_insn(p, BRW_OPCODE_IF);
638   }
639
640   /* Override the defaults for this instruction:
641    */
642   brw_set_dest(insn, brw_ip_reg());
643   brw_set_src0(insn, brw_ip_reg());
644   brw_set_src1(insn, brw_imm_d(0x0));
645
646   insn->header.execution_size = execute_size;
647   insn->header.compression_control = BRW_COMPRESSION_NONE;
648   insn->header.predicate_control = BRW_PREDICATE_NORMAL;
649   insn->header.mask_control = BRW_MASK_ENABLE;
650   if (!p->single_program_flow)
651       insn->header.thread_control = BRW_THREAD_SWITCH;
652
653   p->current->header.predicate_control = BRW_PREDICATE_NONE;
654
655   return insn;
656}
657
658
659struct brw_instruction *brw_ELSE(struct brw_compile *p,
660				 struct brw_instruction *if_insn)
661{
662   struct intel_context *intel = &p->brw->intel;
663   struct brw_instruction *insn;
664   GLuint br = 1;
665
666   if (intel->is_ironlake)
667      br = 2;
668
669   if (p->single_program_flow) {
670      insn = next_insn(p, BRW_OPCODE_ADD);
671   } else {
672      insn = next_insn(p, BRW_OPCODE_ELSE);
673   }
674
675   brw_set_dest(insn, brw_ip_reg());
676   brw_set_src0(insn, brw_ip_reg());
677   brw_set_src1(insn, brw_imm_d(0x0));
678
679   insn->header.compression_control = BRW_COMPRESSION_NONE;
680   insn->header.execution_size = if_insn->header.execution_size;
681   insn->header.mask_control = BRW_MASK_ENABLE;
682   if (!p->single_program_flow)
683       insn->header.thread_control = BRW_THREAD_SWITCH;
684
685   /* Patch the if instruction to point at this instruction.
686    */
687   if (p->single_program_flow) {
688      assert(if_insn->header.opcode == BRW_OPCODE_ADD);
689
690      if_insn->bits3.ud = (insn - if_insn + 1) * 16;
691   } else {
692      assert(if_insn->header.opcode == BRW_OPCODE_IF);
693
694      if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
695      if_insn->bits3.if_else.pop_count = 0;
696      if_insn->bits3.if_else.pad0 = 0;
697   }
698
699   return insn;
700}
701
702void brw_ENDIF(struct brw_compile *p,
703	       struct brw_instruction *patch_insn)
704{
705   struct intel_context *intel = &p->brw->intel;
706   GLuint br = 1;
707
708   if (intel->is_ironlake)
709      br = 2;
710
711   if (p->single_program_flow) {
712      /* In single program flow mode, there's no need to execute an ENDIF,
713       * since we don't need to do any stack operations, and if we're executing
714       * currently, we want to just continue executing.
715       */
716      struct brw_instruction *next = &p->store[p->nr_insn];
717
718      assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
719
720      patch_insn->bits3.ud = (next - patch_insn) * 16;
721   } else {
722      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
723
724      brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
725      brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
726      brw_set_src1(insn, brw_imm_d(0x0));
727
728      insn->header.compression_control = BRW_COMPRESSION_NONE;
729      insn->header.execution_size = patch_insn->header.execution_size;
730      insn->header.mask_control = BRW_MASK_ENABLE;
731      insn->header.thread_control = BRW_THREAD_SWITCH;
732
733      assert(patch_insn->bits3.if_else.jump_count == 0);
734
735      /* Patch the if or else instructions to point at this or the next
736       * instruction respectively.
737       */
738      if (patch_insn->header.opcode == BRW_OPCODE_IF) {
739	 /* Automagically turn it into an IFF:
740	  */
741	 patch_insn->header.opcode = BRW_OPCODE_IFF;
742	 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
743	 patch_insn->bits3.if_else.pop_count = 0;
744	 patch_insn->bits3.if_else.pad0 = 0;
745      } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
746	 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
747	 patch_insn->bits3.if_else.pop_count = 1;
748	 patch_insn->bits3.if_else.pad0 = 0;
749      } else {
750	 assert(0);
751      }
752
753      /* Also pop item off the stack in the endif instruction:
754       */
755      insn->bits3.if_else.jump_count = 0;
756      insn->bits3.if_else.pop_count = 1;
757      insn->bits3.if_else.pad0 = 0;
758   }
759}
760
761struct brw_instruction *brw_BREAK(struct brw_compile *p)
762{
763   struct brw_instruction *insn;
764   insn = next_insn(p, BRW_OPCODE_BREAK);
765   brw_set_dest(insn, brw_ip_reg());
766   brw_set_src0(insn, brw_ip_reg());
767   brw_set_src1(insn, brw_imm_d(0x0));
768   insn->header.compression_control = BRW_COMPRESSION_NONE;
769   insn->header.execution_size = BRW_EXECUTE_8;
770   /* insn->header.mask_control = BRW_MASK_DISABLE; */
771   insn->bits3.if_else.pad0 = 0;
772   return insn;
773}
774
775struct brw_instruction *brw_CONT(struct brw_compile *p)
776{
777   struct brw_instruction *insn;
778   insn = next_insn(p, BRW_OPCODE_CONTINUE);
779   brw_set_dest(insn, brw_ip_reg());
780   brw_set_src0(insn, brw_ip_reg());
781   brw_set_src1(insn, brw_imm_d(0x0));
782   insn->header.compression_control = BRW_COMPRESSION_NONE;
783   insn->header.execution_size = BRW_EXECUTE_8;
784   /* insn->header.mask_control = BRW_MASK_DISABLE; */
785   insn->bits3.if_else.pad0 = 0;
786   return insn;
787}
788
789/* DO/WHILE loop:
790 */
791struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
792{
793   if (p->single_program_flow) {
794      return &p->store[p->nr_insn];
795   } else {
796      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
797
798      /* Override the defaults for this instruction:
799       */
800      brw_set_dest(insn, brw_null_reg());
801      brw_set_src0(insn, brw_null_reg());
802      brw_set_src1(insn, brw_null_reg());
803
804      insn->header.compression_control = BRW_COMPRESSION_NONE;
805      insn->header.execution_size = execute_size;
806      insn->header.predicate_control = BRW_PREDICATE_NONE;
807      /* insn->header.mask_control = BRW_MASK_ENABLE; */
808      /* insn->header.mask_control = BRW_MASK_DISABLE; */
809
810      return insn;
811   }
812}
813
814
815
816struct brw_instruction *brw_WHILE(struct brw_compile *p,
817                                  struct brw_instruction *do_insn)
818{
819   struct intel_context *intel = &p->brw->intel;
820   struct brw_instruction *insn;
821   GLuint br = 1;
822
823   if (intel->is_ironlake)
824      br = 2;
825
826   if (p->single_program_flow)
827      insn = next_insn(p, BRW_OPCODE_ADD);
828   else
829      insn = next_insn(p, BRW_OPCODE_WHILE);
830
831   brw_set_dest(insn, brw_ip_reg());
832   brw_set_src0(insn, brw_ip_reg());
833   brw_set_src1(insn, brw_imm_d(0x0));
834
835   insn->header.compression_control = BRW_COMPRESSION_NONE;
836
837   if (p->single_program_flow) {
838      insn->header.execution_size = BRW_EXECUTE_1;
839
840      insn->bits3.d = (do_insn - insn) * 16;
841   } else {
842      insn->header.execution_size = do_insn->header.execution_size;
843
844      assert(do_insn->header.opcode == BRW_OPCODE_DO);
845      insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
846      insn->bits3.if_else.pop_count = 0;
847      insn->bits3.if_else.pad0 = 0;
848   }
849
850/*    insn->header.mask_control = BRW_MASK_ENABLE; */
851
852   /* insn->header.mask_control = BRW_MASK_DISABLE; */
853   p->current->header.predicate_control = BRW_PREDICATE_NONE;
854   return insn;
855}
856
857
858/* FORWARD JUMPS:
859 */
860void brw_land_fwd_jump(struct brw_compile *p,
861		       struct brw_instruction *jmp_insn)
862{
863   struct intel_context *intel = &p->brw->intel;
864   struct brw_instruction *landing = &p->store[p->nr_insn];
865   GLuint jmpi = 1;
866
867   if (intel->is_ironlake)
868       jmpi = 2;
869
870   assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
871   assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
872
873   jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
874}
875
876
877
878/* To integrate with the above, it makes sense that the comparison
879 * instruction should populate the flag register.  It might be simpler
880 * just to use the flag reg for most WM tasks?
881 */
882void brw_CMP(struct brw_compile *p,
883	     struct brw_reg dest,
884	     GLuint conditional,
885	     struct brw_reg src0,
886	     struct brw_reg src1)
887{
888   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
889
890   insn->header.destreg__conditionalmod = conditional;
891   brw_set_dest(insn, dest);
892   brw_set_src0(insn, src0);
893   brw_set_src1(insn, src1);
894
895/*    guess_execution_size(insn, src0); */
896
897
898   /* Make it so that future instructions will use the computed flag
899    * value until brw_set_predicate_control_flag_value() is called
900    * again.
901    */
902   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
903       dest.nr == 0) {
904      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
905      p->flag_value = 0xff;
906   }
907}
908
909
910
911/***********************************************************************
912 * Helpers for the various SEND message types:
913 */
914
915/** Extended math function, float[8].
916 */
917void brw_math( struct brw_compile *p,
918	       struct brw_reg dest,
919	       GLuint function,
920	       GLuint saturate,
921	       GLuint msg_reg_nr,
922	       struct brw_reg src,
923	       GLuint data_type,
924	       GLuint precision )
925{
926   struct intel_context *intel = &p->brw->intel;
927
928   if (intel->gen >= 6) {
929      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
930
931      /* Math is the same ISA format as other opcodes, except that CondModifier
932       * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
933       */
934      insn->header.destreg__conditionalmod = function;
935
936      brw_set_dest(insn, dest);
937      brw_set_src0(insn, src);
938      brw_set_src1(insn, brw_null_reg());
939   } else {
940      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
941      GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
942      GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
943      /* Example code doesn't set predicate_control for send
944       * instructions.
945       */
946      insn->header.predicate_control = 0;
947      insn->header.destreg__conditionalmod = msg_reg_nr;
948
949      brw_set_dest(insn, dest);
950      brw_set_src0(insn, src);
951      brw_set_math_message(p->brw,
952			   insn,
953			   msg_length, response_length,
954			   function,
955			   BRW_MATH_INTEGER_UNSIGNED,
956			   precision,
957			   saturate,
958			   data_type);
959   }
960}
961
962/**
963 * Extended math function, float[16].
964 * Use 2 send instructions.
965 */
966void brw_math_16( struct brw_compile *p,
967		  struct brw_reg dest,
968		  GLuint function,
969		  GLuint saturate,
970		  GLuint msg_reg_nr,
971		  struct brw_reg src,
972		  GLuint precision )
973{
974   struct brw_instruction *insn;
975   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
976   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
977
978   /* First instruction:
979    */
980   brw_push_insn_state(p);
981   brw_set_predicate_control_flag_value(p, 0xff);
982   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
983
984   insn = next_insn(p, BRW_OPCODE_SEND);
985   insn->header.destreg__conditionalmod = msg_reg_nr;
986
987   brw_set_dest(insn, dest);
988   brw_set_src0(insn, src);
989   brw_set_math_message(p->brw,
990			insn,
991			msg_length, response_length,
992			function,
993			BRW_MATH_INTEGER_UNSIGNED,
994			precision,
995			saturate,
996			BRW_MATH_DATA_VECTOR);
997
998   /* Second instruction:
999    */
1000   insn = next_insn(p, BRW_OPCODE_SEND);
1001   insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1002   insn->header.destreg__conditionalmod = msg_reg_nr+1;
1003
1004   brw_set_dest(insn, offset(dest,1));
1005   brw_set_src0(insn, src);
1006   brw_set_math_message(p->brw,
1007			insn,
1008			msg_length, response_length,
1009			function,
1010			BRW_MATH_INTEGER_UNSIGNED,
1011			precision,
1012			saturate,
1013			BRW_MATH_DATA_VECTOR);
1014
1015   brw_pop_insn_state(p);
1016}
1017
1018
1019/**
1020 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
1021 * Scratch offset should be a multiple of 64.
1022 * Used for register spilling.
1023 */
1024void brw_dp_WRITE_16( struct brw_compile *p,
1025		      struct brw_reg src,
1026		      GLuint scratch_offset )
1027{
1028   GLuint msg_reg_nr = 1;
1029   {
1030      brw_push_insn_state(p);
1031      brw_set_mask_control(p, BRW_MASK_DISABLE);
1032      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1033
1034      /* set message header global offset field (reg 0, element 2) */
1035      brw_MOV(p,
1036	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1037	      brw_imm_d(scratch_offset));
1038
1039      brw_pop_insn_state(p);
1040   }
1041
1042   {
1043      GLuint msg_length = 3;
1044      struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1045      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1046
1047      insn->header.predicate_control = 0; /* XXX */
1048      insn->header.compression_control = BRW_COMPRESSION_NONE;
1049      insn->header.destreg__conditionalmod = msg_reg_nr;
1050
1051      brw_set_dest(insn, dest);
1052      brw_set_src0(insn, src);
1053
1054      brw_set_dp_write_message(p->brw,
1055			       insn,
1056			       255, /* binding table index (255=stateless) */
1057			       BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1058			       BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1059			       msg_length,
1060			       0, /* pixel scoreboard */
1061			       0, /* response_length */
1062			       0); /* eot */
1063   }
1064}
1065
1066
1067/**
1068 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1069 * Scratch offset should be a multiple of 64.
1070 * Used for register spilling.
1071 */
1072void brw_dp_READ_16( struct brw_compile *p,
1073		      struct brw_reg dest,
1074		      GLuint scratch_offset )
1075{
1076   GLuint msg_reg_nr = 1;
1077   {
1078      brw_push_insn_state(p);
1079      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1080      brw_set_mask_control(p, BRW_MASK_DISABLE);
1081
1082      /* set message header global offset field (reg 0, element 2) */
1083      brw_MOV(p,
1084	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1085	      brw_imm_d(scratch_offset));
1086
1087      brw_pop_insn_state(p);
1088   }
1089
1090   {
1091      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1092
1093      insn->header.predicate_control = 0; /* XXX */
1094      insn->header.compression_control = BRW_COMPRESSION_NONE;
1095      insn->header.destreg__conditionalmod = msg_reg_nr;
1096
1097      brw_set_dest(insn, dest);	/* UW? */
1098      brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1099
1100      brw_set_dp_read_message(p->brw,
1101			      insn,
1102			      255, /* binding table index (255=stateless) */
1103			      3,  /* msg_control (3 means 4 Owords) */
1104			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1105			      1, /* target cache (render/scratch) */
1106			      1, /* msg_length */
1107			      2, /* response_length */
1108			      0); /* eot */
1109   }
1110}
1111
1112
1113/**
1114 * Read a float[4] vector from the data port Data Cache (const buffer).
1115 * Location (in buffer) should be a multiple of 16.
1116 * Used for fetching shader constants.
1117 * If relAddr is true, we'll do an indirect fetch using the address register.
1118 */
1119void brw_dp_READ_4( struct brw_compile *p,
1120                    struct brw_reg dest,
1121                    GLboolean relAddr,
1122                    GLuint location,
1123                    GLuint bind_table_index )
1124{
1125   /* XXX: relAddr not implemented */
1126   GLuint msg_reg_nr = 1;
1127   {
1128      struct brw_reg b;
1129      brw_push_insn_state(p);
1130      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1131      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1132      brw_set_mask_control(p, BRW_MASK_DISABLE);
1133
1134   /* Setup MRF[1] with location/offset into const buffer */
1135      b = brw_message_reg(msg_reg_nr);
1136      b = retype(b, BRW_REGISTER_TYPE_UD);
1137      /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1138       * when the docs say only dword[2] should be set.  Hmmm.  But it works.
1139       */
1140      brw_MOV(p, b, brw_imm_ud(location));
1141      brw_pop_insn_state(p);
1142   }
1143
1144   {
1145      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1146
1147      insn->header.predicate_control = BRW_PREDICATE_NONE;
1148      insn->header.compression_control = BRW_COMPRESSION_NONE;
1149      insn->header.destreg__conditionalmod = msg_reg_nr;
1150      insn->header.mask_control = BRW_MASK_DISABLE;
1151
1152      /* cast dest to a uword[8] vector */
1153      dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1154
1155      brw_set_dest(insn, dest);
1156      brw_set_src0(insn, brw_null_reg());
1157
1158      brw_set_dp_read_message(p->brw,
1159			      insn,
1160			      bind_table_index,
1161			      0,  /* msg_control (0 means 1 Oword) */
1162			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1163			      0, /* source cache = data cache */
1164			      1, /* msg_length */
1165			      1, /* response_length (1 Oword) */
1166			      0); /* eot */
1167   }
1168}
1169
1170
1171/**
1172 * Read float[4] constant(s) from VS constant buffer.
1173 * For relative addressing, two float[4] constants will be read into 'dest'.
1174 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1175 */
1176void brw_dp_READ_4_vs(struct brw_compile *p,
1177                      struct brw_reg dest,
1178                      GLuint oword,
1179                      GLboolean relAddr,
1180                      struct brw_reg addrReg,
1181                      GLuint location,
1182                      GLuint bind_table_index)
1183{
1184   GLuint msg_reg_nr = 1;
1185
1186   assert(oword < 2);
1187   /*
1188   printf("vs const read msg, location %u, msg_reg_nr %d\n",
1189          location, msg_reg_nr);
1190   */
1191
1192   /* Setup MRF[1] with location/offset into const buffer */
1193   {
1194      struct brw_reg b;
1195
1196      brw_push_insn_state(p);
1197      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1198      brw_set_mask_control(p, BRW_MASK_DISABLE);
1199      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1200      /*brw_set_access_mode(p, BRW_ALIGN_16);*/
1201
1202      /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1203       * when the docs say only dword[2] should be set.  Hmmm.  But it works.
1204       */
1205      b = brw_message_reg(msg_reg_nr);
1206      b = retype(b, BRW_REGISTER_TYPE_UD);
1207      /*b = get_element_ud(b, 2);*/
1208      if (relAddr) {
1209         brw_ADD(p, b, addrReg, brw_imm_ud(location));
1210      }
1211      else {
1212         brw_MOV(p, b, brw_imm_ud(location));
1213      }
1214
1215      brw_pop_insn_state(p);
1216   }
1217
1218   {
1219      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1220
1221      insn->header.predicate_control = BRW_PREDICATE_NONE;
1222      insn->header.compression_control = BRW_COMPRESSION_NONE;
1223      insn->header.destreg__conditionalmod = msg_reg_nr;
1224      insn->header.mask_control = BRW_MASK_DISABLE;
1225      /*insn->header.access_mode = BRW_ALIGN_16;*/
1226
1227      brw_set_dest(insn, dest);
1228      brw_set_src0(insn, brw_null_reg());
1229
1230      brw_set_dp_read_message(p->brw,
1231			      insn,
1232			      bind_table_index,
1233			      oword,  /* 0 = lower Oword, 1 = upper Oword */
1234			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1235			      0, /* source cache = data cache */
1236			      1, /* msg_length */
1237			      1, /* response_length (1 Oword) */
1238			      0); /* eot */
1239   }
1240}
1241
1242
1243
1244void brw_fb_WRITE(struct brw_compile *p,
1245                  struct brw_reg dest,
1246                  GLuint msg_reg_nr,
1247                  struct brw_reg src0,
1248                  GLuint binding_table_index,
1249                  GLuint msg_length,
1250                  GLuint response_length,
1251                  GLboolean eot)
1252{
1253   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1254
1255   insn->header.predicate_control = 0; /* XXX */
1256   insn->header.compression_control = BRW_COMPRESSION_NONE;
1257   insn->header.destreg__conditionalmod = msg_reg_nr;
1258
1259   brw_set_dest(insn, dest);
1260   brw_set_src0(insn, src0);
1261   brw_set_dp_write_message(p->brw,
1262			    insn,
1263			    binding_table_index,
1264			    BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
1265			    BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
1266			    msg_length,
1267			    1,	/* pixel scoreboard */
1268			    response_length,
1269			    eot);
1270}
1271
1272
1273/**
1274 * Texture sample instruction.
1275 * Note: the msg_type plus msg_length values determine exactly what kind
1276 * of sampling operation is performed.  See volume 4, page 161 of docs.
1277 */
1278void brw_SAMPLE(struct brw_compile *p,
1279		struct brw_reg dest,
1280		GLuint msg_reg_nr,
1281		struct brw_reg src0,
1282		GLuint binding_table_index,
1283		GLuint sampler,
1284		GLuint writemask,
1285		GLuint msg_type,
1286		GLuint response_length,
1287		GLuint msg_length,
1288		GLboolean eot,
1289		GLuint header_present,
1290		GLuint simd_mode)
1291{
1292   GLboolean need_stall = 0;
1293
1294   if (writemask == 0) {
1295      /*printf("%s: zero writemask??\n", __FUNCTION__); */
1296      return;
1297   }
1298
1299   /* Hardware doesn't do destination dependency checking on send
1300    * instructions properly.  Add a workaround which generates the
1301    * dependency by other means.  In practice it seems like this bug
1302    * only crops up for texture samples, and only where registers are
1303    * written by the send and then written again later without being
1304    * read in between.  Luckily for us, we already track that
1305    * information and use it to modify the writemask for the
1306    * instruction, so that is a guide for whether a workaround is
1307    * needed.
1308    */
1309   if (writemask != WRITEMASK_XYZW) {
1310      GLuint dst_offset = 0;
1311      GLuint i, newmask = 0, len = 0;
1312
1313      for (i = 0; i < 4; i++) {
1314	 if (writemask & (1<<i))
1315	    break;
1316	 dst_offset += 2;
1317      }
1318      for (; i < 4; i++) {
1319	 if (!(writemask & (1<<i)))
1320	    break;
1321	 newmask |= 1<<i;
1322	 len++;
1323      }
1324
1325      if (newmask != writemask) {
1326	 need_stall = 1;
1327         /* printf("need stall %x %x\n", newmask , writemask); */
1328      }
1329      else {
1330	 GLboolean dispatch_16 = GL_FALSE;
1331
1332	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1333
1334	 guess_execution_size(p->current, dest);
1335	 if (p->current->header.execution_size == BRW_EXECUTE_16)
1336	    dispatch_16 = GL_TRUE;
1337
1338	 newmask = ~newmask & WRITEMASK_XYZW;
1339
1340	 brw_push_insn_state(p);
1341
1342	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1343	 brw_set_mask_control(p, BRW_MASK_DISABLE);
1344
1345	 brw_MOV(p, m1, brw_vec8_grf(0,0));
1346  	 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1347
1348	 brw_pop_insn_state(p);
1349
1350  	 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1351	 dest = offset(dest, dst_offset);
1352
1353	 /* For 16-wide dispatch, masked channels are skipped in the
1354	  * response.  For 8-wide, masked channels still take up slots,
1355	  * and are just not written to.
1356	  */
1357	 if (dispatch_16)
1358	    response_length = len * 2;
1359      }
1360   }
1361
1362   {
1363      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1364
1365      insn->header.predicate_control = 0; /* XXX */
1366      insn->header.compression_control = BRW_COMPRESSION_NONE;
1367      insn->header.destreg__conditionalmod = msg_reg_nr;
1368
1369      brw_set_dest(insn, dest);
1370      brw_set_src0(insn, src0);
1371      brw_set_sampler_message(p->brw, insn,
1372			      binding_table_index,
1373			      sampler,
1374			      msg_type,
1375			      response_length,
1376			      msg_length,
1377			      eot,
1378			      header_present,
1379			      simd_mode);
1380   }
1381
1382   if (need_stall) {
1383      struct brw_reg reg = vec8(offset(dest, response_length-1));
1384
1385      /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
1386       */
1387      brw_push_insn_state(p);
1388      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1389      brw_MOV(p, reg, reg);
1390      brw_pop_insn_state(p);
1391   }
1392
1393}
1394
1395/* All these variables are pretty confusing - we might be better off
1396 * using bitmasks and macros for this, in the old style.  Or perhaps
1397 * just having the caller instantiate the fields in dword3 itself.
1398 */
1399void brw_urb_WRITE(struct brw_compile *p,
1400		   struct brw_reg dest,
1401		   GLuint msg_reg_nr,
1402		   struct brw_reg src0,
1403		   GLboolean allocate,
1404		   GLboolean used,
1405		   GLuint msg_length,
1406		   GLuint response_length,
1407		   GLboolean eot,
1408		   GLboolean writes_complete,
1409		   GLuint offset,
1410		   GLuint swizzle)
1411{
1412   struct intel_context *intel = &p->brw->intel;
1413   struct brw_instruction *insn;
1414
1415   /* Sandybridge doesn't have the implied move for SENDs,
1416    * and the first message register index comes from src0.
1417    */
1418   if (intel->gen >= 6) {
1419      brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1420      src0 = brw_message_reg(msg_reg_nr);
1421   }
1422
1423   insn = next_insn(p, BRW_OPCODE_SEND);
1424
1425   assert(msg_length < BRW_MAX_MRF);
1426
1427   brw_set_dest(insn, dest);
1428   brw_set_src0(insn, src0);
1429   brw_set_src1(insn, brw_imm_d(0));
1430
1431   if (intel->gen < 6)
1432      insn->header.destreg__conditionalmod = msg_reg_nr;
1433
1434   brw_set_urb_message(p->brw,
1435		       insn,
1436		       allocate,
1437		       used,
1438		       msg_length,
1439		       response_length,
1440		       eot,
1441		       writes_complete,
1442		       offset,
1443		       swizzle);
1444}
1445
1446void brw_ff_sync(struct brw_compile *p,
1447		   struct brw_reg dest,
1448		   GLuint msg_reg_nr,
1449		   struct brw_reg src0,
1450		   GLboolean allocate,
1451		   GLboolean used,
1452		   GLuint msg_length,
1453		   GLuint response_length,
1454		   GLboolean eot,
1455		   GLboolean writes_complete,
1456		   GLuint offset,
1457		   GLuint swizzle)
1458{
1459   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1460
1461   assert(msg_length < 16);
1462
1463   brw_set_dest(insn, dest);
1464   brw_set_src0(insn, src0);
1465   brw_set_src1(insn, brw_imm_d(0));
1466
1467   insn->header.destreg__conditionalmod = msg_reg_nr;
1468
1469   brw_set_ff_sync_message(p->brw,
1470		       insn,
1471		       allocate,
1472		       used,
1473		       msg_length,
1474		       response_length,
1475		       eot,
1476		       writes_complete,
1477		       offset,
1478		       swizzle);
1479}
1480