brw_eu_emit.c revision 25024d948298a9f3f3210a0b91486f79a3917b0f
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33#include "brw_context.h"
34#include "brw_defines.h"
35#include "brw_eu.h"
36
37
38
39
40/***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44static void guess_execution_size( struct brw_instruction *insn,
45				  struct brw_reg reg )
46{
47   if (reg.width == BRW_WIDTH_8 &&
48       insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49      insn->header.execution_size = BRW_EXECUTE_16;
50   else
51      insn->header.execution_size = reg.width;	/* note - definitions are compatible */
52}
53
54
55static void brw_set_dest( struct brw_instruction *insn,
56			  struct brw_reg dest )
57{
58   if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
59       dest.file != BRW_MESSAGE_REGISTER_FILE)
60      assert(dest.nr < 128);
61
62   insn->bits1.da1.dest_reg_file = dest.file;
63   insn->bits1.da1.dest_reg_type = dest.type;
64   insn->bits1.da1.dest_address_mode = dest.address_mode;
65
66   if (dest.address_mode == BRW_ADDRESS_DIRECT) {
67      insn->bits1.da1.dest_reg_nr = dest.nr;
68
69      if (insn->header.access_mode == BRW_ALIGN_1) {
70	 insn->bits1.da1.dest_subreg_nr = dest.subnr;
71	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
72	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
73	 insn->bits1.da1.dest_horiz_stride = dest.hstride;
74      }
75      else {
76	 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
77	 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
78      }
79   }
80   else {
81      insn->bits1.ia1.dest_subreg_nr = dest.subnr;
82
83      /* These are different sizes in align1 vs align16:
84       */
85      if (insn->header.access_mode == BRW_ALIGN_1) {
86	 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
87	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
88	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
89	 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
90      }
91      else {
92	 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
93      }
94   }
95
96   /* NEW: Set the execution size based on dest.width and
97    * insn->compression_control:
98    */
99   guess_execution_size(insn, dest);
100}
101
102static void brw_set_src0( struct brw_instruction *insn,
103                          struct brw_reg reg )
104{
105   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
106
107   if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
108      assert(reg.nr < 128);
109
110   insn->bits1.da1.src0_reg_file = reg.file;
111   insn->bits1.da1.src0_reg_type = reg.type;
112   insn->bits2.da1.src0_abs = reg.abs;
113   insn->bits2.da1.src0_negate = reg.negate;
114   insn->bits2.da1.src0_address_mode = reg.address_mode;
115
116   if (reg.file == BRW_IMMEDIATE_VALUE) {
117      insn->bits3.ud = reg.dw1.ud;
118
119      /* Required to set some fields in src1 as well:
120       */
121      insn->bits1.da1.src1_reg_file = 0; /* arf */
122      insn->bits1.da1.src1_reg_type = reg.type;
123   }
124   else
125   {
126      if (reg.address_mode == BRW_ADDRESS_DIRECT) {
127	 if (insn->header.access_mode == BRW_ALIGN_1) {
128	    insn->bits2.da1.src0_subreg_nr = reg.subnr;
129	    insn->bits2.da1.src0_reg_nr = reg.nr;
130	 }
131	 else {
132	    insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
133	    insn->bits2.da16.src0_reg_nr = reg.nr;
134	 }
135      }
136      else {
137	 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
138
139	 if (insn->header.access_mode == BRW_ALIGN_1) {
140	    insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
141	 }
142	 else {
143	    insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
144	 }
145      }
146
147      if (insn->header.access_mode == BRW_ALIGN_1) {
148	 if (reg.width == BRW_WIDTH_1 &&
149	     insn->header.execution_size == BRW_EXECUTE_1) {
150	    insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
151	    insn->bits2.da1.src0_width = BRW_WIDTH_1;
152	    insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
153	 }
154	 else {
155	    insn->bits2.da1.src0_horiz_stride = reg.hstride;
156	    insn->bits2.da1.src0_width = reg.width;
157	    insn->bits2.da1.src0_vert_stride = reg.vstride;
158	 }
159      }
160      else {
161	 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
162	 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
163	 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
164	 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
165
166	 /* This is an oddity of the fact we're using the same
167	  * descriptions for registers in align_16 as align_1:
168	  */
169	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
170	    insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
171	 else
172	    insn->bits2.da16.src0_vert_stride = reg.vstride;
173      }
174   }
175}
176
177
178void brw_set_src1( struct brw_instruction *insn,
179                   struct brw_reg reg )
180{
181   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
182
183   assert(reg.nr < 128);
184
185   insn->bits1.da1.src1_reg_file = reg.file;
186   insn->bits1.da1.src1_reg_type = reg.type;
187   insn->bits3.da1.src1_abs = reg.abs;
188   insn->bits3.da1.src1_negate = reg.negate;
189
190   /* Only src1 can be immediate in two-argument instructions.
191    */
192   assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
193
194   if (reg.file == BRW_IMMEDIATE_VALUE) {
195      insn->bits3.ud = reg.dw1.ud;
196   }
197   else {
198      /* This is a hardware restriction, which may or may not be lifted
199       * in the future:
200       */
201      assert (reg.address_mode == BRW_ADDRESS_DIRECT);
202      /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
203
204      if (insn->header.access_mode == BRW_ALIGN_1) {
205	 insn->bits3.da1.src1_subreg_nr = reg.subnr;
206	 insn->bits3.da1.src1_reg_nr = reg.nr;
207      }
208      else {
209	 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
210	 insn->bits3.da16.src1_reg_nr = reg.nr;
211      }
212
213      if (insn->header.access_mode == BRW_ALIGN_1) {
214	 if (reg.width == BRW_WIDTH_1 &&
215	     insn->header.execution_size == BRW_EXECUTE_1) {
216	    insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
217	    insn->bits3.da1.src1_width = BRW_WIDTH_1;
218	    insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
219	 }
220	 else {
221	    insn->bits3.da1.src1_horiz_stride = reg.hstride;
222	    insn->bits3.da1.src1_width = reg.width;
223	    insn->bits3.da1.src1_vert_stride = reg.vstride;
224	 }
225      }
226      else {
227	 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
228	 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
229	 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
230	 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
231
232	 /* This is an oddity of the fact we're using the same
233	  * descriptions for registers in align_16 as align_1:
234	  */
235	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
236	    insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
237	 else
238	    insn->bits3.da16.src1_vert_stride = reg.vstride;
239      }
240   }
241}
242
243
244
245static void brw_set_math_message( struct brw_context *brw,
246				  struct brw_instruction *insn,
247				  GLuint msg_length,
248				  GLuint response_length,
249				  GLuint function,
250				  GLuint integer_type,
251				  GLboolean low_precision,
252				  GLboolean saturate,
253				  GLuint dataType )
254{
255   struct intel_context *intel = &brw->intel;
256   brw_set_src1(insn, brw_imm_d(0));
257
258   if (intel->is_ironlake) {
259       insn->bits3.math_igdng.function = function;
260       insn->bits3.math_igdng.int_type = integer_type;
261       insn->bits3.math_igdng.precision = low_precision;
262       insn->bits3.math_igdng.saturate = saturate;
263       insn->bits3.math_igdng.data_type = dataType;
264       insn->bits3.math_igdng.snapshot = 0;
265       insn->bits3.math_igdng.header_present = 0;
266       insn->bits3.math_igdng.response_length = response_length;
267       insn->bits3.math_igdng.msg_length = msg_length;
268       insn->bits3.math_igdng.end_of_thread = 0;
269       insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH;
270       insn->bits2.send_igdng.end_of_thread = 0;
271   } else {
272       insn->bits3.math.function = function;
273       insn->bits3.math.int_type = integer_type;
274       insn->bits3.math.precision = low_precision;
275       insn->bits3.math.saturate = saturate;
276       insn->bits3.math.data_type = dataType;
277       insn->bits3.math.response_length = response_length;
278       insn->bits3.math.msg_length = msg_length;
279       insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
280       insn->bits3.math.end_of_thread = 0;
281   }
282}
283
284
285static void brw_set_ff_sync_message( struct brw_context *brw,
286				 struct brw_instruction *insn,
287				 GLboolean allocate,
288				 GLboolean used,
289				 GLuint msg_length,
290				 GLuint response_length,
291				 GLboolean end_of_thread,
292				 GLboolean complete,
293				 GLuint offset,
294				 GLuint swizzle_control )
295{
296	brw_set_src1(insn, brw_imm_d(0));
297
298	insn->bits3.urb_igdng.opcode = 1;
299	insn->bits3.urb_igdng.offset = offset;
300	insn->bits3.urb_igdng.swizzle_control = swizzle_control;
301	insn->bits3.urb_igdng.allocate = allocate;
302	insn->bits3.urb_igdng.used = used;
303	insn->bits3.urb_igdng.complete = complete;
304	insn->bits3.urb_igdng.header_present = 1;
305	insn->bits3.urb_igdng.response_length = response_length;
306	insn->bits3.urb_igdng.msg_length = msg_length;
307	insn->bits3.urb_igdng.end_of_thread = end_of_thread;
308	insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
309	insn->bits2.send_igdng.end_of_thread = end_of_thread;
310}
311
312static void brw_set_urb_message( struct brw_context *brw,
313				 struct brw_instruction *insn,
314				 GLboolean allocate,
315				 GLboolean used,
316				 GLuint msg_length,
317				 GLuint response_length,
318				 GLboolean end_of_thread,
319				 GLboolean complete,
320				 GLuint offset,
321				 GLuint swizzle_control )
322{
323    struct intel_context *intel = &brw->intel;
324    brw_set_src1(insn, brw_imm_d(0));
325
326    if (intel->is_ironlake) {
327        insn->bits3.urb_igdng.opcode = 0;	/* ? */
328        insn->bits3.urb_igdng.offset = offset;
329        insn->bits3.urb_igdng.swizzle_control = swizzle_control;
330        insn->bits3.urb_igdng.allocate = allocate;
331        insn->bits3.urb_igdng.used = used;	/* ? */
332        insn->bits3.urb_igdng.complete = complete;
333        insn->bits3.urb_igdng.header_present = 1;
334        insn->bits3.urb_igdng.response_length = response_length;
335        insn->bits3.urb_igdng.msg_length = msg_length;
336        insn->bits3.urb_igdng.end_of_thread = end_of_thread;
337        insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
338        insn->bits2.send_igdng.end_of_thread = end_of_thread;
339    } else {
340        insn->bits3.urb.opcode = 0;	/* ? */
341        insn->bits3.urb.offset = offset;
342        insn->bits3.urb.swizzle_control = swizzle_control;
343        insn->bits3.urb.allocate = allocate;
344        insn->bits3.urb.used = used;	/* ? */
345        insn->bits3.urb.complete = complete;
346        insn->bits3.urb.response_length = response_length;
347        insn->bits3.urb.msg_length = msg_length;
348        insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
349        insn->bits3.urb.end_of_thread = end_of_thread;
350    }
351}
352
353static void brw_set_dp_write_message( struct brw_context *brw,
354				      struct brw_instruction *insn,
355				      GLuint binding_table_index,
356				      GLuint msg_control,
357				      GLuint msg_type,
358				      GLuint msg_length,
359				      GLuint pixel_scoreboard_clear,
360				      GLuint response_length,
361				      GLuint end_of_thread )
362{
363   struct intel_context *intel = &brw->intel;
364   brw_set_src1(insn, brw_imm_d(0));
365
366   if (intel->is_ironlake) {
367       insn->bits3.dp_write_igdng.binding_table_index = binding_table_index;
368       insn->bits3.dp_write_igdng.msg_control = msg_control;
369       insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear;
370       insn->bits3.dp_write_igdng.msg_type = msg_type;
371       insn->bits3.dp_write_igdng.send_commit_msg = 0;
372       insn->bits3.dp_write_igdng.header_present = 1;
373       insn->bits3.dp_write_igdng.response_length = response_length;
374       insn->bits3.dp_write_igdng.msg_length = msg_length;
375       insn->bits3.dp_write_igdng.end_of_thread = end_of_thread;
376       insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
377       insn->bits2.send_igdng.end_of_thread = end_of_thread;
378   } else {
379       insn->bits3.dp_write.binding_table_index = binding_table_index;
380       insn->bits3.dp_write.msg_control = msg_control;
381       insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
382       insn->bits3.dp_write.msg_type = msg_type;
383       insn->bits3.dp_write.send_commit_msg = 0;
384       insn->bits3.dp_write.response_length = response_length;
385       insn->bits3.dp_write.msg_length = msg_length;
386       insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
387       insn->bits3.dp_write.end_of_thread = end_of_thread;
388   }
389}
390
391static void brw_set_dp_read_message( struct brw_context *brw,
392				      struct brw_instruction *insn,
393				      GLuint binding_table_index,
394				      GLuint msg_control,
395				      GLuint msg_type,
396				      GLuint target_cache,
397				      GLuint msg_length,
398				      GLuint response_length,
399				      GLuint end_of_thread )
400{
401   struct intel_context *intel = &brw->intel;
402   brw_set_src1(insn, brw_imm_d(0));
403
404   if (intel->is_ironlake) {
405       insn->bits3.dp_read_igdng.binding_table_index = binding_table_index;
406       insn->bits3.dp_read_igdng.msg_control = msg_control;
407       insn->bits3.dp_read_igdng.msg_type = msg_type;
408       insn->bits3.dp_read_igdng.target_cache = target_cache;
409       insn->bits3.dp_read_igdng.header_present = 1;
410       insn->bits3.dp_read_igdng.response_length = response_length;
411       insn->bits3.dp_read_igdng.msg_length = msg_length;
412       insn->bits3.dp_read_igdng.pad1 = 0;
413       insn->bits3.dp_read_igdng.end_of_thread = end_of_thread;
414       insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
415       insn->bits2.send_igdng.end_of_thread = end_of_thread;
416   } else {
417       insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
418       insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
419       insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
420       insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
421       insn->bits3.dp_read.response_length = response_length;  /*16:19*/
422       insn->bits3.dp_read.msg_length = msg_length;  /*20:23*/
423       insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
424       insn->bits3.dp_read.pad1 = 0;  /*28:30*/
425       insn->bits3.dp_read.end_of_thread = end_of_thread;  /*31*/
426   }
427}
428
429static void brw_set_sampler_message(struct brw_context *brw,
430                                    struct brw_instruction *insn,
431                                    GLuint binding_table_index,
432                                    GLuint sampler,
433                                    GLuint msg_type,
434                                    GLuint response_length,
435                                    GLuint msg_length,
436                                    GLboolean eot,
437                                    GLuint header_present,
438                                    GLuint simd_mode)
439{
440   struct intel_context *intel = &brw->intel;
441   assert(eot == 0);
442   brw_set_src1(insn, brw_imm_d(0));
443
444   if (intel->is_ironlake) {
445      insn->bits3.sampler_igdng.binding_table_index = binding_table_index;
446      insn->bits3.sampler_igdng.sampler = sampler;
447      insn->bits3.sampler_igdng.msg_type = msg_type;
448      insn->bits3.sampler_igdng.simd_mode = simd_mode;
449      insn->bits3.sampler_igdng.header_present = header_present;
450      insn->bits3.sampler_igdng.response_length = response_length;
451      insn->bits3.sampler_igdng.msg_length = msg_length;
452      insn->bits3.sampler_igdng.end_of_thread = eot;
453      insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER;
454      insn->bits2.send_igdng.end_of_thread = eot;
455   } else if (intel->is_g4x) {
456      insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
457      insn->bits3.sampler_g4x.sampler = sampler;
458      insn->bits3.sampler_g4x.msg_type = msg_type;
459      insn->bits3.sampler_g4x.response_length = response_length;
460      insn->bits3.sampler_g4x.msg_length = msg_length;
461      insn->bits3.sampler_g4x.end_of_thread = eot;
462      insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
463   } else {
464      insn->bits3.sampler.binding_table_index = binding_table_index;
465      insn->bits3.sampler.sampler = sampler;
466      insn->bits3.sampler.msg_type = msg_type;
467      insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
468      insn->bits3.sampler.response_length = response_length;
469      insn->bits3.sampler.msg_length = msg_length;
470      insn->bits3.sampler.end_of_thread = eot;
471      insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
472   }
473}
474
475
476
477static struct brw_instruction *next_insn( struct brw_compile *p,
478					  GLuint opcode )
479{
480   struct brw_instruction *insn;
481
482   assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
483
484   insn = &p->store[p->nr_insn++];
485   memcpy(insn, p->current, sizeof(*insn));
486
487   /* Reset this one-shot flag:
488    */
489
490   if (p->current->header.destreg__conditionalmod) {
491      p->current->header.destreg__conditionalmod = 0;
492      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
493   }
494
495   insn->header.opcode = opcode;
496   return insn;
497}
498
499
500static struct brw_instruction *brw_alu1( struct brw_compile *p,
501					 GLuint opcode,
502					 struct brw_reg dest,
503					 struct brw_reg src )
504{
505   struct brw_instruction *insn = next_insn(p, opcode);
506   brw_set_dest(insn, dest);
507   brw_set_src0(insn, src);
508   return insn;
509}
510
511static struct brw_instruction *brw_alu2(struct brw_compile *p,
512					GLuint opcode,
513					struct brw_reg dest,
514					struct brw_reg src0,
515					struct brw_reg src1 )
516{
517   struct brw_instruction *insn = next_insn(p, opcode);
518   brw_set_dest(insn, dest);
519   brw_set_src0(insn, src0);
520   brw_set_src1(insn, src1);
521   return insn;
522}
523
524
525/***********************************************************************
526 * Convenience routines.
527 */
528#define ALU1(OP)					\
529struct brw_instruction *brw_##OP(struct brw_compile *p,	\
530	      struct brw_reg dest,			\
531	      struct brw_reg src0)   			\
532{							\
533   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
534}
535
536#define ALU2(OP)					\
537struct brw_instruction *brw_##OP(struct brw_compile *p,	\
538	      struct brw_reg dest,			\
539	      struct brw_reg src0,			\
540	      struct brw_reg src1)   			\
541{							\
542   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
543}
544
545
546ALU1(MOV)
547ALU2(SEL)
548ALU1(NOT)
549ALU2(AND)
550ALU2(OR)
551ALU2(XOR)
552ALU2(SHR)
553ALU2(SHL)
554ALU2(RSR)
555ALU2(RSL)
556ALU2(ASR)
557ALU2(ADD)
558ALU2(MUL)
559ALU1(FRC)
560ALU1(RNDD)
561ALU1(RNDZ)
562ALU2(MAC)
563ALU2(MACH)
564ALU1(LZD)
565ALU2(DP4)
566ALU2(DPH)
567ALU2(DP3)
568ALU2(DP2)
569ALU2(LINE)
570
571
572
573
574void brw_NOP(struct brw_compile *p)
575{
576   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
577   brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
578   brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
579   brw_set_src1(insn, brw_imm_ud(0x0));
580}
581
582
583
584
585
586/***********************************************************************
587 * Comparisons, if/else/endif
588 */
589
590struct brw_instruction *brw_JMPI(struct brw_compile *p,
591                                 struct brw_reg dest,
592                                 struct brw_reg src0,
593                                 struct brw_reg src1)
594{
595   struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
596
597   insn->header.execution_size = 1;
598   insn->header.compression_control = BRW_COMPRESSION_NONE;
599   insn->header.mask_control = BRW_MASK_DISABLE;
600
601   p->current->header.predicate_control = BRW_PREDICATE_NONE;
602
603   return insn;
604}
605
606/* EU takes the value from the flag register and pushes it onto some
607 * sort of a stack (presumably merging with any flag value already on
608 * the stack).  Within an if block, the flags at the top of the stack
609 * control execution on each channel of the unit, eg. on each of the
610 * 16 pixel values in our wm programs.
611 *
612 * When the matching 'else' instruction is reached (presumably by
613 * countdown of the instruction count patched in by our ELSE/ENDIF
614 * functions), the relevent flags are inverted.
615 *
616 * When the matching 'endif' instruction is reached, the flags are
617 * popped off.  If the stack is now empty, normal execution resumes.
618 *
619 * No attempt is made to deal with stack overflow (14 elements?).
620 */
621struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
622{
623   struct brw_instruction *insn;
624
625   if (p->single_program_flow) {
626      assert(execute_size == BRW_EXECUTE_1);
627
628      insn = next_insn(p, BRW_OPCODE_ADD);
629      insn->header.predicate_inverse = 1;
630   } else {
631      insn = next_insn(p, BRW_OPCODE_IF);
632   }
633
634   /* Override the defaults for this instruction:
635    */
636   brw_set_dest(insn, brw_ip_reg());
637   brw_set_src0(insn, brw_ip_reg());
638   brw_set_src1(insn, brw_imm_d(0x0));
639
640   insn->header.execution_size = execute_size;
641   insn->header.compression_control = BRW_COMPRESSION_NONE;
642   insn->header.predicate_control = BRW_PREDICATE_NORMAL;
643   insn->header.mask_control = BRW_MASK_ENABLE;
644   if (!p->single_program_flow)
645       insn->header.thread_control = BRW_THREAD_SWITCH;
646
647   p->current->header.predicate_control = BRW_PREDICATE_NONE;
648
649   return insn;
650}
651
652
653struct brw_instruction *brw_ELSE(struct brw_compile *p,
654				 struct brw_instruction *if_insn)
655{
656   struct intel_context *intel = &p->brw->intel;
657   struct brw_instruction *insn;
658   GLuint br = 1;
659
660   if (intel->is_ironlake)
661      br = 2;
662
663   if (p->single_program_flow) {
664      insn = next_insn(p, BRW_OPCODE_ADD);
665   } else {
666      insn = next_insn(p, BRW_OPCODE_ELSE);
667   }
668
669   brw_set_dest(insn, brw_ip_reg());
670   brw_set_src0(insn, brw_ip_reg());
671   brw_set_src1(insn, brw_imm_d(0x0));
672
673   insn->header.compression_control = BRW_COMPRESSION_NONE;
674   insn->header.execution_size = if_insn->header.execution_size;
675   insn->header.mask_control = BRW_MASK_ENABLE;
676   if (!p->single_program_flow)
677       insn->header.thread_control = BRW_THREAD_SWITCH;
678
679   /* Patch the if instruction to point at this instruction.
680    */
681   if (p->single_program_flow) {
682      assert(if_insn->header.opcode == BRW_OPCODE_ADD);
683
684      if_insn->bits3.ud = (insn - if_insn + 1) * 16;
685   } else {
686      assert(if_insn->header.opcode == BRW_OPCODE_IF);
687
688      if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
689      if_insn->bits3.if_else.pop_count = 0;
690      if_insn->bits3.if_else.pad0 = 0;
691   }
692
693   return insn;
694}
695
696void brw_ENDIF(struct brw_compile *p,
697	       struct brw_instruction *patch_insn)
698{
699   struct intel_context *intel = &p->brw->intel;
700   GLuint br = 1;
701
702   if (intel->is_ironlake)
703      br = 2;
704
705   if (p->single_program_flow) {
706      /* In single program flow mode, there's no need to execute an ENDIF,
707       * since we don't need to do any stack operations, and if we're executing
708       * currently, we want to just continue executing.
709       */
710      struct brw_instruction *next = &p->store[p->nr_insn];
711
712      assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
713
714      patch_insn->bits3.ud = (next - patch_insn) * 16;
715   } else {
716      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
717
718      brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
719      brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
720      brw_set_src1(insn, brw_imm_d(0x0));
721
722      insn->header.compression_control = BRW_COMPRESSION_NONE;
723      insn->header.execution_size = patch_insn->header.execution_size;
724      insn->header.mask_control = BRW_MASK_ENABLE;
725      insn->header.thread_control = BRW_THREAD_SWITCH;
726
727      assert(patch_insn->bits3.if_else.jump_count == 0);
728
729      /* Patch the if or else instructions to point at this or the next
730       * instruction respectively.
731       */
732      if (patch_insn->header.opcode == BRW_OPCODE_IF) {
733	 /* Automagically turn it into an IFF:
734	  */
735	 patch_insn->header.opcode = BRW_OPCODE_IFF;
736	 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
737	 patch_insn->bits3.if_else.pop_count = 0;
738	 patch_insn->bits3.if_else.pad0 = 0;
739      } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
740	 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
741	 patch_insn->bits3.if_else.pop_count = 1;
742	 patch_insn->bits3.if_else.pad0 = 0;
743      } else {
744	 assert(0);
745      }
746
747      /* Also pop item off the stack in the endif instruction:
748       */
749      insn->bits3.if_else.jump_count = 0;
750      insn->bits3.if_else.pop_count = 1;
751      insn->bits3.if_else.pad0 = 0;
752   }
753}
754
755struct brw_instruction *brw_BREAK(struct brw_compile *p)
756{
757   struct brw_instruction *insn;
758   insn = next_insn(p, BRW_OPCODE_BREAK);
759   brw_set_dest(insn, brw_ip_reg());
760   brw_set_src0(insn, brw_ip_reg());
761   brw_set_src1(insn, brw_imm_d(0x0));
762   insn->header.compression_control = BRW_COMPRESSION_NONE;
763   insn->header.execution_size = BRW_EXECUTE_8;
764   /* insn->header.mask_control = BRW_MASK_DISABLE; */
765   insn->bits3.if_else.pad0 = 0;
766   return insn;
767}
768
769struct brw_instruction *brw_CONT(struct brw_compile *p)
770{
771   struct brw_instruction *insn;
772   insn = next_insn(p, BRW_OPCODE_CONTINUE);
773   brw_set_dest(insn, brw_ip_reg());
774   brw_set_src0(insn, brw_ip_reg());
775   brw_set_src1(insn, brw_imm_d(0x0));
776   insn->header.compression_control = BRW_COMPRESSION_NONE;
777   insn->header.execution_size = BRW_EXECUTE_8;
778   /* insn->header.mask_control = BRW_MASK_DISABLE; */
779   insn->bits3.if_else.pad0 = 0;
780   return insn;
781}
782
783/* DO/WHILE loop:
784 */
785struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
786{
787   if (p->single_program_flow) {
788      return &p->store[p->nr_insn];
789   } else {
790      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
791
792      /* Override the defaults for this instruction:
793       */
794      brw_set_dest(insn, brw_null_reg());
795      brw_set_src0(insn, brw_null_reg());
796      brw_set_src1(insn, brw_null_reg());
797
798      insn->header.compression_control = BRW_COMPRESSION_NONE;
799      insn->header.execution_size = execute_size;
800      insn->header.predicate_control = BRW_PREDICATE_NONE;
801      /* insn->header.mask_control = BRW_MASK_ENABLE; */
802      /* insn->header.mask_control = BRW_MASK_DISABLE; */
803
804      return insn;
805   }
806}
807
808
809
810struct brw_instruction *brw_WHILE(struct brw_compile *p,
811                                  struct brw_instruction *do_insn)
812{
813   struct intel_context *intel = &p->brw->intel;
814   struct brw_instruction *insn;
815   GLuint br = 1;
816
817   if (intel->is_ironlake)
818      br = 2;
819
820   if (p->single_program_flow)
821      insn = next_insn(p, BRW_OPCODE_ADD);
822   else
823      insn = next_insn(p, BRW_OPCODE_WHILE);
824
825   brw_set_dest(insn, brw_ip_reg());
826   brw_set_src0(insn, brw_ip_reg());
827   brw_set_src1(insn, brw_imm_d(0x0));
828
829   insn->header.compression_control = BRW_COMPRESSION_NONE;
830
831   if (p->single_program_flow) {
832      insn->header.execution_size = BRW_EXECUTE_1;
833
834      insn->bits3.d = (do_insn - insn) * 16;
835   } else {
836      insn->header.execution_size = do_insn->header.execution_size;
837
838      assert(do_insn->header.opcode == BRW_OPCODE_DO);
839      insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
840      insn->bits3.if_else.pop_count = 0;
841      insn->bits3.if_else.pad0 = 0;
842   }
843
844/*    insn->header.mask_control = BRW_MASK_ENABLE; */
845
846   /* insn->header.mask_control = BRW_MASK_DISABLE; */
847   p->current->header.predicate_control = BRW_PREDICATE_NONE;
848   return insn;
849}
850
851
852/* FORWARD JUMPS:
853 */
854void brw_land_fwd_jump(struct brw_compile *p,
855		       struct brw_instruction *jmp_insn)
856{
857   struct intel_context *intel = &p->brw->intel;
858   struct brw_instruction *landing = &p->store[p->nr_insn];
859   GLuint jmpi = 1;
860
861   if (intel->is_ironlake)
862       jmpi = 2;
863
864   assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
865   assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
866
867   jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
868}
869
870
871
872/* To integrate with the above, it makes sense that the comparison
873 * instruction should populate the flag register.  It might be simpler
874 * just to use the flag reg for most WM tasks?
875 */
876void brw_CMP(struct brw_compile *p,
877	     struct brw_reg dest,
878	     GLuint conditional,
879	     struct brw_reg src0,
880	     struct brw_reg src1)
881{
882   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
883
884   insn->header.destreg__conditionalmod = conditional;
885   brw_set_dest(insn, dest);
886   brw_set_src0(insn, src0);
887   brw_set_src1(insn, src1);
888
889/*    guess_execution_size(insn, src0); */
890
891
892   /* Make it so that future instructions will use the computed flag
893    * value until brw_set_predicate_control_flag_value() is called
894    * again.
895    */
896   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
897       dest.nr == 0) {
898      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
899      p->flag_value = 0xff;
900   }
901}
902
903
904
905/***********************************************************************
906 * Helpers for the various SEND message types:
907 */
908
909/** Extended math function, float[8].
910 */
911void brw_math( struct brw_compile *p,
912	       struct brw_reg dest,
913	       GLuint function,
914	       GLuint saturate,
915	       GLuint msg_reg_nr,
916	       struct brw_reg src,
917	       GLuint data_type,
918	       GLuint precision )
919{
920   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
921   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
922   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
923
924   /* Example code doesn't set predicate_control for send
925    * instructions.
926    */
927   insn->header.predicate_control = 0;
928   insn->header.destreg__conditionalmod = msg_reg_nr;
929
930   brw_set_dest(insn, dest);
931   brw_set_src0(insn, src);
932   brw_set_math_message(p->brw,
933			insn,
934			msg_length, response_length,
935			function,
936			BRW_MATH_INTEGER_UNSIGNED,
937			precision,
938			saturate,
939			data_type);
940}
941
942/**
943 * Extended math function, float[16].
944 * Use 2 send instructions.
945 */
946void brw_math_16( struct brw_compile *p,
947		  struct brw_reg dest,
948		  GLuint function,
949		  GLuint saturate,
950		  GLuint msg_reg_nr,
951		  struct brw_reg src,
952		  GLuint precision )
953{
954   struct brw_instruction *insn;
955   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
956   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
957
958   /* First instruction:
959    */
960   brw_push_insn_state(p);
961   brw_set_predicate_control_flag_value(p, 0xff);
962   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
963
964   insn = next_insn(p, BRW_OPCODE_SEND);
965   insn->header.destreg__conditionalmod = msg_reg_nr;
966
967   brw_set_dest(insn, dest);
968   brw_set_src0(insn, src);
969   brw_set_math_message(p->brw,
970			insn,
971			msg_length, response_length,
972			function,
973			BRW_MATH_INTEGER_UNSIGNED,
974			precision,
975			saturate,
976			BRW_MATH_DATA_VECTOR);
977
978   /* Second instruction:
979    */
980   insn = next_insn(p, BRW_OPCODE_SEND);
981   insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
982   insn->header.destreg__conditionalmod = msg_reg_nr+1;
983
984   brw_set_dest(insn, offset(dest,1));
985   brw_set_src0(insn, src);
986   brw_set_math_message(p->brw,
987			insn,
988			msg_length, response_length,
989			function,
990			BRW_MATH_INTEGER_UNSIGNED,
991			precision,
992			saturate,
993			BRW_MATH_DATA_VECTOR);
994
995   brw_pop_insn_state(p);
996}
997
998
999/**
1000 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
1001 * Scratch offset should be a multiple of 64.
1002 * Used for register spilling.
1003 */
1004void brw_dp_WRITE_16( struct brw_compile *p,
1005		      struct brw_reg src,
1006		      GLuint scratch_offset )
1007{
1008   GLuint msg_reg_nr = 1;
1009   {
1010      brw_push_insn_state(p);
1011      brw_set_mask_control(p, BRW_MASK_DISABLE);
1012      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1013
1014      /* set message header global offset field (reg 0, element 2) */
1015      brw_MOV(p,
1016	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1017	      brw_imm_d(scratch_offset));
1018
1019      brw_pop_insn_state(p);
1020   }
1021
1022   {
1023      GLuint msg_length = 3;
1024      struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1025      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1026
1027      insn->header.predicate_control = 0; /* XXX */
1028      insn->header.compression_control = BRW_COMPRESSION_NONE;
1029      insn->header.destreg__conditionalmod = msg_reg_nr;
1030
1031      brw_set_dest(insn, dest);
1032      brw_set_src0(insn, src);
1033
1034      brw_set_dp_write_message(p->brw,
1035			       insn,
1036			       255, /* binding table index (255=stateless) */
1037			       BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1038			       BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1039			       msg_length,
1040			       0, /* pixel scoreboard */
1041			       0, /* response_length */
1042			       0); /* eot */
1043   }
1044}
1045
1046
1047/**
1048 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1049 * Scratch offset should be a multiple of 64.
1050 * Used for register spilling.
1051 */
1052void brw_dp_READ_16( struct brw_compile *p,
1053		      struct brw_reg dest,
1054		      GLuint scratch_offset )
1055{
1056   GLuint msg_reg_nr = 1;
1057   {
1058      brw_push_insn_state(p);
1059      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1060      brw_set_mask_control(p, BRW_MASK_DISABLE);
1061
1062      /* set message header global offset field (reg 0, element 2) */
1063      brw_MOV(p,
1064	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1065	      brw_imm_d(scratch_offset));
1066
1067      brw_pop_insn_state(p);
1068   }
1069
1070   {
1071      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1072
1073      insn->header.predicate_control = 0; /* XXX */
1074      insn->header.compression_control = BRW_COMPRESSION_NONE;
1075      insn->header.destreg__conditionalmod = msg_reg_nr;
1076
1077      brw_set_dest(insn, dest);	/* UW? */
1078      brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1079
1080      brw_set_dp_read_message(p->brw,
1081			      insn,
1082			      255, /* binding table index (255=stateless) */
1083			      3,  /* msg_control (3 means 4 Owords) */
1084			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1085			      1, /* target cache (render/scratch) */
1086			      1, /* msg_length */
1087			      2, /* response_length */
1088			      0); /* eot */
1089   }
1090}
1091
1092
1093/**
1094 * Read a float[4] vector from the data port Data Cache (const buffer).
1095 * Location (in buffer) should be a multiple of 16.
1096 * Used for fetching shader constants.
1097 * If relAddr is true, we'll do an indirect fetch using the address register.
1098 */
1099void brw_dp_READ_4( struct brw_compile *p,
1100                    struct brw_reg dest,
1101                    GLboolean relAddr,
1102                    GLuint location,
1103                    GLuint bind_table_index )
1104{
1105   /* XXX: relAddr not implemented */
1106   GLuint msg_reg_nr = 1;
1107   {
1108      struct brw_reg b;
1109      brw_push_insn_state(p);
1110      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1111      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1112      brw_set_mask_control(p, BRW_MASK_DISABLE);
1113
1114   /* Setup MRF[1] with location/offset into const buffer */
1115      b = brw_message_reg(msg_reg_nr);
1116      b = retype(b, BRW_REGISTER_TYPE_UD);
1117      /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1118       * when the docs say only dword[2] should be set.  Hmmm.  But it works.
1119       */
1120      brw_MOV(p, b, brw_imm_ud(location));
1121      brw_pop_insn_state(p);
1122   }
1123
1124   {
1125      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1126
1127      insn->header.predicate_control = BRW_PREDICATE_NONE;
1128      insn->header.compression_control = BRW_COMPRESSION_NONE;
1129      insn->header.destreg__conditionalmod = msg_reg_nr;
1130      insn->header.mask_control = BRW_MASK_DISABLE;
1131
1132      /* cast dest to a uword[8] vector */
1133      dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1134
1135      brw_set_dest(insn, dest);
1136      brw_set_src0(insn, brw_null_reg());
1137
1138      brw_set_dp_read_message(p->brw,
1139			      insn,
1140			      bind_table_index,
1141			      0,  /* msg_control (0 means 1 Oword) */
1142			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1143			      0, /* source cache = data cache */
1144			      1, /* msg_length */
1145			      1, /* response_length (1 Oword) */
1146			      0); /* eot */
1147   }
1148}
1149
1150
1151/**
1152 * Read float[4] constant(s) from VS constant buffer.
1153 * For relative addressing, two float[4] constants will be read into 'dest'.
1154 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1155 */
1156void brw_dp_READ_4_vs(struct brw_compile *p,
1157                      struct brw_reg dest,
1158                      GLuint oword,
1159                      GLboolean relAddr,
1160                      struct brw_reg addrReg,
1161                      GLuint location,
1162                      GLuint bind_table_index)
1163{
1164   GLuint msg_reg_nr = 1;
1165
1166   assert(oword < 2);
1167   /*
1168   printf("vs const read msg, location %u, msg_reg_nr %d\n",
1169          location, msg_reg_nr);
1170   */
1171
1172   /* Setup MRF[1] with location/offset into const buffer */
1173   {
1174      struct brw_reg b;
1175
1176      brw_push_insn_state(p);
1177      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1178      brw_set_mask_control(p, BRW_MASK_DISABLE);
1179      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1180      /*brw_set_access_mode(p, BRW_ALIGN_16);*/
1181
1182      /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1183       * when the docs say only dword[2] should be set.  Hmmm.  But it works.
1184       */
1185      b = brw_message_reg(msg_reg_nr);
1186      b = retype(b, BRW_REGISTER_TYPE_UD);
1187      /*b = get_element_ud(b, 2);*/
1188      if (relAddr) {
1189         brw_ADD(p, b, addrReg, brw_imm_ud(location));
1190      }
1191      else {
1192         brw_MOV(p, b, brw_imm_ud(location));
1193      }
1194
1195      brw_pop_insn_state(p);
1196   }
1197
1198   {
1199      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1200
1201      insn->header.predicate_control = BRW_PREDICATE_NONE;
1202      insn->header.compression_control = BRW_COMPRESSION_NONE;
1203      insn->header.destreg__conditionalmod = msg_reg_nr;
1204      insn->header.mask_control = BRW_MASK_DISABLE;
1205      /*insn->header.access_mode = BRW_ALIGN_16;*/
1206
1207      brw_set_dest(insn, dest);
1208      brw_set_src0(insn, brw_null_reg());
1209
1210      brw_set_dp_read_message(p->brw,
1211			      insn,
1212			      bind_table_index,
1213			      oword,  /* 0 = lower Oword, 1 = upper Oword */
1214			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1215			      0, /* source cache = data cache */
1216			      1, /* msg_length */
1217			      1, /* response_length (1 Oword) */
1218			      0); /* eot */
1219   }
1220}
1221
1222
1223
1224void brw_fb_WRITE(struct brw_compile *p,
1225                  struct brw_reg dest,
1226                  GLuint msg_reg_nr,
1227                  struct brw_reg src0,
1228                  GLuint binding_table_index,
1229                  GLuint msg_length,
1230                  GLuint response_length,
1231                  GLboolean eot)
1232{
1233   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1234
1235   insn->header.predicate_control = 0; /* XXX */
1236   insn->header.compression_control = BRW_COMPRESSION_NONE;
1237   insn->header.destreg__conditionalmod = msg_reg_nr;
1238
1239   brw_set_dest(insn, dest);
1240   brw_set_src0(insn, src0);
1241   brw_set_dp_write_message(p->brw,
1242			    insn,
1243			    binding_table_index,
1244			    BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
1245			    BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
1246			    msg_length,
1247			    1,	/* pixel scoreboard */
1248			    response_length,
1249			    eot);
1250}
1251
1252
1253/**
1254 * Texture sample instruction.
1255 * Note: the msg_type plus msg_length values determine exactly what kind
1256 * of sampling operation is performed.  See volume 4, page 161 of docs.
1257 */
1258void brw_SAMPLE(struct brw_compile *p,
1259		struct brw_reg dest,
1260		GLuint msg_reg_nr,
1261		struct brw_reg src0,
1262		GLuint binding_table_index,
1263		GLuint sampler,
1264		GLuint writemask,
1265		GLuint msg_type,
1266		GLuint response_length,
1267		GLuint msg_length,
1268		GLboolean eot,
1269		GLuint header_present,
1270		GLuint simd_mode)
1271{
1272   GLboolean need_stall = 0;
1273
1274   if (writemask == 0) {
1275      /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
1276      return;
1277   }
1278
1279   /* Hardware doesn't do destination dependency checking on send
1280    * instructions properly.  Add a workaround which generates the
1281    * dependency by other means.  In practice it seems like this bug
1282    * only crops up for texture samples, and only where registers are
1283    * written by the send and then written again later without being
1284    * read in between.  Luckily for us, we already track that
1285    * information and use it to modify the writemask for the
1286    * instruction, so that is a guide for whether a workaround is
1287    * needed.
1288    */
1289   if (writemask != WRITEMASK_XYZW) {
1290      GLuint dst_offset = 0;
1291      GLuint i, newmask = 0, len = 0;
1292
1293      for (i = 0; i < 4; i++) {
1294	 if (writemask & (1<<i))
1295	    break;
1296	 dst_offset += 2;
1297      }
1298      for (; i < 4; i++) {
1299	 if (!(writemask & (1<<i)))
1300	    break;
1301	 newmask |= 1<<i;
1302	 len++;
1303      }
1304
1305      if (newmask != writemask) {
1306	 need_stall = 1;
1307         /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
1308      }
1309      else {
1310	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1311
1312	 newmask = ~newmask & WRITEMASK_XYZW;
1313
1314	 brw_push_insn_state(p);
1315
1316	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1317	 brw_set_mask_control(p, BRW_MASK_DISABLE);
1318
1319	 brw_MOV(p, m1, brw_vec8_grf(0,0));
1320  	 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1321
1322	 brw_pop_insn_state(p);
1323
1324  	 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1325	 dest = offset(dest, dst_offset);
1326	 response_length = len * 2;
1327      }
1328   }
1329
1330   {
1331      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1332
1333      insn->header.predicate_control = 0; /* XXX */
1334      insn->header.compression_control = BRW_COMPRESSION_NONE;
1335      insn->header.destreg__conditionalmod = msg_reg_nr;
1336
1337      brw_set_dest(insn, dest);
1338      brw_set_src0(insn, src0);
1339      brw_set_sampler_message(p->brw, insn,
1340			      binding_table_index,
1341			      sampler,
1342			      msg_type,
1343			      response_length,
1344			      msg_length,
1345			      eot,
1346			      header_present,
1347			      simd_mode);
1348   }
1349
1350   if (need_stall) {
1351      struct brw_reg reg = vec8(offset(dest, response_length-1));
1352
1353      /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
1354       */
1355      brw_push_insn_state(p);
1356      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1357      brw_MOV(p, reg, reg);
1358      brw_pop_insn_state(p);
1359   }
1360
1361}
1362
1363/* All these variables are pretty confusing - we might be better off
1364 * using bitmasks and macros for this, in the old style.  Or perhaps
1365 * just having the caller instantiate the fields in dword3 itself.
1366 */
1367void brw_urb_WRITE(struct brw_compile *p,
1368		   struct brw_reg dest,
1369		   GLuint msg_reg_nr,
1370		   struct brw_reg src0,
1371		   GLboolean allocate,
1372		   GLboolean used,
1373		   GLuint msg_length,
1374		   GLuint response_length,
1375		   GLboolean eot,
1376		   GLboolean writes_complete,
1377		   GLuint offset,
1378		   GLuint swizzle)
1379{
1380   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1381
1382   assert(msg_length < BRW_MAX_MRF);
1383
1384   brw_set_dest(insn, dest);
1385   brw_set_src0(insn, src0);
1386   brw_set_src1(insn, brw_imm_d(0));
1387
1388   insn->header.destreg__conditionalmod = msg_reg_nr;
1389
1390   brw_set_urb_message(p->brw,
1391		       insn,
1392		       allocate,
1393		       used,
1394		       msg_length,
1395		       response_length,
1396		       eot,
1397		       writes_complete,
1398		       offset,
1399		       swizzle);
1400}
1401
1402void brw_ff_sync(struct brw_compile *p,
1403		   struct brw_reg dest,
1404		   GLuint msg_reg_nr,
1405		   struct brw_reg src0,
1406		   GLboolean allocate,
1407		   GLboolean used,
1408		   GLuint msg_length,
1409		   GLuint response_length,
1410		   GLboolean eot,
1411		   GLboolean writes_complete,
1412		   GLuint offset,
1413		   GLuint swizzle)
1414{
1415   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1416
1417   assert(msg_length < 16);
1418
1419   brw_set_dest(insn, dest);
1420   brw_set_src0(insn, src0);
1421   brw_set_src1(insn, brw_imm_d(0));
1422
1423   insn->header.destreg__conditionalmod = msg_reg_nr;
1424
1425   brw_set_ff_sync_message(p->brw,
1426		       insn,
1427		       allocate,
1428		       used,
1429		       msg_length,
1430		       response_length,
1431		       eot,
1432		       writes_complete,
1433		       offset,
1434		       swizzle);
1435}
1436