brw_eu_emit.c revision 38c449409207c8948c1961a3132475bbd422f8f1
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33#include "brw_context.h"
34#include "brw_defines.h"
35#include "brw_eu.h"
36
37
38
39
40/***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44static void guess_execution_size( struct brw_instruction *insn,
45				  struct brw_reg reg )
46{
47   if (reg.width == BRW_WIDTH_8 &&
48       insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49      insn->header.execution_size = BRW_EXECUTE_16;
50   else
51      insn->header.execution_size = reg.width;	/* note - definitions are compatible */
52}
53
54
55static void brw_set_dest( struct brw_instruction *insn,
56			  struct brw_reg dest )
57{
58   if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
59       dest.file != BRW_MESSAGE_REGISTER_FILE)
60      assert(dest.nr < 128);
61
62   insn->bits1.da1.dest_reg_file = dest.file;
63   insn->bits1.da1.dest_reg_type = dest.type;
64   insn->bits1.da1.dest_address_mode = dest.address_mode;
65
66   if (dest.address_mode == BRW_ADDRESS_DIRECT) {
67      insn->bits1.da1.dest_reg_nr = dest.nr;
68
69      if (insn->header.access_mode == BRW_ALIGN_1) {
70	 insn->bits1.da1.dest_subreg_nr = dest.subnr;
71	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
72	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
73	 insn->bits1.da1.dest_horiz_stride = dest.hstride;
74      }
75      else {
76	 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
77	 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
78      }
79   }
80   else {
81      insn->bits1.ia1.dest_subreg_nr = dest.subnr;
82
83      /* These are different sizes in align1 vs align16:
84       */
85      if (insn->header.access_mode == BRW_ALIGN_1) {
86	 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
87	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
88	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
89	 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
90      }
91      else {
92	 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
93      }
94   }
95
96   /* NEW: Set the execution size based on dest.width and
97    * insn->compression_control:
98    */
99   guess_execution_size(insn, dest);
100}
101
102static void brw_set_src0( struct brw_instruction *insn,
103                          struct brw_reg reg )
104{
105   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
106
107   if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
108      assert(reg.nr < 128);
109
110   insn->bits1.da1.src0_reg_file = reg.file;
111   insn->bits1.da1.src0_reg_type = reg.type;
112   insn->bits2.da1.src0_abs = reg.abs;
113   insn->bits2.da1.src0_negate = reg.negate;
114   insn->bits2.da1.src0_address_mode = reg.address_mode;
115
116   if (reg.file == BRW_IMMEDIATE_VALUE) {
117      insn->bits3.ud = reg.dw1.ud;
118
119      /* Required to set some fields in src1 as well:
120       */
121      insn->bits1.da1.src1_reg_file = 0; /* arf */
122      insn->bits1.da1.src1_reg_type = reg.type;
123   }
124   else
125   {
126      if (reg.address_mode == BRW_ADDRESS_DIRECT) {
127	 if (insn->header.access_mode == BRW_ALIGN_1) {
128	    insn->bits2.da1.src0_subreg_nr = reg.subnr;
129	    insn->bits2.da1.src0_reg_nr = reg.nr;
130	 }
131	 else {
132	    insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
133	    insn->bits2.da16.src0_reg_nr = reg.nr;
134	 }
135      }
136      else {
137	 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
138
139	 if (insn->header.access_mode == BRW_ALIGN_1) {
140	    insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
141	 }
142	 else {
143	    insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
144	 }
145      }
146
147      if (insn->header.access_mode == BRW_ALIGN_1) {
148	 if (reg.width == BRW_WIDTH_1 &&
149	     insn->header.execution_size == BRW_EXECUTE_1) {
150	    insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
151	    insn->bits2.da1.src0_width = BRW_WIDTH_1;
152	    insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
153	 }
154	 else {
155	    insn->bits2.da1.src0_horiz_stride = reg.hstride;
156	    insn->bits2.da1.src0_width = reg.width;
157	    insn->bits2.da1.src0_vert_stride = reg.vstride;
158	 }
159      }
160      else {
161	 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
162	 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
163	 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
164	 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
165
166	 /* This is an oddity of the fact we're using the same
167	  * descriptions for registers in align_16 as align_1:
168	  */
169	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
170	    insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
171	 else
172	    insn->bits2.da16.src0_vert_stride = reg.vstride;
173      }
174   }
175}
176
177
178void brw_set_src1( struct brw_instruction *insn,
179                   struct brw_reg reg )
180{
181   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
182
183   assert(reg.nr < 128);
184
185   insn->bits1.da1.src1_reg_file = reg.file;
186   insn->bits1.da1.src1_reg_type = reg.type;
187   insn->bits3.da1.src1_abs = reg.abs;
188   insn->bits3.da1.src1_negate = reg.negate;
189
190   /* Only src1 can be immediate in two-argument instructions.
191    */
192   assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
193
194   if (reg.file == BRW_IMMEDIATE_VALUE) {
195      insn->bits3.ud = reg.dw1.ud;
196   }
197   else {
198      /* This is a hardware restriction, which may or may not be lifted
199       * in the future:
200       */
201      assert (reg.address_mode == BRW_ADDRESS_DIRECT);
202      /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
203
204      if (insn->header.access_mode == BRW_ALIGN_1) {
205	 insn->bits3.da1.src1_subreg_nr = reg.subnr;
206	 insn->bits3.da1.src1_reg_nr = reg.nr;
207      }
208      else {
209	 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
210	 insn->bits3.da16.src1_reg_nr = reg.nr;
211      }
212
213      if (insn->header.access_mode == BRW_ALIGN_1) {
214	 if (reg.width == BRW_WIDTH_1 &&
215	     insn->header.execution_size == BRW_EXECUTE_1) {
216	    insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
217	    insn->bits3.da1.src1_width = BRW_WIDTH_1;
218	    insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
219	 }
220	 else {
221	    insn->bits3.da1.src1_horiz_stride = reg.hstride;
222	    insn->bits3.da1.src1_width = reg.width;
223	    insn->bits3.da1.src1_vert_stride = reg.vstride;
224	 }
225      }
226      else {
227	 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
228	 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
229	 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
230	 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
231
232	 /* This is an oddity of the fact we're using the same
233	  * descriptions for registers in align_16 as align_1:
234	  */
235	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
236	    insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
237	 else
238	    insn->bits3.da16.src1_vert_stride = reg.vstride;
239      }
240   }
241}
242
243
244
245static void brw_set_math_message( struct brw_context *brw,
246				  struct brw_instruction *insn,
247				  GLuint msg_length,
248				  GLuint response_length,
249				  GLuint function,
250				  GLuint integer_type,
251				  GLboolean low_precision,
252				  GLboolean saturate,
253				  GLuint dataType )
254{
255   struct intel_context *intel = &brw->intel;
256   brw_set_src1(insn, brw_imm_d(0));
257
258   if (intel->is_ironlake) {
259       insn->bits3.math_igdng.function = function;
260       insn->bits3.math_igdng.int_type = integer_type;
261       insn->bits3.math_igdng.precision = low_precision;
262       insn->bits3.math_igdng.saturate = saturate;
263       insn->bits3.math_igdng.data_type = dataType;
264       insn->bits3.math_igdng.snapshot = 0;
265       insn->bits3.math_igdng.header_present = 0;
266       insn->bits3.math_igdng.response_length = response_length;
267       insn->bits3.math_igdng.msg_length = msg_length;
268       insn->bits3.math_igdng.end_of_thread = 0;
269       insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH;
270       insn->bits2.send_igdng.end_of_thread = 0;
271   } else {
272       insn->bits3.math.function = function;
273       insn->bits3.math.int_type = integer_type;
274       insn->bits3.math.precision = low_precision;
275       insn->bits3.math.saturate = saturate;
276       insn->bits3.math.data_type = dataType;
277       insn->bits3.math.response_length = response_length;
278       insn->bits3.math.msg_length = msg_length;
279       insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
280       insn->bits3.math.end_of_thread = 0;
281   }
282}
283
284
285static void brw_set_ff_sync_message( struct brw_context *brw,
286				 struct brw_instruction *insn,
287				 GLboolean allocate,
288				 GLboolean used,
289				 GLuint msg_length,
290				 GLuint response_length,
291				 GLboolean end_of_thread,
292				 GLboolean complete,
293				 GLuint offset,
294				 GLuint swizzle_control )
295{
296	brw_set_src1(insn, brw_imm_d(0));
297
298	insn->bits3.urb_igdng.opcode = 1;
299	insn->bits3.urb_igdng.offset = offset;
300	insn->bits3.urb_igdng.swizzle_control = swizzle_control;
301	insn->bits3.urb_igdng.allocate = allocate;
302	insn->bits3.urb_igdng.used = used;
303	insn->bits3.urb_igdng.complete = complete;
304	insn->bits3.urb_igdng.header_present = 1;
305	insn->bits3.urb_igdng.response_length = response_length;
306	insn->bits3.urb_igdng.msg_length = msg_length;
307	insn->bits3.urb_igdng.end_of_thread = end_of_thread;
308	insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
309	insn->bits2.send_igdng.end_of_thread = end_of_thread;
310}
311
312static void brw_set_urb_message( struct brw_context *brw,
313				 struct brw_instruction *insn,
314				 GLboolean allocate,
315				 GLboolean used,
316				 GLuint msg_length,
317				 GLuint response_length,
318				 GLboolean end_of_thread,
319				 GLboolean complete,
320				 GLuint offset,
321				 GLuint swizzle_control )
322{
323    struct intel_context *intel = &brw->intel;
324    brw_set_src1(insn, brw_imm_d(0));
325
326    if (intel->is_ironlake) {
327        insn->bits3.urb_igdng.opcode = 0;	/* ? */
328        insn->bits3.urb_igdng.offset = offset;
329        insn->bits3.urb_igdng.swizzle_control = swizzle_control;
330        insn->bits3.urb_igdng.allocate = allocate;
331        insn->bits3.urb_igdng.used = used;	/* ? */
332        insn->bits3.urb_igdng.complete = complete;
333        insn->bits3.urb_igdng.header_present = 1;
334        insn->bits3.urb_igdng.response_length = response_length;
335        insn->bits3.urb_igdng.msg_length = msg_length;
336        insn->bits3.urb_igdng.end_of_thread = end_of_thread;
337        insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
338        insn->bits2.send_igdng.end_of_thread = end_of_thread;
339    } else {
340        insn->bits3.urb.opcode = 0;	/* ? */
341        insn->bits3.urb.offset = offset;
342        insn->bits3.urb.swizzle_control = swizzle_control;
343        insn->bits3.urb.allocate = allocate;
344        insn->bits3.urb.used = used;	/* ? */
345        insn->bits3.urb.complete = complete;
346        insn->bits3.urb.response_length = response_length;
347        insn->bits3.urb.msg_length = msg_length;
348        insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
349        insn->bits3.urb.end_of_thread = end_of_thread;
350    }
351}
352
353static void brw_set_dp_write_message( struct brw_context *brw,
354				      struct brw_instruction *insn,
355				      GLuint binding_table_index,
356				      GLuint msg_control,
357				      GLuint msg_type,
358				      GLuint msg_length,
359				      GLuint pixel_scoreboard_clear,
360				      GLuint response_length,
361				      GLuint end_of_thread )
362{
363   struct intel_context *intel = &brw->intel;
364   brw_set_src1(insn, brw_imm_d(0));
365
366   if (intel->is_ironlake) {
367       insn->bits3.dp_write_igdng.binding_table_index = binding_table_index;
368       insn->bits3.dp_write_igdng.msg_control = msg_control;
369       insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear;
370       insn->bits3.dp_write_igdng.msg_type = msg_type;
371       insn->bits3.dp_write_igdng.send_commit_msg = 0;
372       insn->bits3.dp_write_igdng.header_present = 1;
373       insn->bits3.dp_write_igdng.response_length = response_length;
374       insn->bits3.dp_write_igdng.msg_length = msg_length;
375       insn->bits3.dp_write_igdng.end_of_thread = end_of_thread;
376       insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
377       insn->bits2.send_igdng.end_of_thread = end_of_thread;
378   } else {
379       insn->bits3.dp_write.binding_table_index = binding_table_index;
380       insn->bits3.dp_write.msg_control = msg_control;
381       insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
382       insn->bits3.dp_write.msg_type = msg_type;
383       insn->bits3.dp_write.send_commit_msg = 0;
384       insn->bits3.dp_write.response_length = response_length;
385       insn->bits3.dp_write.msg_length = msg_length;
386       insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
387       insn->bits3.dp_write.end_of_thread = end_of_thread;
388   }
389}
390
391static void brw_set_dp_read_message( struct brw_context *brw,
392				      struct brw_instruction *insn,
393				      GLuint binding_table_index,
394				      GLuint msg_control,
395				      GLuint msg_type,
396				      GLuint target_cache,
397				      GLuint msg_length,
398				      GLuint response_length,
399				      GLuint end_of_thread )
400{
401   struct intel_context *intel = &brw->intel;
402   brw_set_src1(insn, brw_imm_d(0));
403
404   if (intel->is_ironlake) {
405       insn->bits3.dp_read_igdng.binding_table_index = binding_table_index;
406       insn->bits3.dp_read_igdng.msg_control = msg_control;
407       insn->bits3.dp_read_igdng.msg_type = msg_type;
408       insn->bits3.dp_read_igdng.target_cache = target_cache;
409       insn->bits3.dp_read_igdng.header_present = 1;
410       insn->bits3.dp_read_igdng.response_length = response_length;
411       insn->bits3.dp_read_igdng.msg_length = msg_length;
412       insn->bits3.dp_read_igdng.pad1 = 0;
413       insn->bits3.dp_read_igdng.end_of_thread = end_of_thread;
414       insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
415       insn->bits2.send_igdng.end_of_thread = end_of_thread;
416   } else {
417       insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
418       insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
419       insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
420       insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
421       insn->bits3.dp_read.response_length = response_length;  /*16:19*/
422       insn->bits3.dp_read.msg_length = msg_length;  /*20:23*/
423       insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
424       insn->bits3.dp_read.pad1 = 0;  /*28:30*/
425       insn->bits3.dp_read.end_of_thread = end_of_thread;  /*31*/
426   }
427}
428
429static void brw_set_sampler_message(struct brw_context *brw,
430                                    struct brw_instruction *insn,
431                                    GLuint binding_table_index,
432                                    GLuint sampler,
433                                    GLuint msg_type,
434                                    GLuint response_length,
435                                    GLuint msg_length,
436                                    GLboolean eot,
437                                    GLuint header_present,
438                                    GLuint simd_mode)
439{
440   struct intel_context *intel = &brw->intel;
441   assert(eot == 0);
442   brw_set_src1(insn, brw_imm_d(0));
443
444   if (intel->is_ironlake) {
445      insn->bits3.sampler_igdng.binding_table_index = binding_table_index;
446      insn->bits3.sampler_igdng.sampler = sampler;
447      insn->bits3.sampler_igdng.msg_type = msg_type;
448      insn->bits3.sampler_igdng.simd_mode = simd_mode;
449      insn->bits3.sampler_igdng.header_present = header_present;
450      insn->bits3.sampler_igdng.response_length = response_length;
451      insn->bits3.sampler_igdng.msg_length = msg_length;
452      insn->bits3.sampler_igdng.end_of_thread = eot;
453      insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER;
454      insn->bits2.send_igdng.end_of_thread = eot;
455   } else if (intel->is_g4x) {
456      insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
457      insn->bits3.sampler_g4x.sampler = sampler;
458      insn->bits3.sampler_g4x.msg_type = msg_type;
459      insn->bits3.sampler_g4x.response_length = response_length;
460      insn->bits3.sampler_g4x.msg_length = msg_length;
461      insn->bits3.sampler_g4x.end_of_thread = eot;
462      insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
463   } else {
464      insn->bits3.sampler.binding_table_index = binding_table_index;
465      insn->bits3.sampler.sampler = sampler;
466      insn->bits3.sampler.msg_type = msg_type;
467      insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
468      insn->bits3.sampler.response_length = response_length;
469      insn->bits3.sampler.msg_length = msg_length;
470      insn->bits3.sampler.end_of_thread = eot;
471      insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
472   }
473}
474
475
476
477static struct brw_instruction *next_insn( struct brw_compile *p,
478					  GLuint opcode )
479{
480   struct brw_instruction *insn;
481
482   assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
483
484   insn = &p->store[p->nr_insn++];
485   memcpy(insn, p->current, sizeof(*insn));
486
487   /* Reset this one-shot flag:
488    */
489
490   if (p->current->header.destreg__conditionalmod) {
491      p->current->header.destreg__conditionalmod = 0;
492      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
493   }
494
495   insn->header.opcode = opcode;
496   return insn;
497}
498
499
500static struct brw_instruction *brw_alu1( struct brw_compile *p,
501					 GLuint opcode,
502					 struct brw_reg dest,
503					 struct brw_reg src )
504{
505   struct brw_instruction *insn = next_insn(p, opcode);
506   brw_set_dest(insn, dest);
507   brw_set_src0(insn, src);
508   return insn;
509}
510
511static struct brw_instruction *brw_alu2(struct brw_compile *p,
512					GLuint opcode,
513					struct brw_reg dest,
514					struct brw_reg src0,
515					struct brw_reg src1 )
516{
517   struct brw_instruction *insn = next_insn(p, opcode);
518   brw_set_dest(insn, dest);
519   brw_set_src0(insn, src0);
520   brw_set_src1(insn, src1);
521   return insn;
522}
523
524
525/***********************************************************************
526 * Convenience routines.
527 */
528#define ALU1(OP)					\
529struct brw_instruction *brw_##OP(struct brw_compile *p,	\
530	      struct brw_reg dest,			\
531	      struct brw_reg src0)   			\
532{							\
533   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
534}
535
536#define ALU2(OP)					\
537struct brw_instruction *brw_##OP(struct brw_compile *p,	\
538	      struct brw_reg dest,			\
539	      struct brw_reg src0,			\
540	      struct brw_reg src1)   			\
541{							\
542   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
543}
544
545
546ALU1(MOV)
547ALU2(SEL)
548ALU1(NOT)
549ALU2(AND)
550ALU2(OR)
551ALU2(XOR)
552ALU2(SHR)
553ALU2(SHL)
554ALU2(RSR)
555ALU2(RSL)
556ALU2(ASR)
557ALU2(ADD)
558ALU2(MUL)
559ALU1(FRC)
560ALU1(RNDD)
561ALU1(RNDZ)
562ALU2(MAC)
563ALU2(MACH)
564ALU1(LZD)
565ALU2(DP4)
566ALU2(DPH)
567ALU2(DP3)
568ALU2(DP2)
569ALU2(LINE)
570
571
572
573
574void brw_NOP(struct brw_compile *p)
575{
576   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
577   brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
578   brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
579   brw_set_src1(insn, brw_imm_ud(0x0));
580}
581
582
583
584
585
586/***********************************************************************
587 * Comparisons, if/else/endif
588 */
589
590struct brw_instruction *brw_JMPI(struct brw_compile *p,
591                                 struct brw_reg dest,
592                                 struct brw_reg src0,
593                                 struct brw_reg src1)
594{
595   struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
596
597   insn->header.execution_size = 1;
598   insn->header.compression_control = BRW_COMPRESSION_NONE;
599   insn->header.mask_control = BRW_MASK_DISABLE;
600
601   p->current->header.predicate_control = BRW_PREDICATE_NONE;
602
603   return insn;
604}
605
606/* EU takes the value from the flag register and pushes it onto some
607 * sort of a stack (presumably merging with any flag value already on
608 * the stack).  Within an if block, the flags at the top of the stack
609 * control execution on each channel of the unit, eg. on each of the
610 * 16 pixel values in our wm programs.
611 *
612 * When the matching 'else' instruction is reached (presumably by
613 * countdown of the instruction count patched in by our ELSE/ENDIF
614 * functions), the relevent flags are inverted.
615 *
616 * When the matching 'endif' instruction is reached, the flags are
617 * popped off.  If the stack is now empty, normal execution resumes.
618 *
619 * No attempt is made to deal with stack overflow (14 elements?).
620 */
621struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
622{
623   struct brw_instruction *insn;
624
625   if (p->single_program_flow) {
626      assert(execute_size == BRW_EXECUTE_1);
627
628      insn = next_insn(p, BRW_OPCODE_ADD);
629      insn->header.predicate_inverse = 1;
630   } else {
631      insn = next_insn(p, BRW_OPCODE_IF);
632   }
633
634   /* Override the defaults for this instruction:
635    */
636   brw_set_dest(insn, brw_ip_reg());
637   brw_set_src0(insn, brw_ip_reg());
638   brw_set_src1(insn, brw_imm_d(0x0));
639
640   insn->header.execution_size = execute_size;
641   insn->header.compression_control = BRW_COMPRESSION_NONE;
642   insn->header.predicate_control = BRW_PREDICATE_NORMAL;
643   insn->header.mask_control = BRW_MASK_ENABLE;
644   if (!p->single_program_flow)
645       insn->header.thread_control = BRW_THREAD_SWITCH;
646
647   p->current->header.predicate_control = BRW_PREDICATE_NONE;
648
649   return insn;
650}
651
652
653struct brw_instruction *brw_ELSE(struct brw_compile *p,
654				 struct brw_instruction *if_insn)
655{
656   struct intel_context *intel = &p->brw->intel;
657   struct brw_instruction *insn;
658   GLuint br = 1;
659
660   if (intel->is_ironlake)
661      br = 2;
662
663   if (p->single_program_flow) {
664      insn = next_insn(p, BRW_OPCODE_ADD);
665   } else {
666      insn = next_insn(p, BRW_OPCODE_ELSE);
667   }
668
669   brw_set_dest(insn, brw_ip_reg());
670   brw_set_src0(insn, brw_ip_reg());
671   brw_set_src1(insn, brw_imm_d(0x0));
672
673   insn->header.compression_control = BRW_COMPRESSION_NONE;
674   insn->header.execution_size = if_insn->header.execution_size;
675   insn->header.mask_control = BRW_MASK_ENABLE;
676   if (!p->single_program_flow)
677       insn->header.thread_control = BRW_THREAD_SWITCH;
678
679   /* Patch the if instruction to point at this instruction.
680    */
681   if (p->single_program_flow) {
682      assert(if_insn->header.opcode == BRW_OPCODE_ADD);
683
684      if_insn->bits3.ud = (insn - if_insn + 1) * 16;
685   } else {
686      assert(if_insn->header.opcode == BRW_OPCODE_IF);
687
688      if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
689      if_insn->bits3.if_else.pop_count = 0;
690      if_insn->bits3.if_else.pad0 = 0;
691   }
692
693   return insn;
694}
695
696void brw_ENDIF(struct brw_compile *p,
697	       struct brw_instruction *patch_insn)
698{
699   struct intel_context *intel = &p->brw->intel;
700   GLuint br = 1;
701
702   if (intel->is_ironlake)
703      br = 2;
704
705   if (p->single_program_flow) {
706      /* In single program flow mode, there's no need to execute an ENDIF,
707       * since we don't need to do any stack operations, and if we're executing
708       * currently, we want to just continue executing.
709       */
710      struct brw_instruction *next = &p->store[p->nr_insn];
711
712      assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
713
714      patch_insn->bits3.ud = (next - patch_insn) * 16;
715   } else {
716      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
717
718      brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
719      brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
720      brw_set_src1(insn, brw_imm_d(0x0));
721
722      insn->header.compression_control = BRW_COMPRESSION_NONE;
723      insn->header.execution_size = patch_insn->header.execution_size;
724      insn->header.mask_control = BRW_MASK_ENABLE;
725      insn->header.thread_control = BRW_THREAD_SWITCH;
726
727      assert(patch_insn->bits3.if_else.jump_count == 0);
728
729      /* Patch the if or else instructions to point at this or the next
730       * instruction respectively.
731       */
732      if (patch_insn->header.opcode == BRW_OPCODE_IF) {
733	 /* Automagically turn it into an IFF:
734	  */
735	 patch_insn->header.opcode = BRW_OPCODE_IFF;
736	 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
737	 patch_insn->bits3.if_else.pop_count = 0;
738	 patch_insn->bits3.if_else.pad0 = 0;
739      } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
740	 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
741	 patch_insn->bits3.if_else.pop_count = 1;
742	 patch_insn->bits3.if_else.pad0 = 0;
743      } else {
744	 assert(0);
745      }
746
747      /* Also pop item off the stack in the endif instruction:
748       */
749      insn->bits3.if_else.jump_count = 0;
750      insn->bits3.if_else.pop_count = 1;
751      insn->bits3.if_else.pad0 = 0;
752   }
753}
754
755struct brw_instruction *brw_BREAK(struct brw_compile *p)
756{
757   struct brw_instruction *insn;
758   insn = next_insn(p, BRW_OPCODE_BREAK);
759   brw_set_dest(insn, brw_ip_reg());
760   brw_set_src0(insn, brw_ip_reg());
761   brw_set_src1(insn, brw_imm_d(0x0));
762   insn->header.compression_control = BRW_COMPRESSION_NONE;
763   insn->header.execution_size = BRW_EXECUTE_8;
764   /* insn->header.mask_control = BRW_MASK_DISABLE; */
765   insn->bits3.if_else.pad0 = 0;
766   return insn;
767}
768
769struct brw_instruction *brw_CONT(struct brw_compile *p)
770{
771   struct brw_instruction *insn;
772   insn = next_insn(p, BRW_OPCODE_CONTINUE);
773   brw_set_dest(insn, brw_ip_reg());
774   brw_set_src0(insn, brw_ip_reg());
775   brw_set_src1(insn, brw_imm_d(0x0));
776   insn->header.compression_control = BRW_COMPRESSION_NONE;
777   insn->header.execution_size = BRW_EXECUTE_8;
778   /* insn->header.mask_control = BRW_MASK_DISABLE; */
779   insn->bits3.if_else.pad0 = 0;
780   return insn;
781}
782
783/* DO/WHILE loop:
784 */
785struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
786{
787   if (p->single_program_flow) {
788      return &p->store[p->nr_insn];
789   } else {
790      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
791
792      /* Override the defaults for this instruction:
793       */
794      brw_set_dest(insn, brw_null_reg());
795      brw_set_src0(insn, brw_null_reg());
796      brw_set_src1(insn, brw_null_reg());
797
798      insn->header.compression_control = BRW_COMPRESSION_NONE;
799      insn->header.execution_size = execute_size;
800      insn->header.predicate_control = BRW_PREDICATE_NONE;
801      /* insn->header.mask_control = BRW_MASK_ENABLE; */
802      /* insn->header.mask_control = BRW_MASK_DISABLE; */
803
804      return insn;
805   }
806}
807
808
809
810struct brw_instruction *brw_WHILE(struct brw_compile *p,
811                                  struct brw_instruction *do_insn)
812{
813   struct intel_context *intel = &p->brw->intel;
814   struct brw_instruction *insn;
815   GLuint br = 1;
816
817   if (intel->is_ironlake)
818      br = 2;
819
820   if (p->single_program_flow)
821      insn = next_insn(p, BRW_OPCODE_ADD);
822   else
823      insn = next_insn(p, BRW_OPCODE_WHILE);
824
825   brw_set_dest(insn, brw_ip_reg());
826   brw_set_src0(insn, brw_ip_reg());
827   brw_set_src1(insn, brw_imm_d(0x0));
828
829   insn->header.compression_control = BRW_COMPRESSION_NONE;
830
831   if (p->single_program_flow) {
832      insn->header.execution_size = BRW_EXECUTE_1;
833
834      insn->bits3.d = (do_insn - insn) * 16;
835   } else {
836      insn->header.execution_size = do_insn->header.execution_size;
837
838      assert(do_insn->header.opcode == BRW_OPCODE_DO);
839      insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
840      insn->bits3.if_else.pop_count = 0;
841      insn->bits3.if_else.pad0 = 0;
842   }
843
844/*    insn->header.mask_control = BRW_MASK_ENABLE; */
845
846   /* insn->header.mask_control = BRW_MASK_DISABLE; */
847   p->current->header.predicate_control = BRW_PREDICATE_NONE;
848   return insn;
849}
850
851
852/* FORWARD JUMPS:
853 */
854void brw_land_fwd_jump(struct brw_compile *p,
855		       struct brw_instruction *jmp_insn)
856{
857   struct intel_context *intel = &p->brw->intel;
858   struct brw_instruction *landing = &p->store[p->nr_insn];
859   GLuint jmpi = 1;
860
861   if (intel->is_ironlake)
862       jmpi = 2;
863
864   assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
865   assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
866
867   jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
868}
869
870
871
872/* To integrate with the above, it makes sense that the comparison
873 * instruction should populate the flag register.  It might be simpler
874 * just to use the flag reg for most WM tasks?
875 */
876void brw_CMP(struct brw_compile *p,
877	     struct brw_reg dest,
878	     GLuint conditional,
879	     struct brw_reg src0,
880	     struct brw_reg src1)
881{
882   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
883
884   insn->header.destreg__conditionalmod = conditional;
885   brw_set_dest(insn, dest);
886   brw_set_src0(insn, src0);
887   brw_set_src1(insn, src1);
888
889/*    guess_execution_size(insn, src0); */
890
891
892   /* Make it so that future instructions will use the computed flag
893    * value until brw_set_predicate_control_flag_value() is called
894    * again.
895    */
896   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
897       dest.nr == 0) {
898      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
899      p->flag_value = 0xff;
900   }
901}
902
903
904
905/***********************************************************************
906 * Helpers for the various SEND message types:
907 */
908
909/** Extended math function, float[8].
910 */
911void brw_math( struct brw_compile *p,
912	       struct brw_reg dest,
913	       GLuint function,
914	       GLuint saturate,
915	       GLuint msg_reg_nr,
916	       struct brw_reg src,
917	       GLuint data_type,
918	       GLuint precision )
919{
920   struct intel_context *intel = &p->brw->intel;
921
922   if (intel->gen >= 6) {
923      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
924
925      /* Math is the same ISA format as other opcodes, except that CondModifier
926       * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
927       */
928      insn->header.destreg__conditionalmod = function;
929
930      brw_set_dest(insn, dest);
931      brw_set_src0(insn, src);
932      brw_set_src1(insn, brw_null_reg());
933   } else {
934      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
935      GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
936      GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
937      /* Example code doesn't set predicate_control for send
938       * instructions.
939       */
940      insn->header.predicate_control = 0;
941      insn->header.destreg__conditionalmod = msg_reg_nr;
942
943      brw_set_dest(insn, dest);
944      brw_set_src0(insn, src);
945      brw_set_math_message(p->brw,
946			   insn,
947			   msg_length, response_length,
948			   function,
949			   BRW_MATH_INTEGER_UNSIGNED,
950			   precision,
951			   saturate,
952			   data_type);
953   }
954}
955
956/**
957 * Extended math function, float[16].
958 * Use 2 send instructions.
959 */
960void brw_math_16( struct brw_compile *p,
961		  struct brw_reg dest,
962		  GLuint function,
963		  GLuint saturate,
964		  GLuint msg_reg_nr,
965		  struct brw_reg src,
966		  GLuint precision )
967{
968   struct brw_instruction *insn;
969   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
970   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
971
972   /* First instruction:
973    */
974   brw_push_insn_state(p);
975   brw_set_predicate_control_flag_value(p, 0xff);
976   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
977
978   insn = next_insn(p, BRW_OPCODE_SEND);
979   insn->header.destreg__conditionalmod = msg_reg_nr;
980
981   brw_set_dest(insn, dest);
982   brw_set_src0(insn, src);
983   brw_set_math_message(p->brw,
984			insn,
985			msg_length, response_length,
986			function,
987			BRW_MATH_INTEGER_UNSIGNED,
988			precision,
989			saturate,
990			BRW_MATH_DATA_VECTOR);
991
992   /* Second instruction:
993    */
994   insn = next_insn(p, BRW_OPCODE_SEND);
995   insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
996   insn->header.destreg__conditionalmod = msg_reg_nr+1;
997
998   brw_set_dest(insn, offset(dest,1));
999   brw_set_src0(insn, src);
1000   brw_set_math_message(p->brw,
1001			insn,
1002			msg_length, response_length,
1003			function,
1004			BRW_MATH_INTEGER_UNSIGNED,
1005			precision,
1006			saturate,
1007			BRW_MATH_DATA_VECTOR);
1008
1009   brw_pop_insn_state(p);
1010}
1011
1012
1013/**
1014 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
1015 * Scratch offset should be a multiple of 64.
1016 * Used for register spilling.
1017 */
1018void brw_dp_WRITE_16( struct brw_compile *p,
1019		      struct brw_reg src,
1020		      GLuint scratch_offset )
1021{
1022   GLuint msg_reg_nr = 1;
1023   {
1024      brw_push_insn_state(p);
1025      brw_set_mask_control(p, BRW_MASK_DISABLE);
1026      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1027
1028      /* set message header global offset field (reg 0, element 2) */
1029      brw_MOV(p,
1030	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1031	      brw_imm_d(scratch_offset));
1032
1033      brw_pop_insn_state(p);
1034   }
1035
1036   {
1037      GLuint msg_length = 3;
1038      struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1039      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1040
1041      insn->header.predicate_control = 0; /* XXX */
1042      insn->header.compression_control = BRW_COMPRESSION_NONE;
1043      insn->header.destreg__conditionalmod = msg_reg_nr;
1044
1045      brw_set_dest(insn, dest);
1046      brw_set_src0(insn, src);
1047
1048      brw_set_dp_write_message(p->brw,
1049			       insn,
1050			       255, /* binding table index (255=stateless) */
1051			       BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1052			       BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1053			       msg_length,
1054			       0, /* pixel scoreboard */
1055			       0, /* response_length */
1056			       0); /* eot */
1057   }
1058}
1059
1060
1061/**
1062 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1063 * Scratch offset should be a multiple of 64.
1064 * Used for register spilling.
1065 */
1066void brw_dp_READ_16( struct brw_compile *p,
1067		      struct brw_reg dest,
1068		      GLuint scratch_offset )
1069{
1070   GLuint msg_reg_nr = 1;
1071   {
1072      brw_push_insn_state(p);
1073      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1074      brw_set_mask_control(p, BRW_MASK_DISABLE);
1075
1076      /* set message header global offset field (reg 0, element 2) */
1077      brw_MOV(p,
1078	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1079	      brw_imm_d(scratch_offset));
1080
1081      brw_pop_insn_state(p);
1082   }
1083
1084   {
1085      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1086
1087      insn->header.predicate_control = 0; /* XXX */
1088      insn->header.compression_control = BRW_COMPRESSION_NONE;
1089      insn->header.destreg__conditionalmod = msg_reg_nr;
1090
1091      brw_set_dest(insn, dest);	/* UW? */
1092      brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1093
1094      brw_set_dp_read_message(p->brw,
1095			      insn,
1096			      255, /* binding table index (255=stateless) */
1097			      3,  /* msg_control (3 means 4 Owords) */
1098			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1099			      1, /* target cache (render/scratch) */
1100			      1, /* msg_length */
1101			      2, /* response_length */
1102			      0); /* eot */
1103   }
1104}
1105
1106
1107/**
1108 * Read a float[4] vector from the data port Data Cache (const buffer).
1109 * Location (in buffer) should be a multiple of 16.
1110 * Used for fetching shader constants.
1111 * If relAddr is true, we'll do an indirect fetch using the address register.
1112 */
1113void brw_dp_READ_4( struct brw_compile *p,
1114                    struct brw_reg dest,
1115                    GLboolean relAddr,
1116                    GLuint location,
1117                    GLuint bind_table_index )
1118{
1119   /* XXX: relAddr not implemented */
1120   GLuint msg_reg_nr = 1;
1121   {
1122      struct brw_reg b;
1123      brw_push_insn_state(p);
1124      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1125      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1126      brw_set_mask_control(p, BRW_MASK_DISABLE);
1127
1128   /* Setup MRF[1] with location/offset into const buffer */
1129      b = brw_message_reg(msg_reg_nr);
1130      b = retype(b, BRW_REGISTER_TYPE_UD);
1131      /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1132       * when the docs say only dword[2] should be set.  Hmmm.  But it works.
1133       */
1134      brw_MOV(p, b, brw_imm_ud(location));
1135      brw_pop_insn_state(p);
1136   }
1137
1138   {
1139      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1140
1141      insn->header.predicate_control = BRW_PREDICATE_NONE;
1142      insn->header.compression_control = BRW_COMPRESSION_NONE;
1143      insn->header.destreg__conditionalmod = msg_reg_nr;
1144      insn->header.mask_control = BRW_MASK_DISABLE;
1145
1146      /* cast dest to a uword[8] vector */
1147      dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1148
1149      brw_set_dest(insn, dest);
1150      brw_set_src0(insn, brw_null_reg());
1151
1152      brw_set_dp_read_message(p->brw,
1153			      insn,
1154			      bind_table_index,
1155			      0,  /* msg_control (0 means 1 Oword) */
1156			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1157			      0, /* source cache = data cache */
1158			      1, /* msg_length */
1159			      1, /* response_length (1 Oword) */
1160			      0); /* eot */
1161   }
1162}
1163
1164
1165/**
1166 * Read float[4] constant(s) from VS constant buffer.
1167 * For relative addressing, two float[4] constants will be read into 'dest'.
1168 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1169 */
1170void brw_dp_READ_4_vs(struct brw_compile *p,
1171                      struct brw_reg dest,
1172                      GLuint oword,
1173                      GLboolean relAddr,
1174                      struct brw_reg addrReg,
1175                      GLuint location,
1176                      GLuint bind_table_index)
1177{
1178   GLuint msg_reg_nr = 1;
1179
1180   assert(oword < 2);
1181   /*
1182   printf("vs const read msg, location %u, msg_reg_nr %d\n",
1183          location, msg_reg_nr);
1184   */
1185
1186   /* Setup MRF[1] with location/offset into const buffer */
1187   {
1188      struct brw_reg b;
1189
1190      brw_push_insn_state(p);
1191      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1192      brw_set_mask_control(p, BRW_MASK_DISABLE);
1193      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1194      /*brw_set_access_mode(p, BRW_ALIGN_16);*/
1195
1196      /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1197       * when the docs say only dword[2] should be set.  Hmmm.  But it works.
1198       */
1199      b = brw_message_reg(msg_reg_nr);
1200      b = retype(b, BRW_REGISTER_TYPE_UD);
1201      /*b = get_element_ud(b, 2);*/
1202      if (relAddr) {
1203         brw_ADD(p, b, addrReg, brw_imm_ud(location));
1204      }
1205      else {
1206         brw_MOV(p, b, brw_imm_ud(location));
1207      }
1208
1209      brw_pop_insn_state(p);
1210   }
1211
1212   {
1213      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1214
1215      insn->header.predicate_control = BRW_PREDICATE_NONE;
1216      insn->header.compression_control = BRW_COMPRESSION_NONE;
1217      insn->header.destreg__conditionalmod = msg_reg_nr;
1218      insn->header.mask_control = BRW_MASK_DISABLE;
1219      /*insn->header.access_mode = BRW_ALIGN_16;*/
1220
1221      brw_set_dest(insn, dest);
1222      brw_set_src0(insn, brw_null_reg());
1223
1224      brw_set_dp_read_message(p->brw,
1225			      insn,
1226			      bind_table_index,
1227			      oword,  /* 0 = lower Oword, 1 = upper Oword */
1228			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1229			      0, /* source cache = data cache */
1230			      1, /* msg_length */
1231			      1, /* response_length (1 Oword) */
1232			      0); /* eot */
1233   }
1234}
1235
1236
1237
1238void brw_fb_WRITE(struct brw_compile *p,
1239                  struct brw_reg dest,
1240                  GLuint msg_reg_nr,
1241                  struct brw_reg src0,
1242                  GLuint binding_table_index,
1243                  GLuint msg_length,
1244                  GLuint response_length,
1245                  GLboolean eot)
1246{
1247   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1248
1249   insn->header.predicate_control = 0; /* XXX */
1250   insn->header.compression_control = BRW_COMPRESSION_NONE;
1251   insn->header.destreg__conditionalmod = msg_reg_nr;
1252
1253   brw_set_dest(insn, dest);
1254   brw_set_src0(insn, src0);
1255   brw_set_dp_write_message(p->brw,
1256			    insn,
1257			    binding_table_index,
1258			    BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
1259			    BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
1260			    msg_length,
1261			    1,	/* pixel scoreboard */
1262			    response_length,
1263			    eot);
1264}
1265
1266
1267/**
1268 * Texture sample instruction.
1269 * Note: the msg_type plus msg_length values determine exactly what kind
1270 * of sampling operation is performed.  See volume 4, page 161 of docs.
1271 */
1272void brw_SAMPLE(struct brw_compile *p,
1273		struct brw_reg dest,
1274		GLuint msg_reg_nr,
1275		struct brw_reg src0,
1276		GLuint binding_table_index,
1277		GLuint sampler,
1278		GLuint writemask,
1279		GLuint msg_type,
1280		GLuint response_length,
1281		GLuint msg_length,
1282		GLboolean eot,
1283		GLuint header_present,
1284		GLuint simd_mode)
1285{
1286   GLboolean need_stall = 0;
1287
1288   if (writemask == 0) {
1289      /*printf("%s: zero writemask??\n", __FUNCTION__); */
1290      return;
1291   }
1292
1293   /* Hardware doesn't do destination dependency checking on send
1294    * instructions properly.  Add a workaround which generates the
1295    * dependency by other means.  In practice it seems like this bug
1296    * only crops up for texture samples, and only where registers are
1297    * written by the send and then written again later without being
1298    * read in between.  Luckily for us, we already track that
1299    * information and use it to modify the writemask for the
1300    * instruction, so that is a guide for whether a workaround is
1301    * needed.
1302    */
1303   if (writemask != WRITEMASK_XYZW) {
1304      GLuint dst_offset = 0;
1305      GLuint i, newmask = 0, len = 0;
1306
1307      for (i = 0; i < 4; i++) {
1308	 if (writemask & (1<<i))
1309	    break;
1310	 dst_offset += 2;
1311      }
1312      for (; i < 4; i++) {
1313	 if (!(writemask & (1<<i)))
1314	    break;
1315	 newmask |= 1<<i;
1316	 len++;
1317      }
1318
1319      if (newmask != writemask) {
1320	 need_stall = 1;
1321         /* printf("need stall %x %x\n", newmask , writemask); */
1322      }
1323      else {
1324	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1325
1326	 newmask = ~newmask & WRITEMASK_XYZW;
1327
1328	 brw_push_insn_state(p);
1329
1330	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1331	 brw_set_mask_control(p, BRW_MASK_DISABLE);
1332
1333	 brw_MOV(p, m1, brw_vec8_grf(0,0));
1334  	 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1335
1336	 brw_pop_insn_state(p);
1337
1338  	 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1339	 dest = offset(dest, dst_offset);
1340	 response_length = len * 2;
1341      }
1342   }
1343
1344   {
1345      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1346
1347      insn->header.predicate_control = 0; /* XXX */
1348      insn->header.compression_control = BRW_COMPRESSION_NONE;
1349      insn->header.destreg__conditionalmod = msg_reg_nr;
1350
1351      brw_set_dest(insn, dest);
1352      brw_set_src0(insn, src0);
1353      brw_set_sampler_message(p->brw, insn,
1354			      binding_table_index,
1355			      sampler,
1356			      msg_type,
1357			      response_length,
1358			      msg_length,
1359			      eot,
1360			      header_present,
1361			      simd_mode);
1362   }
1363
1364   if (need_stall) {
1365      struct brw_reg reg = vec8(offset(dest, response_length-1));
1366
1367      /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
1368       */
1369      brw_push_insn_state(p);
1370      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1371      brw_MOV(p, reg, reg);
1372      brw_pop_insn_state(p);
1373   }
1374
1375}
1376
1377/* All these variables are pretty confusing - we might be better off
1378 * using bitmasks and macros for this, in the old style.  Or perhaps
1379 * just having the caller instantiate the fields in dword3 itself.
1380 */
1381void brw_urb_WRITE(struct brw_compile *p,
1382		   struct brw_reg dest,
1383		   GLuint msg_reg_nr,
1384		   struct brw_reg src0,
1385		   GLboolean allocate,
1386		   GLboolean used,
1387		   GLuint msg_length,
1388		   GLuint response_length,
1389		   GLboolean eot,
1390		   GLboolean writes_complete,
1391		   GLuint offset,
1392		   GLuint swizzle)
1393{
1394   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1395
1396   assert(msg_length < BRW_MAX_MRF);
1397
1398   brw_set_dest(insn, dest);
1399   brw_set_src0(insn, src0);
1400   brw_set_src1(insn, brw_imm_d(0));
1401
1402   insn->header.destreg__conditionalmod = msg_reg_nr;
1403
1404   brw_set_urb_message(p->brw,
1405		       insn,
1406		       allocate,
1407		       used,
1408		       msg_length,
1409		       response_length,
1410		       eot,
1411		       writes_complete,
1412		       offset,
1413		       swizzle);
1414}
1415
1416void brw_ff_sync(struct brw_compile *p,
1417		   struct brw_reg dest,
1418		   GLuint msg_reg_nr,
1419		   struct brw_reg src0,
1420		   GLboolean allocate,
1421		   GLboolean used,
1422		   GLuint msg_length,
1423		   GLuint response_length,
1424		   GLboolean eot,
1425		   GLboolean writes_complete,
1426		   GLuint offset,
1427		   GLuint swizzle)
1428{
1429   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1430
1431   assert(msg_length < 16);
1432
1433   brw_set_dest(insn, dest);
1434   brw_set_src0(insn, src0);
1435   brw_set_src1(insn, brw_imm_d(0));
1436
1437   insn->header.destreg__conditionalmod = msg_reg_nr;
1438
1439   brw_set_ff_sync_message(p->brw,
1440		       insn,
1441		       allocate,
1442		       used,
1443		       msg_length,
1444		       response_length,
1445		       eot,
1446		       writes_complete,
1447		       offset,
1448		       swizzle);
1449}
1450