brw_eu_emit.c revision 0002069fd5117b52f0ae2be0b7e3d8e839a3a61c
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33#include "brw_context.h"
34#include "brw_defines.h"
35#include "brw_eu.h"
36
37
38
39
40/***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44static void guess_execution_size( struct brw_instruction *insn,
45				  struct brw_reg reg )
46{
47   if (reg.width == BRW_WIDTH_8 &&
48       insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49      insn->header.execution_size = BRW_EXECUTE_16;
50   else
51      insn->header.execution_size = reg.width;	/* note - definitions are compatible */
52}
53
54
55static void brw_set_dest( struct brw_instruction *insn,
56			  struct brw_reg dest )
57{
58   if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
59       dest.file != BRW_MESSAGE_REGISTER_FILE)
60      assert(dest.nr < 128);
61
62   insn->bits1.da1.dest_reg_file = dest.file;
63   insn->bits1.da1.dest_reg_type = dest.type;
64   insn->bits1.da1.dest_address_mode = dest.address_mode;
65
66   if (dest.address_mode == BRW_ADDRESS_DIRECT) {
67      insn->bits1.da1.dest_reg_nr = dest.nr;
68
69      if (insn->header.access_mode == BRW_ALIGN_1) {
70	 insn->bits1.da1.dest_subreg_nr = dest.subnr;
71	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
72	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
73	 insn->bits1.da1.dest_horiz_stride = dest.hstride;
74      }
75      else {
76	 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
77	 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
78	 /* even ignored in da16, still need to set as '01' */
79	 insn->bits1.da16.dest_horiz_stride = 1;
80      }
81   }
82   else {
83      insn->bits1.ia1.dest_subreg_nr = dest.subnr;
84
85      /* These are different sizes in align1 vs align16:
86       */
87      if (insn->header.access_mode == BRW_ALIGN_1) {
88	 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
89	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
90	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
91	 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
92      }
93      else {
94	 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
95	 /* even ignored in da16, still need to set as '01' */
96	 insn->bits1.ia16.dest_horiz_stride = 1;
97      }
98   }
99
100   /* NEW: Set the execution size based on dest.width and
101    * insn->compression_control:
102    */
103   guess_execution_size(insn, dest);
104}
105
106extern int reg_type_size[];
107
108static void
109validate_reg(struct brw_instruction *insn, struct brw_reg reg)
110{
111   int hstride_for_reg[] = {0, 1, 2, 4};
112   int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
113   int width_for_reg[] = {1, 2, 4, 8, 16};
114   int execsize_for_reg[] = {1, 2, 4, 8, 16};
115   int width, hstride, vstride, execsize;
116
117   if (reg.file == BRW_IMMEDIATE_VALUE) {
118      /* 3.3.6: Region Parameters.  Restriction: Immediate vectors
119       * mean the destination has to be 128-bit aligned and the
120       * destination horiz stride has to be a word.
121       */
122      if (reg.type == BRW_REGISTER_TYPE_V) {
123	 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
124		reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
125      }
126
127      return;
128   }
129
130   if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
131       reg.file == BRW_ARF_NULL)
132      return;
133
134   assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
135   hstride = hstride_for_reg[reg.hstride];
136
137   if (reg.vstride == 0xf) {
138      vstride = -1;
139   } else {
140      assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
141      vstride = vstride_for_reg[reg.vstride];
142   }
143
144   assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
145   width = width_for_reg[reg.width];
146
147   assert(insn->header.execution_size >= 0 &&
148	  insn->header.execution_size < Elements(execsize_for_reg));
149   execsize = execsize_for_reg[insn->header.execution_size];
150
151   /* Restrictions from 3.3.10: Register Region Restrictions. */
152   /* 3. */
153   assert(execsize >= width);
154
155   /* 4. */
156   if (execsize == width && hstride != 0) {
157      assert(vstride == -1 || vstride == width * hstride);
158   }
159
160   /* 5. */
161   if (execsize == width && hstride == 0) {
162      /* no restriction on vstride. */
163   }
164
165   /* 6. */
166   if (width == 1) {
167      assert(hstride == 0);
168   }
169
170   /* 7. */
171   if (execsize == 1 && width == 1) {
172      assert(hstride == 0);
173      assert(vstride == 0);
174   }
175
176   /* 8. */
177   if (vstride == 0 && hstride == 0) {
178      assert(width == 1);
179   }
180
181   /* 10. Check destination issues. */
182}
183
184static void brw_set_src0( struct brw_instruction *insn,
185                          struct brw_reg reg )
186{
187   if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
188      assert(reg.nr < 128);
189
190   validate_reg(insn, reg);
191
192   insn->bits1.da1.src0_reg_file = reg.file;
193   insn->bits1.da1.src0_reg_type = reg.type;
194   insn->bits2.da1.src0_abs = reg.abs;
195   insn->bits2.da1.src0_negate = reg.negate;
196   insn->bits2.da1.src0_address_mode = reg.address_mode;
197
198   if (reg.file == BRW_IMMEDIATE_VALUE) {
199      insn->bits3.ud = reg.dw1.ud;
200
201      /* Required to set some fields in src1 as well:
202       */
203      insn->bits1.da1.src1_reg_file = 0; /* arf */
204      insn->bits1.da1.src1_reg_type = reg.type;
205   }
206   else
207   {
208      if (reg.address_mode == BRW_ADDRESS_DIRECT) {
209	 if (insn->header.access_mode == BRW_ALIGN_1) {
210	    insn->bits2.da1.src0_subreg_nr = reg.subnr;
211	    insn->bits2.da1.src0_reg_nr = reg.nr;
212	 }
213	 else {
214	    insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
215	    insn->bits2.da16.src0_reg_nr = reg.nr;
216	 }
217      }
218      else {
219	 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
220
221	 if (insn->header.access_mode == BRW_ALIGN_1) {
222	    insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
223	 }
224	 else {
225	    insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
226	 }
227      }
228
229      if (insn->header.access_mode == BRW_ALIGN_1) {
230	 if (reg.width == BRW_WIDTH_1 &&
231	     insn->header.execution_size == BRW_EXECUTE_1) {
232	    insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
233	    insn->bits2.da1.src0_width = BRW_WIDTH_1;
234	    insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
235	 }
236	 else {
237	    insn->bits2.da1.src0_horiz_stride = reg.hstride;
238	    insn->bits2.da1.src0_width = reg.width;
239	    insn->bits2.da1.src0_vert_stride = reg.vstride;
240	 }
241      }
242      else {
243	 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
244	 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
245	 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
246	 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
247
248	 /* This is an oddity of the fact we're using the same
249	  * descriptions for registers in align_16 as align_1:
250	  */
251	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
252	    insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
253	 else
254	    insn->bits2.da16.src0_vert_stride = reg.vstride;
255      }
256   }
257}
258
259
260void brw_set_src1( struct brw_instruction *insn,
261                   struct brw_reg reg )
262{
263   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
264
265   assert(reg.nr < 128);
266
267   validate_reg(insn, reg);
268
269   insn->bits1.da1.src1_reg_file = reg.file;
270   insn->bits1.da1.src1_reg_type = reg.type;
271   insn->bits3.da1.src1_abs = reg.abs;
272   insn->bits3.da1.src1_negate = reg.negate;
273
274   /* Only src1 can be immediate in two-argument instructions.
275    */
276   assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
277
278   if (reg.file == BRW_IMMEDIATE_VALUE) {
279      insn->bits3.ud = reg.dw1.ud;
280   }
281   else {
282      /* This is a hardware restriction, which may or may not be lifted
283       * in the future:
284       */
285      assert (reg.address_mode == BRW_ADDRESS_DIRECT);
286      /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
287
288      if (insn->header.access_mode == BRW_ALIGN_1) {
289	 insn->bits3.da1.src1_subreg_nr = reg.subnr;
290	 insn->bits3.da1.src1_reg_nr = reg.nr;
291      }
292      else {
293	 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
294	 insn->bits3.da16.src1_reg_nr = reg.nr;
295      }
296
297      if (insn->header.access_mode == BRW_ALIGN_1) {
298	 if (reg.width == BRW_WIDTH_1 &&
299	     insn->header.execution_size == BRW_EXECUTE_1) {
300	    insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
301	    insn->bits3.da1.src1_width = BRW_WIDTH_1;
302	    insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
303	 }
304	 else {
305	    insn->bits3.da1.src1_horiz_stride = reg.hstride;
306	    insn->bits3.da1.src1_width = reg.width;
307	    insn->bits3.da1.src1_vert_stride = reg.vstride;
308	 }
309      }
310      else {
311	 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
312	 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
313	 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
314	 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
315
316	 /* This is an oddity of the fact we're using the same
317	  * descriptions for registers in align_16 as align_1:
318	  */
319	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
320	    insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
321	 else
322	    insn->bits3.da16.src1_vert_stride = reg.vstride;
323      }
324   }
325}
326
327
328
329static void brw_set_math_message( struct brw_context *brw,
330				  struct brw_instruction *insn,
331				  GLuint msg_length,
332				  GLuint response_length,
333				  GLuint function,
334				  GLuint integer_type,
335				  GLboolean low_precision,
336				  GLboolean saturate,
337				  GLuint dataType )
338{
339   struct intel_context *intel = &brw->intel;
340   brw_set_src1(insn, brw_imm_d(0));
341
342   if (intel->gen == 5) {
343       insn->bits3.math_gen5.function = function;
344       insn->bits3.math_gen5.int_type = integer_type;
345       insn->bits3.math_gen5.precision = low_precision;
346       insn->bits3.math_gen5.saturate = saturate;
347       insn->bits3.math_gen5.data_type = dataType;
348       insn->bits3.math_gen5.snapshot = 0;
349       insn->bits3.math_gen5.header_present = 0;
350       insn->bits3.math_gen5.response_length = response_length;
351       insn->bits3.math_gen5.msg_length = msg_length;
352       insn->bits3.math_gen5.end_of_thread = 0;
353       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
354       insn->bits2.send_gen5.end_of_thread = 0;
355   } else {
356       insn->bits3.math.function = function;
357       insn->bits3.math.int_type = integer_type;
358       insn->bits3.math.precision = low_precision;
359       insn->bits3.math.saturate = saturate;
360       insn->bits3.math.data_type = dataType;
361       insn->bits3.math.response_length = response_length;
362       insn->bits3.math.msg_length = msg_length;
363       insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
364       insn->bits3.math.end_of_thread = 0;
365   }
366}
367
368
369static void brw_set_ff_sync_message(struct brw_context *brw,
370				    struct brw_instruction *insn,
371				    GLboolean allocate,
372				    GLuint response_length,
373				    GLboolean end_of_thread)
374{
375	struct intel_context *intel = &brw->intel;
376	brw_set_src1(insn, brw_imm_d(0));
377
378	insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
379	insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
380	insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
381	insn->bits3.urb_gen5.allocate = allocate;
382	insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
383	insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
384	insn->bits3.urb_gen5.header_present = 1;
385	insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
386	insn->bits3.urb_gen5.msg_length = 1;
387	insn->bits3.urb_gen5.end_of_thread = end_of_thread;
388	if (intel->gen >= 6) {
389	   insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
390	} else {
391	   insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
392	   insn->bits2.send_gen5.end_of_thread = end_of_thread;
393	}
394}
395
396static void brw_set_urb_message( struct brw_context *brw,
397				 struct brw_instruction *insn,
398				 GLboolean allocate,
399				 GLboolean used,
400				 GLuint msg_length,
401				 GLuint response_length,
402				 GLboolean end_of_thread,
403				 GLboolean complete,
404				 GLuint offset,
405				 GLuint swizzle_control )
406{
407    struct intel_context *intel = &brw->intel;
408    brw_set_src1(insn, brw_imm_d(0));
409
410    if (intel->gen >= 5) {
411        insn->bits3.urb_gen5.opcode = 0;	/* ? */
412        insn->bits3.urb_gen5.offset = offset;
413        insn->bits3.urb_gen5.swizzle_control = swizzle_control;
414        insn->bits3.urb_gen5.allocate = allocate;
415        insn->bits3.urb_gen5.used = used;	/* ? */
416        insn->bits3.urb_gen5.complete = complete;
417        insn->bits3.urb_gen5.header_present = 1;
418        insn->bits3.urb_gen5.response_length = response_length;
419        insn->bits3.urb_gen5.msg_length = msg_length;
420        insn->bits3.urb_gen5.end_of_thread = end_of_thread;
421	if (intel->gen >= 6) {
422	   /* For SNB, the SFID bits moved to the condmod bits, and
423	    * EOT stayed in bits3 above.  Does the EOT bit setting
424	    * below on Ironlake even do anything?
425	    */
426	   insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
427	} else {
428	   insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
429	   insn->bits2.send_gen5.end_of_thread = end_of_thread;
430	}
431    } else {
432        insn->bits3.urb.opcode = 0;	/* ? */
433        insn->bits3.urb.offset = offset;
434        insn->bits3.urb.swizzle_control = swizzle_control;
435        insn->bits3.urb.allocate = allocate;
436        insn->bits3.urb.used = used;	/* ? */
437        insn->bits3.urb.complete = complete;
438        insn->bits3.urb.response_length = response_length;
439        insn->bits3.urb.msg_length = msg_length;
440        insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
441        insn->bits3.urb.end_of_thread = end_of_thread;
442    }
443}
444
445static void brw_set_dp_write_message( struct brw_context *brw,
446				      struct brw_instruction *insn,
447				      GLuint binding_table_index,
448				      GLuint msg_control,
449				      GLuint msg_type,
450				      GLuint msg_length,
451				      GLuint pixel_scoreboard_clear,
452				      GLuint response_length,
453				      GLuint end_of_thread,
454				      GLuint send_commit_msg)
455{
456   struct intel_context *intel = &brw->intel;
457   brw_set_src1(insn, brw_imm_ud(0));
458
459   if (intel->gen >= 6) {
460       insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
461       insn->bits3.dp_render_cache.msg_control = msg_control;
462       insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear;
463       insn->bits3.dp_render_cache.msg_type = msg_type;
464       insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg;
465       insn->bits3.dp_render_cache.header_present = 0; /* XXX */
466       insn->bits3.dp_render_cache.response_length = response_length;
467       insn->bits3.dp_render_cache.msg_length = msg_length;
468       insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
469       insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
470	/* XXX really need below? */
471       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
472       insn->bits2.send_gen5.end_of_thread = end_of_thread;
473   } else if (intel->gen == 5) {
474       insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
475       insn->bits3.dp_write_gen5.msg_control = msg_control;
476       insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
477       insn->bits3.dp_write_gen5.msg_type = msg_type;
478       insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
479       insn->bits3.dp_write_gen5.header_present = 1;
480       insn->bits3.dp_write_gen5.response_length = response_length;
481       insn->bits3.dp_write_gen5.msg_length = msg_length;
482       insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
483       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
484       insn->bits2.send_gen5.end_of_thread = end_of_thread;
485   } else {
486       insn->bits3.dp_write.binding_table_index = binding_table_index;
487       insn->bits3.dp_write.msg_control = msg_control;
488       insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
489       insn->bits3.dp_write.msg_type = msg_type;
490       insn->bits3.dp_write.send_commit_msg = send_commit_msg;
491       insn->bits3.dp_write.response_length = response_length;
492       insn->bits3.dp_write.msg_length = msg_length;
493       insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
494       insn->bits3.dp_write.end_of_thread = end_of_thread;
495   }
496}
497
498static void brw_set_dp_read_message( struct brw_context *brw,
499				      struct brw_instruction *insn,
500				      GLuint binding_table_index,
501				      GLuint msg_control,
502				      GLuint msg_type,
503				      GLuint target_cache,
504				      GLuint msg_length,
505				      GLuint response_length,
506				      GLuint end_of_thread )
507{
508   struct intel_context *intel = &brw->intel;
509   brw_set_src1(insn, brw_imm_d(0));
510
511   if (intel->gen == 5) {
512       insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
513       insn->bits3.dp_read_gen5.msg_control = msg_control;
514       insn->bits3.dp_read_gen5.msg_type = msg_type;
515       insn->bits3.dp_read_gen5.target_cache = target_cache;
516       insn->bits3.dp_read_gen5.header_present = 1;
517       insn->bits3.dp_read_gen5.response_length = response_length;
518       insn->bits3.dp_read_gen5.msg_length = msg_length;
519       insn->bits3.dp_read_gen5.pad1 = 0;
520       insn->bits3.dp_read_gen5.end_of_thread = end_of_thread;
521       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
522       insn->bits2.send_gen5.end_of_thread = end_of_thread;
523   } else {
524       insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
525       insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
526       insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
527       insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
528       insn->bits3.dp_read.response_length = response_length;  /*16:19*/
529       insn->bits3.dp_read.msg_length = msg_length;  /*20:23*/
530       insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
531       insn->bits3.dp_read.pad1 = 0;  /*28:30*/
532       insn->bits3.dp_read.end_of_thread = end_of_thread;  /*31*/
533   }
534}
535
536static void brw_set_sampler_message(struct brw_context *brw,
537                                    struct brw_instruction *insn,
538                                    GLuint binding_table_index,
539                                    GLuint sampler,
540                                    GLuint msg_type,
541                                    GLuint response_length,
542                                    GLuint msg_length,
543                                    GLboolean eot,
544                                    GLuint header_present,
545                                    GLuint simd_mode)
546{
547   struct intel_context *intel = &brw->intel;
548   assert(eot == 0);
549   brw_set_src1(insn, brw_imm_d(0));
550
551   if (intel->gen == 5) {
552      insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
553      insn->bits3.sampler_gen5.sampler = sampler;
554      insn->bits3.sampler_gen5.msg_type = msg_type;
555      insn->bits3.sampler_gen5.simd_mode = simd_mode;
556      insn->bits3.sampler_gen5.header_present = header_present;
557      insn->bits3.sampler_gen5.response_length = response_length;
558      insn->bits3.sampler_gen5.msg_length = msg_length;
559      insn->bits3.sampler_gen5.end_of_thread = eot;
560      insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
561      insn->bits2.send_gen5.end_of_thread = eot;
562   } else if (intel->is_g4x) {
563      insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
564      insn->bits3.sampler_g4x.sampler = sampler;
565      insn->bits3.sampler_g4x.msg_type = msg_type;
566      insn->bits3.sampler_g4x.response_length = response_length;
567      insn->bits3.sampler_g4x.msg_length = msg_length;
568      insn->bits3.sampler_g4x.end_of_thread = eot;
569      insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
570   } else {
571      insn->bits3.sampler.binding_table_index = binding_table_index;
572      insn->bits3.sampler.sampler = sampler;
573      insn->bits3.sampler.msg_type = msg_type;
574      insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
575      insn->bits3.sampler.response_length = response_length;
576      insn->bits3.sampler.msg_length = msg_length;
577      insn->bits3.sampler.end_of_thread = eot;
578      insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
579   }
580}
581
582
583
584static struct brw_instruction *next_insn( struct brw_compile *p,
585					  GLuint opcode )
586{
587   struct brw_instruction *insn;
588
589   assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
590
591   insn = &p->store[p->nr_insn++];
592   memcpy(insn, p->current, sizeof(*insn));
593
594   /* Reset this one-shot flag:
595    */
596
597   if (p->current->header.destreg__conditionalmod) {
598      p->current->header.destreg__conditionalmod = 0;
599      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
600   }
601
602   insn->header.opcode = opcode;
603   return insn;
604}
605
606
607static struct brw_instruction *brw_alu1( struct brw_compile *p,
608					 GLuint opcode,
609					 struct brw_reg dest,
610					 struct brw_reg src )
611{
612   struct brw_instruction *insn = next_insn(p, opcode);
613   brw_set_dest(insn, dest);
614   brw_set_src0(insn, src);
615   return insn;
616}
617
618static struct brw_instruction *brw_alu2(struct brw_compile *p,
619					GLuint opcode,
620					struct brw_reg dest,
621					struct brw_reg src0,
622					struct brw_reg src1 )
623{
624   struct brw_instruction *insn = next_insn(p, opcode);
625   brw_set_dest(insn, dest);
626   brw_set_src0(insn, src0);
627   brw_set_src1(insn, src1);
628   return insn;
629}
630
631
632/***********************************************************************
633 * Convenience routines.
634 */
635#define ALU1(OP)					\
636struct brw_instruction *brw_##OP(struct brw_compile *p,	\
637	      struct brw_reg dest,			\
638	      struct brw_reg src0)   			\
639{							\
640   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
641}
642
643#define ALU2(OP)					\
644struct brw_instruction *brw_##OP(struct brw_compile *p,	\
645	      struct brw_reg dest,			\
646	      struct brw_reg src0,			\
647	      struct brw_reg src1)   			\
648{							\
649   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
650}
651
652
653ALU1(MOV)
654ALU2(SEL)
655ALU1(NOT)
656ALU2(AND)
657ALU2(OR)
658ALU2(XOR)
659ALU2(SHR)
660ALU2(SHL)
661ALU2(RSR)
662ALU2(RSL)
663ALU2(ASR)
664ALU2(ADD)
665ALU2(MUL)
666ALU1(FRC)
667ALU1(RNDD)
668ALU1(RNDZ)
669ALU2(MAC)
670ALU2(MACH)
671ALU1(LZD)
672ALU2(DP4)
673ALU2(DPH)
674ALU2(DP3)
675ALU2(DP2)
676ALU2(LINE)
677ALU2(PLN)
678
679
680
681void brw_NOP(struct brw_compile *p)
682{
683   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
684   brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
685   brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
686   brw_set_src1(insn, brw_imm_ud(0x0));
687}
688
689
690
691
692
693/***********************************************************************
694 * Comparisons, if/else/endif
695 */
696
697struct brw_instruction *brw_JMPI(struct brw_compile *p,
698                                 struct brw_reg dest,
699                                 struct brw_reg src0,
700                                 struct brw_reg src1)
701{
702   struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
703
704   insn->header.execution_size = 1;
705   insn->header.compression_control = BRW_COMPRESSION_NONE;
706   insn->header.mask_control = BRW_MASK_DISABLE;
707
708   p->current->header.predicate_control = BRW_PREDICATE_NONE;
709
710   return insn;
711}
712
713/* EU takes the value from the flag register and pushes it onto some
714 * sort of a stack (presumably merging with any flag value already on
715 * the stack).  Within an if block, the flags at the top of the stack
716 * control execution on each channel of the unit, eg. on each of the
717 * 16 pixel values in our wm programs.
718 *
719 * When the matching 'else' instruction is reached (presumably by
720 * countdown of the instruction count patched in by our ELSE/ENDIF
721 * functions), the relevent flags are inverted.
722 *
723 * When the matching 'endif' instruction is reached, the flags are
724 * popped off.  If the stack is now empty, normal execution resumes.
725 *
726 * No attempt is made to deal with stack overflow (14 elements?).
727 */
728struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
729{
730   struct brw_instruction *insn;
731
732   if (p->single_program_flow) {
733      assert(execute_size == BRW_EXECUTE_1);
734
735      insn = next_insn(p, BRW_OPCODE_ADD);
736      insn->header.predicate_inverse = 1;
737   } else {
738      insn = next_insn(p, BRW_OPCODE_IF);
739   }
740
741   /* Override the defaults for this instruction:
742    */
743   brw_set_dest(insn, brw_ip_reg());
744   brw_set_src0(insn, brw_ip_reg());
745   brw_set_src1(insn, brw_imm_d(0x0));
746
747   insn->header.execution_size = execute_size;
748   insn->header.compression_control = BRW_COMPRESSION_NONE;
749   insn->header.predicate_control = BRW_PREDICATE_NORMAL;
750   insn->header.mask_control = BRW_MASK_ENABLE;
751   if (!p->single_program_flow)
752       insn->header.thread_control = BRW_THREAD_SWITCH;
753
754   p->current->header.predicate_control = BRW_PREDICATE_NONE;
755
756   return insn;
757}
758
759
760struct brw_instruction *brw_ELSE(struct brw_compile *p,
761				 struct brw_instruction *if_insn)
762{
763   struct intel_context *intel = &p->brw->intel;
764   struct brw_instruction *insn;
765   GLuint br = 1;
766
767   if (intel->gen == 5)
768      br = 2;
769
770   if (p->single_program_flow) {
771      insn = next_insn(p, BRW_OPCODE_ADD);
772   } else {
773      insn = next_insn(p, BRW_OPCODE_ELSE);
774   }
775
776   brw_set_dest(insn, brw_ip_reg());
777   brw_set_src0(insn, brw_ip_reg());
778   brw_set_src1(insn, brw_imm_d(0x0));
779
780   insn->header.compression_control = BRW_COMPRESSION_NONE;
781   insn->header.execution_size = if_insn->header.execution_size;
782   insn->header.mask_control = BRW_MASK_ENABLE;
783   if (!p->single_program_flow)
784       insn->header.thread_control = BRW_THREAD_SWITCH;
785
786   /* Patch the if instruction to point at this instruction.
787    */
788   if (p->single_program_flow) {
789      assert(if_insn->header.opcode == BRW_OPCODE_ADD);
790
791      if_insn->bits3.ud = (insn - if_insn + 1) * 16;
792   } else {
793      assert(if_insn->header.opcode == BRW_OPCODE_IF);
794
795      if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
796      if_insn->bits3.if_else.pop_count = 0;
797      if_insn->bits3.if_else.pad0 = 0;
798   }
799
800   return insn;
801}
802
803void brw_ENDIF(struct brw_compile *p,
804	       struct brw_instruction *patch_insn)
805{
806   struct intel_context *intel = &p->brw->intel;
807   GLuint br = 1;
808
809   if (intel->gen == 5)
810      br = 2;
811
812   if (p->single_program_flow) {
813      /* In single program flow mode, there's no need to execute an ENDIF,
814       * since we don't need to do any stack operations, and if we're executing
815       * currently, we want to just continue executing.
816       */
817      struct brw_instruction *next = &p->store[p->nr_insn];
818
819      assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
820
821      patch_insn->bits3.ud = (next - patch_insn) * 16;
822   } else {
823      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
824
825      brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
826      brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
827      brw_set_src1(insn, brw_imm_d(0x0));
828
829      insn->header.compression_control = BRW_COMPRESSION_NONE;
830      insn->header.execution_size = patch_insn->header.execution_size;
831      insn->header.mask_control = BRW_MASK_ENABLE;
832      insn->header.thread_control = BRW_THREAD_SWITCH;
833
834      assert(patch_insn->bits3.if_else.jump_count == 0);
835
836      /* Patch the if or else instructions to point at this or the next
837       * instruction respectively.
838       */
839      if (patch_insn->header.opcode == BRW_OPCODE_IF) {
840	 /* Automagically turn it into an IFF:
841	  */
842	 patch_insn->header.opcode = BRW_OPCODE_IFF;
843	 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
844	 patch_insn->bits3.if_else.pop_count = 0;
845	 patch_insn->bits3.if_else.pad0 = 0;
846      } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
847	 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
848	 patch_insn->bits3.if_else.pop_count = 1;
849	 patch_insn->bits3.if_else.pad0 = 0;
850      } else {
851	 assert(0);
852      }
853
854      /* Also pop item off the stack in the endif instruction:
855       */
856      insn->bits3.if_else.jump_count = 0;
857      insn->bits3.if_else.pop_count = 1;
858      insn->bits3.if_else.pad0 = 0;
859   }
860}
861
862struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
863{
864   struct brw_instruction *insn;
865   insn = next_insn(p, BRW_OPCODE_BREAK);
866   brw_set_dest(insn, brw_ip_reg());
867   brw_set_src0(insn, brw_ip_reg());
868   brw_set_src1(insn, brw_imm_d(0x0));
869   insn->header.compression_control = BRW_COMPRESSION_NONE;
870   insn->header.execution_size = BRW_EXECUTE_8;
871   /* insn->header.mask_control = BRW_MASK_DISABLE; */
872   insn->bits3.if_else.pad0 = 0;
873   insn->bits3.if_else.pop_count = pop_count;
874   return insn;
875}
876
877struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
878{
879   struct brw_instruction *insn;
880   insn = next_insn(p, BRW_OPCODE_CONTINUE);
881   brw_set_dest(insn, brw_ip_reg());
882   brw_set_src0(insn, brw_ip_reg());
883   brw_set_src1(insn, brw_imm_d(0x0));
884   insn->header.compression_control = BRW_COMPRESSION_NONE;
885   insn->header.execution_size = BRW_EXECUTE_8;
886   /* insn->header.mask_control = BRW_MASK_DISABLE; */
887   insn->bits3.if_else.pad0 = 0;
888   insn->bits3.if_else.pop_count = pop_count;
889   return insn;
890}
891
892/* DO/WHILE loop:
893 */
894struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
895{
896   if (p->single_program_flow) {
897      return &p->store[p->nr_insn];
898   } else {
899      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
900
901      /* Override the defaults for this instruction:
902       */
903      brw_set_dest(insn, brw_null_reg());
904      brw_set_src0(insn, brw_null_reg());
905      brw_set_src1(insn, brw_null_reg());
906
907      insn->header.compression_control = BRW_COMPRESSION_NONE;
908      insn->header.execution_size = execute_size;
909      insn->header.predicate_control = BRW_PREDICATE_NONE;
910      /* insn->header.mask_control = BRW_MASK_ENABLE; */
911      /* insn->header.mask_control = BRW_MASK_DISABLE; */
912
913      return insn;
914   }
915}
916
917
918
919struct brw_instruction *brw_WHILE(struct brw_compile *p,
920                                  struct brw_instruction *do_insn)
921{
922   struct intel_context *intel = &p->brw->intel;
923   struct brw_instruction *insn;
924   GLuint br = 1;
925
926   if (intel->gen == 5)
927      br = 2;
928
929   if (p->single_program_flow)
930      insn = next_insn(p, BRW_OPCODE_ADD);
931   else
932      insn = next_insn(p, BRW_OPCODE_WHILE);
933
934   brw_set_dest(insn, brw_ip_reg());
935   brw_set_src0(insn, brw_ip_reg());
936   brw_set_src1(insn, brw_imm_d(0x0));
937
938   insn->header.compression_control = BRW_COMPRESSION_NONE;
939
940   if (p->single_program_flow) {
941      insn->header.execution_size = BRW_EXECUTE_1;
942
943      insn->bits3.d = (do_insn - insn) * 16;
944   } else {
945      insn->header.execution_size = do_insn->header.execution_size;
946
947      assert(do_insn->header.opcode == BRW_OPCODE_DO);
948      insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
949      insn->bits3.if_else.pop_count = 0;
950      insn->bits3.if_else.pad0 = 0;
951   }
952
953/*    insn->header.mask_control = BRW_MASK_ENABLE; */
954
955   /* insn->header.mask_control = BRW_MASK_DISABLE; */
956   p->current->header.predicate_control = BRW_PREDICATE_NONE;
957   return insn;
958}
959
960
961/* FORWARD JUMPS:
962 */
963void brw_land_fwd_jump(struct brw_compile *p,
964		       struct brw_instruction *jmp_insn)
965{
966   struct intel_context *intel = &p->brw->intel;
967   struct brw_instruction *landing = &p->store[p->nr_insn];
968   GLuint jmpi = 1;
969
970   if (intel->gen == 5)
971       jmpi = 2;
972
973   assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
974   assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
975
976   jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
977}
978
979
980
981/* To integrate with the above, it makes sense that the comparison
982 * instruction should populate the flag register.  It might be simpler
983 * just to use the flag reg for most WM tasks?
984 */
985void brw_CMP(struct brw_compile *p,
986	     struct brw_reg dest,
987	     GLuint conditional,
988	     struct brw_reg src0,
989	     struct brw_reg src1)
990{
991   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
992
993   insn->header.destreg__conditionalmod = conditional;
994   brw_set_dest(insn, dest);
995   brw_set_src0(insn, src0);
996   brw_set_src1(insn, src1);
997
998/*    guess_execution_size(insn, src0); */
999
1000
1001   /* Make it so that future instructions will use the computed flag
1002    * value until brw_set_predicate_control_flag_value() is called
1003    * again.
1004    */
1005   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1006       dest.nr == 0) {
1007      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1008      p->flag_value = 0xff;
1009   }
1010}
1011
1012/* Issue 'wait' instruction for n1, host could program MMIO
1013   to wake up thread. */
1014void brw_WAIT (struct brw_compile *p)
1015{
1016   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1017   struct brw_reg src = brw_notification_1_reg();
1018
1019   brw_set_dest(insn, src);
1020   brw_set_src0(insn, src);
1021   brw_set_src1(insn, brw_null_reg());
1022   insn->header.execution_size = 0; /* must */
1023   insn->header.predicate_control = 0;
1024   insn->header.compression_control = 0;
1025}
1026
1027
1028/***********************************************************************
1029 * Helpers for the various SEND message types:
1030 */
1031
1032/** Extended math function, float[8].
1033 */
1034void brw_math( struct brw_compile *p,
1035	       struct brw_reg dest,
1036	       GLuint function,
1037	       GLuint saturate,
1038	       GLuint msg_reg_nr,
1039	       struct brw_reg src,
1040	       GLuint data_type,
1041	       GLuint precision )
1042{
1043   struct intel_context *intel = &p->brw->intel;
1044
1045   if (intel->gen >= 6) {
1046      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1047
1048      /* Math is the same ISA format as other opcodes, except that CondModifier
1049       * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1050       */
1051      insn->header.destreg__conditionalmod = function;
1052
1053      brw_set_dest(insn, dest);
1054      brw_set_src0(insn, src);
1055      brw_set_src1(insn, brw_null_reg());
1056   } else {
1057      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1058      GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1059      GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1060      /* Example code doesn't set predicate_control for send
1061       * instructions.
1062       */
1063      insn->header.predicate_control = 0;
1064      insn->header.destreg__conditionalmod = msg_reg_nr;
1065
1066      brw_set_dest(insn, dest);
1067      brw_set_src0(insn, src);
1068      brw_set_math_message(p->brw,
1069			   insn,
1070			   msg_length, response_length,
1071			   function,
1072			   BRW_MATH_INTEGER_UNSIGNED,
1073			   precision,
1074			   saturate,
1075			   data_type);
1076   }
1077}
1078
1079/**
1080 * Extended math function, float[16].
1081 * Use 2 send instructions.
1082 */
1083void brw_math_16( struct brw_compile *p,
1084		  struct brw_reg dest,
1085		  GLuint function,
1086		  GLuint saturate,
1087		  GLuint msg_reg_nr,
1088		  struct brw_reg src,
1089		  GLuint precision )
1090{
1091   struct intel_context *intel = &p->brw->intel;
1092   struct brw_instruction *insn;
1093   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1094   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1095
1096   if (intel->gen >= 6) {
1097      insn = next_insn(p, BRW_OPCODE_MATH);
1098
1099      /* Math is the same ISA format as other opcodes, except that CondModifier
1100       * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1101       */
1102      insn->header.destreg__conditionalmod = function;
1103
1104      brw_set_dest(insn, dest);
1105      brw_set_src0(insn, src);
1106      brw_set_src1(insn, brw_null_reg());
1107      return;
1108   }
1109
1110   /* First instruction:
1111    */
1112   brw_push_insn_state(p);
1113   brw_set_predicate_control_flag_value(p, 0xff);
1114   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1115
1116   insn = next_insn(p, BRW_OPCODE_SEND);
1117   insn->header.destreg__conditionalmod = msg_reg_nr;
1118
1119   brw_set_dest(insn, dest);
1120   brw_set_src0(insn, src);
1121   brw_set_math_message(p->brw,
1122			insn,
1123			msg_length, response_length,
1124			function,
1125			BRW_MATH_INTEGER_UNSIGNED,
1126			precision,
1127			saturate,
1128			BRW_MATH_DATA_VECTOR);
1129
1130   /* Second instruction:
1131    */
1132   insn = next_insn(p, BRW_OPCODE_SEND);
1133   insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1134   insn->header.destreg__conditionalmod = msg_reg_nr+1;
1135
1136   brw_set_dest(insn, offset(dest,1));
1137   brw_set_src0(insn, src);
1138   brw_set_math_message(p->brw,
1139			insn,
1140			msg_length, response_length,
1141			function,
1142			BRW_MATH_INTEGER_UNSIGNED,
1143			precision,
1144			saturate,
1145			BRW_MATH_DATA_VECTOR);
1146
1147   brw_pop_insn_state(p);
1148}
1149
1150
1151/**
1152 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
1153 * Scratch offset should be a multiple of 64.
1154 * Used for register spilling.
1155 */
1156void brw_dp_WRITE_16( struct brw_compile *p,
1157		      struct brw_reg src,
1158		      GLuint scratch_offset )
1159{
1160   struct intel_context *intel = &p->brw->intel;
1161   GLuint msg_reg_nr = 1;
1162   {
1163      brw_push_insn_state(p);
1164      brw_set_mask_control(p, BRW_MASK_DISABLE);
1165      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1166
1167      /* set message header global offset field (reg 0, element 2) */
1168      brw_MOV(p,
1169	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1170	      brw_imm_d(scratch_offset));
1171
1172      brw_pop_insn_state(p);
1173   }
1174
1175   {
1176      GLuint msg_length = 3;
1177      struct brw_reg dest;
1178      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1179      int send_commit_msg;
1180
1181      insn->header.predicate_control = 0; /* XXX */
1182      insn->header.compression_control = BRW_COMPRESSION_NONE;
1183      insn->header.destreg__conditionalmod = msg_reg_nr;
1184
1185      /* Until gen6, writes followed by reads from the same location
1186       * are not guaranteed to be ordered unless write_commit is set.
1187       * If set, then a no-op write is issued to the destination
1188       * register to set a dependency, and a read from the destination
1189       * can be used to ensure the ordering.
1190       *
1191       * For gen6, only writes between different threads need ordering
1192       * protection.  Our use of DP writes is all about register
1193       * spilling within a thread.
1194       */
1195      if (intel->gen >= 6) {
1196	 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1197	 send_commit_msg = 0;
1198      } else {
1199	 dest = brw_uw16_grf(0, 0);
1200	 send_commit_msg = 1;
1201      }
1202
1203      brw_set_dest(insn, dest);
1204      brw_set_src0(insn, src);
1205
1206      brw_set_dp_write_message(p->brw,
1207			       insn,
1208			       255, /* binding table index (255=stateless) */
1209			       BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1210			       BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1211			       msg_length,
1212			       0, /* pixel scoreboard */
1213			       send_commit_msg, /* response_length */
1214			       0, /* eot */
1215			       send_commit_msg);
1216   }
1217}
1218
1219
1220/**
1221 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1222 * Scratch offset should be a multiple of 64.
1223 * Used for register spilling.
1224 */
1225void brw_dp_READ_16( struct brw_compile *p,
1226		      struct brw_reg dest,
1227		      GLuint scratch_offset )
1228{
1229   GLuint msg_reg_nr = 1;
1230   {
1231      brw_push_insn_state(p);
1232      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1233      brw_set_mask_control(p, BRW_MASK_DISABLE);
1234
1235      /* set message header global offset field (reg 0, element 2) */
1236      brw_MOV(p,
1237	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1238	      brw_imm_d(scratch_offset));
1239
1240      brw_pop_insn_state(p);
1241   }
1242
1243   {
1244      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1245
1246      insn->header.predicate_control = 0; /* XXX */
1247      insn->header.compression_control = BRW_COMPRESSION_NONE;
1248      insn->header.destreg__conditionalmod = msg_reg_nr;
1249
1250      brw_set_dest(insn, dest);	/* UW? */
1251      brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1252
1253      brw_set_dp_read_message(p->brw,
1254			      insn,
1255			      255, /* binding table index (255=stateless) */
1256			      BRW_DATAPORT_OWORD_BLOCK_4_OWORDS,
1257			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1258			      1, /* target cache (render/scratch) */
1259			      1, /* msg_length */
1260			      2, /* response_length */
1261			      0); /* eot */
1262   }
1263}
1264
1265
1266/**
1267 * Read a float[4] vector from the data port Data Cache (const buffer).
1268 * Location (in buffer) should be a multiple of 16.
1269 * Used for fetching shader constants.
1270 * If relAddr is true, we'll do an indirect fetch using the address register.
1271 */
1272void brw_dp_READ_4( struct brw_compile *p,
1273                    struct brw_reg dest,
1274                    GLboolean relAddr,
1275                    GLuint location,
1276                    GLuint bind_table_index )
1277{
1278   /* XXX: relAddr not implemented */
1279   GLuint msg_reg_nr = 1;
1280   {
1281      struct brw_reg b;
1282      brw_push_insn_state(p);
1283      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1284      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1285      brw_set_mask_control(p, BRW_MASK_DISABLE);
1286
1287   /* Setup MRF[1] with location/offset into const buffer */
1288      b = brw_message_reg(msg_reg_nr);
1289      b = retype(b, BRW_REGISTER_TYPE_UD);
1290      /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1291       * when the docs say only dword[2] should be set.  Hmmm.  But it works.
1292       */
1293      brw_MOV(p, b, brw_imm_ud(location));
1294      brw_pop_insn_state(p);
1295   }
1296
1297   {
1298      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1299
1300      insn->header.predicate_control = BRW_PREDICATE_NONE;
1301      insn->header.compression_control = BRW_COMPRESSION_NONE;
1302      insn->header.destreg__conditionalmod = msg_reg_nr;
1303      insn->header.mask_control = BRW_MASK_DISABLE;
1304
1305      /* cast dest to a uword[8] vector */
1306      dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1307
1308      brw_set_dest(insn, dest);
1309      brw_set_src0(insn, brw_null_reg());
1310
1311      brw_set_dp_read_message(p->brw,
1312			      insn,
1313			      bind_table_index,
1314			      0,  /* msg_control (0 means 1 Oword) */
1315			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1316			      0, /* source cache = data cache */
1317			      1, /* msg_length */
1318			      1, /* response_length (1 Oword) */
1319			      0); /* eot */
1320   }
1321}
1322
1323
1324/**
1325 * Read float[4] constant(s) from VS constant buffer.
1326 * For relative addressing, two float[4] constants will be read into 'dest'.
1327 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1328 */
1329void brw_dp_READ_4_vs(struct brw_compile *p,
1330                      struct brw_reg dest,
1331                      GLuint location,
1332                      GLuint bind_table_index)
1333{
1334   struct brw_instruction *insn;
1335   GLuint msg_reg_nr = 1;
1336   struct brw_reg b;
1337
1338   /*
1339   printf("vs const read msg, location %u, msg_reg_nr %d\n",
1340          location, msg_reg_nr);
1341   */
1342
1343   /* Setup MRF[1] with location/offset into const buffer */
1344   brw_push_insn_state(p);
1345   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1346   brw_set_mask_control(p, BRW_MASK_DISABLE);
1347   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1348
1349   /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1350    * when the docs say only dword[2] should be set.  Hmmm.  But it works.
1351    */
1352   b = brw_message_reg(msg_reg_nr);
1353   b = retype(b, BRW_REGISTER_TYPE_UD);
1354   /*b = get_element_ud(b, 2);*/
1355   brw_MOV(p, b, brw_imm_ud(location));
1356
1357   brw_pop_insn_state(p);
1358
1359   insn = next_insn(p, BRW_OPCODE_SEND);
1360
1361   insn->header.predicate_control = BRW_PREDICATE_NONE;
1362   insn->header.compression_control = BRW_COMPRESSION_NONE;
1363   insn->header.destreg__conditionalmod = msg_reg_nr;
1364   insn->header.mask_control = BRW_MASK_DISABLE;
1365
1366   brw_set_dest(insn, dest);
1367   brw_set_src0(insn, brw_null_reg());
1368
1369   brw_set_dp_read_message(p->brw,
1370			   insn,
1371			   bind_table_index,
1372			   0,
1373			   BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1374			   0, /* source cache = data cache */
1375			   1, /* msg_length */
1376			   1, /* response_length (1 Oword) */
1377			   0); /* eot */
1378}
1379
1380/**
1381 * Read a float[4] constant per vertex from VS constant buffer, with
1382 * relative addressing.
1383 */
1384void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1385			       struct brw_reg dest,
1386			       struct brw_reg addr_reg,
1387			       GLuint offset,
1388			       GLuint bind_table_index)
1389{
1390   struct intel_context *intel = &p->brw->intel;
1391   int msg_type;
1392
1393   /* Setup MRF[1] with offset into const buffer */
1394   brw_push_insn_state(p);
1395   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1396   brw_set_mask_control(p, BRW_MASK_DISABLE);
1397   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1398
1399   /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1400    * fields ignored.
1401    */
1402   brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD),
1403	   addr_reg, brw_imm_d(offset));
1404   brw_pop_insn_state(p);
1405
1406   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1407
1408   insn->header.predicate_control = BRW_PREDICATE_NONE;
1409   insn->header.compression_control = BRW_COMPRESSION_NONE;
1410   insn->header.destreg__conditionalmod = 0;
1411   insn->header.mask_control = BRW_MASK_DISABLE;
1412
1413   brw_set_dest(insn, dest);
1414   brw_set_src0(insn, brw_vec8_grf(0, 0));
1415
1416   if (intel->gen == 6)
1417      msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1418   else if (intel->gen == 5 || intel->is_g4x)
1419      msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1420   else
1421      msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1422
1423   brw_set_dp_read_message(p->brw,
1424			   insn,
1425			   bind_table_index,
1426			   BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
1427			   msg_type,
1428			   0, /* source cache = data cache */
1429			   2, /* msg_length */
1430			   1, /* response_length */
1431			   0); /* eot */
1432}
1433
1434
1435
1436void brw_fb_WRITE(struct brw_compile *p,
1437		  int dispatch_width,
1438                  struct brw_reg dest,
1439                  GLuint msg_reg_nr,
1440                  struct brw_reg src0,
1441                  GLuint binding_table_index,
1442                  GLuint msg_length,
1443                  GLuint response_length,
1444                  GLboolean eot)
1445{
1446   struct intel_context *intel = &p->brw->intel;
1447   struct brw_instruction *insn;
1448   GLuint msg_control, msg_type;
1449
1450   insn = next_insn(p, BRW_OPCODE_SEND);
1451   insn->header.predicate_control = 0; /* XXX */
1452   insn->header.compression_control = BRW_COMPRESSION_NONE;
1453
1454   if (intel->gen >= 6) {
1455       /* headerless version, just submit color payload */
1456       src0 = brw_message_reg(msg_reg_nr);
1457
1458       msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6;
1459   } else {
1460      insn->header.destreg__conditionalmod = msg_reg_nr;
1461
1462      msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1463   }
1464
1465   if (dispatch_width == 16)
1466      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
1467   else
1468      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
1469
1470   brw_set_dest(insn, dest);
1471   brw_set_src0(insn, src0);
1472   brw_set_dp_write_message(p->brw,
1473			    insn,
1474			    binding_table_index,
1475			    msg_control,
1476			    msg_type,
1477			    msg_length,
1478			    1,	/* pixel scoreboard */
1479			    response_length,
1480			    eot,
1481			    0 /* send_commit_msg */);
1482}
1483
1484
1485/**
1486 * Texture sample instruction.
1487 * Note: the msg_type plus msg_length values determine exactly what kind
1488 * of sampling operation is performed.  See volume 4, page 161 of docs.
1489 */
1490void brw_SAMPLE(struct brw_compile *p,
1491		struct brw_reg dest,
1492		GLuint msg_reg_nr,
1493		struct brw_reg src0,
1494		GLuint binding_table_index,
1495		GLuint sampler,
1496		GLuint writemask,
1497		GLuint msg_type,
1498		GLuint response_length,
1499		GLuint msg_length,
1500		GLboolean eot,
1501		GLuint header_present,
1502		GLuint simd_mode)
1503{
1504   GLboolean need_stall = 0;
1505
1506   if (writemask == 0) {
1507      /*printf("%s: zero writemask??\n", __FUNCTION__); */
1508      return;
1509   }
1510
1511   /* Hardware doesn't do destination dependency checking on send
1512    * instructions properly.  Add a workaround which generates the
1513    * dependency by other means.  In practice it seems like this bug
1514    * only crops up for texture samples, and only where registers are
1515    * written by the send and then written again later without being
1516    * read in between.  Luckily for us, we already track that
1517    * information and use it to modify the writemask for the
1518    * instruction, so that is a guide for whether a workaround is
1519    * needed.
1520    */
1521   if (writemask != WRITEMASK_XYZW) {
1522      GLuint dst_offset = 0;
1523      GLuint i, newmask = 0, len = 0;
1524
1525      for (i = 0; i < 4; i++) {
1526	 if (writemask & (1<<i))
1527	    break;
1528	 dst_offset += 2;
1529      }
1530      for (; i < 4; i++) {
1531	 if (!(writemask & (1<<i)))
1532	    break;
1533	 newmask |= 1<<i;
1534	 len++;
1535      }
1536
1537      if (newmask != writemask) {
1538	 need_stall = 1;
1539         /* printf("need stall %x %x\n", newmask , writemask); */
1540      }
1541      else {
1542	 GLboolean dispatch_16 = GL_FALSE;
1543
1544	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1545
1546	 guess_execution_size(p->current, dest);
1547	 if (p->current->header.execution_size == BRW_EXECUTE_16)
1548	    dispatch_16 = GL_TRUE;
1549
1550	 newmask = ~newmask & WRITEMASK_XYZW;
1551
1552	 brw_push_insn_state(p);
1553
1554	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1555	 brw_set_mask_control(p, BRW_MASK_DISABLE);
1556
1557	 brw_MOV(p, m1, brw_vec8_grf(0,0));
1558  	 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1559
1560	 brw_pop_insn_state(p);
1561
1562  	 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1563	 dest = offset(dest, dst_offset);
1564
1565	 /* For 16-wide dispatch, masked channels are skipped in the
1566	  * response.  For 8-wide, masked channels still take up slots,
1567	  * and are just not written to.
1568	  */
1569	 if (dispatch_16)
1570	    response_length = len * 2;
1571      }
1572   }
1573
1574   {
1575      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1576
1577      insn->header.predicate_control = 0; /* XXX */
1578      insn->header.compression_control = BRW_COMPRESSION_NONE;
1579      insn->header.destreg__conditionalmod = msg_reg_nr;
1580
1581      brw_set_dest(insn, dest);
1582      brw_set_src0(insn, src0);
1583      brw_set_sampler_message(p->brw, insn,
1584			      binding_table_index,
1585			      sampler,
1586			      msg_type,
1587			      response_length,
1588			      msg_length,
1589			      eot,
1590			      header_present,
1591			      simd_mode);
1592   }
1593
1594   if (need_stall) {
1595      struct brw_reg reg = vec8(offset(dest, response_length-1));
1596
1597      /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
1598       */
1599      brw_push_insn_state(p);
1600      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1601      brw_MOV(p, reg, reg);
1602      brw_pop_insn_state(p);
1603   }
1604
1605}
1606
1607/* All these variables are pretty confusing - we might be better off
1608 * using bitmasks and macros for this, in the old style.  Or perhaps
1609 * just having the caller instantiate the fields in dword3 itself.
1610 */
1611void brw_urb_WRITE(struct brw_compile *p,
1612		   struct brw_reg dest,
1613		   GLuint msg_reg_nr,
1614		   struct brw_reg src0,
1615		   GLboolean allocate,
1616		   GLboolean used,
1617		   GLuint msg_length,
1618		   GLuint response_length,
1619		   GLboolean eot,
1620		   GLboolean writes_complete,
1621		   GLuint offset,
1622		   GLuint swizzle)
1623{
1624   struct intel_context *intel = &p->brw->intel;
1625   struct brw_instruction *insn;
1626
1627   /* Sandybridge doesn't have the implied move for SENDs,
1628    * and the first message register index comes from src0.
1629    */
1630   if (intel->gen >= 6) {
1631      brw_push_insn_state(p);
1632      brw_set_mask_control( p, BRW_MASK_DISABLE );
1633      brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1634      brw_pop_insn_state(p);
1635      src0 = brw_message_reg(msg_reg_nr);
1636   }
1637
1638   insn = next_insn(p, BRW_OPCODE_SEND);
1639
1640   assert(msg_length < BRW_MAX_MRF);
1641
1642   brw_set_dest(insn, dest);
1643   brw_set_src0(insn, src0);
1644   brw_set_src1(insn, brw_imm_d(0));
1645
1646   if (intel->gen < 6)
1647      insn->header.destreg__conditionalmod = msg_reg_nr;
1648
1649   brw_set_urb_message(p->brw,
1650		       insn,
1651		       allocate,
1652		       used,
1653		       msg_length,
1654		       response_length,
1655		       eot,
1656		       writes_complete,
1657		       offset,
1658		       swizzle);
1659}
1660
1661void brw_ff_sync(struct brw_compile *p,
1662		   struct brw_reg dest,
1663		   GLuint msg_reg_nr,
1664		   struct brw_reg src0,
1665		   GLboolean allocate,
1666		   GLuint response_length,
1667		   GLboolean eot)
1668{
1669   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1670
1671   brw_set_dest(insn, dest);
1672   brw_set_src0(insn, src0);
1673   brw_set_src1(insn, brw_imm_d(0));
1674
1675   insn->header.destreg__conditionalmod = msg_reg_nr;
1676
1677   brw_set_ff_sync_message(p->brw,
1678			   insn,
1679			   allocate,
1680			   response_length,
1681			   eot);
1682}
1683