brw_eu_emit.c revision ea909be58dda7e916cb9ce434ecb78597881ad33
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33#include "brw_context.h"
34#include "brw_defines.h"
35#include "brw_eu.h"
36
37
38
39
40/***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44static void guess_execution_size( struct brw_instruction *insn,
45				  struct brw_reg reg )
46{
47   if (reg.width == BRW_WIDTH_8 &&
48       insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49      insn->header.execution_size = BRW_EXECUTE_16;
50   else
51      insn->header.execution_size = reg.width;	/* note - definitions are compatible */
52}
53
54
55static void brw_set_dest( struct brw_instruction *insn,
56			  struct brw_reg dest )
57{
58   if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
59       dest.file != BRW_MESSAGE_REGISTER_FILE)
60      assert(dest.nr < 128);
61
62   insn->bits1.da1.dest_reg_file = dest.file;
63   insn->bits1.da1.dest_reg_type = dest.type;
64   insn->bits1.da1.dest_address_mode = dest.address_mode;
65
66   if (dest.address_mode == BRW_ADDRESS_DIRECT) {
67      insn->bits1.da1.dest_reg_nr = dest.nr;
68
69      if (insn->header.access_mode == BRW_ALIGN_1) {
70	 insn->bits1.da1.dest_subreg_nr = dest.subnr;
71	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
72	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
73	 insn->bits1.da1.dest_horiz_stride = dest.hstride;
74      }
75      else {
76	 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
77	 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
78	 /* even ignored in da16, still need to set as '01' */
79	 insn->bits1.da16.dest_horiz_stride = 1;
80      }
81   }
82   else {
83      insn->bits1.ia1.dest_subreg_nr = dest.subnr;
84
85      /* These are different sizes in align1 vs align16:
86       */
87      if (insn->header.access_mode == BRW_ALIGN_1) {
88	 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
89	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
90	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
91	 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
92      }
93      else {
94	 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
95	 /* even ignored in da16, still need to set as '01' */
96	 insn->bits1.ia16.dest_horiz_stride = 1;
97      }
98   }
99
100   /* NEW: Set the execution size based on dest.width and
101    * insn->compression_control:
102    */
103   guess_execution_size(insn, dest);
104}
105
106extern int reg_type_size[];
107
108static void
109validate_reg(struct brw_instruction *insn, struct brw_reg reg)
110{
111   int hstride_for_reg[] = {0, 1, 2, 4};
112   int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
113   int width_for_reg[] = {1, 2, 4, 8, 16};
114   int execsize_for_reg[] = {1, 2, 4, 8, 16};
115   int width, hstride, vstride, execsize;
116
117   if (reg.file == BRW_IMMEDIATE_VALUE) {
118      /* 3.3.6: Region Parameters.  Restriction: Immediate vectors
119       * mean the destination has to be 128-bit aligned and the
120       * destination horiz stride has to be a word.
121       */
122      if (reg.type == BRW_REGISTER_TYPE_V) {
123	 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
124		reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
125      }
126
127      return;
128   }
129
130   if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
131       reg.file == BRW_ARF_NULL)
132      return;
133
134   assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
135   hstride = hstride_for_reg[reg.hstride];
136
137   if (reg.vstride == 0xf) {
138      vstride = -1;
139   } else {
140      assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
141      vstride = vstride_for_reg[reg.vstride];
142   }
143
144   assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
145   width = width_for_reg[reg.width];
146
147   assert(insn->header.execution_size >= 0 &&
148	  insn->header.execution_size < Elements(execsize_for_reg));
149   execsize = execsize_for_reg[insn->header.execution_size];
150
151   /* Restrictions from 3.3.10: Register Region Restrictions. */
152   /* 3. */
153   assert(execsize >= width);
154
155   /* 4. */
156   if (execsize == width && hstride != 0) {
157      assert(vstride == -1 || vstride == width * hstride);
158   }
159
160   /* 5. */
161   if (execsize == width && hstride == 0) {
162      /* no restriction on vstride. */
163   }
164
165   /* 6. */
166   if (width == 1) {
167      assert(hstride == 0);
168   }
169
170   /* 7. */
171   if (execsize == 1 && width == 1) {
172      assert(hstride == 0);
173      assert(vstride == 0);
174   }
175
176   /* 8. */
177   if (vstride == 0 && hstride == 0) {
178      assert(width == 1);
179   }
180
181   /* 10. Check destination issues. */
182}
183
184static void brw_set_src0( struct brw_instruction *insn,
185                          struct brw_reg reg )
186{
187   if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
188      assert(reg.nr < 128);
189
190   validate_reg(insn, reg);
191
192   insn->bits1.da1.src0_reg_file = reg.file;
193   insn->bits1.da1.src0_reg_type = reg.type;
194   insn->bits2.da1.src0_abs = reg.abs;
195   insn->bits2.da1.src0_negate = reg.negate;
196   insn->bits2.da1.src0_address_mode = reg.address_mode;
197
198   if (reg.file == BRW_IMMEDIATE_VALUE) {
199      insn->bits3.ud = reg.dw1.ud;
200
201      /* Required to set some fields in src1 as well:
202       */
203      insn->bits1.da1.src1_reg_file = 0; /* arf */
204      insn->bits1.da1.src1_reg_type = reg.type;
205   }
206   else
207   {
208      if (reg.address_mode == BRW_ADDRESS_DIRECT) {
209	 if (insn->header.access_mode == BRW_ALIGN_1) {
210	    insn->bits2.da1.src0_subreg_nr = reg.subnr;
211	    insn->bits2.da1.src0_reg_nr = reg.nr;
212	 }
213	 else {
214	    insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
215	    insn->bits2.da16.src0_reg_nr = reg.nr;
216	 }
217      }
218      else {
219	 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
220
221	 if (insn->header.access_mode == BRW_ALIGN_1) {
222	    insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
223	 }
224	 else {
225	    insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
226	 }
227      }
228
229      if (insn->header.access_mode == BRW_ALIGN_1) {
230	 if (reg.width == BRW_WIDTH_1 &&
231	     insn->header.execution_size == BRW_EXECUTE_1) {
232	    insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
233	    insn->bits2.da1.src0_width = BRW_WIDTH_1;
234	    insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
235	 }
236	 else {
237	    insn->bits2.da1.src0_horiz_stride = reg.hstride;
238	    insn->bits2.da1.src0_width = reg.width;
239	    insn->bits2.da1.src0_vert_stride = reg.vstride;
240	 }
241      }
242      else {
243	 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
244	 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
245	 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
246	 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
247
248	 /* This is an oddity of the fact we're using the same
249	  * descriptions for registers in align_16 as align_1:
250	  */
251	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
252	    insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
253	 else
254	    insn->bits2.da16.src0_vert_stride = reg.vstride;
255      }
256   }
257}
258
259
260void brw_set_src1( struct brw_instruction *insn,
261                   struct brw_reg reg )
262{
263   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
264
265   assert(reg.nr < 128);
266
267   validate_reg(insn, reg);
268
269   insn->bits1.da1.src1_reg_file = reg.file;
270   insn->bits1.da1.src1_reg_type = reg.type;
271   insn->bits3.da1.src1_abs = reg.abs;
272   insn->bits3.da1.src1_negate = reg.negate;
273
274   /* Only src1 can be immediate in two-argument instructions.
275    */
276   assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
277
278   if (reg.file == BRW_IMMEDIATE_VALUE) {
279      insn->bits3.ud = reg.dw1.ud;
280   }
281   else {
282      /* This is a hardware restriction, which may or may not be lifted
283       * in the future:
284       */
285      assert (reg.address_mode == BRW_ADDRESS_DIRECT);
286      /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
287
288      if (insn->header.access_mode == BRW_ALIGN_1) {
289	 insn->bits3.da1.src1_subreg_nr = reg.subnr;
290	 insn->bits3.da1.src1_reg_nr = reg.nr;
291      }
292      else {
293	 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
294	 insn->bits3.da16.src1_reg_nr = reg.nr;
295      }
296
297      if (insn->header.access_mode == BRW_ALIGN_1) {
298	 if (reg.width == BRW_WIDTH_1 &&
299	     insn->header.execution_size == BRW_EXECUTE_1) {
300	    insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
301	    insn->bits3.da1.src1_width = BRW_WIDTH_1;
302	    insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
303	 }
304	 else {
305	    insn->bits3.da1.src1_horiz_stride = reg.hstride;
306	    insn->bits3.da1.src1_width = reg.width;
307	    insn->bits3.da1.src1_vert_stride = reg.vstride;
308	 }
309      }
310      else {
311	 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
312	 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
313	 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
314	 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
315
316	 /* This is an oddity of the fact we're using the same
317	  * descriptions for registers in align_16 as align_1:
318	  */
319	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
320	    insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
321	 else
322	    insn->bits3.da16.src1_vert_stride = reg.vstride;
323      }
324   }
325}
326
327
328
329static void brw_set_math_message( struct brw_context *brw,
330				  struct brw_instruction *insn,
331				  GLuint msg_length,
332				  GLuint response_length,
333				  GLuint function,
334				  GLuint integer_type,
335				  GLboolean low_precision,
336				  GLboolean saturate,
337				  GLuint dataType )
338{
339   struct intel_context *intel = &brw->intel;
340   brw_set_src1(insn, brw_imm_d(0));
341
342   if (intel->gen == 5) {
343       insn->bits3.math_gen5.function = function;
344       insn->bits3.math_gen5.int_type = integer_type;
345       insn->bits3.math_gen5.precision = low_precision;
346       insn->bits3.math_gen5.saturate = saturate;
347       insn->bits3.math_gen5.data_type = dataType;
348       insn->bits3.math_gen5.snapshot = 0;
349       insn->bits3.math_gen5.header_present = 0;
350       insn->bits3.math_gen5.response_length = response_length;
351       insn->bits3.math_gen5.msg_length = msg_length;
352       insn->bits3.math_gen5.end_of_thread = 0;
353       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
354       insn->bits2.send_gen5.end_of_thread = 0;
355   } else {
356       insn->bits3.math.function = function;
357       insn->bits3.math.int_type = integer_type;
358       insn->bits3.math.precision = low_precision;
359       insn->bits3.math.saturate = saturate;
360       insn->bits3.math.data_type = dataType;
361       insn->bits3.math.response_length = response_length;
362       insn->bits3.math.msg_length = msg_length;
363       insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
364       insn->bits3.math.end_of_thread = 0;
365   }
366}
367
368
369static void brw_set_ff_sync_message(struct brw_context *brw,
370				    struct brw_instruction *insn,
371				    GLboolean allocate,
372				    GLuint response_length,
373				    GLboolean end_of_thread)
374{
375	struct intel_context *intel = &brw->intel;
376	brw_set_src1(insn, brw_imm_d(0));
377
378	insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
379	insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
380	insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
381	insn->bits3.urb_gen5.allocate = allocate;
382	insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
383	insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
384	insn->bits3.urb_gen5.header_present = 1;
385	insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
386	insn->bits3.urb_gen5.msg_length = 1;
387	insn->bits3.urb_gen5.end_of_thread = end_of_thread;
388	if (intel->gen >= 6) {
389	   insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
390	} else {
391	   insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
392	   insn->bits2.send_gen5.end_of_thread = end_of_thread;
393	}
394}
395
396static void brw_set_urb_message( struct brw_context *brw,
397				 struct brw_instruction *insn,
398				 GLboolean allocate,
399				 GLboolean used,
400				 GLuint msg_length,
401				 GLuint response_length,
402				 GLboolean end_of_thread,
403				 GLboolean complete,
404				 GLuint offset,
405				 GLuint swizzle_control )
406{
407    struct intel_context *intel = &brw->intel;
408    brw_set_src1(insn, brw_imm_d(0));
409
410    if (intel->gen >= 5) {
411        insn->bits3.urb_gen5.opcode = 0;	/* ? */
412        insn->bits3.urb_gen5.offset = offset;
413        insn->bits3.urb_gen5.swizzle_control = swizzle_control;
414        insn->bits3.urb_gen5.allocate = allocate;
415        insn->bits3.urb_gen5.used = used;	/* ? */
416        insn->bits3.urb_gen5.complete = complete;
417        insn->bits3.urb_gen5.header_present = 1;
418        insn->bits3.urb_gen5.response_length = response_length;
419        insn->bits3.urb_gen5.msg_length = msg_length;
420        insn->bits3.urb_gen5.end_of_thread = end_of_thread;
421	if (intel->gen >= 6) {
422	   /* For SNB, the SFID bits moved to the condmod bits, and
423	    * EOT stayed in bits3 above.  Does the EOT bit setting
424	    * below on Ironlake even do anything?
425	    */
426	   insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
427	} else {
428	   insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
429	   insn->bits2.send_gen5.end_of_thread = end_of_thread;
430	}
431    } else {
432        insn->bits3.urb.opcode = 0;	/* ? */
433        insn->bits3.urb.offset = offset;
434        insn->bits3.urb.swizzle_control = swizzle_control;
435        insn->bits3.urb.allocate = allocate;
436        insn->bits3.urb.used = used;	/* ? */
437        insn->bits3.urb.complete = complete;
438        insn->bits3.urb.response_length = response_length;
439        insn->bits3.urb.msg_length = msg_length;
440        insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
441        insn->bits3.urb.end_of_thread = end_of_thread;
442    }
443}
444
445static void brw_set_dp_write_message( struct brw_context *brw,
446				      struct brw_instruction *insn,
447				      GLuint binding_table_index,
448				      GLuint msg_control,
449				      GLuint msg_type,
450				      GLuint msg_length,
451				      GLboolean header_present,
452				      GLuint pixel_scoreboard_clear,
453				      GLuint response_length,
454				      GLuint end_of_thread,
455				      GLuint send_commit_msg)
456{
457   struct intel_context *intel = &brw->intel;
458   brw_set_src1(insn, brw_imm_ud(0));
459
460   if (intel->gen >= 6) {
461       insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
462       insn->bits3.dp_render_cache.msg_control = msg_control;
463       insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear;
464       insn->bits3.dp_render_cache.msg_type = msg_type;
465       insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg;
466       insn->bits3.dp_render_cache.header_present = header_present;
467       insn->bits3.dp_render_cache.response_length = response_length;
468       insn->bits3.dp_render_cache.msg_length = msg_length;
469       insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
470       insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
471	/* XXX really need below? */
472       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
473       insn->bits2.send_gen5.end_of_thread = end_of_thread;
474   } else if (intel->gen == 5) {
475       insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
476       insn->bits3.dp_write_gen5.msg_control = msg_control;
477       insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
478       insn->bits3.dp_write_gen5.msg_type = msg_type;
479       insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
480       insn->bits3.dp_write_gen5.header_present = header_present;
481       insn->bits3.dp_write_gen5.response_length = response_length;
482       insn->bits3.dp_write_gen5.msg_length = msg_length;
483       insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
484       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
485       insn->bits2.send_gen5.end_of_thread = end_of_thread;
486   } else {
487       insn->bits3.dp_write.binding_table_index = binding_table_index;
488       insn->bits3.dp_write.msg_control = msg_control;
489       insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
490       insn->bits3.dp_write.msg_type = msg_type;
491       insn->bits3.dp_write.send_commit_msg = send_commit_msg;
492       insn->bits3.dp_write.response_length = response_length;
493       insn->bits3.dp_write.msg_length = msg_length;
494       insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
495       insn->bits3.dp_write.end_of_thread = end_of_thread;
496   }
497}
498
499static void brw_set_dp_read_message( struct brw_context *brw,
500				      struct brw_instruction *insn,
501				      GLuint binding_table_index,
502				      GLuint msg_control,
503				      GLuint msg_type,
504				      GLuint target_cache,
505				      GLuint msg_length,
506				      GLuint response_length,
507				      GLuint end_of_thread )
508{
509   struct intel_context *intel = &brw->intel;
510   brw_set_src1(insn, brw_imm_d(0));
511
512   if (intel->gen == 5) {
513       insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
514       insn->bits3.dp_read_gen5.msg_control = msg_control;
515       insn->bits3.dp_read_gen5.msg_type = msg_type;
516       insn->bits3.dp_read_gen5.target_cache = target_cache;
517       insn->bits3.dp_read_gen5.header_present = 1;
518       insn->bits3.dp_read_gen5.response_length = response_length;
519       insn->bits3.dp_read_gen5.msg_length = msg_length;
520       insn->bits3.dp_read_gen5.pad1 = 0;
521       insn->bits3.dp_read_gen5.end_of_thread = end_of_thread;
522       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
523       insn->bits2.send_gen5.end_of_thread = end_of_thread;
524   } else {
525       insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
526       insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
527       insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
528       insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
529       insn->bits3.dp_read.response_length = response_length;  /*16:19*/
530       insn->bits3.dp_read.msg_length = msg_length;  /*20:23*/
531       insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
532       insn->bits3.dp_read.pad1 = 0;  /*28:30*/
533       insn->bits3.dp_read.end_of_thread = end_of_thread;  /*31*/
534   }
535}
536
537static void brw_set_sampler_message(struct brw_context *brw,
538                                    struct brw_instruction *insn,
539                                    GLuint binding_table_index,
540                                    GLuint sampler,
541                                    GLuint msg_type,
542                                    GLuint response_length,
543                                    GLuint msg_length,
544                                    GLboolean eot,
545                                    GLuint header_present,
546                                    GLuint simd_mode)
547{
548   struct intel_context *intel = &brw->intel;
549   assert(eot == 0);
550   brw_set_src1(insn, brw_imm_d(0));
551
552   if (intel->gen >= 5) {
553      insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
554      insn->bits3.sampler_gen5.sampler = sampler;
555      insn->bits3.sampler_gen5.msg_type = msg_type;
556      insn->bits3.sampler_gen5.simd_mode = simd_mode;
557      insn->bits3.sampler_gen5.header_present = header_present;
558      insn->bits3.sampler_gen5.response_length = response_length;
559      insn->bits3.sampler_gen5.msg_length = msg_length;
560      insn->bits3.sampler_gen5.end_of_thread = eot;
561      if (intel->gen >= 6)
562	  insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
563      else {
564	  insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
565	  insn->bits2.send_gen5.end_of_thread = eot;
566      }
567   } else if (intel->is_g4x) {
568      insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
569      insn->bits3.sampler_g4x.sampler = sampler;
570      insn->bits3.sampler_g4x.msg_type = msg_type;
571      insn->bits3.sampler_g4x.response_length = response_length;
572      insn->bits3.sampler_g4x.msg_length = msg_length;
573      insn->bits3.sampler_g4x.end_of_thread = eot;
574      insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
575   } else {
576      insn->bits3.sampler.binding_table_index = binding_table_index;
577      insn->bits3.sampler.sampler = sampler;
578      insn->bits3.sampler.msg_type = msg_type;
579      insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
580      insn->bits3.sampler.response_length = response_length;
581      insn->bits3.sampler.msg_length = msg_length;
582      insn->bits3.sampler.end_of_thread = eot;
583      insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
584   }
585}
586
587
588
589static struct brw_instruction *next_insn( struct brw_compile *p,
590					  GLuint opcode )
591{
592   struct brw_instruction *insn;
593
594   assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
595
596   insn = &p->store[p->nr_insn++];
597   memcpy(insn, p->current, sizeof(*insn));
598
599   /* Reset this one-shot flag:
600    */
601
602   if (p->current->header.destreg__conditionalmod) {
603      p->current->header.destreg__conditionalmod = 0;
604      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
605   }
606
607   insn->header.opcode = opcode;
608   return insn;
609}
610
611
612static struct brw_instruction *brw_alu1( struct brw_compile *p,
613					 GLuint opcode,
614					 struct brw_reg dest,
615					 struct brw_reg src )
616{
617   struct brw_instruction *insn = next_insn(p, opcode);
618   brw_set_dest(insn, dest);
619   brw_set_src0(insn, src);
620   return insn;
621}
622
623static struct brw_instruction *brw_alu2(struct brw_compile *p,
624					GLuint opcode,
625					struct brw_reg dest,
626					struct brw_reg src0,
627					struct brw_reg src1 )
628{
629   struct brw_instruction *insn = next_insn(p, opcode);
630   brw_set_dest(insn, dest);
631   brw_set_src0(insn, src0);
632   brw_set_src1(insn, src1);
633   return insn;
634}
635
636
637/***********************************************************************
638 * Convenience routines.
639 */
640#define ALU1(OP)					\
641struct brw_instruction *brw_##OP(struct brw_compile *p,	\
642	      struct brw_reg dest,			\
643	      struct brw_reg src0)   			\
644{							\
645   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
646}
647
648#define ALU2(OP)					\
649struct brw_instruction *brw_##OP(struct brw_compile *p,	\
650	      struct brw_reg dest,			\
651	      struct brw_reg src0,			\
652	      struct brw_reg src1)   			\
653{							\
654   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
655}
656
657
658ALU1(MOV)
659ALU2(SEL)
660ALU1(NOT)
661ALU2(AND)
662ALU2(OR)
663ALU2(XOR)
664ALU2(SHR)
665ALU2(SHL)
666ALU2(RSR)
667ALU2(RSL)
668ALU2(ASR)
669ALU1(FRC)
670ALU1(RNDD)
671ALU1(RNDZ)
672ALU2(MAC)
673ALU2(MACH)
674ALU1(LZD)
675ALU2(DP4)
676ALU2(DPH)
677ALU2(DP3)
678ALU2(DP2)
679ALU2(LINE)
680ALU2(PLN)
681
682struct brw_instruction *brw_ADD(struct brw_compile *p,
683				struct brw_reg dest,
684				struct brw_reg src0,
685				struct brw_reg src1)
686{
687   /* 6.2.2: add */
688   if (src0.type == BRW_REGISTER_TYPE_F ||
689       (src0.file == BRW_IMMEDIATE_VALUE &&
690	src0.type == BRW_REGISTER_TYPE_VF)) {
691      assert(src1.type != BRW_REGISTER_TYPE_UD);
692      assert(src1.type != BRW_REGISTER_TYPE_D);
693   }
694
695   if (src1.type == BRW_REGISTER_TYPE_F ||
696       (src1.file == BRW_IMMEDIATE_VALUE &&
697	src1.type == BRW_REGISTER_TYPE_VF)) {
698      assert(src0.type != BRW_REGISTER_TYPE_UD);
699      assert(src0.type != BRW_REGISTER_TYPE_D);
700   }
701
702   return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
703}
704
705struct brw_instruction *brw_MUL(struct brw_compile *p,
706				struct brw_reg dest,
707				struct brw_reg src0,
708				struct brw_reg src1)
709{
710   /* 6.32.38: mul */
711   if (src0.type == BRW_REGISTER_TYPE_D ||
712       src0.type == BRW_REGISTER_TYPE_UD ||
713       src1.type == BRW_REGISTER_TYPE_D ||
714       src1.type == BRW_REGISTER_TYPE_UD) {
715      assert(dest.type != BRW_REGISTER_TYPE_F);
716   }
717
718   if (src0.type == BRW_REGISTER_TYPE_F ||
719       (src0.file == BRW_IMMEDIATE_VALUE &&
720	src0.type == BRW_REGISTER_TYPE_VF)) {
721      assert(src1.type != BRW_REGISTER_TYPE_UD);
722      assert(src1.type != BRW_REGISTER_TYPE_D);
723   }
724
725   if (src1.type == BRW_REGISTER_TYPE_F ||
726       (src1.file == BRW_IMMEDIATE_VALUE &&
727	src1.type == BRW_REGISTER_TYPE_VF)) {
728      assert(src0.type != BRW_REGISTER_TYPE_UD);
729      assert(src0.type != BRW_REGISTER_TYPE_D);
730   }
731
732   assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
733	  src0.nr != BRW_ARF_ACCUMULATOR);
734   assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
735	  src1.nr != BRW_ARF_ACCUMULATOR);
736
737   return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
738}
739
740
741void brw_NOP(struct brw_compile *p)
742{
743   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
744   brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
745   brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
746   brw_set_src1(insn, brw_imm_ud(0x0));
747}
748
749
750
751
752
753/***********************************************************************
754 * Comparisons, if/else/endif
755 */
756
757struct brw_instruction *brw_JMPI(struct brw_compile *p,
758                                 struct brw_reg dest,
759                                 struct brw_reg src0,
760                                 struct brw_reg src1)
761{
762   struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
763
764   insn->header.execution_size = 1;
765   insn->header.compression_control = BRW_COMPRESSION_NONE;
766   insn->header.mask_control = BRW_MASK_DISABLE;
767
768   p->current->header.predicate_control = BRW_PREDICATE_NONE;
769
770   return insn;
771}
772
773/* EU takes the value from the flag register and pushes it onto some
774 * sort of a stack (presumably merging with any flag value already on
775 * the stack).  Within an if block, the flags at the top of the stack
776 * control execution on each channel of the unit, eg. on each of the
777 * 16 pixel values in our wm programs.
778 *
779 * When the matching 'else' instruction is reached (presumably by
780 * countdown of the instruction count patched in by our ELSE/ENDIF
781 * functions), the relevent flags are inverted.
782 *
783 * When the matching 'endif' instruction is reached, the flags are
784 * popped off.  If the stack is now empty, normal execution resumes.
785 *
786 * No attempt is made to deal with stack overflow (14 elements?).
787 */
788struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
789{
790   struct brw_instruction *insn;
791
792   if (p->single_program_flow) {
793      assert(execute_size == BRW_EXECUTE_1);
794
795      insn = next_insn(p, BRW_OPCODE_ADD);
796      insn->header.predicate_inverse = 1;
797   } else {
798      insn = next_insn(p, BRW_OPCODE_IF);
799   }
800
801   /* Override the defaults for this instruction:
802    */
803   brw_set_dest(insn, brw_ip_reg());
804   brw_set_src0(insn, brw_ip_reg());
805   brw_set_src1(insn, brw_imm_d(0x0));
806
807   insn->header.execution_size = execute_size;
808   insn->header.compression_control = BRW_COMPRESSION_NONE;
809   insn->header.predicate_control = BRW_PREDICATE_NORMAL;
810   insn->header.mask_control = BRW_MASK_ENABLE;
811   if (!p->single_program_flow)
812       insn->header.thread_control = BRW_THREAD_SWITCH;
813
814   p->current->header.predicate_control = BRW_PREDICATE_NONE;
815
816   return insn;
817}
818
819
820struct brw_instruction *brw_ELSE(struct brw_compile *p,
821				 struct brw_instruction *if_insn)
822{
823   struct intel_context *intel = &p->brw->intel;
824   struct brw_instruction *insn;
825   GLuint br = 1;
826
827   /* jump count is for 64bit data chunk each, so one 128bit
828      instruction requires 2 chunks. */
829   if (intel->gen >= 5)
830      br = 2;
831
832   if (p->single_program_flow) {
833      insn = next_insn(p, BRW_OPCODE_ADD);
834   } else {
835      insn = next_insn(p, BRW_OPCODE_ELSE);
836   }
837
838   brw_set_dest(insn, brw_ip_reg());
839   brw_set_src0(insn, brw_ip_reg());
840   brw_set_src1(insn, brw_imm_d(0x0));
841
842   insn->header.compression_control = BRW_COMPRESSION_NONE;
843   insn->header.execution_size = if_insn->header.execution_size;
844   insn->header.mask_control = BRW_MASK_ENABLE;
845   if (!p->single_program_flow)
846       insn->header.thread_control = BRW_THREAD_SWITCH;
847
848   /* Patch the if instruction to point at this instruction.
849    */
850   if (p->single_program_flow) {
851      assert(if_insn->header.opcode == BRW_OPCODE_ADD);
852
853      if_insn->bits3.ud = (insn - if_insn + 1) * 16;
854   } else {
855      assert(if_insn->header.opcode == BRW_OPCODE_IF);
856
857      if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
858      if_insn->bits3.if_else.pop_count = 0;
859      if_insn->bits3.if_else.pad0 = 0;
860   }
861
862   return insn;
863}
864
865void brw_ENDIF(struct brw_compile *p,
866	       struct brw_instruction *patch_insn)
867{
868   struct intel_context *intel = &p->brw->intel;
869   GLuint br = 1;
870
871   if (intel->gen >= 5)
872      br = 2;
873
874   if (p->single_program_flow) {
875      /* In single program flow mode, there's no need to execute an ENDIF,
876       * since we don't need to do any stack operations, and if we're executing
877       * currently, we want to just continue executing.
878       */
879      struct brw_instruction *next = &p->store[p->nr_insn];
880
881      assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
882
883      patch_insn->bits3.ud = (next - patch_insn) * 16;
884   } else {
885      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
886
887      brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
888      brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
889      brw_set_src1(insn, brw_imm_d(0x0));
890
891      insn->header.compression_control = BRW_COMPRESSION_NONE;
892      insn->header.execution_size = patch_insn->header.execution_size;
893      insn->header.mask_control = BRW_MASK_ENABLE;
894      insn->header.thread_control = BRW_THREAD_SWITCH;
895
896      assert(patch_insn->bits3.if_else.jump_count == 0);
897
898      /* Patch the if or else instructions to point at this or the next
899       * instruction respectively.
900       */
901      if (patch_insn->header.opcode == BRW_OPCODE_IF) {
902	 /* Automagically turn it into an IFF:
903	  */
904	 patch_insn->header.opcode = BRW_OPCODE_IFF;
905	 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
906	 patch_insn->bits3.if_else.pop_count = 0;
907	 patch_insn->bits3.if_else.pad0 = 0;
908      } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
909	 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
910	 patch_insn->bits3.if_else.pop_count = 1;
911	 patch_insn->bits3.if_else.pad0 = 0;
912      } else {
913	 assert(0);
914      }
915
916      /* Also pop item off the stack in the endif instruction:
917       */
918      insn->bits3.if_else.jump_count = 0;
919      insn->bits3.if_else.pop_count = 1;
920      insn->bits3.if_else.pad0 = 0;
921   }
922}
923
924struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
925{
926   struct brw_instruction *insn;
927   insn = next_insn(p, BRW_OPCODE_BREAK);
928   brw_set_dest(insn, brw_ip_reg());
929   brw_set_src0(insn, brw_ip_reg());
930   brw_set_src1(insn, brw_imm_d(0x0));
931   insn->header.compression_control = BRW_COMPRESSION_NONE;
932   insn->header.execution_size = BRW_EXECUTE_8;
933   /* insn->header.mask_control = BRW_MASK_DISABLE; */
934   insn->bits3.if_else.pad0 = 0;
935   insn->bits3.if_else.pop_count = pop_count;
936   return insn;
937}
938
939struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
940{
941   struct brw_instruction *insn;
942   insn = next_insn(p, BRW_OPCODE_CONTINUE);
943   brw_set_dest(insn, brw_ip_reg());
944   brw_set_src0(insn, brw_ip_reg());
945   brw_set_src1(insn, brw_imm_d(0x0));
946   insn->header.compression_control = BRW_COMPRESSION_NONE;
947   insn->header.execution_size = BRW_EXECUTE_8;
948   /* insn->header.mask_control = BRW_MASK_DISABLE; */
949   insn->bits3.if_else.pad0 = 0;
950   insn->bits3.if_else.pop_count = pop_count;
951   return insn;
952}
953
954/* DO/WHILE loop:
955 */
956struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
957{
958   if (p->single_program_flow) {
959      return &p->store[p->nr_insn];
960   } else {
961      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
962
963      /* Override the defaults for this instruction:
964       */
965      brw_set_dest(insn, brw_null_reg());
966      brw_set_src0(insn, brw_null_reg());
967      brw_set_src1(insn, brw_null_reg());
968
969      insn->header.compression_control = BRW_COMPRESSION_NONE;
970      insn->header.execution_size = execute_size;
971      insn->header.predicate_control = BRW_PREDICATE_NONE;
972      /* insn->header.mask_control = BRW_MASK_ENABLE; */
973      /* insn->header.mask_control = BRW_MASK_DISABLE; */
974
975      return insn;
976   }
977}
978
979
980
981struct brw_instruction *brw_WHILE(struct brw_compile *p,
982                                  struct brw_instruction *do_insn)
983{
984   struct intel_context *intel = &p->brw->intel;
985   struct brw_instruction *insn;
986   GLuint br = 1;
987
988   if (intel->gen >= 5)
989      br = 2;
990
991   if (p->single_program_flow)
992      insn = next_insn(p, BRW_OPCODE_ADD);
993   else
994      insn = next_insn(p, BRW_OPCODE_WHILE);
995
996   brw_set_dest(insn, brw_ip_reg());
997   brw_set_src0(insn, brw_ip_reg());
998   brw_set_src1(insn, brw_imm_d(0x0));
999
1000   insn->header.compression_control = BRW_COMPRESSION_NONE;
1001
1002   if (p->single_program_flow) {
1003      insn->header.execution_size = BRW_EXECUTE_1;
1004
1005      insn->bits3.d = (do_insn - insn) * 16;
1006   } else {
1007      insn->header.execution_size = do_insn->header.execution_size;
1008
1009      assert(do_insn->header.opcode == BRW_OPCODE_DO);
1010      insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1011      insn->bits3.if_else.pop_count = 0;
1012      insn->bits3.if_else.pad0 = 0;
1013   }
1014
1015/*    insn->header.mask_control = BRW_MASK_ENABLE; */
1016
1017   /* insn->header.mask_control = BRW_MASK_DISABLE; */
1018   p->current->header.predicate_control = BRW_PREDICATE_NONE;
1019   return insn;
1020}
1021
1022
1023/* FORWARD JUMPS:
1024 */
1025void brw_land_fwd_jump(struct brw_compile *p,
1026		       struct brw_instruction *jmp_insn)
1027{
1028   struct intel_context *intel = &p->brw->intel;
1029   struct brw_instruction *landing = &p->store[p->nr_insn];
1030   GLuint jmpi = 1;
1031
1032   if (intel->gen >= 5)
1033       jmpi = 2;
1034
1035   assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1036   assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1037
1038   jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1039}
1040
1041
1042
1043/* To integrate with the above, it makes sense that the comparison
1044 * instruction should populate the flag register.  It might be simpler
1045 * just to use the flag reg for most WM tasks?
1046 */
1047void brw_CMP(struct brw_compile *p,
1048	     struct brw_reg dest,
1049	     GLuint conditional,
1050	     struct brw_reg src0,
1051	     struct brw_reg src1)
1052{
1053   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
1054
1055   insn->header.destreg__conditionalmod = conditional;
1056   brw_set_dest(insn, dest);
1057   brw_set_src0(insn, src0);
1058   brw_set_src1(insn, src1);
1059
1060/*    guess_execution_size(insn, src0); */
1061
1062
1063   /* Make it so that future instructions will use the computed flag
1064    * value until brw_set_predicate_control_flag_value() is called
1065    * again.
1066    */
1067   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1068       dest.nr == 0) {
1069      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1070      p->flag_value = 0xff;
1071   }
1072}
1073
1074/* Issue 'wait' instruction for n1, host could program MMIO
1075   to wake up thread. */
1076void brw_WAIT (struct brw_compile *p)
1077{
1078   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
1079   struct brw_reg src = brw_notification_1_reg();
1080
1081   brw_set_dest(insn, src);
1082   brw_set_src0(insn, src);
1083   brw_set_src1(insn, brw_null_reg());
1084   insn->header.execution_size = 0; /* must */
1085   insn->header.predicate_control = 0;
1086   insn->header.compression_control = 0;
1087}
1088
1089
1090/***********************************************************************
1091 * Helpers for the various SEND message types:
1092 */
1093
1094/** Extended math function, float[8].
1095 */
1096void brw_math( struct brw_compile *p,
1097	       struct brw_reg dest,
1098	       GLuint function,
1099	       GLuint saturate,
1100	       GLuint msg_reg_nr,
1101	       struct brw_reg src,
1102	       GLuint data_type,
1103	       GLuint precision )
1104{
1105   struct intel_context *intel = &p->brw->intel;
1106
1107   if (intel->gen >= 6) {
1108      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1109
1110      /* Math is the same ISA format as other opcodes, except that CondModifier
1111       * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1112       */
1113      insn->header.destreg__conditionalmod = function;
1114
1115      brw_set_dest(insn, dest);
1116      brw_set_src0(insn, src);
1117      brw_set_src1(insn, brw_null_reg());
1118   } else {
1119      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1120      GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1121      GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1122      /* Example code doesn't set predicate_control for send
1123       * instructions.
1124       */
1125      insn->header.predicate_control = 0;
1126      insn->header.destreg__conditionalmod = msg_reg_nr;
1127
1128      brw_set_dest(insn, dest);
1129      brw_set_src0(insn, src);
1130      brw_set_math_message(p->brw,
1131			   insn,
1132			   msg_length, response_length,
1133			   function,
1134			   BRW_MATH_INTEGER_UNSIGNED,
1135			   precision,
1136			   saturate,
1137			   data_type);
1138   }
1139}
1140
1141/** Extended math function, float[8].
1142 */
1143void brw_math2(struct brw_compile *p,
1144	       struct brw_reg dest,
1145	       GLuint function,
1146	       struct brw_reg src0,
1147	       struct brw_reg src1)
1148{
1149   struct intel_context *intel = &p->brw->intel;
1150   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
1151
1152   assert(intel->gen >= 6);
1153
1154   /* Math is the same ISA format as other opcodes, except that CondModifier
1155    * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1156    */
1157   insn->header.destreg__conditionalmod = function;
1158
1159   brw_set_dest(insn, dest);
1160   brw_set_src0(insn, src0);
1161   brw_set_src1(insn, src1);
1162}
1163
1164/**
1165 * Extended math function, float[16].
1166 * Use 2 send instructions.
1167 */
1168void brw_math_16( struct brw_compile *p,
1169		  struct brw_reg dest,
1170		  GLuint function,
1171		  GLuint saturate,
1172		  GLuint msg_reg_nr,
1173		  struct brw_reg src,
1174		  GLuint precision )
1175{
1176   struct intel_context *intel = &p->brw->intel;
1177   struct brw_instruction *insn;
1178   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1179   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1180
1181   if (intel->gen >= 6) {
1182      insn = next_insn(p, BRW_OPCODE_MATH);
1183
1184      /* Math is the same ISA format as other opcodes, except that CondModifier
1185       * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1186       */
1187      insn->header.destreg__conditionalmod = function;
1188
1189      brw_set_dest(insn, dest);
1190      brw_set_src0(insn, src);
1191      brw_set_src1(insn, brw_null_reg());
1192      return;
1193   }
1194
1195   /* First instruction:
1196    */
1197   brw_push_insn_state(p);
1198   brw_set_predicate_control_flag_value(p, 0xff);
1199   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1200
1201   insn = next_insn(p, BRW_OPCODE_SEND);
1202   insn->header.destreg__conditionalmod = msg_reg_nr;
1203
1204   brw_set_dest(insn, dest);
1205   brw_set_src0(insn, src);
1206   brw_set_math_message(p->brw,
1207			insn,
1208			msg_length, response_length,
1209			function,
1210			BRW_MATH_INTEGER_UNSIGNED,
1211			precision,
1212			saturate,
1213			BRW_MATH_DATA_VECTOR);
1214
1215   /* Second instruction:
1216    */
1217   insn = next_insn(p, BRW_OPCODE_SEND);
1218   insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1219   insn->header.destreg__conditionalmod = msg_reg_nr+1;
1220
1221   brw_set_dest(insn, offset(dest,1));
1222   brw_set_src0(insn, src);
1223   brw_set_math_message(p->brw,
1224			insn,
1225			msg_length, response_length,
1226			function,
1227			BRW_MATH_INTEGER_UNSIGNED,
1228			precision,
1229			saturate,
1230			BRW_MATH_DATA_VECTOR);
1231
1232   brw_pop_insn_state(p);
1233}
1234
1235
1236/**
1237 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
1238 * Scratch offset should be a multiple of 64.
1239 * Used for register spilling.
1240 */
1241void brw_dp_WRITE_16( struct brw_compile *p,
1242		      struct brw_reg src,
1243		      GLuint scratch_offset )
1244{
1245   struct intel_context *intel = &p->brw->intel;
1246   GLuint msg_reg_nr = 1;
1247   {
1248      brw_push_insn_state(p);
1249      brw_set_mask_control(p, BRW_MASK_DISABLE);
1250      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1251
1252      /* set message header global offset field (reg 0, element 2) */
1253      brw_MOV(p,
1254	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1255	      brw_imm_d(scratch_offset));
1256
1257      brw_pop_insn_state(p);
1258   }
1259
1260   {
1261      GLuint msg_length = 3;
1262      struct brw_reg dest;
1263      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1264      int send_commit_msg;
1265
1266      insn->header.predicate_control = 0; /* XXX */
1267      insn->header.compression_control = BRW_COMPRESSION_NONE;
1268      insn->header.destreg__conditionalmod = msg_reg_nr;
1269
1270      /* Until gen6, writes followed by reads from the same location
1271       * are not guaranteed to be ordered unless write_commit is set.
1272       * If set, then a no-op write is issued to the destination
1273       * register to set a dependency, and a read from the destination
1274       * can be used to ensure the ordering.
1275       *
1276       * For gen6, only writes between different threads need ordering
1277       * protection.  Our use of DP writes is all about register
1278       * spilling within a thread.
1279       */
1280      if (intel->gen >= 6) {
1281	 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1282	 send_commit_msg = 0;
1283      } else {
1284	 dest = brw_uw16_grf(0, 0);
1285	 send_commit_msg = 1;
1286      }
1287
1288      brw_set_dest(insn, dest);
1289      brw_set_src0(insn, src);
1290
1291      brw_set_dp_write_message(p->brw,
1292			       insn,
1293			       255, /* binding table index (255=stateless) */
1294			       BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1295			       BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1296			       msg_length,
1297			       GL_TRUE, /* header_present */
1298			       0, /* pixel scoreboard */
1299			       send_commit_msg, /* response_length */
1300			       0, /* eot */
1301			       send_commit_msg);
1302   }
1303}
1304
1305
1306/**
1307 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1308 * Scratch offset should be a multiple of 64.
1309 * Used for register spilling.
1310 */
1311void brw_dp_READ_16( struct brw_compile *p,
1312		      struct brw_reg dest,
1313		      GLuint scratch_offset )
1314{
1315   GLuint msg_reg_nr = 1;
1316   {
1317      brw_push_insn_state(p);
1318      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1319      brw_set_mask_control(p, BRW_MASK_DISABLE);
1320
1321      /* set message header global offset field (reg 0, element 2) */
1322      brw_MOV(p,
1323	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1324	      brw_imm_d(scratch_offset));
1325
1326      brw_pop_insn_state(p);
1327   }
1328
1329   {
1330      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1331
1332      insn->header.predicate_control = 0; /* XXX */
1333      insn->header.compression_control = BRW_COMPRESSION_NONE;
1334      insn->header.destreg__conditionalmod = msg_reg_nr;
1335
1336      brw_set_dest(insn, dest);	/* UW? */
1337      brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1338
1339      brw_set_dp_read_message(p->brw,
1340			      insn,
1341			      255, /* binding table index (255=stateless) */
1342			      BRW_DATAPORT_OWORD_BLOCK_4_OWORDS,
1343			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1344			      1, /* target cache (render/scratch) */
1345			      1, /* msg_length */
1346			      2, /* response_length */
1347			      0); /* eot */
1348   }
1349}
1350
1351
1352/**
1353 * Read a float[4] vector from the data port Data Cache (const buffer).
1354 * Location (in buffer) should be a multiple of 16.
1355 * Used for fetching shader constants.
1356 * If relAddr is true, we'll do an indirect fetch using the address register.
1357 */
1358void brw_dp_READ_4( struct brw_compile *p,
1359                    struct brw_reg dest,
1360                    GLboolean relAddr,
1361                    GLuint location,
1362                    GLuint bind_table_index )
1363{
1364   /* XXX: relAddr not implemented */
1365   GLuint msg_reg_nr = 1;
1366   {
1367      struct brw_reg b;
1368      brw_push_insn_state(p);
1369      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1370      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1371      brw_set_mask_control(p, BRW_MASK_DISABLE);
1372
1373   /* Setup MRF[1] with location/offset into const buffer */
1374      b = brw_message_reg(msg_reg_nr);
1375      b = retype(b, BRW_REGISTER_TYPE_UD);
1376      /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1377       * when the docs say only dword[2] should be set.  Hmmm.  But it works.
1378       */
1379      brw_MOV(p, b, brw_imm_ud(location));
1380      brw_pop_insn_state(p);
1381   }
1382
1383   {
1384      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1385
1386      insn->header.predicate_control = BRW_PREDICATE_NONE;
1387      insn->header.compression_control = BRW_COMPRESSION_NONE;
1388      insn->header.destreg__conditionalmod = msg_reg_nr;
1389      insn->header.mask_control = BRW_MASK_DISABLE;
1390
1391      /* cast dest to a uword[8] vector */
1392      dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1393
1394      brw_set_dest(insn, dest);
1395      brw_set_src0(insn, brw_null_reg());
1396
1397      brw_set_dp_read_message(p->brw,
1398			      insn,
1399			      bind_table_index,
1400			      0,  /* msg_control (0 means 1 Oword) */
1401			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1402			      0, /* source cache = data cache */
1403			      1, /* msg_length */
1404			      1, /* response_length (1 Oword) */
1405			      0); /* eot */
1406   }
1407}
1408
1409
1410/**
1411 * Read float[4] constant(s) from VS constant buffer.
1412 * For relative addressing, two float[4] constants will be read into 'dest'.
1413 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1414 */
1415void brw_dp_READ_4_vs(struct brw_compile *p,
1416                      struct brw_reg dest,
1417                      GLuint location,
1418                      GLuint bind_table_index)
1419{
1420   struct brw_instruction *insn;
1421   GLuint msg_reg_nr = 1;
1422   struct brw_reg b;
1423
1424   /*
1425   printf("vs const read msg, location %u, msg_reg_nr %d\n",
1426          location, msg_reg_nr);
1427   */
1428
1429   /* Setup MRF[1] with location/offset into const buffer */
1430   brw_push_insn_state(p);
1431   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1432   brw_set_mask_control(p, BRW_MASK_DISABLE);
1433   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1434
1435   /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1436    * when the docs say only dword[2] should be set.  Hmmm.  But it works.
1437    */
1438   b = brw_message_reg(msg_reg_nr);
1439   b = retype(b, BRW_REGISTER_TYPE_UD);
1440   /*b = get_element_ud(b, 2);*/
1441   brw_MOV(p, b, brw_imm_ud(location));
1442
1443   brw_pop_insn_state(p);
1444
1445   insn = next_insn(p, BRW_OPCODE_SEND);
1446
1447   insn->header.predicate_control = BRW_PREDICATE_NONE;
1448   insn->header.compression_control = BRW_COMPRESSION_NONE;
1449   insn->header.destreg__conditionalmod = msg_reg_nr;
1450   insn->header.mask_control = BRW_MASK_DISABLE;
1451
1452   brw_set_dest(insn, dest);
1453   brw_set_src0(insn, brw_null_reg());
1454
1455   brw_set_dp_read_message(p->brw,
1456			   insn,
1457			   bind_table_index,
1458			   0,
1459			   BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1460			   0, /* source cache = data cache */
1461			   1, /* msg_length */
1462			   1, /* response_length (1 Oword) */
1463			   0); /* eot */
1464}
1465
1466/**
1467 * Read a float[4] constant per vertex from VS constant buffer, with
1468 * relative addressing.
1469 */
1470void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1471			       struct brw_reg dest,
1472			       struct brw_reg addr_reg,
1473			       GLuint offset,
1474			       GLuint bind_table_index)
1475{
1476   struct intel_context *intel = &p->brw->intel;
1477   int msg_type;
1478
1479   /* Setup MRF[1] with offset into const buffer */
1480   brw_push_insn_state(p);
1481   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1482   brw_set_mask_control(p, BRW_MASK_DISABLE);
1483   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1484
1485   /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1486    * fields ignored.
1487    */
1488   brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD),
1489	   addr_reg, brw_imm_d(offset));
1490   brw_pop_insn_state(p);
1491
1492   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1493
1494   insn->header.predicate_control = BRW_PREDICATE_NONE;
1495   insn->header.compression_control = BRW_COMPRESSION_NONE;
1496   insn->header.destreg__conditionalmod = 0;
1497   insn->header.mask_control = BRW_MASK_DISABLE;
1498
1499   brw_set_dest(insn, dest);
1500   brw_set_src0(insn, brw_vec8_grf(0, 0));
1501
1502   if (intel->gen == 6)
1503      msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1504   else if (intel->gen == 5 || intel->is_g4x)
1505      msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1506   else
1507      msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1508
1509   brw_set_dp_read_message(p->brw,
1510			   insn,
1511			   bind_table_index,
1512			   BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
1513			   msg_type,
1514			   0, /* source cache = data cache */
1515			   2, /* msg_length */
1516			   1, /* response_length */
1517			   0); /* eot */
1518}
1519
1520
1521
1522void brw_fb_WRITE(struct brw_compile *p,
1523		  int dispatch_width,
1524                  struct brw_reg dest,
1525                  GLuint msg_reg_nr,
1526                  struct brw_reg src0,
1527                  GLuint binding_table_index,
1528                  GLuint msg_length,
1529                  GLuint response_length,
1530                  GLboolean eot)
1531{
1532   struct intel_context *intel = &p->brw->intel;
1533   struct brw_instruction *insn;
1534   GLuint msg_control, msg_type;
1535   GLboolean header_present = GL_TRUE;
1536
1537   insn = next_insn(p, BRW_OPCODE_SEND);
1538   insn->header.predicate_control = 0; /* XXX */
1539   insn->header.compression_control = BRW_COMPRESSION_NONE;
1540
1541   if (intel->gen >= 6) {
1542      if (msg_length == 4)
1543	 header_present = GL_FALSE;
1544
1545       /* headerless version, just submit color payload */
1546       src0 = brw_message_reg(msg_reg_nr);
1547
1548       msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6;
1549   } else {
1550      insn->header.destreg__conditionalmod = msg_reg_nr;
1551
1552      msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1553   }
1554
1555   if (dispatch_width == 16)
1556      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
1557   else
1558      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
1559
1560   brw_set_dest(insn, dest);
1561   brw_set_src0(insn, src0);
1562   brw_set_dp_write_message(p->brw,
1563			    insn,
1564			    binding_table_index,
1565			    msg_control,
1566			    msg_type,
1567			    msg_length,
1568			    header_present,
1569			    1,	/* pixel scoreboard */
1570			    response_length,
1571			    eot,
1572			    0 /* send_commit_msg */);
1573}
1574
1575
1576/**
1577 * Texture sample instruction.
1578 * Note: the msg_type plus msg_length values determine exactly what kind
1579 * of sampling operation is performed.  See volume 4, page 161 of docs.
1580 */
1581void brw_SAMPLE(struct brw_compile *p,
1582		struct brw_reg dest,
1583		GLuint msg_reg_nr,
1584		struct brw_reg src0,
1585		GLuint binding_table_index,
1586		GLuint sampler,
1587		GLuint writemask,
1588		GLuint msg_type,
1589		GLuint response_length,
1590		GLuint msg_length,
1591		GLboolean eot,
1592		GLuint header_present,
1593		GLuint simd_mode)
1594{
1595   struct intel_context *intel = &p->brw->intel;
1596   GLboolean need_stall = 0;
1597
1598   if (writemask == 0) {
1599      /*printf("%s: zero writemask??\n", __FUNCTION__); */
1600      return;
1601   }
1602
1603   /* Hardware doesn't do destination dependency checking on send
1604    * instructions properly.  Add a workaround which generates the
1605    * dependency by other means.  In practice it seems like this bug
1606    * only crops up for texture samples, and only where registers are
1607    * written by the send and then written again later without being
1608    * read in between.  Luckily for us, we already track that
1609    * information and use it to modify the writemask for the
1610    * instruction, so that is a guide for whether a workaround is
1611    * needed.
1612    */
1613   if (writemask != WRITEMASK_XYZW) {
1614      GLuint dst_offset = 0;
1615      GLuint i, newmask = 0, len = 0;
1616
1617      for (i = 0; i < 4; i++) {
1618	 if (writemask & (1<<i))
1619	    break;
1620	 dst_offset += 2;
1621      }
1622      for (; i < 4; i++) {
1623	 if (!(writemask & (1<<i)))
1624	    break;
1625	 newmask |= 1<<i;
1626	 len++;
1627      }
1628
1629      if (newmask != writemask) {
1630	 need_stall = 1;
1631         /* printf("need stall %x %x\n", newmask , writemask); */
1632      }
1633      else {
1634	 GLboolean dispatch_16 = GL_FALSE;
1635
1636	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1637
1638	 guess_execution_size(p->current, dest);
1639	 if (p->current->header.execution_size == BRW_EXECUTE_16)
1640	    dispatch_16 = GL_TRUE;
1641
1642	 newmask = ~newmask & WRITEMASK_XYZW;
1643
1644	 brw_push_insn_state(p);
1645
1646	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1647	 brw_set_mask_control(p, BRW_MASK_DISABLE);
1648
1649	 brw_MOV(p, m1, brw_vec8_grf(0,0));
1650  	 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1651
1652	 brw_pop_insn_state(p);
1653
1654  	 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1655	 dest = offset(dest, dst_offset);
1656
1657	 /* For 16-wide dispatch, masked channels are skipped in the
1658	  * response.  For 8-wide, masked channels still take up slots,
1659	  * and are just not written to.
1660	  */
1661	 if (dispatch_16)
1662	    response_length = len * 2;
1663      }
1664   }
1665
1666   {
1667      struct brw_instruction *insn;
1668
1669      /* Sandybridge doesn't have the implied move for SENDs,
1670       * and the first message register index comes from src0.
1671       */
1672      if (intel->gen >= 6) {
1673	  brw_push_insn_state(p);
1674	  brw_set_mask_control( p, BRW_MASK_DISABLE );
1675	  /* m1 contains header? */
1676	  brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1677	  brw_pop_insn_state(p);
1678	  src0 = brw_message_reg(msg_reg_nr);
1679      }
1680
1681      insn = next_insn(p, BRW_OPCODE_SEND);
1682      insn->header.predicate_control = 0; /* XXX */
1683      insn->header.compression_control = BRW_COMPRESSION_NONE;
1684      if (intel->gen < 6)
1685	  insn->header.destreg__conditionalmod = msg_reg_nr;
1686
1687      brw_set_dest(insn, dest);
1688      brw_set_src0(insn, src0);
1689      brw_set_sampler_message(p->brw, insn,
1690			      binding_table_index,
1691			      sampler,
1692			      msg_type,
1693			      response_length,
1694			      msg_length,
1695			      eot,
1696			      header_present,
1697			      simd_mode);
1698   }
1699
1700   if (need_stall) {
1701      struct brw_reg reg = vec8(offset(dest, response_length-1));
1702
1703      /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
1704       */
1705      brw_push_insn_state(p);
1706      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1707      brw_MOV(p, reg, reg);
1708      brw_pop_insn_state(p);
1709   }
1710
1711}
1712
1713/* All these variables are pretty confusing - we might be better off
1714 * using bitmasks and macros for this, in the old style.  Or perhaps
1715 * just having the caller instantiate the fields in dword3 itself.
1716 */
1717void brw_urb_WRITE(struct brw_compile *p,
1718		   struct brw_reg dest,
1719		   GLuint msg_reg_nr,
1720		   struct brw_reg src0,
1721		   GLboolean allocate,
1722		   GLboolean used,
1723		   GLuint msg_length,
1724		   GLuint response_length,
1725		   GLboolean eot,
1726		   GLboolean writes_complete,
1727		   GLuint offset,
1728		   GLuint swizzle)
1729{
1730   struct intel_context *intel = &p->brw->intel;
1731   struct brw_instruction *insn;
1732
1733   /* Sandybridge doesn't have the implied move for SENDs,
1734    * and the first message register index comes from src0.
1735    */
1736   if (intel->gen >= 6) {
1737      brw_push_insn_state(p);
1738      brw_set_mask_control( p, BRW_MASK_DISABLE );
1739      brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1740      brw_pop_insn_state(p);
1741      src0 = brw_message_reg(msg_reg_nr);
1742   }
1743
1744   insn = next_insn(p, BRW_OPCODE_SEND);
1745
1746   assert(msg_length < BRW_MAX_MRF);
1747
1748   brw_set_dest(insn, dest);
1749   brw_set_src0(insn, src0);
1750   brw_set_src1(insn, brw_imm_d(0));
1751
1752   if (intel->gen < 6)
1753      insn->header.destreg__conditionalmod = msg_reg_nr;
1754
1755   brw_set_urb_message(p->brw,
1756		       insn,
1757		       allocate,
1758		       used,
1759		       msg_length,
1760		       response_length,
1761		       eot,
1762		       writes_complete,
1763		       offset,
1764		       swizzle);
1765}
1766
1767void brw_ff_sync(struct brw_compile *p,
1768		   struct brw_reg dest,
1769		   GLuint msg_reg_nr,
1770		   struct brw_reg src0,
1771		   GLboolean allocate,
1772		   GLuint response_length,
1773		   GLboolean eot)
1774{
1775   struct intel_context *intel = &p->brw->intel;
1776   struct brw_instruction *insn;
1777
1778   /* Sandybridge doesn't have the implied move for SENDs,
1779    * and the first message register index comes from src0.
1780    */
1781   if (intel->gen >= 6) {
1782      brw_push_insn_state(p);
1783      brw_set_mask_control( p, BRW_MASK_DISABLE );
1784      brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1785      brw_pop_insn_state(p);
1786      src0 = brw_message_reg(msg_reg_nr);
1787   }
1788
1789   insn = next_insn(p, BRW_OPCODE_SEND);
1790   brw_set_dest(insn, dest);
1791   brw_set_src0(insn, src0);
1792   brw_set_src1(insn, brw_imm_d(0));
1793
1794   if (intel->gen < 6)
1795       insn->header.destreg__conditionalmod = msg_reg_nr;
1796
1797   brw_set_ff_sync_message(p->brw,
1798			   insn,
1799			   allocate,
1800			   response_length,
1801			   eot);
1802}
1803