brw_eu_emit.c revision e6ec500e19f455237828f4f3955f888ad0b56382
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33#include "brw_context.h"
34#include "brw_defines.h"
35#include "brw_eu.h"
36
37
38
39
40/***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44static void guess_execution_size( struct brw_instruction *insn,
45				  struct brw_reg reg )
46{
47   if (reg.width == BRW_WIDTH_8 &&
48       insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49      insn->header.execution_size = BRW_EXECUTE_16;
50   else
51      insn->header.execution_size = reg.width;	/* note - definitions are compatible */
52}
53
54
55static void brw_set_dest( struct brw_instruction *insn,
56			  struct brw_reg dest )
57{
58   if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
59       dest.file != BRW_MESSAGE_REGISTER_FILE)
60      assert(dest.nr < 128);
61
62   insn->bits1.da1.dest_reg_file = dest.file;
63   insn->bits1.da1.dest_reg_type = dest.type;
64   insn->bits1.da1.dest_address_mode = dest.address_mode;
65
66   if (dest.address_mode == BRW_ADDRESS_DIRECT) {
67      insn->bits1.da1.dest_reg_nr = dest.nr;
68
69      if (insn->header.access_mode == BRW_ALIGN_1) {
70	 insn->bits1.da1.dest_subreg_nr = dest.subnr;
71	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
72	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
73	 insn->bits1.da1.dest_horiz_stride = dest.hstride;
74      }
75      else {
76	 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
77	 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
78	 /* even ignored in da16, still need to set as '01' */
79	 insn->bits1.da16.dest_horiz_stride = 1;
80      }
81   }
82   else {
83      insn->bits1.ia1.dest_subreg_nr = dest.subnr;
84
85      /* These are different sizes in align1 vs align16:
86       */
87      if (insn->header.access_mode == BRW_ALIGN_1) {
88	 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
89	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
90	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
91	 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
92      }
93      else {
94	 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
95	 /* even ignored in da16, still need to set as '01' */
96	 insn->bits1.ia16.dest_horiz_stride = 1;
97      }
98   }
99
100   /* NEW: Set the execution size based on dest.width and
101    * insn->compression_control:
102    */
103   guess_execution_size(insn, dest);
104}
105
106static void brw_set_src0( struct brw_instruction *insn,
107                          struct brw_reg reg )
108{
109   if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
110      assert(reg.nr < 128);
111
112   insn->bits1.da1.src0_reg_file = reg.file;
113   insn->bits1.da1.src0_reg_type = reg.type;
114   insn->bits2.da1.src0_abs = reg.abs;
115   insn->bits2.da1.src0_negate = reg.negate;
116   insn->bits2.da1.src0_address_mode = reg.address_mode;
117
118   if (reg.file == BRW_IMMEDIATE_VALUE) {
119      insn->bits3.ud = reg.dw1.ud;
120
121      /* Required to set some fields in src1 as well:
122       */
123      insn->bits1.da1.src1_reg_file = 0; /* arf */
124      insn->bits1.da1.src1_reg_type = reg.type;
125   }
126   else
127   {
128      if (reg.address_mode == BRW_ADDRESS_DIRECT) {
129	 if (insn->header.access_mode == BRW_ALIGN_1) {
130	    insn->bits2.da1.src0_subreg_nr = reg.subnr;
131	    insn->bits2.da1.src0_reg_nr = reg.nr;
132	 }
133	 else {
134	    insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
135	    insn->bits2.da16.src0_reg_nr = reg.nr;
136	 }
137      }
138      else {
139	 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
140
141	 if (insn->header.access_mode == BRW_ALIGN_1) {
142	    insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
143	 }
144	 else {
145	    insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
146	 }
147      }
148
149      if (insn->header.access_mode == BRW_ALIGN_1) {
150	 if (reg.width == BRW_WIDTH_1 &&
151	     insn->header.execution_size == BRW_EXECUTE_1) {
152	    insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
153	    insn->bits2.da1.src0_width = BRW_WIDTH_1;
154	    insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
155	 }
156	 else {
157	    insn->bits2.da1.src0_horiz_stride = reg.hstride;
158	    insn->bits2.da1.src0_width = reg.width;
159	    insn->bits2.da1.src0_vert_stride = reg.vstride;
160	 }
161      }
162      else {
163	 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
164	 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
165	 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
166	 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
167
168	 /* This is an oddity of the fact we're using the same
169	  * descriptions for registers in align_16 as align_1:
170	  */
171	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
172	    insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
173	 else
174	    insn->bits2.da16.src0_vert_stride = reg.vstride;
175      }
176   }
177}
178
179
180void brw_set_src1( struct brw_instruction *insn,
181                   struct brw_reg reg )
182{
183   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
184
185   assert(reg.nr < 128);
186
187   insn->bits1.da1.src1_reg_file = reg.file;
188   insn->bits1.da1.src1_reg_type = reg.type;
189   insn->bits3.da1.src1_abs = reg.abs;
190   insn->bits3.da1.src1_negate = reg.negate;
191
192   /* Only src1 can be immediate in two-argument instructions.
193    */
194   assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
195
196   if (reg.file == BRW_IMMEDIATE_VALUE) {
197      insn->bits3.ud = reg.dw1.ud;
198   }
199   else {
200      /* This is a hardware restriction, which may or may not be lifted
201       * in the future:
202       */
203      assert (reg.address_mode == BRW_ADDRESS_DIRECT);
204      /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
205
206      if (insn->header.access_mode == BRW_ALIGN_1) {
207	 insn->bits3.da1.src1_subreg_nr = reg.subnr;
208	 insn->bits3.da1.src1_reg_nr = reg.nr;
209      }
210      else {
211	 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
212	 insn->bits3.da16.src1_reg_nr = reg.nr;
213      }
214
215      if (insn->header.access_mode == BRW_ALIGN_1) {
216	 if (reg.width == BRW_WIDTH_1 &&
217	     insn->header.execution_size == BRW_EXECUTE_1) {
218	    insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
219	    insn->bits3.da1.src1_width = BRW_WIDTH_1;
220	    insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
221	 }
222	 else {
223	    insn->bits3.da1.src1_horiz_stride = reg.hstride;
224	    insn->bits3.da1.src1_width = reg.width;
225	    insn->bits3.da1.src1_vert_stride = reg.vstride;
226	 }
227      }
228      else {
229	 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
230	 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
231	 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
232	 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
233
234	 /* This is an oddity of the fact we're using the same
235	  * descriptions for registers in align_16 as align_1:
236	  */
237	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
238	    insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
239	 else
240	    insn->bits3.da16.src1_vert_stride = reg.vstride;
241      }
242   }
243}
244
245
246
247static void brw_set_math_message( struct brw_context *brw,
248				  struct brw_instruction *insn,
249				  GLuint msg_length,
250				  GLuint response_length,
251				  GLuint function,
252				  GLuint integer_type,
253				  GLboolean low_precision,
254				  GLboolean saturate,
255				  GLuint dataType )
256{
257   struct intel_context *intel = &brw->intel;
258   brw_set_src1(insn, brw_imm_d(0));
259
260   if (intel->gen == 5) {
261       insn->bits3.math_gen5.function = function;
262       insn->bits3.math_gen5.int_type = integer_type;
263       insn->bits3.math_gen5.precision = low_precision;
264       insn->bits3.math_gen5.saturate = saturate;
265       insn->bits3.math_gen5.data_type = dataType;
266       insn->bits3.math_gen5.snapshot = 0;
267       insn->bits3.math_gen5.header_present = 0;
268       insn->bits3.math_gen5.response_length = response_length;
269       insn->bits3.math_gen5.msg_length = msg_length;
270       insn->bits3.math_gen5.end_of_thread = 0;
271       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
272       insn->bits2.send_gen5.end_of_thread = 0;
273   } else {
274       insn->bits3.math.function = function;
275       insn->bits3.math.int_type = integer_type;
276       insn->bits3.math.precision = low_precision;
277       insn->bits3.math.saturate = saturate;
278       insn->bits3.math.data_type = dataType;
279       insn->bits3.math.response_length = response_length;
280       insn->bits3.math.msg_length = msg_length;
281       insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
282       insn->bits3.math.end_of_thread = 0;
283   }
284}
285
286
287static void brw_set_ff_sync_message(struct brw_context *brw,
288				    struct brw_instruction *insn,
289				    GLboolean allocate,
290				    GLuint response_length,
291				    GLboolean end_of_thread)
292{
293	struct intel_context *intel = &brw->intel;
294	brw_set_src1(insn, brw_imm_d(0));
295
296	insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
297	insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
298	insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
299	insn->bits3.urb_gen5.allocate = allocate;
300	insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
301	insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
302	insn->bits3.urb_gen5.header_present = 1;
303	insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
304	insn->bits3.urb_gen5.msg_length = 1;
305	insn->bits3.urb_gen5.end_of_thread = end_of_thread;
306	if (intel->gen >= 6) {
307	   insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
308	} else {
309	   insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
310	   insn->bits2.send_gen5.end_of_thread = end_of_thread;
311	}
312}
313
314static void brw_set_urb_message( struct brw_context *brw,
315				 struct brw_instruction *insn,
316				 GLboolean allocate,
317				 GLboolean used,
318				 GLuint msg_length,
319				 GLuint response_length,
320				 GLboolean end_of_thread,
321				 GLboolean complete,
322				 GLuint offset,
323				 GLuint swizzle_control )
324{
325    struct intel_context *intel = &brw->intel;
326    brw_set_src1(insn, brw_imm_d(0));
327
328    if (intel->gen >= 5) {
329        insn->bits3.urb_gen5.opcode = 0;	/* ? */
330        insn->bits3.urb_gen5.offset = offset;
331        insn->bits3.urb_gen5.swizzle_control = swizzle_control;
332        insn->bits3.urb_gen5.allocate = allocate;
333        insn->bits3.urb_gen5.used = used;	/* ? */
334        insn->bits3.urb_gen5.complete = complete;
335        insn->bits3.urb_gen5.header_present = 1;
336        insn->bits3.urb_gen5.response_length = response_length;
337        insn->bits3.urb_gen5.msg_length = msg_length;
338        insn->bits3.urb_gen5.end_of_thread = end_of_thread;
339	if (intel->gen >= 6) {
340	   /* For SNB, the SFID bits moved to the condmod bits, and
341	    * EOT stayed in bits3 above.  Does the EOT bit setting
342	    * below on Ironlake even do anything?
343	    */
344	   insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
345	} else {
346	   insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
347	   insn->bits2.send_gen5.end_of_thread = end_of_thread;
348	}
349    } else {
350        insn->bits3.urb.opcode = 0;	/* ? */
351        insn->bits3.urb.offset = offset;
352        insn->bits3.urb.swizzle_control = swizzle_control;
353        insn->bits3.urb.allocate = allocate;
354        insn->bits3.urb.used = used;	/* ? */
355        insn->bits3.urb.complete = complete;
356        insn->bits3.urb.response_length = response_length;
357        insn->bits3.urb.msg_length = msg_length;
358        insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
359        insn->bits3.urb.end_of_thread = end_of_thread;
360    }
361}
362
363static void brw_set_dp_write_message( struct brw_context *brw,
364				      struct brw_instruction *insn,
365				      GLuint binding_table_index,
366				      GLuint msg_control,
367				      GLuint msg_type,
368				      GLuint msg_length,
369				      GLuint pixel_scoreboard_clear,
370				      GLuint response_length,
371				      GLuint end_of_thread,
372				      GLuint send_commit_msg)
373{
374   struct intel_context *intel = &brw->intel;
375   brw_set_src1(insn, brw_imm_ud(0));
376
377   if (intel->gen >= 6) {
378       insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
379       insn->bits3.dp_render_cache.msg_control = msg_control;
380       insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear;
381       insn->bits3.dp_render_cache.msg_type = msg_type;
382       insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg;
383       insn->bits3.dp_render_cache.header_present = 0; /* XXX */
384       insn->bits3.dp_render_cache.response_length = response_length;
385       insn->bits3.dp_render_cache.msg_length = msg_length;
386       insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
387       insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
388	/* XXX really need below? */
389       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
390       insn->bits2.send_gen5.end_of_thread = end_of_thread;
391   } else if (intel->gen == 5) {
392       insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
393       insn->bits3.dp_write_gen5.msg_control = msg_control;
394       insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
395       insn->bits3.dp_write_gen5.msg_type = msg_type;
396       insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
397       insn->bits3.dp_write_gen5.header_present = 1;
398       insn->bits3.dp_write_gen5.response_length = response_length;
399       insn->bits3.dp_write_gen5.msg_length = msg_length;
400       insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
401       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
402       insn->bits2.send_gen5.end_of_thread = end_of_thread;
403   } else {
404       insn->bits3.dp_write.binding_table_index = binding_table_index;
405       insn->bits3.dp_write.msg_control = msg_control;
406       insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
407       insn->bits3.dp_write.msg_type = msg_type;
408       insn->bits3.dp_write.send_commit_msg = send_commit_msg;
409       insn->bits3.dp_write.response_length = response_length;
410       insn->bits3.dp_write.msg_length = msg_length;
411       insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
412       insn->bits3.dp_write.end_of_thread = end_of_thread;
413   }
414}
415
416static void brw_set_dp_read_message( struct brw_context *brw,
417				      struct brw_instruction *insn,
418				      GLuint binding_table_index,
419				      GLuint msg_control,
420				      GLuint msg_type,
421				      GLuint target_cache,
422				      GLuint msg_length,
423				      GLuint response_length,
424				      GLuint end_of_thread )
425{
426   struct intel_context *intel = &brw->intel;
427   brw_set_src1(insn, brw_imm_d(0));
428
429   if (intel->gen == 5) {
430       insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
431       insn->bits3.dp_read_gen5.msg_control = msg_control;
432       insn->bits3.dp_read_gen5.msg_type = msg_type;
433       insn->bits3.dp_read_gen5.target_cache = target_cache;
434       insn->bits3.dp_read_gen5.header_present = 1;
435       insn->bits3.dp_read_gen5.response_length = response_length;
436       insn->bits3.dp_read_gen5.msg_length = msg_length;
437       insn->bits3.dp_read_gen5.pad1 = 0;
438       insn->bits3.dp_read_gen5.end_of_thread = end_of_thread;
439       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
440       insn->bits2.send_gen5.end_of_thread = end_of_thread;
441   } else {
442       insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
443       insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
444       insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
445       insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
446       insn->bits3.dp_read.response_length = response_length;  /*16:19*/
447       insn->bits3.dp_read.msg_length = msg_length;  /*20:23*/
448       insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
449       insn->bits3.dp_read.pad1 = 0;  /*28:30*/
450       insn->bits3.dp_read.end_of_thread = end_of_thread;  /*31*/
451   }
452}
453
454static void brw_set_sampler_message(struct brw_context *brw,
455                                    struct brw_instruction *insn,
456                                    GLuint binding_table_index,
457                                    GLuint sampler,
458                                    GLuint msg_type,
459                                    GLuint response_length,
460                                    GLuint msg_length,
461                                    GLboolean eot,
462                                    GLuint header_present,
463                                    GLuint simd_mode)
464{
465   struct intel_context *intel = &brw->intel;
466   assert(eot == 0);
467   brw_set_src1(insn, brw_imm_d(0));
468
469   if (intel->gen == 5) {
470      insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
471      insn->bits3.sampler_gen5.sampler = sampler;
472      insn->bits3.sampler_gen5.msg_type = msg_type;
473      insn->bits3.sampler_gen5.simd_mode = simd_mode;
474      insn->bits3.sampler_gen5.header_present = header_present;
475      insn->bits3.sampler_gen5.response_length = response_length;
476      insn->bits3.sampler_gen5.msg_length = msg_length;
477      insn->bits3.sampler_gen5.end_of_thread = eot;
478      insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
479      insn->bits2.send_gen5.end_of_thread = eot;
480   } else if (intel->is_g4x) {
481      insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
482      insn->bits3.sampler_g4x.sampler = sampler;
483      insn->bits3.sampler_g4x.msg_type = msg_type;
484      insn->bits3.sampler_g4x.response_length = response_length;
485      insn->bits3.sampler_g4x.msg_length = msg_length;
486      insn->bits3.sampler_g4x.end_of_thread = eot;
487      insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
488   } else {
489      insn->bits3.sampler.binding_table_index = binding_table_index;
490      insn->bits3.sampler.sampler = sampler;
491      insn->bits3.sampler.msg_type = msg_type;
492      insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
493      insn->bits3.sampler.response_length = response_length;
494      insn->bits3.sampler.msg_length = msg_length;
495      insn->bits3.sampler.end_of_thread = eot;
496      insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
497   }
498}
499
500
501
502static struct brw_instruction *next_insn( struct brw_compile *p,
503					  GLuint opcode )
504{
505   struct brw_instruction *insn;
506
507   assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
508
509   insn = &p->store[p->nr_insn++];
510   memcpy(insn, p->current, sizeof(*insn));
511
512   /* Reset this one-shot flag:
513    */
514
515   if (p->current->header.destreg__conditionalmod) {
516      p->current->header.destreg__conditionalmod = 0;
517      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
518   }
519
520   insn->header.opcode = opcode;
521   return insn;
522}
523
524
525static struct brw_instruction *brw_alu1( struct brw_compile *p,
526					 GLuint opcode,
527					 struct brw_reg dest,
528					 struct brw_reg src )
529{
530   struct brw_instruction *insn = next_insn(p, opcode);
531   brw_set_dest(insn, dest);
532   brw_set_src0(insn, src);
533   return insn;
534}
535
536static struct brw_instruction *brw_alu2(struct brw_compile *p,
537					GLuint opcode,
538					struct brw_reg dest,
539					struct brw_reg src0,
540					struct brw_reg src1 )
541{
542   struct brw_instruction *insn = next_insn(p, opcode);
543   brw_set_dest(insn, dest);
544   brw_set_src0(insn, src0);
545   brw_set_src1(insn, src1);
546   return insn;
547}
548
549
550/***********************************************************************
551 * Convenience routines.
552 */
553#define ALU1(OP)					\
554struct brw_instruction *brw_##OP(struct brw_compile *p,	\
555	      struct brw_reg dest,			\
556	      struct brw_reg src0)   			\
557{							\
558   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
559}
560
561#define ALU2(OP)					\
562struct brw_instruction *brw_##OP(struct brw_compile *p,	\
563	      struct brw_reg dest,			\
564	      struct brw_reg src0,			\
565	      struct brw_reg src1)   			\
566{							\
567   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
568}
569
570
571ALU1(MOV)
572ALU2(SEL)
573ALU1(NOT)
574ALU2(AND)
575ALU2(OR)
576ALU2(XOR)
577ALU2(SHR)
578ALU2(SHL)
579ALU2(RSR)
580ALU2(RSL)
581ALU2(ASR)
582ALU2(ADD)
583ALU2(MUL)
584ALU1(FRC)
585ALU1(RNDD)
586ALU1(RNDZ)
587ALU2(MAC)
588ALU2(MACH)
589ALU1(LZD)
590ALU2(DP4)
591ALU2(DPH)
592ALU2(DP3)
593ALU2(DP2)
594ALU2(LINE)
595ALU2(PLN)
596
597
598
599void brw_NOP(struct brw_compile *p)
600{
601   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
602   brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
603   brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
604   brw_set_src1(insn, brw_imm_ud(0x0));
605}
606
607
608
609
610
611/***********************************************************************
612 * Comparisons, if/else/endif
613 */
614
615struct brw_instruction *brw_JMPI(struct brw_compile *p,
616                                 struct brw_reg dest,
617                                 struct brw_reg src0,
618                                 struct brw_reg src1)
619{
620   struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
621
622   insn->header.execution_size = 1;
623   insn->header.compression_control = BRW_COMPRESSION_NONE;
624   insn->header.mask_control = BRW_MASK_DISABLE;
625
626   p->current->header.predicate_control = BRW_PREDICATE_NONE;
627
628   return insn;
629}
630
631/* EU takes the value from the flag register and pushes it onto some
632 * sort of a stack (presumably merging with any flag value already on
633 * the stack).  Within an if block, the flags at the top of the stack
634 * control execution on each channel of the unit, eg. on each of the
635 * 16 pixel values in our wm programs.
636 *
637 * When the matching 'else' instruction is reached (presumably by
638 * countdown of the instruction count patched in by our ELSE/ENDIF
639 * functions), the relevent flags are inverted.
640 *
641 * When the matching 'endif' instruction is reached, the flags are
642 * popped off.  If the stack is now empty, normal execution resumes.
643 *
644 * No attempt is made to deal with stack overflow (14 elements?).
645 */
646struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
647{
648   struct brw_instruction *insn;
649
650   if (p->single_program_flow) {
651      assert(execute_size == BRW_EXECUTE_1);
652
653      insn = next_insn(p, BRW_OPCODE_ADD);
654      insn->header.predicate_inverse = 1;
655   } else {
656      insn = next_insn(p, BRW_OPCODE_IF);
657   }
658
659   /* Override the defaults for this instruction:
660    */
661   brw_set_dest(insn, brw_ip_reg());
662   brw_set_src0(insn, brw_ip_reg());
663   brw_set_src1(insn, brw_imm_d(0x0));
664
665   insn->header.execution_size = execute_size;
666   insn->header.compression_control = BRW_COMPRESSION_NONE;
667   insn->header.predicate_control = BRW_PREDICATE_NORMAL;
668   insn->header.mask_control = BRW_MASK_ENABLE;
669   if (!p->single_program_flow)
670       insn->header.thread_control = BRW_THREAD_SWITCH;
671
672   p->current->header.predicate_control = BRW_PREDICATE_NONE;
673
674   return insn;
675}
676
677
678struct brw_instruction *brw_ELSE(struct brw_compile *p,
679				 struct brw_instruction *if_insn)
680{
681   struct intel_context *intel = &p->brw->intel;
682   struct brw_instruction *insn;
683   GLuint br = 1;
684
685   if (intel->gen == 5)
686      br = 2;
687
688   if (p->single_program_flow) {
689      insn = next_insn(p, BRW_OPCODE_ADD);
690   } else {
691      insn = next_insn(p, BRW_OPCODE_ELSE);
692   }
693
694   brw_set_dest(insn, brw_ip_reg());
695   brw_set_src0(insn, brw_ip_reg());
696   brw_set_src1(insn, brw_imm_d(0x0));
697
698   insn->header.compression_control = BRW_COMPRESSION_NONE;
699   insn->header.execution_size = if_insn->header.execution_size;
700   insn->header.mask_control = BRW_MASK_ENABLE;
701   if (!p->single_program_flow)
702       insn->header.thread_control = BRW_THREAD_SWITCH;
703
704   /* Patch the if instruction to point at this instruction.
705    */
706   if (p->single_program_flow) {
707      assert(if_insn->header.opcode == BRW_OPCODE_ADD);
708
709      if_insn->bits3.ud = (insn - if_insn + 1) * 16;
710   } else {
711      assert(if_insn->header.opcode == BRW_OPCODE_IF);
712
713      if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
714      if_insn->bits3.if_else.pop_count = 0;
715      if_insn->bits3.if_else.pad0 = 0;
716   }
717
718   return insn;
719}
720
721void brw_ENDIF(struct brw_compile *p,
722	       struct brw_instruction *patch_insn)
723{
724   struct intel_context *intel = &p->brw->intel;
725   GLuint br = 1;
726
727   if (intel->gen == 5)
728      br = 2;
729
730   if (p->single_program_flow) {
731      /* In single program flow mode, there's no need to execute an ENDIF,
732       * since we don't need to do any stack operations, and if we're executing
733       * currently, we want to just continue executing.
734       */
735      struct brw_instruction *next = &p->store[p->nr_insn];
736
737      assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
738
739      patch_insn->bits3.ud = (next - patch_insn) * 16;
740   } else {
741      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
742
743      brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
744      brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
745      brw_set_src1(insn, brw_imm_d(0x0));
746
747      insn->header.compression_control = BRW_COMPRESSION_NONE;
748      insn->header.execution_size = patch_insn->header.execution_size;
749      insn->header.mask_control = BRW_MASK_ENABLE;
750      insn->header.thread_control = BRW_THREAD_SWITCH;
751
752      assert(patch_insn->bits3.if_else.jump_count == 0);
753
754      /* Patch the if or else instructions to point at this or the next
755       * instruction respectively.
756       */
757      if (patch_insn->header.opcode == BRW_OPCODE_IF) {
758	 /* Automagically turn it into an IFF:
759	  */
760	 patch_insn->header.opcode = BRW_OPCODE_IFF;
761	 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
762	 patch_insn->bits3.if_else.pop_count = 0;
763	 patch_insn->bits3.if_else.pad0 = 0;
764      } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
765	 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
766	 patch_insn->bits3.if_else.pop_count = 1;
767	 patch_insn->bits3.if_else.pad0 = 0;
768      } else {
769	 assert(0);
770      }
771
772      /* Also pop item off the stack in the endif instruction:
773       */
774      insn->bits3.if_else.jump_count = 0;
775      insn->bits3.if_else.pop_count = 1;
776      insn->bits3.if_else.pad0 = 0;
777   }
778}
779
780struct brw_instruction *brw_BREAK(struct brw_compile *p)
781{
782   struct brw_instruction *insn;
783   insn = next_insn(p, BRW_OPCODE_BREAK);
784   brw_set_dest(insn, brw_ip_reg());
785   brw_set_src0(insn, brw_ip_reg());
786   brw_set_src1(insn, brw_imm_d(0x0));
787   insn->header.compression_control = BRW_COMPRESSION_NONE;
788   insn->header.execution_size = BRW_EXECUTE_8;
789   /* insn->header.mask_control = BRW_MASK_DISABLE; */
790   insn->bits3.if_else.pad0 = 0;
791   return insn;
792}
793
794struct brw_instruction *brw_CONT(struct brw_compile *p)
795{
796   struct brw_instruction *insn;
797   insn = next_insn(p, BRW_OPCODE_CONTINUE);
798   brw_set_dest(insn, brw_ip_reg());
799   brw_set_src0(insn, brw_ip_reg());
800   brw_set_src1(insn, brw_imm_d(0x0));
801   insn->header.compression_control = BRW_COMPRESSION_NONE;
802   insn->header.execution_size = BRW_EXECUTE_8;
803   /* insn->header.mask_control = BRW_MASK_DISABLE; */
804   insn->bits3.if_else.pad0 = 0;
805   return insn;
806}
807
808/* DO/WHILE loop:
809 */
810struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
811{
812   if (p->single_program_flow) {
813      return &p->store[p->nr_insn];
814   } else {
815      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
816
817      /* Override the defaults for this instruction:
818       */
819      brw_set_dest(insn, brw_null_reg());
820      brw_set_src0(insn, brw_null_reg());
821      brw_set_src1(insn, brw_null_reg());
822
823      insn->header.compression_control = BRW_COMPRESSION_NONE;
824      insn->header.execution_size = execute_size;
825      insn->header.predicate_control = BRW_PREDICATE_NONE;
826      /* insn->header.mask_control = BRW_MASK_ENABLE; */
827      /* insn->header.mask_control = BRW_MASK_DISABLE; */
828
829      return insn;
830   }
831}
832
833
834
835struct brw_instruction *brw_WHILE(struct brw_compile *p,
836                                  struct brw_instruction *do_insn)
837{
838   struct intel_context *intel = &p->brw->intel;
839   struct brw_instruction *insn;
840   GLuint br = 1;
841
842   if (intel->gen == 5)
843      br = 2;
844
845   if (p->single_program_flow)
846      insn = next_insn(p, BRW_OPCODE_ADD);
847   else
848      insn = next_insn(p, BRW_OPCODE_WHILE);
849
850   brw_set_dest(insn, brw_ip_reg());
851   brw_set_src0(insn, brw_ip_reg());
852   brw_set_src1(insn, brw_imm_d(0x0));
853
854   insn->header.compression_control = BRW_COMPRESSION_NONE;
855
856   if (p->single_program_flow) {
857      insn->header.execution_size = BRW_EXECUTE_1;
858
859      insn->bits3.d = (do_insn - insn) * 16;
860   } else {
861      insn->header.execution_size = do_insn->header.execution_size;
862
863      assert(do_insn->header.opcode == BRW_OPCODE_DO);
864      insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
865      insn->bits3.if_else.pop_count = 0;
866      insn->bits3.if_else.pad0 = 0;
867   }
868
869/*    insn->header.mask_control = BRW_MASK_ENABLE; */
870
871   /* insn->header.mask_control = BRW_MASK_DISABLE; */
872   p->current->header.predicate_control = BRW_PREDICATE_NONE;
873   return insn;
874}
875
876
877/* FORWARD JUMPS:
878 */
879void brw_land_fwd_jump(struct brw_compile *p,
880		       struct brw_instruction *jmp_insn)
881{
882   struct intel_context *intel = &p->brw->intel;
883   struct brw_instruction *landing = &p->store[p->nr_insn];
884   GLuint jmpi = 1;
885
886   if (intel->gen == 5)
887       jmpi = 2;
888
889   assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
890   assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
891
892   jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
893}
894
895
896
897/* To integrate with the above, it makes sense that the comparison
898 * instruction should populate the flag register.  It might be simpler
899 * just to use the flag reg for most WM tasks?
900 */
901void brw_CMP(struct brw_compile *p,
902	     struct brw_reg dest,
903	     GLuint conditional,
904	     struct brw_reg src0,
905	     struct brw_reg src1)
906{
907   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
908
909   insn->header.destreg__conditionalmod = conditional;
910   brw_set_dest(insn, dest);
911   brw_set_src0(insn, src0);
912   brw_set_src1(insn, src1);
913
914/*    guess_execution_size(insn, src0); */
915
916
917   /* Make it so that future instructions will use the computed flag
918    * value until brw_set_predicate_control_flag_value() is called
919    * again.
920    */
921   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
922       dest.nr == 0) {
923      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
924      p->flag_value = 0xff;
925   }
926}
927
928/* Issue 'wait' instruction for n1, host could program MMIO
929   to wake up thread. */
930void brw_WAIT (struct brw_compile *p)
931{
932   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
933   struct brw_reg src = brw_notification_1_reg();
934
935   brw_set_dest(insn, src);
936   brw_set_src0(insn, src);
937   brw_set_src1(insn, brw_null_reg());
938   insn->header.execution_size = 0; /* must */
939   insn->header.predicate_control = 0;
940   insn->header.compression_control = 0;
941}
942
943
944/***********************************************************************
945 * Helpers for the various SEND message types:
946 */
947
948/** Extended math function, float[8].
949 */
950void brw_math( struct brw_compile *p,
951	       struct brw_reg dest,
952	       GLuint function,
953	       GLuint saturate,
954	       GLuint msg_reg_nr,
955	       struct brw_reg src,
956	       GLuint data_type,
957	       GLuint precision )
958{
959   struct intel_context *intel = &p->brw->intel;
960
961   if (intel->gen >= 6) {
962      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
963
964      /* Math is the same ISA format as other opcodes, except that CondModifier
965       * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
966       */
967      insn->header.destreg__conditionalmod = function;
968
969      brw_set_dest(insn, dest);
970      brw_set_src0(insn, src);
971      brw_set_src1(insn, brw_null_reg());
972   } else {
973      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
974      GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
975      GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
976      /* Example code doesn't set predicate_control for send
977       * instructions.
978       */
979      insn->header.predicate_control = 0;
980      insn->header.destreg__conditionalmod = msg_reg_nr;
981
982      brw_set_dest(insn, dest);
983      brw_set_src0(insn, src);
984      brw_set_math_message(p->brw,
985			   insn,
986			   msg_length, response_length,
987			   function,
988			   BRW_MATH_INTEGER_UNSIGNED,
989			   precision,
990			   saturate,
991			   data_type);
992   }
993}
994
995/**
996 * Extended math function, float[16].
997 * Use 2 send instructions.
998 */
999void brw_math_16( struct brw_compile *p,
1000		  struct brw_reg dest,
1001		  GLuint function,
1002		  GLuint saturate,
1003		  GLuint msg_reg_nr,
1004		  struct brw_reg src,
1005		  GLuint precision )
1006{
1007   struct intel_context *intel = &p->brw->intel;
1008   struct brw_instruction *insn;
1009   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
1010   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
1011
1012   if (intel->gen >= 6) {
1013      insn = next_insn(p, BRW_OPCODE_MATH);
1014
1015      /* Math is the same ISA format as other opcodes, except that CondModifier
1016       * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1017       */
1018      insn->header.destreg__conditionalmod = function;
1019
1020      brw_set_dest(insn, dest);
1021      brw_set_src0(insn, src);
1022      brw_set_src1(insn, brw_null_reg());
1023      return;
1024   }
1025
1026   /* First instruction:
1027    */
1028   brw_push_insn_state(p);
1029   brw_set_predicate_control_flag_value(p, 0xff);
1030   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1031
1032   insn = next_insn(p, BRW_OPCODE_SEND);
1033   insn->header.destreg__conditionalmod = msg_reg_nr;
1034
1035   brw_set_dest(insn, dest);
1036   brw_set_src0(insn, src);
1037   brw_set_math_message(p->brw,
1038			insn,
1039			msg_length, response_length,
1040			function,
1041			BRW_MATH_INTEGER_UNSIGNED,
1042			precision,
1043			saturate,
1044			BRW_MATH_DATA_VECTOR);
1045
1046   /* Second instruction:
1047    */
1048   insn = next_insn(p, BRW_OPCODE_SEND);
1049   insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1050   insn->header.destreg__conditionalmod = msg_reg_nr+1;
1051
1052   brw_set_dest(insn, offset(dest,1));
1053   brw_set_src0(insn, src);
1054   brw_set_math_message(p->brw,
1055			insn,
1056			msg_length, response_length,
1057			function,
1058			BRW_MATH_INTEGER_UNSIGNED,
1059			precision,
1060			saturate,
1061			BRW_MATH_DATA_VECTOR);
1062
1063   brw_pop_insn_state(p);
1064}
1065
1066
1067/**
1068 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
1069 * Scratch offset should be a multiple of 64.
1070 * Used for register spilling.
1071 */
1072void brw_dp_WRITE_16( struct brw_compile *p,
1073		      struct brw_reg src,
1074		      GLuint scratch_offset )
1075{
1076   struct intel_context *intel = &p->brw->intel;
1077   GLuint msg_reg_nr = 1;
1078   {
1079      brw_push_insn_state(p);
1080      brw_set_mask_control(p, BRW_MASK_DISABLE);
1081      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1082
1083      /* set message header global offset field (reg 0, element 2) */
1084      brw_MOV(p,
1085	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1086	      brw_imm_d(scratch_offset));
1087
1088      brw_pop_insn_state(p);
1089   }
1090
1091   {
1092      GLuint msg_length = 3;
1093      struct brw_reg dest;
1094      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1095      int send_commit_msg;
1096
1097      insn->header.predicate_control = 0; /* XXX */
1098      insn->header.compression_control = BRW_COMPRESSION_NONE;
1099      insn->header.destreg__conditionalmod = msg_reg_nr;
1100
1101      /* Until gen6, writes followed by reads from the same location
1102       * are not guaranteed to be ordered unless write_commit is set.
1103       * If set, then a no-op write is issued to the destination
1104       * register to set a dependency, and a read from the destination
1105       * can be used to ensure the ordering.
1106       *
1107       * For gen6, only writes between different threads need ordering
1108       * protection.  Our use of DP writes is all about register
1109       * spilling within a thread.
1110       */
1111      if (intel->gen >= 6) {
1112	 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1113	 send_commit_msg = 0;
1114      } else {
1115	 dest = brw_uw16_grf(0, 0);
1116	 send_commit_msg = 1;
1117      }
1118
1119      brw_set_dest(insn, dest);
1120      brw_set_src0(insn, src);
1121
1122      brw_set_dp_write_message(p->brw,
1123			       insn,
1124			       255, /* binding table index (255=stateless) */
1125			       BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
1126			       BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
1127			       msg_length,
1128			       0, /* pixel scoreboard */
1129			       send_commit_msg, /* response_length */
1130			       0, /* eot */
1131			       send_commit_msg);
1132   }
1133}
1134
1135
1136/**
1137 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
1138 * Scratch offset should be a multiple of 64.
1139 * Used for register spilling.
1140 */
1141void brw_dp_READ_16( struct brw_compile *p,
1142		      struct brw_reg dest,
1143		      GLuint scratch_offset )
1144{
1145   GLuint msg_reg_nr = 1;
1146   {
1147      brw_push_insn_state(p);
1148      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1149      brw_set_mask_control(p, BRW_MASK_DISABLE);
1150
1151      /* set message header global offset field (reg 0, element 2) */
1152      brw_MOV(p,
1153	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
1154	      brw_imm_d(scratch_offset));
1155
1156      brw_pop_insn_state(p);
1157   }
1158
1159   {
1160      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1161
1162      insn->header.predicate_control = 0; /* XXX */
1163      insn->header.compression_control = BRW_COMPRESSION_NONE;
1164      insn->header.destreg__conditionalmod = msg_reg_nr;
1165
1166      brw_set_dest(insn, dest);	/* UW? */
1167      brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1168
1169      brw_set_dp_read_message(p->brw,
1170			      insn,
1171			      255, /* binding table index (255=stateless) */
1172			      BRW_DATAPORT_OWORD_BLOCK_4_OWORDS,
1173			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1174			      1, /* target cache (render/scratch) */
1175			      1, /* msg_length */
1176			      2, /* response_length */
1177			      0); /* eot */
1178   }
1179}
1180
1181
1182/**
1183 * Read a float[4] vector from the data port Data Cache (const buffer).
1184 * Location (in buffer) should be a multiple of 16.
1185 * Used for fetching shader constants.
1186 * If relAddr is true, we'll do an indirect fetch using the address register.
1187 */
1188void brw_dp_READ_4( struct brw_compile *p,
1189                    struct brw_reg dest,
1190                    GLboolean relAddr,
1191                    GLuint location,
1192                    GLuint bind_table_index )
1193{
1194   /* XXX: relAddr not implemented */
1195   GLuint msg_reg_nr = 1;
1196   {
1197      struct brw_reg b;
1198      brw_push_insn_state(p);
1199      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1200      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1201      brw_set_mask_control(p, BRW_MASK_DISABLE);
1202
1203   /* Setup MRF[1] with location/offset into const buffer */
1204      b = brw_message_reg(msg_reg_nr);
1205      b = retype(b, BRW_REGISTER_TYPE_UD);
1206      /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1207       * when the docs say only dword[2] should be set.  Hmmm.  But it works.
1208       */
1209      brw_MOV(p, b, brw_imm_ud(location));
1210      brw_pop_insn_state(p);
1211   }
1212
1213   {
1214      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1215
1216      insn->header.predicate_control = BRW_PREDICATE_NONE;
1217      insn->header.compression_control = BRW_COMPRESSION_NONE;
1218      insn->header.destreg__conditionalmod = msg_reg_nr;
1219      insn->header.mask_control = BRW_MASK_DISABLE;
1220
1221      /* cast dest to a uword[8] vector */
1222      dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
1223
1224      brw_set_dest(insn, dest);
1225      brw_set_src0(insn, brw_null_reg());
1226
1227      brw_set_dp_read_message(p->brw,
1228			      insn,
1229			      bind_table_index,
1230			      0,  /* msg_control (0 means 1 Oword) */
1231			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1232			      0, /* source cache = data cache */
1233			      1, /* msg_length */
1234			      1, /* response_length (1 Oword) */
1235			      0); /* eot */
1236   }
1237}
1238
1239
1240/**
1241 * Read float[4] constant(s) from VS constant buffer.
1242 * For relative addressing, two float[4] constants will be read into 'dest'.
1243 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1244 */
1245void brw_dp_READ_4_vs(struct brw_compile *p,
1246                      struct brw_reg dest,
1247                      GLuint location,
1248                      GLuint bind_table_index)
1249{
1250   struct brw_instruction *insn;
1251   GLuint msg_reg_nr = 1;
1252   struct brw_reg b;
1253
1254   /*
1255   printf("vs const read msg, location %u, msg_reg_nr %d\n",
1256          location, msg_reg_nr);
1257   */
1258
1259   /* Setup MRF[1] with location/offset into const buffer */
1260   brw_push_insn_state(p);
1261   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1262   brw_set_mask_control(p, BRW_MASK_DISABLE);
1263   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1264
1265   /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1266    * when the docs say only dword[2] should be set.  Hmmm.  But it works.
1267    */
1268   b = brw_message_reg(msg_reg_nr);
1269   b = retype(b, BRW_REGISTER_TYPE_UD);
1270   /*b = get_element_ud(b, 2);*/
1271   brw_MOV(p, b, brw_imm_ud(location));
1272
1273   brw_pop_insn_state(p);
1274
1275   insn = next_insn(p, BRW_OPCODE_SEND);
1276
1277   insn->header.predicate_control = BRW_PREDICATE_NONE;
1278   insn->header.compression_control = BRW_COMPRESSION_NONE;
1279   insn->header.destreg__conditionalmod = msg_reg_nr;
1280   insn->header.mask_control = BRW_MASK_DISABLE;
1281
1282   brw_set_dest(insn, dest);
1283   brw_set_src0(insn, brw_null_reg());
1284
1285   brw_set_dp_read_message(p->brw,
1286			   insn,
1287			   bind_table_index,
1288			   0,
1289			   BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1290			   0, /* source cache = data cache */
1291			   1, /* msg_length */
1292			   1, /* response_length (1 Oword) */
1293			   0); /* eot */
1294}
1295
1296/**
1297 * Read a float[4] constant per vertex from VS constant buffer, with
1298 * relative addressing.
1299 */
1300void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1301			       struct brw_reg dest,
1302			       struct brw_reg addr_reg,
1303			       GLuint offset,
1304			       GLuint bind_table_index)
1305{
1306   struct intel_context *intel = &p->brw->intel;
1307   int msg_type;
1308
1309   /* Setup MRF[1] with offset into const buffer */
1310   brw_push_insn_state(p);
1311   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1312   brw_set_mask_control(p, BRW_MASK_DISABLE);
1313   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1314
1315   /* M1.0 is block offset 0, M1.4 is block offset 1, all other
1316    * fields ignored.
1317    */
1318   brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD),
1319	   addr_reg, brw_imm_d(offset));
1320   brw_pop_insn_state(p);
1321
1322   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1323
1324   insn->header.predicate_control = BRW_PREDICATE_NONE;
1325   insn->header.compression_control = BRW_COMPRESSION_NONE;
1326   insn->header.destreg__conditionalmod = 0;
1327   insn->header.mask_control = BRW_MASK_DISABLE;
1328
1329   brw_set_dest(insn, dest);
1330   brw_set_src0(insn, brw_vec8_grf(0, 0));
1331
1332   if (intel->gen == 6)
1333      msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1334   else if (intel->gen == 5 || intel->is_g4x)
1335      msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1336   else
1337      msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1338
1339   brw_set_dp_read_message(p->brw,
1340			   insn,
1341			   bind_table_index,
1342			   BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
1343			   msg_type,
1344			   0, /* source cache = data cache */
1345			   2, /* msg_length */
1346			   1, /* response_length */
1347			   0); /* eot */
1348}
1349
1350
1351
1352void brw_fb_WRITE(struct brw_compile *p,
1353		  int dispatch_width,
1354                  struct brw_reg dest,
1355                  GLuint msg_reg_nr,
1356                  struct brw_reg src0,
1357                  GLuint binding_table_index,
1358                  GLuint msg_length,
1359                  GLuint response_length,
1360                  GLboolean eot)
1361{
1362   struct intel_context *intel = &p->brw->intel;
1363   struct brw_instruction *insn;
1364   GLuint msg_control, msg_type;
1365
1366   insn = next_insn(p, BRW_OPCODE_SEND);
1367   insn->header.predicate_control = 0; /* XXX */
1368   insn->header.compression_control = BRW_COMPRESSION_NONE;
1369
1370   if (intel->gen >= 6) {
1371       /* headerless version, just submit color payload */
1372       src0 = brw_message_reg(msg_reg_nr);
1373
1374       msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6;
1375   } else {
1376      insn->header.destreg__conditionalmod = msg_reg_nr;
1377
1378      msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1379   }
1380
1381   if (dispatch_width == 16)
1382      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
1383   else
1384      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
1385
1386   brw_set_dest(insn, dest);
1387   brw_set_src0(insn, src0);
1388   brw_set_dp_write_message(p->brw,
1389			    insn,
1390			    binding_table_index,
1391			    msg_control,
1392			    msg_type,
1393			    msg_length,
1394			    1,	/* pixel scoreboard */
1395			    response_length,
1396			    eot,
1397			    0 /* send_commit_msg */);
1398}
1399
1400
1401/**
1402 * Texture sample instruction.
1403 * Note: the msg_type plus msg_length values determine exactly what kind
1404 * of sampling operation is performed.  See volume 4, page 161 of docs.
1405 */
1406void brw_SAMPLE(struct brw_compile *p,
1407		struct brw_reg dest,
1408		GLuint msg_reg_nr,
1409		struct brw_reg src0,
1410		GLuint binding_table_index,
1411		GLuint sampler,
1412		GLuint writemask,
1413		GLuint msg_type,
1414		GLuint response_length,
1415		GLuint msg_length,
1416		GLboolean eot,
1417		GLuint header_present,
1418		GLuint simd_mode)
1419{
1420   GLboolean need_stall = 0;
1421
1422   if (writemask == 0) {
1423      /*printf("%s: zero writemask??\n", __FUNCTION__); */
1424      return;
1425   }
1426
1427   /* Hardware doesn't do destination dependency checking on send
1428    * instructions properly.  Add a workaround which generates the
1429    * dependency by other means.  In practice it seems like this bug
1430    * only crops up for texture samples, and only where registers are
1431    * written by the send and then written again later without being
1432    * read in between.  Luckily for us, we already track that
1433    * information and use it to modify the writemask for the
1434    * instruction, so that is a guide for whether a workaround is
1435    * needed.
1436    */
1437   if (writemask != WRITEMASK_XYZW) {
1438      GLuint dst_offset = 0;
1439      GLuint i, newmask = 0, len = 0;
1440
1441      for (i = 0; i < 4; i++) {
1442	 if (writemask & (1<<i))
1443	    break;
1444	 dst_offset += 2;
1445      }
1446      for (; i < 4; i++) {
1447	 if (!(writemask & (1<<i)))
1448	    break;
1449	 newmask |= 1<<i;
1450	 len++;
1451      }
1452
1453      if (newmask != writemask) {
1454	 need_stall = 1;
1455         /* printf("need stall %x %x\n", newmask , writemask); */
1456      }
1457      else {
1458	 GLboolean dispatch_16 = GL_FALSE;
1459
1460	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1461
1462	 guess_execution_size(p->current, dest);
1463	 if (p->current->header.execution_size == BRW_EXECUTE_16)
1464	    dispatch_16 = GL_TRUE;
1465
1466	 newmask = ~newmask & WRITEMASK_XYZW;
1467
1468	 brw_push_insn_state(p);
1469
1470	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1471	 brw_set_mask_control(p, BRW_MASK_DISABLE);
1472
1473	 brw_MOV(p, m1, brw_vec8_grf(0,0));
1474  	 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1475
1476	 brw_pop_insn_state(p);
1477
1478  	 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1479	 dest = offset(dest, dst_offset);
1480
1481	 /* For 16-wide dispatch, masked channels are skipped in the
1482	  * response.  For 8-wide, masked channels still take up slots,
1483	  * and are just not written to.
1484	  */
1485	 if (dispatch_16)
1486	    response_length = len * 2;
1487      }
1488   }
1489
1490   {
1491      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1492
1493      insn->header.predicate_control = 0; /* XXX */
1494      insn->header.compression_control = BRW_COMPRESSION_NONE;
1495      insn->header.destreg__conditionalmod = msg_reg_nr;
1496
1497      brw_set_dest(insn, dest);
1498      brw_set_src0(insn, src0);
1499      brw_set_sampler_message(p->brw, insn,
1500			      binding_table_index,
1501			      sampler,
1502			      msg_type,
1503			      response_length,
1504			      msg_length,
1505			      eot,
1506			      header_present,
1507			      simd_mode);
1508   }
1509
1510   if (need_stall) {
1511      struct brw_reg reg = vec8(offset(dest, response_length-1));
1512
1513      /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
1514       */
1515      brw_push_insn_state(p);
1516      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1517      brw_MOV(p, reg, reg);
1518      brw_pop_insn_state(p);
1519   }
1520
1521}
1522
1523/* All these variables are pretty confusing - we might be better off
1524 * using bitmasks and macros for this, in the old style.  Or perhaps
1525 * just having the caller instantiate the fields in dword3 itself.
1526 */
1527void brw_urb_WRITE(struct brw_compile *p,
1528		   struct brw_reg dest,
1529		   GLuint msg_reg_nr,
1530		   struct brw_reg src0,
1531		   GLboolean allocate,
1532		   GLboolean used,
1533		   GLuint msg_length,
1534		   GLuint response_length,
1535		   GLboolean eot,
1536		   GLboolean writes_complete,
1537		   GLuint offset,
1538		   GLuint swizzle)
1539{
1540   struct intel_context *intel = &p->brw->intel;
1541   struct brw_instruction *insn;
1542
1543   /* Sandybridge doesn't have the implied move for SENDs,
1544    * and the first message register index comes from src0.
1545    */
1546   if (intel->gen >= 6) {
1547      brw_push_insn_state(p);
1548      brw_set_mask_control( p, BRW_MASK_DISABLE );
1549      brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
1550      brw_pop_insn_state(p);
1551      src0 = brw_message_reg(msg_reg_nr);
1552   }
1553
1554   insn = next_insn(p, BRW_OPCODE_SEND);
1555
1556   assert(msg_length < BRW_MAX_MRF);
1557
1558   brw_set_dest(insn, dest);
1559   brw_set_src0(insn, src0);
1560   brw_set_src1(insn, brw_imm_d(0));
1561
1562   if (intel->gen < 6)
1563      insn->header.destreg__conditionalmod = msg_reg_nr;
1564
1565   brw_set_urb_message(p->brw,
1566		       insn,
1567		       allocate,
1568		       used,
1569		       msg_length,
1570		       response_length,
1571		       eot,
1572		       writes_complete,
1573		       offset,
1574		       swizzle);
1575}
1576
1577void brw_ff_sync(struct brw_compile *p,
1578		   struct brw_reg dest,
1579		   GLuint msg_reg_nr,
1580		   struct brw_reg src0,
1581		   GLboolean allocate,
1582		   GLuint response_length,
1583		   GLboolean eot)
1584{
1585   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1586
1587   brw_set_dest(insn, dest);
1588   brw_set_src0(insn, src0);
1589   brw_set_src1(insn, brw_imm_d(0));
1590
1591   insn->header.destreg__conditionalmod = msg_reg_nr;
1592
1593   brw_set_ff_sync_message(p->brw,
1594			   insn,
1595			   allocate,
1596			   response_length,
1597			   eot);
1598}
1599