brw_eu_emit.c revision 9f344b3e7d6e23674dd4747faec253f103563b36
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33#include "brw_context.h"
34#include "brw_defines.h"
35#include "brw_eu.h"
36
37
38
39
40/***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44static void guess_execution_size( struct brw_instruction *insn,
45				  struct brw_reg reg )
46{
47   if (reg.width == BRW_WIDTH_8 &&
48       insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49      insn->header.execution_size = BRW_EXECUTE_16;
50   else
51      insn->header.execution_size = reg.width;	/* note - definitions are compatible */
52}
53
54
55static void brw_set_dest( struct brw_instruction *insn,
56			  struct brw_reg dest )
57{
58   insn->bits1.da1.dest_reg_file = dest.file;
59   insn->bits1.da1.dest_reg_type = dest.type;
60   insn->bits1.da1.dest_address_mode = dest.address_mode;
61
62   if (dest.address_mode == BRW_ADDRESS_DIRECT) {
63      insn->bits1.da1.dest_reg_nr = dest.nr;
64
65      if (insn->header.access_mode == BRW_ALIGN_1) {
66	 insn->bits1.da1.dest_subreg_nr = dest.subnr;
67	 insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
68      }
69      else {
70	 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
71	 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
72      }
73   }
74   else {
75      insn->bits1.ia1.dest_subreg_nr = dest.subnr;
76
77      /* These are different sizes in align1 vs align16:
78       */
79      if (insn->header.access_mode == BRW_ALIGN_1) {
80	 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
81	 insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
82      }
83      else {
84	 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
85      }
86   }
87
88   /* NEW: Set the execution size based on dest.width and
89    * insn->compression_control:
90    */
91   guess_execution_size(insn, dest);
92}
93
94static void brw_set_src0( struct brw_instruction *insn,
95		      struct brw_reg reg )
96{
97   insn->bits1.da1.src0_reg_file = reg.file;
98   insn->bits1.da1.src0_reg_type = reg.type;
99   insn->bits2.da1.src0_abs = reg.abs;
100   insn->bits2.da1.src0_negate = reg.negate;
101   insn->bits2.da1.src0_address_mode = reg.address_mode;
102
103   if (reg.file == BRW_IMMEDIATE_VALUE) {
104      insn->bits3.ud = reg.dw1.ud;
105
106      /* Required to set some fields in src1 as well:
107       */
108      insn->bits1.da1.src1_reg_file = 0; /* arf */
109      insn->bits1.da1.src1_reg_type = reg.type;
110   }
111   else
112   {
113      if (reg.address_mode == BRW_ADDRESS_DIRECT) {
114	 if (insn->header.access_mode == BRW_ALIGN_1) {
115	    insn->bits2.da1.src0_subreg_nr = reg.subnr;
116	    insn->bits2.da1.src0_reg_nr = reg.nr;
117	 }
118	 else {
119	    insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
120	    insn->bits2.da16.src0_reg_nr = reg.nr;
121	 }
122      }
123      else {
124	 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
125
126	 if (insn->header.access_mode == BRW_ALIGN_1) {
127	    insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
128	 }
129	 else {
130	    insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
131	 }
132      }
133
134      if (insn->header.access_mode == BRW_ALIGN_1) {
135	 if (reg.width == BRW_WIDTH_1 &&
136	     insn->header.execution_size == BRW_EXECUTE_1) {
137	    insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
138	    insn->bits2.da1.src0_width = BRW_WIDTH_1;
139	    insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
140	 }
141	 else {
142	    insn->bits2.da1.src0_horiz_stride = reg.hstride;
143	    insn->bits2.da1.src0_width = reg.width;
144	    insn->bits2.da1.src0_vert_stride = reg.vstride;
145	 }
146      }
147      else {
148	 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
149	 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
150	 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
151	 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
152
153	 /* This is an oddity of the fact we're using the same
154	  * descriptions for registers in align_16 as align_1:
155	  */
156	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
157	    insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
158	 else
159	    insn->bits2.da16.src0_vert_stride = reg.vstride;
160      }
161   }
162}
163
164
165static void brw_set_src1( struct brw_instruction *insn,
166			  struct brw_reg reg )
167{
168   insn->bits1.da1.src1_reg_file = reg.file;
169   insn->bits1.da1.src1_reg_type = reg.type;
170   insn->bits3.da1.src1_abs = reg.abs;
171   insn->bits3.da1.src1_negate = reg.negate;
172
173   /* Only src1 can be immediate in two-argument instructions.
174    */
175   assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
176
177   if (reg.file == BRW_IMMEDIATE_VALUE) {
178      insn->bits3.ud = reg.dw1.ud;
179   }
180   else {
181      /* This is a hardware restriction, which may or may not be lifted
182       * in the future:
183       */
184      assert (reg.address_mode == BRW_ADDRESS_DIRECT);
185      assert (reg.file == BRW_GENERAL_REGISTER_FILE);
186
187      if (insn->header.access_mode == BRW_ALIGN_1) {
188	 insn->bits3.da1.src1_subreg_nr = reg.subnr;
189	 insn->bits3.da1.src1_reg_nr = reg.nr;
190      }
191      else {
192	 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
193	 insn->bits3.da16.src1_reg_nr = reg.nr;
194      }
195
196      if (insn->header.access_mode == BRW_ALIGN_1) {
197	 if (reg.width == BRW_WIDTH_1 &&
198	     insn->header.execution_size == BRW_EXECUTE_1) {
199	    insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
200	    insn->bits3.da1.src1_width = BRW_WIDTH_1;
201	    insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
202	 }
203	 else {
204	    insn->bits3.da1.src1_horiz_stride = reg.hstride;
205	    insn->bits3.da1.src1_width = reg.width;
206	    insn->bits3.da1.src1_vert_stride = reg.vstride;
207	 }
208      }
209      else {
210	 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
211	 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
212	 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
213	 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
214
215	 /* This is an oddity of the fact we're using the same
216	  * descriptions for registers in align_16 as align_1:
217	  */
218	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
219	    insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
220	 else
221	    insn->bits3.da16.src1_vert_stride = reg.vstride;
222      }
223   }
224}
225
226
227
228static void brw_set_math_message( struct brw_instruction *insn,
229				  GLuint msg_length,
230				  GLuint response_length,
231				  GLuint function,
232				  GLuint integer_type,
233				  GLboolean low_precision,
234				  GLboolean saturate,
235				  GLuint dataType )
236{
237   brw_set_src1(insn, brw_imm_d(0));
238
239   insn->bits3.math.function = function;
240   insn->bits3.math.int_type = integer_type;
241   insn->bits3.math.precision = low_precision;
242   insn->bits3.math.saturate = saturate;
243   insn->bits3.math.data_type = dataType;
244   insn->bits3.math.response_length = response_length;
245   insn->bits3.math.msg_length = msg_length;
246   insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
247   insn->bits3.math.end_of_thread = 0;
248}
249
250static void brw_set_urb_message( struct brw_instruction *insn,
251				 GLboolean allocate,
252				 GLboolean used,
253				 GLuint msg_length,
254				 GLuint response_length,
255				 GLboolean end_of_thread,
256				 GLboolean complete,
257				 GLuint offset,
258				 GLuint swizzle_control )
259{
260   brw_set_src1(insn, brw_imm_d(0));
261
262   insn->bits3.urb.opcode = 0;	/* ? */
263   insn->bits3.urb.offset = offset;
264   insn->bits3.urb.swizzle_control = swizzle_control;
265   insn->bits3.urb.allocate = allocate;
266   insn->bits3.urb.used = used;	/* ? */
267   insn->bits3.urb.complete = complete;
268   insn->bits3.urb.response_length = response_length;
269   insn->bits3.urb.msg_length = msg_length;
270   insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
271   insn->bits3.urb.end_of_thread = end_of_thread;
272}
273
274static void brw_set_dp_write_message( struct brw_instruction *insn,
275				      GLuint binding_table_index,
276				      GLuint msg_control,
277				      GLuint msg_type,
278				      GLuint msg_length,
279				      GLuint pixel_scoreboard_clear,
280				      GLuint response_length,
281				      GLuint end_of_thread )
282{
283   brw_set_src1(insn, brw_imm_d(0));
284
285   insn->bits3.dp_write.binding_table_index = binding_table_index;
286   insn->bits3.dp_write.msg_control = msg_control;
287   insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
288   insn->bits3.dp_write.msg_type = msg_type;
289   insn->bits3.dp_write.send_commit_msg = 0;
290   insn->bits3.dp_write.response_length = response_length;
291   insn->bits3.dp_write.msg_length = msg_length;
292   insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
293   insn->bits3.urb.end_of_thread = end_of_thread;
294}
295
296static void brw_set_dp_read_message( struct brw_instruction *insn,
297				      GLuint binding_table_index,
298				      GLuint msg_control,
299				      GLuint msg_type,
300				      GLuint target_cache,
301				      GLuint msg_length,
302				      GLuint response_length,
303				      GLuint end_of_thread )
304{
305   brw_set_src1(insn, brw_imm_d(0));
306
307   insn->bits3.dp_read.binding_table_index = binding_table_index;
308   insn->bits3.dp_read.msg_control = msg_control;
309   insn->bits3.dp_read.msg_type = msg_type;
310   insn->bits3.dp_read.target_cache = target_cache;
311   insn->bits3.dp_read.response_length = response_length;
312   insn->bits3.dp_read.msg_length = msg_length;
313   insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ;
314   insn->bits3.dp_read.end_of_thread = end_of_thread;
315}
316
317static void brw_set_sampler_message( struct brw_instruction *insn,
318				     GLuint binding_table_index,
319				     GLuint sampler,
320				     GLuint msg_type,
321				     GLuint response_length,
322				     GLuint msg_length,
323				     GLboolean eot)
324{
325   brw_set_src1(insn, brw_imm_d(0));
326
327   insn->bits3.sampler.binding_table_index = binding_table_index;
328   insn->bits3.sampler.sampler = sampler;
329   insn->bits3.sampler.msg_type = msg_type;
330   insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
331   insn->bits3.sampler.response_length = response_length;
332   insn->bits3.sampler.msg_length = msg_length;
333   insn->bits3.sampler.end_of_thread = eot;
334   insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
335}
336
337
338
339static struct brw_instruction *next_insn( struct brw_compile *p,
340					  GLuint opcode )
341{
342   struct brw_instruction *insn;
343
344   assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
345
346   insn = &p->store[p->nr_insn++];
347   memcpy(insn, p->current, sizeof(*insn));
348
349   /* Reset this one-shot flag:
350    */
351
352   if (p->current->header.destreg__conditonalmod) {
353      p->current->header.destreg__conditonalmod = 0;
354      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
355   }
356
357   insn->header.opcode = opcode;
358   return insn;
359}
360
361
362static struct brw_instruction *brw_alu1( struct brw_compile *p,
363					 GLuint opcode,
364					 struct brw_reg dest,
365					 struct brw_reg src )
366{
367   struct brw_instruction *insn = next_insn(p, opcode);
368   brw_set_dest(insn, dest);
369   brw_set_src0(insn, src);
370   return insn;
371}
372
373static struct brw_instruction *brw_alu2(struct brw_compile *p,
374					GLuint opcode,
375					struct brw_reg dest,
376					struct brw_reg src0,
377					struct brw_reg src1 )
378{
379   struct brw_instruction *insn = next_insn(p, opcode);
380   brw_set_dest(insn, dest);
381   brw_set_src0(insn, src0);
382   brw_set_src1(insn, src1);
383   return insn;
384}
385
386
387/***********************************************************************
388 * Convenience routines.
389 */
390#define ALU1(OP)					\
391struct brw_instruction *brw_##OP(struct brw_compile *p,			\
392	      struct brw_reg dest,			\
393	      struct brw_reg src0)   			\
394{							\
395   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
396}
397
398#define ALU2(OP)					\
399struct brw_instruction *brw_##OP(struct brw_compile *p,			\
400	      struct brw_reg dest,			\
401	      struct brw_reg src0,			\
402	      struct brw_reg src1)   			\
403{							\
404   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
405}
406
407
408ALU1(MOV)
409ALU2(SEL)
410ALU1(NOT)
411ALU2(AND)
412ALU2(OR)
413ALU2(XOR)
414ALU2(SHR)
415ALU2(SHL)
416ALU2(RSR)
417ALU2(RSL)
418ALU2(ASR)
419ALU2(ADD)
420ALU2(MUL)
421ALU1(FRC)
422ALU1(RNDD)
423ALU2(MAC)
424ALU2(MACH)
425ALU1(LZD)
426ALU2(DP4)
427ALU2(DPH)
428ALU2(DP3)
429ALU2(DP2)
430ALU2(LINE)
431
432
433
434
435void brw_NOP(struct brw_compile *p)
436{
437   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
438   brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
439   brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
440   brw_set_src1(insn, brw_imm_ud(0x0));
441}
442
443
444
445
446
447/***********************************************************************
448 * Comparisons, if/else/endif
449 */
450
451struct brw_instruction *brw_JMPI(struct brw_compile *p,
452	      struct brw_reg dest,
453	      struct brw_reg src0,
454	      struct brw_reg src1)
455{
456   struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
457
458   p->current->header.predicate_control = BRW_PREDICATE_NONE;
459
460   return insn;
461}
462
463
464/* EU takes the value from the flag register and pushes it onto some
465 * sort of a stack (presumably merging with any flag value already on
466 * the stack).  Within an if block, the flags at the top of the stack
467 * control execution on each channel of the unit, eg. on each of the
468 * 16 pixel values in our wm programs.
469 *
470 * When the matching 'else' instruction is reached (presumably by
471 * countdown of the instruction count patched in by our ELSE/ENDIF
472 * functions), the relevent flags are inverted.
473 *
474 * When the matching 'endif' instruction is reached, the flags are
475 * popped off.  If the stack is now empty, normal execution resumes.
476 *
477 * No attempt is made to deal with stack overflow (14 elements?).
478 */
479struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
480{
481   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_IF);
482
483   /* Override the defaults for this instruction:
484    */
485   brw_set_dest(insn, brw_ip_reg());
486   brw_set_src0(insn, brw_ip_reg());
487   brw_set_src1(insn, brw_imm_d(0x0));
488
489   insn->header.execution_size = execute_size;
490   insn->header.compression_control = BRW_COMPRESSION_NONE;
491   insn->header.predicate_control = BRW_PREDICATE_NORMAL;
492   insn->header.mask_control = BRW_MASK_ENABLE;
493
494   p->current->header.predicate_control = BRW_PREDICATE_NONE;
495
496   return insn;
497}
498
499
500struct brw_instruction *brw_ELSE(struct brw_compile *p,
501				 struct brw_instruction *if_insn)
502{
503   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ELSE);
504
505   brw_set_dest(insn, brw_ip_reg());
506   brw_set_src0(insn, brw_ip_reg());
507   brw_set_src1(insn, brw_imm_d(0x0));
508
509   insn->header.compression_control = BRW_COMPRESSION_NONE;
510   insn->header.execution_size = if_insn->header.execution_size;
511   insn->header.mask_control = BRW_MASK_ENABLE;
512
513   /* Patch the if instruction to point at this instruction.
514    */
515   assert(if_insn->header.opcode == BRW_OPCODE_IF);
516
517   if_insn->bits3.if_else.jump_count = insn - if_insn;
518   if_insn->bits3.if_else.pop_count = 1;
519   if_insn->bits3.if_else.pad0 = 0;
520
521   return insn;
522}
523
524void brw_ENDIF(struct brw_compile *p,
525	       struct brw_instruction *patch_insn)
526{
527   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
528
529   brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
530   brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
531   brw_set_src1(insn, brw_imm_d(0x0));
532
533   insn->header.compression_control = BRW_COMPRESSION_NONE;
534   insn->header.execution_size = patch_insn->header.execution_size;
535   insn->header.mask_control = BRW_MASK_ENABLE;
536
537   assert(patch_insn->bits3.if_else.jump_count == 0);
538
539   /* Patch the if or else instructions to point at this or the next
540    * instruction respectively.
541    */
542   if (patch_insn->header.opcode == BRW_OPCODE_IF) {
543      /* Automagically turn it into an IFF:
544       */
545      patch_insn->header.opcode = BRW_OPCODE_IFF;
546      patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
547      patch_insn->bits3.if_else.pop_count = 0;
548      patch_insn->bits3.if_else.pad0 = 0;
549
550   }
551   else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
552      patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
553      patch_insn->bits3.if_else.pop_count = 1;
554      patch_insn->bits3.if_else.pad0 = 0;
555   }
556   else {
557      assert(0);
558   }
559
560   /* Also pop item off the stack in the endif instruction:
561    */
562   insn->bits3.if_else.jump_count = 0;
563   insn->bits3.if_else.pop_count = 1;
564   insn->bits3.if_else.pad0 = 0;
565}
566
567/* DO/WHILE loop:
568 */
569struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
570{
571   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
572
573   /* Override the defaults for this instruction:
574    */
575   brw_set_dest(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
576   brw_set_src0(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
577   brw_set_src1(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
578
579   insn->header.compression_control = BRW_COMPRESSION_NONE;
580   insn->header.execution_size = execute_size;
581/*    insn->header.mask_control = BRW_MASK_ENABLE; */
582
583   return insn;
584}
585
586
587
588void brw_WHILE(struct brw_compile *p,
589	       struct brw_instruction *do_insn)
590{
591   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WHILE);
592
593   brw_set_dest(insn, brw_ip_reg());
594   brw_set_src0(insn, brw_ip_reg());
595   brw_set_src1(insn, brw_imm_d(0x0));
596
597   insn->header.compression_control = BRW_COMPRESSION_NONE;
598   insn->header.execution_size = do_insn->header.execution_size;
599
600   assert(do_insn->header.opcode == BRW_OPCODE_DO);
601   insn->bits3.if_else.jump_count = do_insn - insn;
602   insn->bits3.if_else.pop_count = 0;
603   insn->bits3.if_else.pad0 = 0;
604
605/*    insn->header.mask_control = BRW_MASK_ENABLE; */
606
607   p->current->header.predicate_control = BRW_PREDICATE_NONE;
608}
609
610
611/* FORWARD JUMPS:
612 */
613void brw_land_fwd_jump(struct brw_compile *p,
614		       struct brw_instruction *jmp_insn)
615{
616   struct brw_instruction *landing = &p->store[p->nr_insn];
617
618   assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
619   assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
620
621   jmp_insn->bits3.ud = (landing - jmp_insn) - 1;
622}
623
624
625
626/* To integrate with the above, it makes sense that the comparison
627 * instruction should populate the flag register.  It might be simpler
628 * just to use the flag reg for most WM tasks?
629 */
630void brw_CMP(struct brw_compile *p,
631	     struct brw_reg dest,
632	     GLuint conditional,
633	     struct brw_reg src0,
634	     struct brw_reg src1)
635{
636   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
637
638   insn->header.destreg__conditonalmod = conditional;
639   brw_set_dest(insn, dest);
640   brw_set_src0(insn, src0);
641   brw_set_src1(insn, src1);
642
643/*    guess_execution_size(insn, src0); */
644
645
646   /* Make it so that future instructions will use the computed flag
647    * value until brw_set_predicate_control_flag_value() is called
648    * again.
649    */
650   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
651       dest.nr == 0) {
652      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
653      p->flag_value = 0xff;
654   }
655}
656
657
658
659/***********************************************************************
660 * Helpers for the various SEND message types:
661 */
662
663/* Invert 8 values
664 */
665void brw_math( struct brw_compile *p,
666	       struct brw_reg dest,
667	       GLuint function,
668	       GLuint saturate,
669	       GLuint msg_reg_nr,
670	       struct brw_reg src,
671	       GLuint data_type,
672	       GLuint precision )
673{
674   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
675   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
676   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
677
678   /* Example code doesn't set predicate_control for send
679    * instructions.
680    */
681   insn->header.predicate_control = 0;
682   insn->header.destreg__conditonalmod = msg_reg_nr;
683
684   brw_set_dest(insn, dest);
685   brw_set_src0(insn, src);
686   brw_set_math_message(insn,
687			msg_length, response_length,
688			function,
689			BRW_MATH_INTEGER_UNSIGNED,
690			precision,
691			saturate,
692			data_type);
693}
694
695/* Use 2 send instructions to invert 16 elements
696 */
697void brw_math_16( struct brw_compile *p,
698		  struct brw_reg dest,
699		  GLuint function,
700		  GLuint saturate,
701		  GLuint msg_reg_nr,
702		  struct brw_reg src,
703		  GLuint precision )
704{
705   struct brw_instruction *insn;
706   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
707   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
708
709   /* First instruction:
710    */
711   brw_push_insn_state(p);
712   brw_set_predicate_control_flag_value(p, 0xff);
713   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
714
715   insn = next_insn(p, BRW_OPCODE_SEND);
716   insn->header.destreg__conditonalmod = msg_reg_nr;
717
718   brw_set_dest(insn, dest);
719   brw_set_src0(insn, src);
720   brw_set_math_message(insn,
721			msg_length, response_length,
722			function,
723			BRW_MATH_INTEGER_UNSIGNED,
724			precision,
725			saturate,
726			BRW_MATH_DATA_VECTOR);
727
728   /* Second instruction:
729    */
730   insn = next_insn(p, BRW_OPCODE_SEND);
731   insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
732   insn->header.destreg__conditonalmod = msg_reg_nr+1;
733
734   brw_set_dest(insn, offset(dest,1));
735   brw_set_src0(insn, src);
736   brw_set_math_message(insn,
737			msg_length, response_length,
738			function,
739			BRW_MATH_INTEGER_UNSIGNED,
740			precision,
741			saturate,
742			BRW_MATH_DATA_VECTOR);
743
744   brw_pop_insn_state(p);
745}
746
747
748
749
750void brw_dp_WRITE_16( struct brw_compile *p,
751		      struct brw_reg src,
752		      GLuint msg_reg_nr,
753		      GLuint scratch_offset )
754{
755   {
756      brw_push_insn_state(p);
757      brw_set_mask_control(p, BRW_MASK_DISABLE);
758      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
759
760      brw_MOV(p,
761	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
762	      brw_imm_d(scratch_offset));
763
764      brw_pop_insn_state(p);
765   }
766
767   {
768      GLuint msg_length = 3;
769      struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
770      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
771
772      insn->header.predicate_control = 0; /* XXX */
773      insn->header.compression_control = BRW_COMPRESSION_NONE;
774      insn->header.destreg__conditonalmod = msg_reg_nr;
775
776      brw_set_dest(insn, dest);
777      brw_set_src0(insn, src);
778
779      brw_set_dp_write_message(insn,
780			       255, /* bti */
781			       BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
782			       BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
783			       msg_length,
784			       0, /* pixel scoreboard */
785			       0, /* response_length */
786			       0); /* eot */
787   }
788
789}
790
791
792void brw_dp_READ_16( struct brw_compile *p,
793		      struct brw_reg dest,
794		      GLuint msg_reg_nr,
795		      GLuint scratch_offset )
796{
797   {
798      brw_push_insn_state(p);
799      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
800      brw_set_mask_control(p, BRW_MASK_DISABLE);
801
802      brw_MOV(p,
803	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
804	      brw_imm_d(scratch_offset));
805
806      brw_pop_insn_state(p);
807   }
808
809   {
810      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
811
812      insn->header.predicate_control = 0; /* XXX */
813      insn->header.compression_control = BRW_COMPRESSION_NONE;
814      insn->header.destreg__conditonalmod = msg_reg_nr;
815
816      brw_set_dest(insn, dest);	/* UW? */
817      brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
818
819      brw_set_dp_read_message(insn,
820			      255, /* bti */
821			      3,  /* msg_control */
822			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
823			      1, /* target cache */
824			      1, /* msg_length */
825			      2, /* response_length */
826			      0); /* eot */
827   }
828}
829
830
831void brw_fb_WRITE(struct brw_compile *p,
832		   struct brw_reg dest,
833		   GLuint msg_reg_nr,
834		   struct brw_reg src0,
835		   GLuint binding_table_index,
836		   GLuint msg_length,
837		   GLuint response_length,
838		   GLboolean eot)
839{
840   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
841
842   insn->header.predicate_control = 0; /* XXX */
843   insn->header.compression_control = BRW_COMPRESSION_NONE;
844   insn->header.destreg__conditonalmod = msg_reg_nr;
845
846   brw_set_dest(insn, dest);
847   brw_set_src0(insn, src0);
848   brw_set_dp_write_message(insn,
849			    binding_table_index,
850			    BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
851			    BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
852			    msg_length,
853			    1,	/* pixel scoreboard */
854			    response_length,
855			    eot);
856}
857
858
859
860void brw_SAMPLE(struct brw_compile *p,
861		struct brw_reg dest,
862		GLuint msg_reg_nr,
863		struct brw_reg src0,
864		GLuint binding_table_index,
865		GLuint sampler,
866		GLuint writemask,
867		GLuint msg_type,
868		GLuint response_length,
869		GLuint msg_length,
870		GLboolean eot)
871{
872   GLboolean need_stall = 0;
873
874   if(writemask == 0) {
875/*       _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
876      return;
877   }
878
879   /* Hardware doesn't do destination dependency checking on send
880    * instructions properly.  Add a workaround which generates the
881    * dependency by other means.  In practice it seems like this bug
882    * only crops up for texture samples, and only where registers are
883    * written by the send and then written again later without being
884    * read in between.  Luckily for us, we already track that
885    * information and use it to modify the writemask for the
886    * instruction, so that is a guide for whether a workaround is
887    * needed.
888    */
889   if (writemask != WRITEMASK_XYZW) {
890      GLuint dst_offset = 0;
891      GLuint i, newmask = 0, len = 0;
892
893      for (i = 0; i < 4; i++) {
894	 if (writemask & (1<<i))
895	    break;
896	 dst_offset += 2;
897      }
898      for (; i < 4; i++) {
899	 if (!(writemask & (1<<i)))
900	    break;
901	 newmask |= 1<<i;
902	 len++;
903      }
904
905      if (newmask != writemask) {
906	 need_stall = 1;
907/* 	 _mesa_printf("need stall %x %x\n", newmask , writemask); */
908      }
909      else {
910	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
911
912	 newmask = ~newmask & WRITEMASK_XYZW;
913
914	 brw_push_insn_state(p);
915
916	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
917	 brw_set_mask_control(p, BRW_MASK_DISABLE);
918
919	 brw_MOV(p, m1, brw_vec8_grf(0,0));
920  	 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
921
922	 brw_pop_insn_state(p);
923
924  	 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
925	 dest = offset(dest, dst_offset);
926	 response_length = len * 2;
927      }
928   }
929
930   {
931      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
932
933      insn->header.predicate_control = 0; /* XXX */
934      insn->header.compression_control = BRW_COMPRESSION_NONE;
935      insn->header.destreg__conditonalmod = msg_reg_nr;
936
937      brw_set_dest(insn, dest);
938      brw_set_src0(insn, src0);
939      brw_set_sampler_message(insn,
940			      binding_table_index,
941			      sampler,
942			      msg_type,
943			      response_length,
944			      msg_length,
945			      eot);
946   }
947
948   if (need_stall)
949   {
950      struct brw_reg reg = vec8(offset(dest, response_length-1));
951
952      /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
953       */
954      brw_push_insn_state(p);
955      brw_set_compression_control(p, GL_FALSE);
956      brw_MOV(p, reg, reg);
957      brw_pop_insn_state(p);
958   }
959
960}
961
962/* All these variables are pretty confusing - we might be better off
963 * using bitmasks and macros for this, in the old style.  Or perhaps
964 * just having the caller instantiate the fields in dword3 itself.
965 */
966void brw_urb_WRITE(struct brw_compile *p,
967		   struct brw_reg dest,
968		   GLuint msg_reg_nr,
969		   struct brw_reg src0,
970		   GLboolean allocate,
971		   GLboolean used,
972		   GLuint msg_length,
973		   GLuint response_length,
974		   GLboolean eot,
975		   GLboolean writes_complete,
976		   GLuint offset,
977		   GLuint swizzle)
978{
979   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
980
981   assert(msg_length < 16);
982
983   brw_set_dest(insn, dest);
984   brw_set_src0(insn, src0);
985   brw_set_src1(insn, brw_imm_d(0));
986
987   insn->header.destreg__conditonalmod = msg_reg_nr;
988
989   brw_set_urb_message(insn,
990		       allocate,
991		       used,
992		       msg_length,
993		       response_length,
994		       eot,
995		       writes_complete,
996		       offset,
997		       swizzle);
998}
999
1000