brw_eu_emit.c revision 92c075eeb7c330ea420400d1c2bae57356b19f03
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33#include "brw_context.h"
34#include "brw_defines.h"
35#include "brw_eu.h"
36
37
38
39
40/***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44static void guess_execution_size( struct brw_instruction *insn,
45				  struct brw_reg reg )
46{
47   if (reg.width == BRW_WIDTH_8 &&
48       insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49      insn->header.execution_size = BRW_EXECUTE_16;
50   else
51      insn->header.execution_size = reg.width;	/* note - definitions are compatible */
52}
53
54
55static void brw_set_dest( struct brw_instruction *insn,
56			  struct brw_reg dest )
57{
58   insn->bits1.da1.dest_reg_file = dest.file;
59   insn->bits1.da1.dest_reg_type = dest.type;
60   insn->bits1.da1.dest_address_mode = dest.address_mode;
61
62   if (dest.address_mode == BRW_ADDRESS_DIRECT) {
63      insn->bits1.da1.dest_reg_nr = dest.nr;
64
65      if (insn->header.access_mode == BRW_ALIGN_1) {
66	 insn->bits1.da1.dest_subreg_nr = dest.subnr;
67	 insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
68      }
69      else {
70	 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
71	 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
72      }
73   }
74   else {
75      insn->bits1.ia1.dest_subreg_nr = dest.subnr;
76
77      /* These are different sizes in align1 vs align16:
78       */
79      if (insn->header.access_mode == BRW_ALIGN_1) {
80	 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
81	 insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
82      }
83      else {
84	 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
85      }
86   }
87
88   /* NEW: Set the execution size based on dest.width and
89    * insn->compression_control:
90    */
91   guess_execution_size(insn, dest);
92}
93
94static void brw_set_src0( struct brw_instruction *insn,
95		      struct brw_reg reg )
96{
97   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
98
99   insn->bits1.da1.src0_reg_file = reg.file;
100   insn->bits1.da1.src0_reg_type = reg.type;
101   insn->bits2.da1.src0_abs = reg.abs;
102   insn->bits2.da1.src0_negate = reg.negate;
103   insn->bits2.da1.src0_address_mode = reg.address_mode;
104
105   if (reg.file == BRW_IMMEDIATE_VALUE) {
106      insn->bits3.ud = reg.dw1.ud;
107
108      /* Required to set some fields in src1 as well:
109       */
110      insn->bits1.da1.src1_reg_file = 0; /* arf */
111      insn->bits1.da1.src1_reg_type = reg.type;
112   }
113   else
114   {
115      if (reg.address_mode == BRW_ADDRESS_DIRECT) {
116	 if (insn->header.access_mode == BRW_ALIGN_1) {
117	    insn->bits2.da1.src0_subreg_nr = reg.subnr;
118	    insn->bits2.da1.src0_reg_nr = reg.nr;
119	 }
120	 else {
121	    insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
122	    insn->bits2.da16.src0_reg_nr = reg.nr;
123	 }
124      }
125      else {
126	 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
127
128	 if (insn->header.access_mode == BRW_ALIGN_1) {
129	    insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
130	 }
131	 else {
132	    insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
133	 }
134      }
135
136      if (insn->header.access_mode == BRW_ALIGN_1) {
137	 if (reg.width == BRW_WIDTH_1 &&
138	     insn->header.execution_size == BRW_EXECUTE_1) {
139	    insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
140	    insn->bits2.da1.src0_width = BRW_WIDTH_1;
141	    insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
142	 }
143	 else {
144	    insn->bits2.da1.src0_horiz_stride = reg.hstride;
145	    insn->bits2.da1.src0_width = reg.width;
146	    insn->bits2.da1.src0_vert_stride = reg.vstride;
147	 }
148      }
149      else {
150	 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
151	 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
152	 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
153	 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
154
155	 /* This is an oddity of the fact we're using the same
156	  * descriptions for registers in align_16 as align_1:
157	  */
158	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
159	    insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
160	 else
161	    insn->bits2.da16.src0_vert_stride = reg.vstride;
162      }
163   }
164}
165
166
167void brw_set_src1( struct brw_instruction *insn,
168			  struct brw_reg reg )
169{
170   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
171
172   insn->bits1.da1.src1_reg_file = reg.file;
173   insn->bits1.da1.src1_reg_type = reg.type;
174   insn->bits3.da1.src1_abs = reg.abs;
175   insn->bits3.da1.src1_negate = reg.negate;
176
177   /* Only src1 can be immediate in two-argument instructions.
178    */
179   assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
180
181   if (reg.file == BRW_IMMEDIATE_VALUE) {
182      insn->bits3.ud = reg.dw1.ud;
183   }
184   else {
185      /* This is a hardware restriction, which may or may not be lifted
186       * in the future:
187       */
188      assert (reg.address_mode == BRW_ADDRESS_DIRECT);
189      //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
190
191      if (insn->header.access_mode == BRW_ALIGN_1) {
192	 insn->bits3.da1.src1_subreg_nr = reg.subnr;
193	 insn->bits3.da1.src1_reg_nr = reg.nr;
194      }
195      else {
196	 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
197	 insn->bits3.da16.src1_reg_nr = reg.nr;
198      }
199
200      if (insn->header.access_mode == BRW_ALIGN_1) {
201	 if (reg.width == BRW_WIDTH_1 &&
202	     insn->header.execution_size == BRW_EXECUTE_1) {
203	    insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
204	    insn->bits3.da1.src1_width = BRW_WIDTH_1;
205	    insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
206	 }
207	 else {
208	    insn->bits3.da1.src1_horiz_stride = reg.hstride;
209	    insn->bits3.da1.src1_width = reg.width;
210	    insn->bits3.da1.src1_vert_stride = reg.vstride;
211	 }
212      }
213      else {
214	 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
215	 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
216	 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
217	 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
218
219	 /* This is an oddity of the fact we're using the same
220	  * descriptions for registers in align_16 as align_1:
221	  */
222	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
223	    insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
224	 else
225	    insn->bits3.da16.src1_vert_stride = reg.vstride;
226      }
227   }
228}
229
230
231
232static void brw_set_math_message( struct brw_instruction *insn,
233				  GLuint msg_length,
234				  GLuint response_length,
235				  GLuint function,
236				  GLuint integer_type,
237				  GLboolean low_precision,
238				  GLboolean saturate,
239				  GLuint dataType )
240{
241   brw_set_src1(insn, brw_imm_d(0));
242
243   insn->bits3.math.function = function;
244   insn->bits3.math.int_type = integer_type;
245   insn->bits3.math.precision = low_precision;
246   insn->bits3.math.saturate = saturate;
247   insn->bits3.math.data_type = dataType;
248   insn->bits3.math.response_length = response_length;
249   insn->bits3.math.msg_length = msg_length;
250   insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
251   insn->bits3.math.end_of_thread = 0;
252}
253
254static void brw_set_urb_message( struct brw_instruction *insn,
255				 GLboolean allocate,
256				 GLboolean used,
257				 GLuint msg_length,
258				 GLuint response_length,
259				 GLboolean end_of_thread,
260				 GLboolean complete,
261				 GLuint offset,
262				 GLuint swizzle_control )
263{
264   brw_set_src1(insn, brw_imm_d(0));
265
266   insn->bits3.urb.opcode = 0;	/* ? */
267   insn->bits3.urb.offset = offset;
268   insn->bits3.urb.swizzle_control = swizzle_control;
269   insn->bits3.urb.allocate = allocate;
270   insn->bits3.urb.used = used;	/* ? */
271   insn->bits3.urb.complete = complete;
272   insn->bits3.urb.response_length = response_length;
273   insn->bits3.urb.msg_length = msg_length;
274   insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
275   insn->bits3.urb.end_of_thread = end_of_thread;
276}
277
278static void brw_set_dp_write_message( struct brw_instruction *insn,
279				      GLuint binding_table_index,
280				      GLuint msg_control,
281				      GLuint msg_type,
282				      GLuint msg_length,
283				      GLuint pixel_scoreboard_clear,
284				      GLuint response_length,
285				      GLuint end_of_thread )
286{
287   brw_set_src1(insn, brw_imm_d(0));
288
289   insn->bits3.dp_write.binding_table_index = binding_table_index;
290   insn->bits3.dp_write.msg_control = msg_control;
291   insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
292   insn->bits3.dp_write.msg_type = msg_type;
293   insn->bits3.dp_write.send_commit_msg = 0;
294   insn->bits3.dp_write.response_length = response_length;
295   insn->bits3.dp_write.msg_length = msg_length;
296   insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
297   insn->bits3.urb.end_of_thread = end_of_thread;
298}
299
300static void brw_set_dp_read_message( struct brw_instruction *insn,
301				      GLuint binding_table_index,
302				      GLuint msg_control,
303				      GLuint msg_type,
304				      GLuint target_cache,
305				      GLuint msg_length,
306				      GLuint response_length,
307				      GLuint end_of_thread )
308{
309   brw_set_src1(insn, brw_imm_d(0));
310
311   insn->bits3.dp_read.binding_table_index = binding_table_index;
312   insn->bits3.dp_read.msg_control = msg_control;
313   insn->bits3.dp_read.msg_type = msg_type;
314   insn->bits3.dp_read.target_cache = target_cache;
315   insn->bits3.dp_read.response_length = response_length;
316   insn->bits3.dp_read.msg_length = msg_length;
317   insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ;
318   insn->bits3.dp_read.end_of_thread = end_of_thread;
319}
320
321static void brw_set_sampler_message(struct brw_context *brw,
322                 struct brw_instruction *insn,
323				     GLuint binding_table_index,
324				     GLuint sampler,
325				     GLuint msg_type,
326				     GLuint response_length,
327				     GLuint msg_length,
328				     GLboolean eot)
329{
330   brw_set_src1(insn, brw_imm_d(0));
331
332   if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) {
333      insn->bits3.sampler_gm45_g4x.binding_table_index = binding_table_index;
334      insn->bits3.sampler_gm45_g4x.sampler = sampler;
335      insn->bits3.sampler_gm45_g4x.msg_type = msg_type;
336      insn->bits3.sampler_gm45_g4x.response_length = response_length;
337      insn->bits3.sampler_gm45_g4x.msg_length = msg_length;
338      insn->bits3.sampler_gm45_g4x.end_of_thread = eot;
339      insn->bits3.sampler_gm45_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
340   } else {
341      insn->bits3.sampler.binding_table_index = binding_table_index;
342      insn->bits3.sampler.sampler = sampler;
343      insn->bits3.sampler.msg_type = msg_type;
344      insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
345      insn->bits3.sampler.response_length = response_length;
346      insn->bits3.sampler.msg_length = msg_length;
347      insn->bits3.sampler.end_of_thread = eot;
348      insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
349   }
350}
351
352
353
354static struct brw_instruction *next_insn( struct brw_compile *p,
355					  GLuint opcode )
356{
357   struct brw_instruction *insn;
358
359   assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
360
361   insn = &p->store[p->nr_insn++];
362   memcpy(insn, p->current, sizeof(*insn));
363
364   /* Reset this one-shot flag:
365    */
366
367   if (p->current->header.destreg__conditonalmod) {
368      p->current->header.destreg__conditonalmod = 0;
369      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
370   }
371
372   insn->header.opcode = opcode;
373   return insn;
374}
375
376
377static struct brw_instruction *brw_alu1( struct brw_compile *p,
378					 GLuint opcode,
379					 struct brw_reg dest,
380					 struct brw_reg src )
381{
382   struct brw_instruction *insn = next_insn(p, opcode);
383   brw_set_dest(insn, dest);
384   brw_set_src0(insn, src);
385   return insn;
386}
387
388static struct brw_instruction *brw_alu2(struct brw_compile *p,
389					GLuint opcode,
390					struct brw_reg dest,
391					struct brw_reg src0,
392					struct brw_reg src1 )
393{
394   struct brw_instruction *insn = next_insn(p, opcode);
395   brw_set_dest(insn, dest);
396   brw_set_src0(insn, src0);
397   brw_set_src1(insn, src1);
398   return insn;
399}
400
401
402/***********************************************************************
403 * Convenience routines.
404 */
405#define ALU1(OP)					\
406struct brw_instruction *brw_##OP(struct brw_compile *p,			\
407	      struct brw_reg dest,			\
408	      struct brw_reg src0)   			\
409{							\
410   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
411}
412
413#define ALU2(OP)					\
414struct brw_instruction *brw_##OP(struct brw_compile *p,			\
415	      struct brw_reg dest,			\
416	      struct brw_reg src0,			\
417	      struct brw_reg src1)   			\
418{							\
419   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
420}
421
422
423ALU1(MOV)
424ALU2(SEL)
425ALU1(NOT)
426ALU2(AND)
427ALU2(OR)
428ALU2(XOR)
429ALU2(SHR)
430ALU2(SHL)
431ALU2(RSR)
432ALU2(RSL)
433ALU2(ASR)
434ALU2(ADD)
435ALU2(MUL)
436ALU1(FRC)
437ALU1(RNDD)
438ALU2(MAC)
439ALU2(MACH)
440ALU1(LZD)
441ALU2(DP4)
442ALU2(DPH)
443ALU2(DP3)
444ALU2(DP2)
445ALU2(LINE)
446
447
448
449
450void brw_NOP(struct brw_compile *p)
451{
452   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
453   brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
454   brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
455   brw_set_src1(insn, brw_imm_ud(0x0));
456}
457
458
459
460
461
462/***********************************************************************
463 * Comparisons, if/else/endif
464 */
465
466struct brw_instruction *brw_JMPI(struct brw_compile *p,
467	      struct brw_reg dest,
468	      struct brw_reg src0,
469	      struct brw_reg src1)
470{
471   struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
472
473   p->current->header.predicate_control = BRW_PREDICATE_NONE;
474
475   return insn;
476}
477
478/* EU takes the value from the flag register and pushes it onto some
479 * sort of a stack (presumably merging with any flag value already on
480 * the stack).  Within an if block, the flags at the top of the stack
481 * control execution on each channel of the unit, eg. on each of the
482 * 16 pixel values in our wm programs.
483 *
484 * When the matching 'else' instruction is reached (presumably by
485 * countdown of the instruction count patched in by our ELSE/ENDIF
486 * functions), the relevent flags are inverted.
487 *
488 * When the matching 'endif' instruction is reached, the flags are
489 * popped off.  If the stack is now empty, normal execution resumes.
490 *
491 * No attempt is made to deal with stack overflow (14 elements?).
492 */
493struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
494{
495   struct brw_instruction *insn;
496
497   if (p->single_program_flow) {
498      assert(execute_size == BRW_EXECUTE_1);
499
500      insn = next_insn(p, BRW_OPCODE_ADD);
501      insn->header.predicate_inverse = 1;
502   } else {
503      insn = next_insn(p, BRW_OPCODE_IF);
504   }
505
506   /* Override the defaults for this instruction:
507    */
508   brw_set_dest(insn, brw_ip_reg());
509   brw_set_src0(insn, brw_ip_reg());
510   brw_set_src1(insn, brw_imm_d(0x0));
511
512   insn->header.execution_size = execute_size;
513   insn->header.compression_control = BRW_COMPRESSION_NONE;
514   insn->header.predicate_control = BRW_PREDICATE_NORMAL;
515   insn->header.mask_control = BRW_MASK_ENABLE;
516
517   p->current->header.predicate_control = BRW_PREDICATE_NONE;
518
519   return insn;
520}
521
522
523struct brw_instruction *brw_ELSE(struct brw_compile *p,
524				 struct brw_instruction *if_insn)
525{
526   struct brw_instruction *insn;
527
528   if (p->single_program_flow) {
529      insn = next_insn(p, BRW_OPCODE_ADD);
530   } else {
531      insn = next_insn(p, BRW_OPCODE_ELSE);
532   }
533
534   brw_set_dest(insn, brw_ip_reg());
535   brw_set_src0(insn, brw_ip_reg());
536   brw_set_src1(insn, brw_imm_d(0x0));
537
538   insn->header.compression_control = BRW_COMPRESSION_NONE;
539   insn->header.execution_size = if_insn->header.execution_size;
540   insn->header.mask_control = BRW_MASK_ENABLE;
541
542   /* Patch the if instruction to point at this instruction.
543    */
544   if (p->single_program_flow) {
545      assert(if_insn->header.opcode == BRW_OPCODE_ADD);
546
547      if_insn->bits3.ud = (insn - if_insn + 1) * 16;
548   } else {
549      assert(if_insn->header.opcode == BRW_OPCODE_IF);
550
551      if_insn->bits3.if_else.jump_count = insn - if_insn;
552      if_insn->bits3.if_else.pop_count = 1;
553      if_insn->bits3.if_else.pad0 = 0;
554   }
555
556   return insn;
557}
558
559void brw_ENDIF(struct brw_compile *p,
560	       struct brw_instruction *patch_insn)
561{
562   if (p->single_program_flow) {
563      /* In single program flow mode, there's no need to execute an ENDIF,
564       * since we don't need to do any stack operations, and if we're executing
565       * currently, we want to just continue executing.
566       */
567      struct brw_instruction *next = &p->store[p->nr_insn];
568
569      assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
570
571      patch_insn->bits3.ud = (next - patch_insn) * 16;
572   } else {
573      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
574
575      brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
576      brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
577      brw_set_src1(insn, brw_imm_d(0x0));
578
579      insn->header.compression_control = BRW_COMPRESSION_NONE;
580      insn->header.execution_size = patch_insn->header.execution_size;
581      insn->header.mask_control = BRW_MASK_ENABLE;
582
583      assert(patch_insn->bits3.if_else.jump_count == 0);
584
585      /* Patch the if or else instructions to point at this or the next
586       * instruction respectively.
587       */
588      if (patch_insn->header.opcode == BRW_OPCODE_IF) {
589	 /* Automagically turn it into an IFF:
590	  */
591	 patch_insn->header.opcode = BRW_OPCODE_IFF;
592	 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
593	 patch_insn->bits3.if_else.pop_count = 0;
594	 patch_insn->bits3.if_else.pad0 = 0;
595      } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
596	 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
597	 patch_insn->bits3.if_else.pop_count = 1;
598	 patch_insn->bits3.if_else.pad0 = 0;
599      } else {
600	 assert(0);
601      }
602
603      /* Also pop item off the stack in the endif instruction:
604       */
605      insn->bits3.if_else.jump_count = 0;
606      insn->bits3.if_else.pop_count = 1;
607      insn->bits3.if_else.pad0 = 0;
608   }
609}
610
611struct brw_instruction *brw_BREAK(struct brw_compile *p)
612{
613   struct brw_instruction *insn;
614   insn = next_insn(p, BRW_OPCODE_BREAK);
615   brw_set_dest(insn, brw_ip_reg());
616   brw_set_src0(insn, brw_ip_reg());
617   brw_set_src1(insn, brw_imm_d(0x0));
618   insn->header.compression_control = BRW_COMPRESSION_NONE;
619   insn->header.execution_size = BRW_EXECUTE_8;
620   insn->header.mask_control = BRW_MASK_DISABLE;
621   insn->bits3.if_else.pad0 = 0;
622   return insn;
623}
624
625struct brw_instruction *brw_CONT(struct brw_compile *p)
626{
627   struct brw_instruction *insn;
628   insn = next_insn(p, BRW_OPCODE_CONTINUE);
629   brw_set_dest(insn, brw_ip_reg());
630   brw_set_src0(insn, brw_ip_reg());
631   brw_set_src1(insn, brw_imm_d(0x0));
632   insn->header.compression_control = BRW_COMPRESSION_NONE;
633   insn->header.execution_size = BRW_EXECUTE_8;
634   insn->header.mask_control = BRW_MASK_DISABLE;
635   insn->bits3.if_else.pad0 = 0;
636   return insn;
637}
638
639/* DO/WHILE loop:
640 */
641struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
642{
643   if (p->single_program_flow) {
644      return &p->store[p->nr_insn];
645   } else {
646      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
647
648      /* Override the defaults for this instruction:
649       */
650      brw_set_dest(insn, brw_null_reg());
651      brw_set_src0(insn, brw_null_reg());
652      brw_set_src1(insn, brw_null_reg());
653
654      insn->header.compression_control = BRW_COMPRESSION_NONE;
655      insn->header.execution_size = execute_size;
656      insn->header.predicate_control = BRW_PREDICATE_NONE;
657      /* insn->header.mask_control = BRW_MASK_ENABLE; */
658      insn->header.mask_control = BRW_MASK_DISABLE;
659
660      return insn;
661   }
662}
663
664
665
666struct brw_instruction *brw_WHILE(struct brw_compile *p,
667	       struct brw_instruction *do_insn)
668{
669   struct brw_instruction *insn;
670
671   if (p->single_program_flow)
672      insn = next_insn(p, BRW_OPCODE_ADD);
673   else
674      insn = next_insn(p, BRW_OPCODE_WHILE);
675
676   brw_set_dest(insn, brw_ip_reg());
677   brw_set_src0(insn, brw_ip_reg());
678   brw_set_src1(insn, brw_imm_d(0x0));
679
680   insn->header.compression_control = BRW_COMPRESSION_NONE;
681
682   if (p->single_program_flow) {
683      insn->header.execution_size = BRW_EXECUTE_1;
684
685      insn->bits3.d = (do_insn - insn) * 16;
686   } else {
687      insn->header.execution_size = do_insn->header.execution_size;
688
689      assert(do_insn->header.opcode == BRW_OPCODE_DO);
690      insn->bits3.if_else.jump_count = do_insn - insn + 1;
691      insn->bits3.if_else.pop_count = 0;
692      insn->bits3.if_else.pad0 = 0;
693   }
694
695/*    insn->header.mask_control = BRW_MASK_ENABLE; */
696
697   insn->header.mask_control = BRW_MASK_DISABLE;
698   p->current->header.predicate_control = BRW_PREDICATE_NONE;
699   return insn;
700}
701
702
703/* FORWARD JUMPS:
704 */
705void brw_land_fwd_jump(struct brw_compile *p,
706		       struct brw_instruction *jmp_insn)
707{
708   struct brw_instruction *landing = &p->store[p->nr_insn];
709
710   assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
711   assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
712
713   jmp_insn->bits3.ud = (landing - jmp_insn) - 1;
714}
715
716
717
718/* To integrate with the above, it makes sense that the comparison
719 * instruction should populate the flag register.  It might be simpler
720 * just to use the flag reg for most WM tasks?
721 */
722void brw_CMP(struct brw_compile *p,
723	     struct brw_reg dest,
724	     GLuint conditional,
725	     struct brw_reg src0,
726	     struct brw_reg src1)
727{
728   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
729
730   insn->header.destreg__conditonalmod = conditional;
731   brw_set_dest(insn, dest);
732   brw_set_src0(insn, src0);
733   brw_set_src1(insn, src1);
734
735/*    guess_execution_size(insn, src0); */
736
737
738   /* Make it so that future instructions will use the computed flag
739    * value until brw_set_predicate_control_flag_value() is called
740    * again.
741    */
742   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
743       dest.nr == 0) {
744      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
745      p->flag_value = 0xff;
746   }
747}
748
749
750
751/***********************************************************************
752 * Helpers for the various SEND message types:
753 */
754
755/* Invert 8 values
756 */
757void brw_math( struct brw_compile *p,
758	       struct brw_reg dest,
759	       GLuint function,
760	       GLuint saturate,
761	       GLuint msg_reg_nr,
762	       struct brw_reg src,
763	       GLuint data_type,
764	       GLuint precision )
765{
766   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
767   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
768   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
769
770   /* Example code doesn't set predicate_control for send
771    * instructions.
772    */
773   insn->header.predicate_control = 0;
774   insn->header.destreg__conditonalmod = msg_reg_nr;
775
776   brw_set_dest(insn, dest);
777   brw_set_src0(insn, src);
778   brw_set_math_message(insn,
779			msg_length, response_length,
780			function,
781			BRW_MATH_INTEGER_UNSIGNED,
782			precision,
783			saturate,
784			data_type);
785}
786
787/* Use 2 send instructions to invert 16 elements
788 */
789void brw_math_16( struct brw_compile *p,
790		  struct brw_reg dest,
791		  GLuint function,
792		  GLuint saturate,
793		  GLuint msg_reg_nr,
794		  struct brw_reg src,
795		  GLuint precision )
796{
797   struct brw_instruction *insn;
798   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
799   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
800
801   /* First instruction:
802    */
803   brw_push_insn_state(p);
804   brw_set_predicate_control_flag_value(p, 0xff);
805   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
806
807   insn = next_insn(p, BRW_OPCODE_SEND);
808   insn->header.destreg__conditonalmod = msg_reg_nr;
809
810   brw_set_dest(insn, dest);
811   brw_set_src0(insn, src);
812   brw_set_math_message(insn,
813			msg_length, response_length,
814			function,
815			BRW_MATH_INTEGER_UNSIGNED,
816			precision,
817			saturate,
818			BRW_MATH_DATA_VECTOR);
819
820   /* Second instruction:
821    */
822   insn = next_insn(p, BRW_OPCODE_SEND);
823   insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
824   insn->header.destreg__conditonalmod = msg_reg_nr+1;
825
826   brw_set_dest(insn, offset(dest,1));
827   brw_set_src0(insn, src);
828   brw_set_math_message(insn,
829			msg_length, response_length,
830			function,
831			BRW_MATH_INTEGER_UNSIGNED,
832			precision,
833			saturate,
834			BRW_MATH_DATA_VECTOR);
835
836   brw_pop_insn_state(p);
837}
838
839
840
841
842void brw_dp_WRITE_16( struct brw_compile *p,
843		      struct brw_reg src,
844		      GLuint msg_reg_nr,
845		      GLuint scratch_offset )
846{
847   {
848      brw_push_insn_state(p);
849      brw_set_mask_control(p, BRW_MASK_DISABLE);
850      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
851
852      brw_MOV(p,
853	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
854	      brw_imm_d(scratch_offset));
855
856      brw_pop_insn_state(p);
857   }
858
859   {
860      GLuint msg_length = 3;
861      struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
862      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
863
864      insn->header.predicate_control = 0; /* XXX */
865      insn->header.compression_control = BRW_COMPRESSION_NONE;
866      insn->header.destreg__conditonalmod = msg_reg_nr;
867
868      brw_set_dest(insn, dest);
869      brw_set_src0(insn, src);
870
871      brw_set_dp_write_message(insn,
872			       255, /* bti */
873			       BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
874			       BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
875			       msg_length,
876			       0, /* pixel scoreboard */
877			       0, /* response_length */
878			       0); /* eot */
879   }
880
881}
882
883
884void brw_dp_READ_16( struct brw_compile *p,
885		      struct brw_reg dest,
886		      GLuint msg_reg_nr,
887		      GLuint scratch_offset )
888{
889   {
890      brw_push_insn_state(p);
891      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
892      brw_set_mask_control(p, BRW_MASK_DISABLE);
893
894      brw_MOV(p,
895	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
896	      brw_imm_d(scratch_offset));
897
898      brw_pop_insn_state(p);
899   }
900
901   {
902      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
903
904      insn->header.predicate_control = 0; /* XXX */
905      insn->header.compression_control = BRW_COMPRESSION_NONE;
906      insn->header.destreg__conditonalmod = msg_reg_nr;
907
908      brw_set_dest(insn, dest);	/* UW? */
909      brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
910
911      brw_set_dp_read_message(insn,
912			      255, /* bti */
913			      3,  /* msg_control */
914			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
915			      1, /* target cache */
916			      1, /* msg_length */
917			      2, /* response_length */
918			      0); /* eot */
919   }
920}
921
922
923void brw_fb_WRITE(struct brw_compile *p,
924		   struct brw_reg dest,
925		   GLuint msg_reg_nr,
926		   struct brw_reg src0,
927		   GLuint binding_table_index,
928		   GLuint msg_length,
929		   GLuint response_length,
930		   GLboolean eot)
931{
932   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
933
934   insn->header.predicate_control = 0; /* XXX */
935   insn->header.compression_control = BRW_COMPRESSION_NONE;
936   insn->header.destreg__conditonalmod = msg_reg_nr;
937
938   brw_set_dest(insn, dest);
939   brw_set_src0(insn, src0);
940   brw_set_dp_write_message(insn,
941			    binding_table_index,
942			    BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
943			    BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
944			    msg_length,
945			    1,	/* pixel scoreboard */
946			    response_length,
947			    eot);
948}
949
950
951
952void brw_SAMPLE(struct brw_compile *p,
953		struct brw_reg dest,
954		GLuint msg_reg_nr,
955		struct brw_reg src0,
956		GLuint binding_table_index,
957		GLuint sampler,
958		GLuint writemask,
959		GLuint msg_type,
960		GLuint response_length,
961		GLuint msg_length,
962		GLboolean eot)
963{
964   GLboolean need_stall = 0;
965
966   if(writemask == 0) {
967/*       _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
968      return;
969   }
970
971   /* Hardware doesn't do destination dependency checking on send
972    * instructions properly.  Add a workaround which generates the
973    * dependency by other means.  In practice it seems like this bug
974    * only crops up for texture samples, and only where registers are
975    * written by the send and then written again later without being
976    * read in between.  Luckily for us, we already track that
977    * information and use it to modify the writemask for the
978    * instruction, so that is a guide for whether a workaround is
979    * needed.
980    */
981   if (writemask != WRITEMASK_XYZW) {
982      GLuint dst_offset = 0;
983      GLuint i, newmask = 0, len = 0;
984
985      for (i = 0; i < 4; i++) {
986	 if (writemask & (1<<i))
987	    break;
988	 dst_offset += 2;
989      }
990      for (; i < 4; i++) {
991	 if (!(writemask & (1<<i)))
992	    break;
993	 newmask |= 1<<i;
994	 len++;
995      }
996
997      if (newmask != writemask) {
998	 need_stall = 1;
999/* 	 _mesa_printf("need stall %x %x\n", newmask , writemask); */
1000      }
1001      else {
1002	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1003
1004	 newmask = ~newmask & WRITEMASK_XYZW;
1005
1006	 brw_push_insn_state(p);
1007
1008	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1009	 brw_set_mask_control(p, BRW_MASK_DISABLE);
1010
1011	 brw_MOV(p, m1, brw_vec8_grf(0,0));
1012  	 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1013
1014	 brw_pop_insn_state(p);
1015
1016  	 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1017	 dest = offset(dest, dst_offset);
1018	 response_length = len * 2;
1019      }
1020   }
1021
1022   {
1023      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1024
1025      insn->header.predicate_control = 0; /* XXX */
1026      insn->header.compression_control = BRW_COMPRESSION_NONE;
1027      insn->header.destreg__conditonalmod = msg_reg_nr;
1028
1029      brw_set_dest(insn, dest);
1030      brw_set_src0(insn, src0);
1031      brw_set_sampler_message(p->brw, insn,
1032			      binding_table_index,
1033			      sampler,
1034			      msg_type,
1035			      response_length,
1036			      msg_length,
1037			      eot);
1038   }
1039
1040   if (need_stall)
1041   {
1042      struct brw_reg reg = vec8(offset(dest, response_length-1));
1043
1044      /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
1045       */
1046      brw_push_insn_state(p);
1047      brw_set_compression_control(p, GL_FALSE);
1048      brw_MOV(p, reg, reg);
1049      brw_pop_insn_state(p);
1050   }
1051
1052}
1053
1054/* All these variables are pretty confusing - we might be better off
1055 * using bitmasks and macros for this, in the old style.  Or perhaps
1056 * just having the caller instantiate the fields in dword3 itself.
1057 */
1058void brw_urb_WRITE(struct brw_compile *p,
1059		   struct brw_reg dest,
1060		   GLuint msg_reg_nr,
1061		   struct brw_reg src0,
1062		   GLboolean allocate,
1063		   GLboolean used,
1064		   GLuint msg_length,
1065		   GLuint response_length,
1066		   GLboolean eot,
1067		   GLboolean writes_complete,
1068		   GLuint offset,
1069		   GLuint swizzle)
1070{
1071   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1072
1073   assert(msg_length < 16);
1074
1075   brw_set_dest(insn, dest);
1076   brw_set_src0(insn, src0);
1077   brw_set_src1(insn, brw_imm_d(0));
1078
1079   insn->header.destreg__conditonalmod = msg_reg_nr;
1080
1081   brw_set_urb_message(insn,
1082		       allocate,
1083		       used,
1084		       msg_length,
1085		       response_length,
1086		       eot,
1087		       writes_complete,
1088		       offset,
1089		       swizzle);
1090}
1091
1092