brw_eu_emit.c revision 1b6ae2e004b7a7a76508e0da3c45eb0d851ed10c
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keith@tungstengraphics.com>
30  */
31
32
33#include "brw_context.h"
34#include "brw_defines.h"
35#include "brw_eu.h"
36
37
38
39
40/***********************************************************************
41 * Internal helper for constructing instructions
42 */
43
44static void guess_execution_size( struct brw_instruction *insn,
45				  struct brw_reg reg )
46{
47   if (reg.width == BRW_WIDTH_8 &&
48       insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
49      insn->header.execution_size = BRW_EXECUTE_16;
50   else
51      insn->header.execution_size = reg.width;	/* note - definitions are compatible */
52}
53
54
55static void brw_set_dest( struct brw_instruction *insn,
56			  struct brw_reg dest )
57{
58   if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE)
59      assert(dest.nr < 128);
60
61   insn->bits1.da1.dest_reg_file = dest.file;
62   insn->bits1.da1.dest_reg_type = dest.type;
63   insn->bits1.da1.dest_address_mode = dest.address_mode;
64
65   if (dest.address_mode == BRW_ADDRESS_DIRECT) {
66      insn->bits1.da1.dest_reg_nr = dest.nr;
67
68      if (insn->header.access_mode == BRW_ALIGN_1) {
69	 insn->bits1.da1.dest_subreg_nr = dest.subnr;
70	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
71	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
72	 insn->bits1.da1.dest_horiz_stride = dest.hstride;
73      }
74      else {
75	 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
76	 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
77      }
78   }
79   else {
80      insn->bits1.ia1.dest_subreg_nr = dest.subnr;
81
82      /* These are different sizes in align1 vs align16:
83       */
84      if (insn->header.access_mode == BRW_ALIGN_1) {
85	 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
86	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
87	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
88	 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
89      }
90      else {
91	 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
92      }
93   }
94
95   /* NEW: Set the execution size based on dest.width and
96    * insn->compression_control:
97    */
98   guess_execution_size(insn, dest);
99}
100
101static void brw_set_src0( struct brw_instruction *insn,
102                          struct brw_reg reg )
103{
104   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
105
106   if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
107      assert(reg.nr < 128);
108
109   insn->bits1.da1.src0_reg_file = reg.file;
110   insn->bits1.da1.src0_reg_type = reg.type;
111   insn->bits2.da1.src0_abs = reg.abs;
112   insn->bits2.da1.src0_negate = reg.negate;
113   insn->bits2.da1.src0_address_mode = reg.address_mode;
114
115   if (reg.file == BRW_IMMEDIATE_VALUE) {
116      insn->bits3.ud = reg.dw1.ud;
117
118      /* Required to set some fields in src1 as well:
119       */
120      insn->bits1.da1.src1_reg_file = 0; /* arf */
121      insn->bits1.da1.src1_reg_type = reg.type;
122   }
123   else
124   {
125      if (reg.address_mode == BRW_ADDRESS_DIRECT) {
126	 if (insn->header.access_mode == BRW_ALIGN_1) {
127	    insn->bits2.da1.src0_subreg_nr = reg.subnr;
128	    insn->bits2.da1.src0_reg_nr = reg.nr;
129	 }
130	 else {
131	    insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
132	    insn->bits2.da16.src0_reg_nr = reg.nr;
133	 }
134      }
135      else {
136	 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
137
138	 if (insn->header.access_mode == BRW_ALIGN_1) {
139	    insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
140	 }
141	 else {
142	    insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
143	 }
144      }
145
146      if (insn->header.access_mode == BRW_ALIGN_1) {
147	 if (reg.width == BRW_WIDTH_1 &&
148	     insn->header.execution_size == BRW_EXECUTE_1) {
149	    insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
150	    insn->bits2.da1.src0_width = BRW_WIDTH_1;
151	    insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
152	 }
153	 else {
154	    insn->bits2.da1.src0_horiz_stride = reg.hstride;
155	    insn->bits2.da1.src0_width = reg.width;
156	    insn->bits2.da1.src0_vert_stride = reg.vstride;
157	 }
158      }
159      else {
160	 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
161	 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
162	 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
163	 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
164
165	 /* This is an oddity of the fact we're using the same
166	  * descriptions for registers in align_16 as align_1:
167	  */
168	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
169	    insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
170	 else
171	    insn->bits2.da16.src0_vert_stride = reg.vstride;
172      }
173   }
174}
175
176
177void brw_set_src1( struct brw_instruction *insn,
178                   struct brw_reg reg )
179{
180   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
181
182   assert(reg.nr < 128);
183
184   insn->bits1.da1.src1_reg_file = reg.file;
185   insn->bits1.da1.src1_reg_type = reg.type;
186   insn->bits3.da1.src1_abs = reg.abs;
187   insn->bits3.da1.src1_negate = reg.negate;
188
189   /* Only src1 can be immediate in two-argument instructions.
190    */
191   assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
192
193   if (reg.file == BRW_IMMEDIATE_VALUE) {
194      insn->bits3.ud = reg.dw1.ud;
195   }
196   else {
197      /* This is a hardware restriction, which may or may not be lifted
198       * in the future:
199       */
200      assert (reg.address_mode == BRW_ADDRESS_DIRECT);
201      //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
202
203      if (insn->header.access_mode == BRW_ALIGN_1) {
204	 insn->bits3.da1.src1_subreg_nr = reg.subnr;
205	 insn->bits3.da1.src1_reg_nr = reg.nr;
206      }
207      else {
208	 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
209	 insn->bits3.da16.src1_reg_nr = reg.nr;
210      }
211
212      if (insn->header.access_mode == BRW_ALIGN_1) {
213	 if (reg.width == BRW_WIDTH_1 &&
214	     insn->header.execution_size == BRW_EXECUTE_1) {
215	    insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
216	    insn->bits3.da1.src1_width = BRW_WIDTH_1;
217	    insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
218	 }
219	 else {
220	    insn->bits3.da1.src1_horiz_stride = reg.hstride;
221	    insn->bits3.da1.src1_width = reg.width;
222	    insn->bits3.da1.src1_vert_stride = reg.vstride;
223	 }
224      }
225      else {
226	 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
227	 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
228	 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
229	 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
230
231	 /* This is an oddity of the fact we're using the same
232	  * descriptions for registers in align_16 as align_1:
233	  */
234	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
235	    insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
236	 else
237	    insn->bits3.da16.src1_vert_stride = reg.vstride;
238      }
239   }
240}
241
242
243
244static void brw_set_math_message( struct brw_instruction *insn,
245				  GLuint msg_length,
246				  GLuint response_length,
247				  GLuint function,
248				  GLuint integer_type,
249				  GLboolean low_precision,
250				  GLboolean saturate,
251				  GLuint dataType )
252{
253   brw_set_src1(insn, brw_imm_d(0));
254
255   insn->bits3.math.function = function;
256   insn->bits3.math.int_type = integer_type;
257   insn->bits3.math.precision = low_precision;
258   insn->bits3.math.saturate = saturate;
259   insn->bits3.math.data_type = dataType;
260   insn->bits3.math.response_length = response_length;
261   insn->bits3.math.msg_length = msg_length;
262   insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
263   insn->bits3.math.end_of_thread = 0;
264}
265
266static void brw_set_urb_message( struct brw_instruction *insn,
267				 GLboolean allocate,
268				 GLboolean used,
269				 GLuint msg_length,
270				 GLuint response_length,
271				 GLboolean end_of_thread,
272				 GLboolean complete,
273				 GLuint offset,
274				 GLuint swizzle_control )
275{
276   brw_set_src1(insn, brw_imm_d(0));
277
278   insn->bits3.urb.opcode = 0;	/* ? */
279   insn->bits3.urb.offset = offset;
280   insn->bits3.urb.swizzle_control = swizzle_control;
281   insn->bits3.urb.allocate = allocate;
282   insn->bits3.urb.used = used;	/* ? */
283   insn->bits3.urb.complete = complete;
284   insn->bits3.urb.response_length = response_length;
285   insn->bits3.urb.msg_length = msg_length;
286   insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
287   insn->bits3.urb.end_of_thread = end_of_thread;
288}
289
290static void brw_set_dp_write_message( struct brw_instruction *insn,
291				      GLuint binding_table_index,
292				      GLuint msg_control,
293				      GLuint msg_type,
294				      GLuint msg_length,
295				      GLuint pixel_scoreboard_clear,
296				      GLuint response_length,
297				      GLuint end_of_thread )
298{
299   brw_set_src1(insn, brw_imm_d(0));
300
301   insn->bits3.dp_write.binding_table_index = binding_table_index;
302   insn->bits3.dp_write.msg_control = msg_control;
303   insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
304   insn->bits3.dp_write.msg_type = msg_type;
305   insn->bits3.dp_write.send_commit_msg = 0;
306   insn->bits3.dp_write.response_length = response_length;
307   insn->bits3.dp_write.msg_length = msg_length;
308   insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
309   insn->bits3.urb.end_of_thread = end_of_thread;
310}
311
312static void brw_set_dp_read_message( struct brw_instruction *insn,
313				      GLuint binding_table_index,
314				      GLuint msg_control,
315				      GLuint msg_type,
316				      GLuint target_cache,
317				      GLuint msg_length,
318				      GLuint response_length,
319				      GLuint end_of_thread )
320{
321   brw_set_src1(insn, brw_imm_d(0));
322
323   insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
324   insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
325   insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
326   insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
327   insn->bits3.dp_read.response_length = response_length;  /*16:19*/
328   insn->bits3.dp_read.msg_length = msg_length;  /*20:23*/
329   insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
330   insn->bits3.dp_read.pad1 = 0;  /*28:30*/
331   insn->bits3.dp_read.end_of_thread = end_of_thread;  /*31*/
332}
333
334static void brw_set_sampler_message(struct brw_context *brw,
335                                    struct brw_instruction *insn,
336                                    GLuint binding_table_index,
337                                    GLuint sampler,
338                                    GLuint msg_type,
339                                    GLuint response_length,
340                                    GLuint msg_length,
341                                    GLboolean eot)
342{
343   brw_set_src1(insn, brw_imm_d(0));
344
345   if (BRW_IS_G4X(brw)) {
346      insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
347      insn->bits3.sampler_g4x.sampler = sampler;
348      insn->bits3.sampler_g4x.msg_type = msg_type;
349      insn->bits3.sampler_g4x.response_length = response_length;
350      insn->bits3.sampler_g4x.msg_length = msg_length;
351      insn->bits3.sampler_g4x.end_of_thread = eot;
352      insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
353   } else {
354      insn->bits3.sampler.binding_table_index = binding_table_index;
355      insn->bits3.sampler.sampler = sampler;
356      insn->bits3.sampler.msg_type = msg_type;
357      insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
358      insn->bits3.sampler.response_length = response_length;
359      insn->bits3.sampler.msg_length = msg_length;
360      insn->bits3.sampler.end_of_thread = eot;
361      insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
362   }
363}
364
365
366
367static struct brw_instruction *next_insn( struct brw_compile *p,
368					  GLuint opcode )
369{
370   struct brw_instruction *insn;
371
372   assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
373
374   insn = &p->store[p->nr_insn++];
375   memcpy(insn, p->current, sizeof(*insn));
376
377   /* Reset this one-shot flag:
378    */
379
380   if (p->current->header.destreg__conditonalmod) {
381      p->current->header.destreg__conditonalmod = 0;
382      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
383   }
384
385   insn->header.opcode = opcode;
386   return insn;
387}
388
389
390static struct brw_instruction *brw_alu1( struct brw_compile *p,
391					 GLuint opcode,
392					 struct brw_reg dest,
393					 struct brw_reg src )
394{
395   struct brw_instruction *insn = next_insn(p, opcode);
396   brw_set_dest(insn, dest);
397   brw_set_src0(insn, src);
398   return insn;
399}
400
401static struct brw_instruction *brw_alu2(struct brw_compile *p,
402					GLuint opcode,
403					struct brw_reg dest,
404					struct brw_reg src0,
405					struct brw_reg src1 )
406{
407   struct brw_instruction *insn = next_insn(p, opcode);
408   brw_set_dest(insn, dest);
409   brw_set_src0(insn, src0);
410   brw_set_src1(insn, src1);
411   return insn;
412}
413
414
415/***********************************************************************
416 * Convenience routines.
417 */
418#define ALU1(OP)					\
419struct brw_instruction *brw_##OP(struct brw_compile *p,	\
420	      struct brw_reg dest,			\
421	      struct brw_reg src0)   			\
422{							\
423   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
424}
425
426#define ALU2(OP)					\
427struct brw_instruction *brw_##OP(struct brw_compile *p,	\
428	      struct brw_reg dest,			\
429	      struct brw_reg src0,			\
430	      struct brw_reg src1)   			\
431{							\
432   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
433}
434
435
436ALU1(MOV)
437ALU2(SEL)
438ALU1(NOT)
439ALU2(AND)
440ALU2(OR)
441ALU2(XOR)
442ALU2(SHR)
443ALU2(SHL)
444ALU2(RSR)
445ALU2(RSL)
446ALU2(ASR)
447ALU2(ADD)
448ALU2(MUL)
449ALU1(FRC)
450ALU1(RNDD)
451ALU1(RNDZ)
452ALU2(MAC)
453ALU2(MACH)
454ALU1(LZD)
455ALU2(DP4)
456ALU2(DPH)
457ALU2(DP3)
458ALU2(DP2)
459ALU2(LINE)
460
461
462
463
464void brw_NOP(struct brw_compile *p)
465{
466   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
467   brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
468   brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
469   brw_set_src1(insn, brw_imm_ud(0x0));
470}
471
472
473
474
475
476/***********************************************************************
477 * Comparisons, if/else/endif
478 */
479
480struct brw_instruction *brw_JMPI(struct brw_compile *p,
481                                 struct brw_reg dest,
482                                 struct brw_reg src0,
483                                 struct brw_reg src1)
484{
485   struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
486
487   p->current->header.predicate_control = BRW_PREDICATE_NONE;
488
489   return insn;
490}
491
492/* EU takes the value from the flag register and pushes it onto some
493 * sort of a stack (presumably merging with any flag value already on
494 * the stack).  Within an if block, the flags at the top of the stack
495 * control execution on each channel of the unit, eg. on each of the
496 * 16 pixel values in our wm programs.
497 *
498 * When the matching 'else' instruction is reached (presumably by
499 * countdown of the instruction count patched in by our ELSE/ENDIF
500 * functions), the relevent flags are inverted.
501 *
502 * When the matching 'endif' instruction is reached, the flags are
503 * popped off.  If the stack is now empty, normal execution resumes.
504 *
505 * No attempt is made to deal with stack overflow (14 elements?).
506 */
507struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
508{
509   struct brw_instruction *insn;
510
511   if (p->single_program_flow) {
512      assert(execute_size == BRW_EXECUTE_1);
513
514      insn = next_insn(p, BRW_OPCODE_ADD);
515      insn->header.predicate_inverse = 1;
516   } else {
517      insn = next_insn(p, BRW_OPCODE_IF);
518   }
519
520   /* Override the defaults for this instruction:
521    */
522   brw_set_dest(insn, brw_ip_reg());
523   brw_set_src0(insn, brw_ip_reg());
524   brw_set_src1(insn, brw_imm_d(0x0));
525
526   insn->header.execution_size = execute_size;
527   insn->header.compression_control = BRW_COMPRESSION_NONE;
528   insn->header.predicate_control = BRW_PREDICATE_NORMAL;
529   insn->header.mask_control = BRW_MASK_ENABLE;
530   if (!p->single_program_flow)
531       insn->header.thread_control = BRW_THREAD_SWITCH;
532
533   p->current->header.predicate_control = BRW_PREDICATE_NONE;
534
535   return insn;
536}
537
538
539struct brw_instruction *brw_ELSE(struct brw_compile *p,
540				 struct brw_instruction *if_insn)
541{
542   struct brw_instruction *insn;
543
544   if (p->single_program_flow) {
545      insn = next_insn(p, BRW_OPCODE_ADD);
546   } else {
547      insn = next_insn(p, BRW_OPCODE_ELSE);
548   }
549
550   brw_set_dest(insn, brw_ip_reg());
551   brw_set_src0(insn, brw_ip_reg());
552   brw_set_src1(insn, brw_imm_d(0x0));
553
554   insn->header.compression_control = BRW_COMPRESSION_NONE;
555   insn->header.execution_size = if_insn->header.execution_size;
556   insn->header.mask_control = BRW_MASK_ENABLE;
557   if (!p->single_program_flow)
558       insn->header.thread_control = BRW_THREAD_SWITCH;
559
560   /* Patch the if instruction to point at this instruction.
561    */
562   if (p->single_program_flow) {
563      assert(if_insn->header.opcode == BRW_OPCODE_ADD);
564
565      if_insn->bits3.ud = (insn - if_insn + 1) * 16;
566   } else {
567      assert(if_insn->header.opcode == BRW_OPCODE_IF);
568
569      if_insn->bits3.if_else.jump_count = insn - if_insn;
570      if_insn->bits3.if_else.pop_count = 1;
571      if_insn->bits3.if_else.pad0 = 0;
572   }
573
574   return insn;
575}
576
577void brw_ENDIF(struct brw_compile *p,
578	       struct brw_instruction *patch_insn)
579{
580   if (p->single_program_flow) {
581      /* In single program flow mode, there's no need to execute an ENDIF,
582       * since we don't need to do any stack operations, and if we're executing
583       * currently, we want to just continue executing.
584       */
585      struct brw_instruction *next = &p->store[p->nr_insn];
586
587      assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
588
589      patch_insn->bits3.ud = (next - patch_insn) * 16;
590   } else {
591      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
592
593      brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
594      brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
595      brw_set_src1(insn, brw_imm_d(0x0));
596
597      insn->header.compression_control = BRW_COMPRESSION_NONE;
598      insn->header.execution_size = patch_insn->header.execution_size;
599      insn->header.mask_control = BRW_MASK_ENABLE;
600      insn->header.thread_control = BRW_THREAD_SWITCH;
601
602      assert(patch_insn->bits3.if_else.jump_count == 0);
603
604      /* Patch the if or else instructions to point at this or the next
605       * instruction respectively.
606       */
607      if (patch_insn->header.opcode == BRW_OPCODE_IF) {
608	 /* Automagically turn it into an IFF:
609	  */
610	 patch_insn->header.opcode = BRW_OPCODE_IFF;
611	 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
612	 patch_insn->bits3.if_else.pop_count = 0;
613	 patch_insn->bits3.if_else.pad0 = 0;
614      } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
615	 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
616	 patch_insn->bits3.if_else.pop_count = 1;
617	 patch_insn->bits3.if_else.pad0 = 0;
618      } else {
619	 assert(0);
620      }
621
622      /* Also pop item off the stack in the endif instruction:
623       */
624      insn->bits3.if_else.jump_count = 0;
625      insn->bits3.if_else.pop_count = 1;
626      insn->bits3.if_else.pad0 = 0;
627   }
628}
629
630struct brw_instruction *brw_BREAK(struct brw_compile *p)
631{
632   struct brw_instruction *insn;
633   insn = next_insn(p, BRW_OPCODE_BREAK);
634   brw_set_dest(insn, brw_ip_reg());
635   brw_set_src0(insn, brw_ip_reg());
636   brw_set_src1(insn, brw_imm_d(0x0));
637   insn->header.compression_control = BRW_COMPRESSION_NONE;
638   insn->header.execution_size = BRW_EXECUTE_8;
639   /* insn->header.mask_control = BRW_MASK_DISABLE; */
640   insn->bits3.if_else.pad0 = 0;
641   return insn;
642}
643
644struct brw_instruction *brw_CONT(struct brw_compile *p)
645{
646   struct brw_instruction *insn;
647   insn = next_insn(p, BRW_OPCODE_CONTINUE);
648   brw_set_dest(insn, brw_ip_reg());
649   brw_set_src0(insn, brw_ip_reg());
650   brw_set_src1(insn, brw_imm_d(0x0));
651   insn->header.compression_control = BRW_COMPRESSION_NONE;
652   insn->header.execution_size = BRW_EXECUTE_8;
653   /* insn->header.mask_control = BRW_MASK_DISABLE; */
654   insn->bits3.if_else.pad0 = 0;
655   return insn;
656}
657
658/* DO/WHILE loop:
659 */
660struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
661{
662   if (p->single_program_flow) {
663      return &p->store[p->nr_insn];
664   } else {
665      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
666
667      /* Override the defaults for this instruction:
668       */
669      brw_set_dest(insn, brw_null_reg());
670      brw_set_src0(insn, brw_null_reg());
671      brw_set_src1(insn, brw_null_reg());
672
673      insn->header.compression_control = BRW_COMPRESSION_NONE;
674      insn->header.execution_size = execute_size;
675      insn->header.predicate_control = BRW_PREDICATE_NONE;
676      /* insn->header.mask_control = BRW_MASK_ENABLE; */
677      /* insn->header.mask_control = BRW_MASK_DISABLE; */
678
679      return insn;
680   }
681}
682
683
684
685struct brw_instruction *brw_WHILE(struct brw_compile *p,
686                                  struct brw_instruction *do_insn)
687{
688   struct brw_instruction *insn;
689
690   if (p->single_program_flow)
691      insn = next_insn(p, BRW_OPCODE_ADD);
692   else
693      insn = next_insn(p, BRW_OPCODE_WHILE);
694
695   brw_set_dest(insn, brw_ip_reg());
696   brw_set_src0(insn, brw_ip_reg());
697   brw_set_src1(insn, brw_imm_d(0x0));
698
699   insn->header.compression_control = BRW_COMPRESSION_NONE;
700
701   if (p->single_program_flow) {
702      insn->header.execution_size = BRW_EXECUTE_1;
703
704      insn->bits3.d = (do_insn - insn) * 16;
705   } else {
706      insn->header.execution_size = do_insn->header.execution_size;
707
708      assert(do_insn->header.opcode == BRW_OPCODE_DO);
709      insn->bits3.if_else.jump_count = do_insn - insn + 1;
710      insn->bits3.if_else.pop_count = 0;
711      insn->bits3.if_else.pad0 = 0;
712   }
713
714/*    insn->header.mask_control = BRW_MASK_ENABLE; */
715
716   /* insn->header.mask_control = BRW_MASK_DISABLE; */
717   p->current->header.predicate_control = BRW_PREDICATE_NONE;
718   return insn;
719}
720
721
722/* FORWARD JUMPS:
723 */
724void brw_land_fwd_jump(struct brw_compile *p,
725		       struct brw_instruction *jmp_insn)
726{
727   struct brw_instruction *landing = &p->store[p->nr_insn];
728
729   assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
730   assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
731
732   jmp_insn->bits3.ud = (landing - jmp_insn) - 1;
733}
734
735
736
737/* To integrate with the above, it makes sense that the comparison
738 * instruction should populate the flag register.  It might be simpler
739 * just to use the flag reg for most WM tasks?
740 */
741void brw_CMP(struct brw_compile *p,
742	     struct brw_reg dest,
743	     GLuint conditional,
744	     struct brw_reg src0,
745	     struct brw_reg src1)
746{
747   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
748
749   insn->header.destreg__conditonalmod = conditional;
750   brw_set_dest(insn, dest);
751   brw_set_src0(insn, src0);
752   brw_set_src1(insn, src1);
753
754/*    guess_execution_size(insn, src0); */
755
756
757   /* Make it so that future instructions will use the computed flag
758    * value until brw_set_predicate_control_flag_value() is called
759    * again.
760    */
761   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
762       dest.nr == 0) {
763      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
764      p->flag_value = 0xff;
765   }
766}
767
768
769
770/***********************************************************************
771 * Helpers for the various SEND message types:
772 */
773
774/** Extended math function, float[8].
775 */
776void brw_math( struct brw_compile *p,
777	       struct brw_reg dest,
778	       GLuint function,
779	       GLuint saturate,
780	       GLuint msg_reg_nr,
781	       struct brw_reg src,
782	       GLuint data_type,
783	       GLuint precision )
784{
785   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
786   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
787   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
788
789   /* Example code doesn't set predicate_control for send
790    * instructions.
791    */
792   insn->header.predicate_control = 0;
793   insn->header.destreg__conditonalmod = msg_reg_nr;
794
795   brw_set_dest(insn, dest);
796   brw_set_src0(insn, src);
797   brw_set_math_message(insn,
798			msg_length, response_length,
799			function,
800			BRW_MATH_INTEGER_UNSIGNED,
801			precision,
802			saturate,
803			data_type);
804}
805
806/**
807 * Extended math function, float[16].
808 * Use 2 send instructions.
809 */
810void brw_math_16( struct brw_compile *p,
811		  struct brw_reg dest,
812		  GLuint function,
813		  GLuint saturate,
814		  GLuint msg_reg_nr,
815		  struct brw_reg src,
816		  GLuint precision )
817{
818   struct brw_instruction *insn;
819   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
820   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
821
822   /* First instruction:
823    */
824   brw_push_insn_state(p);
825   brw_set_predicate_control_flag_value(p, 0xff);
826   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
827
828   insn = next_insn(p, BRW_OPCODE_SEND);
829   insn->header.destreg__conditonalmod = msg_reg_nr;
830
831   brw_set_dest(insn, dest);
832   brw_set_src0(insn, src);
833   brw_set_math_message(insn,
834			msg_length, response_length,
835			function,
836			BRW_MATH_INTEGER_UNSIGNED,
837			precision,
838			saturate,
839			BRW_MATH_DATA_VECTOR);
840
841   /* Second instruction:
842    */
843   insn = next_insn(p, BRW_OPCODE_SEND);
844   insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
845   insn->header.destreg__conditonalmod = msg_reg_nr+1;
846
847   brw_set_dest(insn, offset(dest,1));
848   brw_set_src0(insn, src);
849   brw_set_math_message(insn,
850			msg_length, response_length,
851			function,
852			BRW_MATH_INTEGER_UNSIGNED,
853			precision,
854			saturate,
855			BRW_MATH_DATA_VECTOR);
856
857   brw_pop_insn_state(p);
858}
859
860
861/**
862 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
863 * Scratch offset should be a multiple of 64.
864 * Used for register spilling.
865 */
866void brw_dp_WRITE_16( struct brw_compile *p,
867		      struct brw_reg src,
868		      GLuint scratch_offset )
869{
870   GLuint msg_reg_nr = 1;
871   {
872      brw_push_insn_state(p);
873      brw_set_mask_control(p, BRW_MASK_DISABLE);
874      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
875
876      /* set message header global offset field (reg 0, element 2) */
877      brw_MOV(p,
878	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
879	      brw_imm_d(scratch_offset));
880
881      brw_pop_insn_state(p);
882   }
883
884   {
885      GLuint msg_length = 3;
886      struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
887      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
888
889      insn->header.predicate_control = 0; /* XXX */
890      insn->header.compression_control = BRW_COMPRESSION_NONE;
891      insn->header.destreg__conditonalmod = msg_reg_nr;
892
893      brw_set_dest(insn, dest);
894      brw_set_src0(insn, src);
895
896      brw_set_dp_write_message(insn,
897			       255, /* binding table index (255=stateless) */
898			       BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
899			       BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
900			       msg_length,
901			       0, /* pixel scoreboard */
902			       0, /* response_length */
903			       0); /* eot */
904   }
905}
906
907
908/**
909 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
910 * Scratch offset should be a multiple of 64.
911 * Used for register spilling.
912 */
913void brw_dp_READ_16( struct brw_compile *p,
914		      struct brw_reg dest,
915		      GLuint scratch_offset )
916{
917   GLuint msg_reg_nr = 1;
918   {
919      brw_push_insn_state(p);
920      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
921      brw_set_mask_control(p, BRW_MASK_DISABLE);
922
923      /* set message header global offset field (reg 0, element 2) */
924      brw_MOV(p,
925	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
926	      brw_imm_d(scratch_offset));
927
928      brw_pop_insn_state(p);
929   }
930
931   {
932      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
933
934      insn->header.predicate_control = 0; /* XXX */
935      insn->header.compression_control = BRW_COMPRESSION_NONE;
936      insn->header.destreg__conditonalmod = msg_reg_nr;
937
938      brw_set_dest(insn, dest);	/* UW? */
939      brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
940
941      brw_set_dp_read_message(insn,
942			      255, /* binding table index (255=stateless) */
943			      3,  /* msg_control (3 means 4 Owords) */
944			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
945			      1, /* target cache (render/scratch) */
946			      1, /* msg_length */
947			      2, /* response_length */
948			      0); /* eot */
949   }
950}
951
952
953/**
954 * Read a float[4] vector from the data port Data Cache (const buffer).
955 * Location (in buffer) should be a multiple of 16.
956 * Used for fetching shader constants.
957 * If relAddr is true, we'll do an indirect fetch using the address register.
958 */
959void brw_dp_READ_4( struct brw_compile *p,
960                    struct brw_reg dest,
961                    GLboolean relAddr,
962                    GLuint location,
963                    GLuint bind_table_index )
964{
965   /* XXX: relAddr not implemented */
966   GLuint msg_reg_nr = 1;
967   {
968      struct brw_reg b;
969      brw_push_insn_state(p);
970      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
971      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
972      brw_set_mask_control(p, BRW_MASK_DISABLE);
973
974   /* Setup MRF[1] with location/offset into const buffer */
975      b = brw_message_reg(msg_reg_nr);
976      b = retype(b, BRW_REGISTER_TYPE_UD);
977      /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
978       * when the docs say only dword[2] should be set.  Hmmm.  But it works.
979       */
980      brw_MOV(p, b, brw_imm_ud(location));
981      brw_pop_insn_state(p);
982   }
983
984   {
985      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
986
987      insn->header.predicate_control = BRW_PREDICATE_NONE;
988      insn->header.compression_control = BRW_COMPRESSION_NONE;
989      insn->header.destreg__conditonalmod = msg_reg_nr;
990      insn->header.mask_control = BRW_MASK_DISABLE;
991
992      /* cast dest to a uword[8] vector */
993      dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
994
995      brw_set_dest(insn, dest);
996      brw_set_src0(insn, brw_null_reg());
997
998      brw_set_dp_read_message(insn,
999			      bind_table_index,
1000			      0,  /* msg_control (0 means 1 Oword) */
1001			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1002			      0, /* source cache = data cache */
1003			      1, /* msg_length */
1004			      1, /* response_length (1 Oword) */
1005			      0); /* eot */
1006   }
1007}
1008
1009
1010/**
1011 * Read float[4] constant(s) from VS constant buffer.
1012 * For relative addressing, two float[4] constants will be read into 'dest'.
1013 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1014 */
1015void brw_dp_READ_4_vs(struct brw_compile *p,
1016                      struct brw_reg dest,
1017                      GLuint oword,
1018                      GLboolean relAddr,
1019                      struct brw_reg addrReg,
1020                      GLuint location,
1021                      GLuint bind_table_index)
1022{
1023   GLuint msg_reg_nr = 1;
1024
1025   assert(oword < 2);
1026   /*
1027   printf("vs const read msg, location %u, msg_reg_nr %d\n",
1028          location, msg_reg_nr);
1029   */
1030
1031   /* Setup MRF[1] with location/offset into const buffer */
1032   {
1033      struct brw_reg b;
1034
1035      brw_push_insn_state(p);
1036      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1037      brw_set_mask_control(p, BRW_MASK_DISABLE);
1038      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1039      /*brw_set_access_mode(p, BRW_ALIGN_16);*/
1040
1041      /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
1042       * when the docs say only dword[2] should be set.  Hmmm.  But it works.
1043       */
1044      b = brw_message_reg(msg_reg_nr);
1045      b = retype(b, BRW_REGISTER_TYPE_UD);
1046      /*b = get_element_ud(b, 2);*/
1047      if (relAddr) {
1048         brw_ADD(p, b, addrReg, brw_imm_ud(location));
1049      }
1050      else {
1051         brw_MOV(p, b, brw_imm_ud(location));
1052      }
1053
1054      brw_pop_insn_state(p);
1055   }
1056
1057   {
1058      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1059
1060      insn->header.predicate_control = BRW_PREDICATE_NONE;
1061      insn->header.compression_control = BRW_COMPRESSION_NONE;
1062      insn->header.destreg__conditonalmod = msg_reg_nr;
1063      insn->header.mask_control = BRW_MASK_DISABLE;
1064      /*insn->header.access_mode = BRW_ALIGN_16;*/
1065
1066      brw_set_dest(insn, dest);
1067      brw_set_src0(insn, brw_null_reg());
1068
1069      brw_set_dp_read_message(insn,
1070			      bind_table_index,
1071			      oword,  /* 0 = lower Oword, 1 = upper Oword */
1072			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1073			      0, /* source cache = data cache */
1074			      1, /* msg_length */
1075			      1, /* response_length (1 Oword) */
1076			      0); /* eot */
1077   }
1078}
1079
1080
1081
1082void brw_fb_WRITE(struct brw_compile *p,
1083                  struct brw_reg dest,
1084                  GLuint msg_reg_nr,
1085                  struct brw_reg src0,
1086                  GLuint binding_table_index,
1087                  GLuint msg_length,
1088                  GLuint response_length,
1089                  GLboolean eot)
1090{
1091   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1092
1093   insn->header.predicate_control = 0; /* XXX */
1094   insn->header.compression_control = BRW_COMPRESSION_NONE;
1095   insn->header.destreg__conditonalmod = msg_reg_nr;
1096
1097   brw_set_dest(insn, dest);
1098   brw_set_src0(insn, src0);
1099   brw_set_dp_write_message(insn,
1100			    binding_table_index,
1101			    BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
1102			    BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
1103			    msg_length,
1104			    1,	/* pixel scoreboard */
1105			    response_length,
1106			    eot);
1107}
1108
1109
1110/**
1111 * Texture sample instruction.
1112 * Note: the msg_type plus msg_length values determine exactly what kind
1113 * of sampling operation is performed.  See volume 4, page 161 of docs.
1114 */
1115void brw_SAMPLE(struct brw_compile *p,
1116		struct brw_reg dest,
1117		GLuint msg_reg_nr,
1118		struct brw_reg src0,
1119		GLuint binding_table_index,
1120		GLuint sampler,
1121		GLuint writemask,
1122		GLuint msg_type,
1123		GLuint response_length,
1124		GLuint msg_length,
1125		GLboolean eot)
1126{
1127   GLboolean need_stall = 0;
1128
1129   if (writemask == 0) {
1130      /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
1131      return;
1132   }
1133
1134   /* Hardware doesn't do destination dependency checking on send
1135    * instructions properly.  Add a workaround which generates the
1136    * dependency by other means.  In practice it seems like this bug
1137    * only crops up for texture samples, and only where registers are
1138    * written by the send and then written again later without being
1139    * read in between.  Luckily for us, we already track that
1140    * information and use it to modify the writemask for the
1141    * instruction, so that is a guide for whether a workaround is
1142    * needed.
1143    */
1144   if (writemask != WRITEMASK_XYZW) {
1145      GLuint dst_offset = 0;
1146      GLuint i, newmask = 0, len = 0;
1147
1148      for (i = 0; i < 4; i++) {
1149	 if (writemask & (1<<i))
1150	    break;
1151	 dst_offset += 2;
1152      }
1153      for (; i < 4; i++) {
1154	 if (!(writemask & (1<<i)))
1155	    break;
1156	 newmask |= 1<<i;
1157	 len++;
1158      }
1159
1160      if (newmask != writemask) {
1161	 need_stall = 1;
1162         /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
1163      }
1164      else {
1165	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1166
1167	 newmask = ~newmask & WRITEMASK_XYZW;
1168
1169	 brw_push_insn_state(p);
1170
1171	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1172	 brw_set_mask_control(p, BRW_MASK_DISABLE);
1173
1174	 brw_MOV(p, m1, brw_vec8_grf(0,0));
1175  	 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
1176
1177	 brw_pop_insn_state(p);
1178
1179  	 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
1180	 dest = offset(dest, dst_offset);
1181	 response_length = len * 2;
1182      }
1183   }
1184
1185   {
1186      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1187
1188      insn->header.predicate_control = 0; /* XXX */
1189      insn->header.compression_control = BRW_COMPRESSION_NONE;
1190      insn->header.destreg__conditonalmod = msg_reg_nr;
1191
1192      brw_set_dest(insn, dest);
1193      brw_set_src0(insn, src0);
1194      brw_set_sampler_message(p->brw, insn,
1195			      binding_table_index,
1196			      sampler,
1197			      msg_type,
1198			      response_length,
1199			      msg_length,
1200			      eot);
1201   }
1202
1203   if (need_stall) {
1204      struct brw_reg reg = vec8(offset(dest, response_length-1));
1205
1206      /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
1207       */
1208      brw_push_insn_state(p);
1209      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1210      brw_MOV(p, reg, reg);
1211      brw_pop_insn_state(p);
1212   }
1213
1214}
1215
1216/* All these variables are pretty confusing - we might be better off
1217 * using bitmasks and macros for this, in the old style.  Or perhaps
1218 * just having the caller instantiate the fields in dword3 itself.
1219 */
1220void brw_urb_WRITE(struct brw_compile *p,
1221		   struct brw_reg dest,
1222		   GLuint msg_reg_nr,
1223		   struct brw_reg src0,
1224		   GLboolean allocate,
1225		   GLboolean used,
1226		   GLuint msg_length,
1227		   GLuint response_length,
1228		   GLboolean eot,
1229		   GLboolean writes_complete,
1230		   GLuint offset,
1231		   GLuint swizzle)
1232{
1233   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
1234
1235   assert(msg_length < BRW_MAX_MRF);
1236
1237   brw_set_dest(insn, dest);
1238   brw_set_src0(insn, src0);
1239   brw_set_src1(insn, brw_imm_d(0));
1240
1241   insn->header.destreg__conditonalmod = msg_reg_nr;
1242
1243   brw_set_urb_message(insn,
1244		       allocate,
1245		       used,
1246		       msg_length,
1247		       response_length,
1248		       eot,
1249		       writes_complete,
1250		       offset,
1251		       swizzle);
1252}
1253
1254