brw_vec4.h revision bb020d09c382285210a5aebe412ddabfad19e4a0
1/*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef BRW_VEC4_H
25#define BRW_VEC4_H
26
27#include <stdint.h>
28#include "brw_shader.h"
29#include "main/compiler.h"
30#include "program/hash_table.h"
31
32extern "C" {
33#include "brw_vs.h"
34#include "brw_context.h"
35#include "brw_eu.h"
36};
37
38#include "glsl/ir.h"
39
40namespace brw {
41
42class dst_reg;
43
44unsigned
45swizzle_for_size(int size);
46
47enum register_file {
48   ARF = BRW_ARCHITECTURE_REGISTER_FILE,
49   GRF = BRW_GENERAL_REGISTER_FILE,
50   MRF = BRW_MESSAGE_REGISTER_FILE,
51   IMM = BRW_IMMEDIATE_VALUE,
52   HW_REG, /* a struct brw_reg */
53   ATTR,
54   UNIFORM, /* prog_data->params[hw_reg] */
55   BAD_FILE
56};
57
58class reg
59{
60public:
61   /** Register file: ARF, GRF, MRF, IMM. */
62   enum register_file file;
63   /** virtual register number.  0 = fixed hw reg */
64   int reg;
65   /** Offset within the virtual register. */
66   int reg_offset;
67   /** Register type.  BRW_REGISTER_TYPE_* */
68   int type;
69   struct brw_reg fixed_hw_reg;
70
71   /** Value for file == BRW_IMMMEDIATE_FILE */
72   union {
73      int32_t i;
74      uint32_t u;
75      float f;
76   } imm;
77};
78
79class src_reg : public reg
80{
81public:
82   /* Callers of this ralloc-based new need not call delete. It's
83    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
84   static void* operator new(size_t size, void *ctx)
85   {
86      void *node;
87
88      node = ralloc_size(ctx, size);
89      assert(node != NULL);
90
91      return node;
92   }
93
94   void init();
95
96   src_reg(register_file file, int reg, const glsl_type *type);
97   src_reg();
98   src_reg(float f);
99   src_reg(uint32_t u);
100   src_reg(int32_t i);
101
102   bool equals(src_reg *r);
103   bool is_zero() const;
104   bool is_one() const;
105
106   src_reg(class vec4_visitor *v, const struct glsl_type *type);
107
108   explicit src_reg(dst_reg reg);
109
110   GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */
111   bool negate;
112   bool abs;
113
114   src_reg *reladdr;
115};
116
117class dst_reg : public reg
118{
119public:
120   /* Callers of this ralloc-based new need not call delete. It's
121    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
122   static void* operator new(size_t size, void *ctx)
123   {
124      void *node;
125
126      node = ralloc_size(ctx, size);
127      assert(node != NULL);
128
129      return node;
130   }
131
132   void init();
133
134   dst_reg();
135   dst_reg(register_file file, int reg);
136   dst_reg(register_file file, int reg, const glsl_type *type, int writemask);
137   dst_reg(struct brw_reg reg);
138   dst_reg(class vec4_visitor *v, const struct glsl_type *type);
139
140   explicit dst_reg(src_reg reg);
141
142   int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
143
144   src_reg *reladdr;
145};
146
147class vec4_instruction : public exec_node {
148public:
149   /* Callers of this ralloc-based new need not call delete. It's
150    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
151   static void* operator new(size_t size, void *ctx)
152   {
153      void *node;
154
155      node = rzalloc_size(ctx, size);
156      assert(node != NULL);
157
158      return node;
159   }
160
161   vec4_instruction(vec4_visitor *v, enum opcode opcode,
162		    dst_reg dst = dst_reg(),
163		    src_reg src0 = src_reg(),
164		    src_reg src1 = src_reg(),
165		    src_reg src2 = src_reg());
166
167   struct brw_reg get_dst(void);
168   struct brw_reg get_src(int i);
169
170   enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
171   dst_reg dst;
172   src_reg src[3];
173
174   bool saturate;
175   bool predicate_inverse;
176   uint32_t predicate;
177
178   int conditional_mod; /**< BRW_CONDITIONAL_* */
179
180   int sampler;
181   uint32_t texture_offset; /**< Texture Offset bitfield */
182   int target; /**< MRT target. */
183   bool shadow_compare;
184
185   bool eot;
186   bool header_present;
187   int mlen; /**< SEND message length */
188   int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
189
190   uint32_t offset; /* spill/unspill offset */
191   /** @{
192    * Annotation for the generated IR.  One of the two can be set.
193    */
194   ir_instruction *ir;
195   const char *annotation;
196
197   bool is_tex();
198   bool is_math();
199};
200
201class vec4_visitor : public ir_visitor
202{
203public:
204   vec4_visitor(struct brw_vs_compile *c,
205		struct gl_shader_program *prog, struct brw_shader *shader);
206   ~vec4_visitor();
207
208   dst_reg dst_null_f()
209   {
210      return dst_reg(brw_null_reg());
211   }
212
213   dst_reg dst_null_d()
214   {
215      return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
216   }
217
218   struct brw_context *brw;
219   const struct gl_vertex_program *vp;
220   struct intel_context *intel;
221   struct gl_context *ctx;
222   struct brw_vs_compile *c;
223   struct brw_vs_prog_data *prog_data;
224   struct brw_compile *p;
225   struct brw_shader *shader;
226   struct gl_shader_program *prog;
227   void *mem_ctx;
228   exec_list instructions;
229
230   char *fail_msg;
231   bool failed;
232
233   /**
234    * GLSL IR currently being processed, which is associated with our
235    * driver IR instructions for debugging purposes.
236    */
237   ir_instruction *base_ir;
238   const char *current_annotation;
239
240   int *virtual_grf_sizes;
241   int virtual_grf_count;
242   int virtual_grf_array_size;
243   int first_non_payload_grf;
244   unsigned int max_grf;
245   int *virtual_grf_def;
246   int *virtual_grf_use;
247   dst_reg userplane[MAX_CLIP_PLANES];
248
249   /**
250    * This is the size to be used for an array with an element per
251    * reg_offset
252    */
253   int virtual_grf_reg_count;
254   /** Per-virtual-grf indices into an array of size virtual_grf_reg_count */
255   int *virtual_grf_reg_map;
256
257   bool live_intervals_valid;
258
259   dst_reg *variable_storage(ir_variable *var);
260
261   void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
262
263   src_reg src_reg_for_float(float val);
264
265   /**
266    * \name Visit methods
267    *
268    * As typical for the visitor pattern, there must be one \c visit method for
269    * each concrete subclass of \c ir_instruction.  Virtual base classes within
270    * the hierarchy should not have \c visit methods.
271    */
272   /*@{*/
273   virtual void visit(ir_variable *);
274   virtual void visit(ir_loop *);
275   virtual void visit(ir_loop_jump *);
276   virtual void visit(ir_function_signature *);
277   virtual void visit(ir_function *);
278   virtual void visit(ir_expression *);
279   virtual void visit(ir_swizzle *);
280   virtual void visit(ir_dereference_variable  *);
281   virtual void visit(ir_dereference_array *);
282   virtual void visit(ir_dereference_record *);
283   virtual void visit(ir_assignment *);
284   virtual void visit(ir_constant *);
285   virtual void visit(ir_call *);
286   virtual void visit(ir_return *);
287   virtual void visit(ir_discard *);
288   virtual void visit(ir_texture *);
289   virtual void visit(ir_if *);
290   /*@}*/
291
292   src_reg result;
293
294   /* Regs for vertex results.  Generated at ir_variable visiting time
295    * for the ir->location's used.
296    */
297   dst_reg output_reg[BRW_VERT_RESULT_MAX];
298   const char *output_reg_annotation[BRW_VERT_RESULT_MAX];
299   int uniform_size[MAX_UNIFORMS];
300   int uniform_vector_size[MAX_UNIFORMS];
301   int uniforms;
302
303   struct hash_table *variable_ht;
304
305   bool run(void);
306   void fail(const char *msg, ...);
307
308   int virtual_grf_alloc(int size);
309   void setup_uniform_clipplane_values();
310   int setup_uniform_values(int loc, const glsl_type *type);
311   void setup_builtin_uniform_values(ir_variable *ir);
312   int setup_attributes(int payload_reg);
313   int setup_uniforms(int payload_reg);
314   void setup_payload();
315   void reg_allocate_trivial();
316   void reg_allocate();
317   void move_grf_array_access_to_scratch();
318   void move_uniform_array_access_to_pull_constants();
319   void move_push_constants_to_pull_constants();
320   void split_uniform_registers();
321   void pack_uniform_registers();
322   void calculate_live_intervals();
323   bool dead_code_eliminate();
324   bool virtual_grf_interferes(int a, int b);
325   bool opt_copy_propagation();
326   bool opt_algebraic();
327   bool opt_compute_to_mrf();
328
329   vec4_instruction *emit(vec4_instruction *inst);
330
331   vec4_instruction *emit(enum opcode opcode);
332
333   vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0);
334
335   vec4_instruction *emit(enum opcode opcode, dst_reg dst,
336			  src_reg src0, src_reg src1);
337
338   vec4_instruction *emit(enum opcode opcode, dst_reg dst,
339			  src_reg src0, src_reg src1, src_reg src2);
340
341   vec4_instruction *emit_before(vec4_instruction *inst,
342				 vec4_instruction *new_inst);
343
344   vec4_instruction *MOV(dst_reg dst, src_reg src0);
345   vec4_instruction *NOT(dst_reg dst, src_reg src0);
346   vec4_instruction *RNDD(dst_reg dst, src_reg src0);
347   vec4_instruction *RNDE(dst_reg dst, src_reg src0);
348   vec4_instruction *RNDZ(dst_reg dst, src_reg src0);
349   vec4_instruction *FRC(dst_reg dst, src_reg src0);
350   vec4_instruction *ADD(dst_reg dst, src_reg src0, src_reg src1);
351   vec4_instruction *MUL(dst_reg dst, src_reg src0, src_reg src1);
352   vec4_instruction *MACH(dst_reg dst, src_reg src0, src_reg src1);
353   vec4_instruction *MAC(dst_reg dst, src_reg src0, src_reg src1);
354   vec4_instruction *AND(dst_reg dst, src_reg src0, src_reg src1);
355   vec4_instruction *OR(dst_reg dst, src_reg src0, src_reg src1);
356   vec4_instruction *XOR(dst_reg dst, src_reg src0, src_reg src1);
357   vec4_instruction *DP3(dst_reg dst, src_reg src0, src_reg src1);
358   vec4_instruction *DP4(dst_reg dst, src_reg src0, src_reg src1);
359   vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
360			 uint32_t condition);
361   vec4_instruction *IF(src_reg src0, src_reg src1, uint32_t condition);
362   vec4_instruction *IF(uint32_t predicate);
363   vec4_instruction *PULL_CONSTANT_LOAD(dst_reg dst, src_reg index);
364   vec4_instruction *SCRATCH_READ(dst_reg dst, src_reg index);
365   vec4_instruction *SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index);
366
367   int implied_mrf_writes(vec4_instruction *inst);
368
369   bool try_rewrite_rhs_to_dst(ir_assignment *ir,
370			       dst_reg dst,
371			       src_reg src,
372			       vec4_instruction *pre_rhs_inst,
373			       vec4_instruction *last_rhs_inst);
374
375   /** Walks an exec_list of ir_instruction and sends it through this visitor. */
376   void visit_instructions(const exec_list *list);
377
378   void emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate);
379   void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1);
380   void emit_if_gen6(ir_if *ir);
381
382   void emit_block_move(dst_reg *dst, src_reg *src,
383			const struct glsl_type *type, uint32_t predicate);
384
385   void emit_constant_values(dst_reg *dst, ir_constant *value);
386
387   /**
388    * Emit the correct dot-product instruction for the type of arguments
389    */
390   void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
391
392   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
393		    dst_reg dst, src_reg src0);
394
395   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
396		    dst_reg dst, src_reg src0, src_reg src1);
397
398   void emit_scs(ir_instruction *ir, enum prog_opcode op,
399		 dst_reg dst, const src_reg &src);
400
401   void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src);
402   void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src);
403   void emit_math(enum opcode opcode, dst_reg dst, src_reg src);
404   void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
405   void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
406   void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
407
408   void swizzle_result(ir_texture *ir, src_reg orig_val, int sampler);
409
410   void emit_ndc_computation();
411   void emit_psiz_and_flags(struct brw_reg reg);
412   void emit_clip_distances(struct brw_reg reg, int offset);
413   void emit_generic_urb_slot(dst_reg reg, int vert_result);
414   void emit_urb_slot(int mrf, int vert_result);
415   void emit_urb_writes(void);
416
417   src_reg get_scratch_offset(vec4_instruction *inst,
418			      src_reg *reladdr, int reg_offset);
419   src_reg get_pull_constant_offset(vec4_instruction *inst,
420				    src_reg *reladdr, int reg_offset);
421   void emit_scratch_read(vec4_instruction *inst,
422			  dst_reg dst,
423			  src_reg orig_src,
424			  int base_offset);
425   void emit_scratch_write(vec4_instruction *inst,
426			   src_reg temp,
427			   dst_reg orig_dst,
428			   int base_offset);
429   void emit_pull_constant_load(vec4_instruction *inst,
430				dst_reg dst,
431				src_reg orig_src,
432				int base_offset);
433
434   bool try_emit_sat(ir_expression *ir);
435   void resolve_ud_negate(src_reg *reg);
436
437   bool process_move_condition(ir_rvalue *ir);
438
439   void generate_code();
440   void generate_vs_instruction(vec4_instruction *inst,
441				struct brw_reg dst,
442				struct brw_reg *src);
443
444   void generate_math1_gen4(vec4_instruction *inst,
445			    struct brw_reg dst,
446			    struct brw_reg src);
447   void generate_math1_gen6(vec4_instruction *inst,
448			    struct brw_reg dst,
449			    struct brw_reg src);
450   void generate_math2_gen4(vec4_instruction *inst,
451			    struct brw_reg dst,
452			    struct brw_reg src0,
453			    struct brw_reg src1);
454   void generate_math2_gen6(vec4_instruction *inst,
455			    struct brw_reg dst,
456			    struct brw_reg src0,
457			    struct brw_reg src1);
458   void generate_math2_gen7(vec4_instruction *inst,
459			    struct brw_reg dst,
460			    struct brw_reg src0,
461			    struct brw_reg src1);
462
463   void generate_tex(vec4_instruction *inst,
464		     struct brw_reg dst,
465		     struct brw_reg src);
466
467   void generate_urb_write(vec4_instruction *inst);
468   void generate_oword_dual_block_offsets(struct brw_reg m1,
469					  struct brw_reg index);
470   void generate_scratch_write(vec4_instruction *inst,
471			       struct brw_reg dst,
472			       struct brw_reg src,
473			       struct brw_reg index);
474   void generate_scratch_read(vec4_instruction *inst,
475			      struct brw_reg dst,
476			      struct brw_reg index);
477   void generate_pull_constant_load(vec4_instruction *inst,
478				    struct brw_reg dst,
479				    struct brw_reg index,
480				    struct brw_reg offset);
481};
482
483} /* namespace brw */
484
485#endif /* BRW_VEC4_H */
486