1/*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef BRW_VEC4_H
25#define BRW_VEC4_H
26
27#include <stdint.h>
28#include "brw_shader.h"
29#include "main/compiler.h"
30#include "program/hash_table.h"
31
32extern "C" {
33#include "brw_vs.h"
34#include "brw_context.h"
35#include "brw_eu.h"
36};
37
38#include "glsl/ir.h"
39
40namespace brw {
41
42class dst_reg;
43
44unsigned
45swizzle_for_size(int size);
46
47enum register_file {
48   ARF = BRW_ARCHITECTURE_REGISTER_FILE,
49   GRF = BRW_GENERAL_REGISTER_FILE,
50   MRF = BRW_MESSAGE_REGISTER_FILE,
51   IMM = BRW_IMMEDIATE_VALUE,
52   HW_REG, /* a struct brw_reg */
53   ATTR,
54   UNIFORM, /* prog_data->params[hw_reg] */
55   BAD_FILE
56};
57
58class reg
59{
60public:
61   /** Register file: ARF, GRF, MRF, IMM. */
62   enum register_file file;
63   /** virtual register number.  0 = fixed hw reg */
64   int reg;
65   /** Offset within the virtual register. */
66   int reg_offset;
67   /** Register type.  BRW_REGISTER_TYPE_* */
68   int type;
69   struct brw_reg fixed_hw_reg;
70
71   /** Value for file == BRW_IMMMEDIATE_FILE */
72   union {
73      int32_t i;
74      uint32_t u;
75      float f;
76   } imm;
77};
78
79class src_reg : public reg
80{
81public:
82   /* Callers of this ralloc-based new need not call delete. It's
83    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
84   static void* operator new(size_t size, void *ctx)
85   {
86      void *node;
87
88      node = ralloc_size(ctx, size);
89      assert(node != NULL);
90
91      return node;
92   }
93
94   void init();
95
96   src_reg(register_file file, int reg, const glsl_type *type);
97   src_reg();
98   src_reg(float f);
99   src_reg(uint32_t u);
100   src_reg(int32_t i);
101
102   bool equals(src_reg *r);
103   bool is_zero() const;
104   bool is_one() const;
105
106   src_reg(class vec4_visitor *v, const struct glsl_type *type);
107
108   explicit src_reg(dst_reg reg);
109
110   GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */
111   bool negate;
112   bool abs;
113
114   src_reg *reladdr;
115};
116
117class dst_reg : public reg
118{
119public:
120   /* Callers of this ralloc-based new need not call delete. It's
121    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
122   static void* operator new(size_t size, void *ctx)
123   {
124      void *node;
125
126      node = ralloc_size(ctx, size);
127      assert(node != NULL);
128
129      return node;
130   }
131
132   void init();
133
134   dst_reg();
135   dst_reg(register_file file, int reg);
136   dst_reg(register_file file, int reg, const glsl_type *type, int writemask);
137   dst_reg(struct brw_reg reg);
138   dst_reg(class vec4_visitor *v, const struct glsl_type *type);
139
140   explicit dst_reg(src_reg reg);
141
142   int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
143
144   src_reg *reladdr;
145};
146
147class vec4_instruction : public exec_node {
148public:
149   /* Callers of this ralloc-based new need not call delete. It's
150    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
151   static void* operator new(size_t size, void *ctx)
152   {
153      void *node;
154
155      node = rzalloc_size(ctx, size);
156      assert(node != NULL);
157
158      return node;
159   }
160
161   vec4_instruction(vec4_visitor *v, enum opcode opcode,
162		    dst_reg dst = dst_reg(),
163		    src_reg src0 = src_reg(),
164		    src_reg src1 = src_reg(),
165		    src_reg src2 = src_reg());
166
167   struct brw_reg get_dst(void);
168   struct brw_reg get_src(int i);
169
170   enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
171   dst_reg dst;
172   src_reg src[3];
173
174   bool saturate;
175   bool predicate_inverse;
176   uint32_t predicate;
177
178   int conditional_mod; /**< BRW_CONDITIONAL_* */
179
180   int sampler;
181   uint32_t texture_offset; /**< Texture Offset bitfield */
182   int target; /**< MRT target. */
183   bool shadow_compare;
184
185   bool eot;
186   bool header_present;
187   int mlen; /**< SEND message length */
188   int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
189
190   uint32_t offset; /* spill/unspill offset */
191   /** @{
192    * Annotation for the generated IR.  One of the two can be set.
193    */
194   ir_instruction *ir;
195   const char *annotation;
196
197   bool is_tex();
198   bool is_math();
199};
200
201class vec4_visitor : public ir_visitor
202{
203public:
204   vec4_visitor(struct brw_vs_compile *c,
205		struct gl_shader_program *prog, struct brw_shader *shader);
206   ~vec4_visitor();
207
208   dst_reg dst_null_f()
209   {
210      return dst_reg(brw_null_reg());
211   }
212
213   dst_reg dst_null_d()
214   {
215      return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
216   }
217
218   struct brw_context *brw;
219   const struct gl_vertex_program *vp;
220   struct intel_context *intel;
221   struct gl_context *ctx;
222   struct brw_vs_compile *c;
223   struct brw_vs_prog_data *prog_data;
224   struct brw_compile *p;
225   struct brw_shader *shader;
226   struct gl_shader_program *prog;
227   void *mem_ctx;
228   exec_list instructions;
229
230   char *fail_msg;
231   bool failed;
232
233   /**
234    * GLSL IR currently being processed, which is associated with our
235    * driver IR instructions for debugging purposes.
236    */
237   ir_instruction *base_ir;
238   const char *current_annotation;
239
240   int *virtual_grf_sizes;
241   int virtual_grf_count;
242   int virtual_grf_array_size;
243   int first_non_payload_grf;
244   unsigned int max_grf;
245   int *virtual_grf_def;
246   int *virtual_grf_use;
247   dst_reg userplane[MAX_CLIP_PLANES];
248
249   /**
250    * This is the size to be used for an array with an element per
251    * reg_offset
252    */
253   int virtual_grf_reg_count;
254   /** Per-virtual-grf indices into an array of size virtual_grf_reg_count */
255   int *virtual_grf_reg_map;
256
257   bool live_intervals_valid;
258
259   dst_reg *variable_storage(ir_variable *var);
260
261   void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
262
263   src_reg src_reg_for_float(float val);
264
265   /**
266    * \name Visit methods
267    *
268    * As typical for the visitor pattern, there must be one \c visit method for
269    * each concrete subclass of \c ir_instruction.  Virtual base classes within
270    * the hierarchy should not have \c visit methods.
271    */
272   /*@{*/
273   virtual void visit(ir_variable *);
274   virtual void visit(ir_loop *);
275   virtual void visit(ir_loop_jump *);
276   virtual void visit(ir_function_signature *);
277   virtual void visit(ir_function *);
278   virtual void visit(ir_expression *);
279   virtual void visit(ir_swizzle *);
280   virtual void visit(ir_dereference_variable  *);
281   virtual void visit(ir_dereference_array *);
282   virtual void visit(ir_dereference_record *);
283   virtual void visit(ir_assignment *);
284   virtual void visit(ir_constant *);
285   virtual void visit(ir_call *);
286   virtual void visit(ir_return *);
287   virtual void visit(ir_discard *);
288   virtual void visit(ir_texture *);
289   virtual void visit(ir_if *);
290   /*@}*/
291
292   src_reg result;
293
294   /* Regs for vertex results.  Generated at ir_variable visiting time
295    * for the ir->location's used.
296    */
297   dst_reg output_reg[BRW_VERT_RESULT_MAX];
298   const char *output_reg_annotation[BRW_VERT_RESULT_MAX];
299   int uniform_size[MAX_UNIFORMS];
300   int uniform_vector_size[MAX_UNIFORMS];
301   int uniforms;
302
303   struct hash_table *variable_ht;
304
305   bool run(void);
306   void fail(const char *msg, ...);
307
308   int virtual_grf_alloc(int size);
309   void setup_uniform_clipplane_values();
310   int setup_uniform_values(int loc, const glsl_type *type);
311   void setup_builtin_uniform_values(ir_variable *ir);
312   int setup_attributes(int payload_reg);
313   int setup_uniforms(int payload_reg);
314   void setup_payload();
315   bool reg_allocate_trivial();
316   bool reg_allocate();
317   void evaluate_spill_costs(float *spill_costs, bool *no_spill);
318   int choose_spill_reg(struct ra_graph *g);
319   void spill_reg(int spill_reg);
320   void move_grf_array_access_to_scratch();
321   void move_uniform_array_access_to_pull_constants();
322   void move_push_constants_to_pull_constants();
323   void split_uniform_registers();
324   void pack_uniform_registers();
325   void calculate_live_intervals();
326   bool dead_code_eliminate();
327   bool virtual_grf_interferes(int a, int b);
328   bool opt_copy_propagation();
329   bool opt_algebraic();
330   bool opt_compute_to_mrf();
331
332   vec4_instruction *emit(vec4_instruction *inst);
333
334   vec4_instruction *emit(enum opcode opcode);
335
336   vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0);
337
338   vec4_instruction *emit(enum opcode opcode, dst_reg dst,
339			  src_reg src0, src_reg src1);
340
341   vec4_instruction *emit(enum opcode opcode, dst_reg dst,
342			  src_reg src0, src_reg src1, src_reg src2);
343
344   vec4_instruction *emit_before(vec4_instruction *inst,
345				 vec4_instruction *new_inst);
346
347   vec4_instruction *MOV(dst_reg dst, src_reg src0);
348   vec4_instruction *NOT(dst_reg dst, src_reg src0);
349   vec4_instruction *RNDD(dst_reg dst, src_reg src0);
350   vec4_instruction *RNDE(dst_reg dst, src_reg src0);
351   vec4_instruction *RNDZ(dst_reg dst, src_reg src0);
352   vec4_instruction *FRC(dst_reg dst, src_reg src0);
353   vec4_instruction *ADD(dst_reg dst, src_reg src0, src_reg src1);
354   vec4_instruction *MUL(dst_reg dst, src_reg src0, src_reg src1);
355   vec4_instruction *MACH(dst_reg dst, src_reg src0, src_reg src1);
356   vec4_instruction *MAC(dst_reg dst, src_reg src0, src_reg src1);
357   vec4_instruction *AND(dst_reg dst, src_reg src0, src_reg src1);
358   vec4_instruction *OR(dst_reg dst, src_reg src0, src_reg src1);
359   vec4_instruction *XOR(dst_reg dst, src_reg src0, src_reg src1);
360   vec4_instruction *DP3(dst_reg dst, src_reg src0, src_reg src1);
361   vec4_instruction *DP4(dst_reg dst, src_reg src0, src_reg src1);
362   vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
363			 uint32_t condition);
364   vec4_instruction *IF(src_reg src0, src_reg src1, uint32_t condition);
365   vec4_instruction *IF(uint32_t predicate);
366   vec4_instruction *PULL_CONSTANT_LOAD(dst_reg dst, src_reg index);
367   vec4_instruction *SCRATCH_READ(dst_reg dst, src_reg index);
368   vec4_instruction *SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index);
369
370   int implied_mrf_writes(vec4_instruction *inst);
371
372   bool try_rewrite_rhs_to_dst(ir_assignment *ir,
373			       dst_reg dst,
374			       src_reg src,
375			       vec4_instruction *pre_rhs_inst,
376			       vec4_instruction *last_rhs_inst);
377
378   /** Walks an exec_list of ir_instruction and sends it through this visitor. */
379   void visit_instructions(const exec_list *list);
380
381   void emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate);
382   void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1);
383   void emit_if_gen6(ir_if *ir);
384
385   void emit_block_move(dst_reg *dst, src_reg *src,
386			const struct glsl_type *type, uint32_t predicate);
387
388   void emit_constant_values(dst_reg *dst, ir_constant *value);
389
390   /**
391    * Emit the correct dot-product instruction for the type of arguments
392    */
393   void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
394
395   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
396		    dst_reg dst, src_reg src0);
397
398   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
399		    dst_reg dst, src_reg src0, src_reg src1);
400
401   void emit_scs(ir_instruction *ir, enum prog_opcode op,
402		 dst_reg dst, const src_reg &src);
403
404   void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src);
405   void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src);
406   void emit_math(enum opcode opcode, dst_reg dst, src_reg src);
407   void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
408   void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
409   void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
410
411   void swizzle_result(ir_texture *ir, src_reg orig_val, int sampler);
412
413   void emit_ndc_computation();
414   void emit_psiz_and_flags(struct brw_reg reg);
415   void emit_clip_distances(struct brw_reg reg, int offset);
416   void emit_generic_urb_slot(dst_reg reg, int vert_result);
417   void emit_urb_slot(int mrf, int vert_result);
418   void emit_urb_writes(void);
419
420   src_reg get_scratch_offset(vec4_instruction *inst,
421			      src_reg *reladdr, int reg_offset);
422   src_reg get_pull_constant_offset(vec4_instruction *inst,
423				    src_reg *reladdr, int reg_offset);
424   void emit_scratch_read(vec4_instruction *inst,
425			  dst_reg dst,
426			  src_reg orig_src,
427			  int base_offset);
428   void emit_scratch_write(vec4_instruction *inst,
429			   src_reg temp,
430			   dst_reg orig_dst,
431			   int base_offset);
432   void emit_pull_constant_load(vec4_instruction *inst,
433				dst_reg dst,
434				src_reg orig_src,
435				int base_offset);
436
437   bool try_emit_sat(ir_expression *ir);
438   void resolve_ud_negate(src_reg *reg);
439
440   bool process_move_condition(ir_rvalue *ir);
441
442   void generate_code();
443   void generate_vs_instruction(vec4_instruction *inst,
444				struct brw_reg dst,
445				struct brw_reg *src);
446
447   void generate_math1_gen4(vec4_instruction *inst,
448			    struct brw_reg dst,
449			    struct brw_reg src);
450   void generate_math1_gen6(vec4_instruction *inst,
451			    struct brw_reg dst,
452			    struct brw_reg src);
453   void generate_math2_gen4(vec4_instruction *inst,
454			    struct brw_reg dst,
455			    struct brw_reg src0,
456			    struct brw_reg src1);
457   void generate_math2_gen6(vec4_instruction *inst,
458			    struct brw_reg dst,
459			    struct brw_reg src0,
460			    struct brw_reg src1);
461   void generate_math2_gen7(vec4_instruction *inst,
462			    struct brw_reg dst,
463			    struct brw_reg src0,
464			    struct brw_reg src1);
465
466   void generate_tex(vec4_instruction *inst,
467		     struct brw_reg dst,
468		     struct brw_reg src);
469
470   void generate_urb_write(vec4_instruction *inst);
471   void generate_oword_dual_block_offsets(struct brw_reg m1,
472					  struct brw_reg index);
473   void generate_scratch_write(vec4_instruction *inst,
474			       struct brw_reg dst,
475			       struct brw_reg src,
476			       struct brw_reg index);
477   void generate_scratch_read(vec4_instruction *inst,
478			      struct brw_reg dst,
479			      struct brw_reg index);
480   void generate_pull_constant_load(vec4_instruction *inst,
481				    struct brw_reg dst,
482				    struct brw_reg index,
483				    struct brw_reg offset);
484};
485
486} /* namespace brw */
487
488#endif /* BRW_VEC4_H */
489