1/*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef BRW_VEC4_H
25#define BRW_VEC4_H
26
27#include "brw_shader.h"
28#include "brw_program.h"
29
30#ifdef __cplusplus
31#include "brw_ir_vec4.h"
32#endif
33
34#include "compiler/glsl/ir.h"
35#include "compiler/nir/nir.h"
36
37
38#ifdef __cplusplus
39extern "C" {
40#endif
41
42const unsigned *
43brw_vec4_generate_assembly(const struct brw_compiler *compiler,
44                           void *log_data,
45                           void *mem_ctx,
46                           const nir_shader *nir,
47                           struct brw_vue_prog_data *prog_data,
48                           const struct cfg_t *cfg,
49                           unsigned *out_assembly_size);
50
51#ifdef __cplusplus
52} /* extern "C" */
53
54namespace brw {
55
56class vec4_live_variables;
57
58/**
59 * The vertex shader front-end.
60 *
61 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
62 * fixed-function) into VS IR.
63 */
64class vec4_visitor : public backend_shader
65{
66public:
67   vec4_visitor(const struct brw_compiler *compiler,
68                void *log_data,
69                const struct brw_sampler_prog_key_data *key,
70                struct brw_vue_prog_data *prog_data,
71                const nir_shader *shader,
72		void *mem_ctx,
73                bool no_spills,
74                int shader_time_index);
75   virtual ~vec4_visitor();
76
77   dst_reg dst_null_f()
78   {
79      return dst_reg(brw_null_reg());
80   }
81
82   dst_reg dst_null_df()
83   {
84      return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF));
85   }
86
87   dst_reg dst_null_d()
88   {
89      return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
90   }
91
92   dst_reg dst_null_ud()
93   {
94      return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
95   }
96
97   const struct brw_sampler_prog_key_data * const key_tex;
98   struct brw_vue_prog_data * const prog_data;
99   char *fail_msg;
100   bool failed;
101
102   /**
103    * GLSL IR currently being processed, which is associated with our
104    * driver IR instructions for debugging purposes.
105    */
106   const void *base_ir;
107   const char *current_annotation;
108
109   int first_non_payload_grf;
110   unsigned int max_grf;
111   int *virtual_grf_start;
112   int *virtual_grf_end;
113   brw::vec4_live_variables *live_intervals;
114   dst_reg userplane[MAX_CLIP_PLANES];
115
116   bool need_all_constants_in_pull_buffer;
117
118   /* Regs for vertex results.  Generated at ir_variable visiting time
119    * for the ir->location's used.
120    */
121   dst_reg output_reg[VARYING_SLOT_TESS_MAX][4];
122   unsigned output_num_components[VARYING_SLOT_TESS_MAX][4];
123   const char *output_reg_annotation[VARYING_SLOT_TESS_MAX];
124   int uniforms;
125
126   src_reg shader_start_time;
127
128   bool run();
129   void fail(const char *msg, ...);
130
131   int setup_uniforms(int payload_reg);
132
133   bool reg_allocate_trivial();
134   bool reg_allocate();
135   void evaluate_spill_costs(float *spill_costs, bool *no_spill);
136   int choose_spill_reg(struct ra_graph *g);
137   void spill_reg(int spill_reg);
138   void move_grf_array_access_to_scratch();
139   void move_uniform_array_access_to_pull_constants();
140   void move_push_constants_to_pull_constants();
141   void split_uniform_registers();
142   void pack_uniform_registers();
143   void calculate_live_intervals();
144   void invalidate_live_intervals();
145   void split_virtual_grfs();
146   bool opt_vector_float();
147   bool opt_reduce_swizzle();
148   bool dead_code_eliminate();
149   int var_range_start(unsigned v, unsigned n) const;
150   int var_range_end(unsigned v, unsigned n) const;
151   bool virtual_grf_interferes(int a, int b);
152   bool opt_cmod_propagation();
153   bool opt_copy_propagation(bool do_constant_prop = true);
154   bool opt_cse_local(bblock_t *block);
155   bool opt_cse();
156   bool opt_algebraic();
157   bool opt_register_coalesce();
158   bool eliminate_find_live_channel();
159   bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
160   void opt_set_dependency_control();
161   void opt_schedule_instructions();
162   void convert_to_hw_regs();
163
164   bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg);
165   bool lower_simd_width();
166   bool scalarize_df();
167   bool lower_64bit_mad_to_mul_add();
168   void apply_logical_swizzle(struct brw_reg *hw_reg,
169                              vec4_instruction *inst, int arg);
170
171   vec4_instruction *emit(vec4_instruction *inst);
172
173   vec4_instruction *emit(enum opcode opcode);
174   vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
175   vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
176                          const src_reg &src0);
177   vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
178                          const src_reg &src0, const src_reg &src1);
179   vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
180                          const src_reg &src0, const src_reg &src1,
181                          const src_reg &src2);
182
183   vec4_instruction *emit_before(bblock_t *block,
184                                 vec4_instruction *inst,
185				 vec4_instruction *new_inst);
186
187#define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
188#define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
189#define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
190   EMIT1(MOV)
191   EMIT1(NOT)
192   EMIT1(RNDD)
193   EMIT1(RNDE)
194   EMIT1(RNDZ)
195   EMIT1(FRC)
196   EMIT1(F32TO16)
197   EMIT1(F16TO32)
198   EMIT2(ADD)
199   EMIT2(MUL)
200   EMIT2(MACH)
201   EMIT2(MAC)
202   EMIT2(AND)
203   EMIT2(OR)
204   EMIT2(XOR)
205   EMIT2(DP3)
206   EMIT2(DP4)
207   EMIT2(DPH)
208   EMIT2(SHL)
209   EMIT2(SHR)
210   EMIT2(ASR)
211   vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
212			 enum brw_conditional_mod condition);
213   vec4_instruction *IF(src_reg src0, src_reg src1,
214                        enum brw_conditional_mod condition);
215   vec4_instruction *IF(enum brw_predicate predicate);
216   EMIT1(SCRATCH_READ)
217   EMIT2(SCRATCH_WRITE)
218   EMIT3(LRP)
219   EMIT1(BFREV)
220   EMIT3(BFE)
221   EMIT2(BFI1)
222   EMIT3(BFI2)
223   EMIT1(FBH)
224   EMIT1(FBL)
225   EMIT1(CBIT)
226   EMIT3(MAD)
227   EMIT2(ADDC)
228   EMIT2(SUBB)
229   EMIT1(DIM)
230
231#undef EMIT1
232#undef EMIT2
233#undef EMIT3
234
235   int implied_mrf_writes(vec4_instruction *inst);
236
237   vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
238                                 src_reg src0, src_reg src1);
239
240   vec4_instruction *emit_lrp(const dst_reg &dst, const src_reg &x,
241                              const src_reg &y, const src_reg &a);
242
243   /**
244    * Copy any live channel from \p src to the first channel of the
245    * result.
246    */
247   src_reg emit_uniformize(const src_reg &src);
248
249   src_reg fix_3src_operand(const src_reg &src);
250   src_reg resolve_source_modifiers(const src_reg &src);
251
252   vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
253                               const src_reg &src1 = src_reg());
254
255   src_reg fix_math_operand(const src_reg &src);
256
257   void emit_pack_half_2x16(dst_reg dst, src_reg src0);
258   void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
259   void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
260   void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
261   void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
262   void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
263
264   void emit_texture(ir_texture_opcode op,
265                     dst_reg dest,
266                     const glsl_type *dest_type,
267                     src_reg coordinate,
268                     int coord_components,
269                     src_reg shadow_comparator,
270                     src_reg lod, src_reg lod2,
271                     src_reg sample_index,
272                     uint32_t constant_offset,
273                     src_reg offset_value,
274                     src_reg mcs,
275                     uint32_t surface, src_reg surface_reg,
276                     src_reg sampler_reg);
277
278   src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
279                          src_reg surface);
280   void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
281
282   void emit_ndc_computation();
283   void emit_psiz_and_flags(dst_reg reg);
284   vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp);
285   virtual void emit_urb_slot(dst_reg reg, int varying);
286
287   void emit_shader_time_begin();
288   void emit_shader_time_end();
289   void emit_shader_time_write(int shader_time_subindex, src_reg value);
290
291   src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
292			      src_reg *reladdr, int reg_offset);
293   void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
294			  dst_reg dst,
295			  src_reg orig_src,
296			  int base_offset);
297   void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
298			   int base_offset);
299   void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
300				dst_reg dst,
301				src_reg orig_src,
302                                int base_offset,
303                                src_reg indirect);
304   void emit_pull_constant_load_reg(dst_reg dst,
305                                    src_reg surf_index,
306                                    src_reg offset,
307                                    bblock_t *before_block,
308                                    vec4_instruction *before_inst);
309   src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
310                                vec4_instruction *inst, src_reg src);
311
312   void resolve_ud_negate(src_reg *reg);
313
314   bool lower_minmax();
315
316   src_reg get_timestamp();
317
318   void dump_instruction(backend_instruction *inst);
319   void dump_instruction(backend_instruction *inst, FILE *file);
320
321   bool is_high_sampler(src_reg sampler);
322
323   bool optimize_predicate(nir_alu_instr *instr, enum brw_predicate *predicate);
324
325   void emit_conversion_from_double(dst_reg dst, src_reg src, bool saturate,
326                                    brw_reg_type single_type);
327   void emit_conversion_to_double(dst_reg dst, src_reg src, bool saturate,
328                                  brw_reg_type single_type);
329
330   src_reg setup_imm_df(double v);
331
332   vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src,
333                                        bool for_write,
334                                        bblock_t *block = NULL,
335                                        vec4_instruction *ref = NULL);
336
337   virtual void emit_nir_code();
338   virtual void nir_setup_uniforms();
339   virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
340   virtual void nir_setup_system_values();
341   virtual void nir_emit_impl(nir_function_impl *impl);
342   virtual void nir_emit_cf_list(exec_list *list);
343   virtual void nir_emit_if(nir_if *if_stmt);
344   virtual void nir_emit_loop(nir_loop *loop);
345   virtual void nir_emit_block(nir_block *block);
346   virtual void nir_emit_instr(nir_instr *instr);
347   virtual void nir_emit_load_const(nir_load_const_instr *instr);
348   virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
349   virtual void nir_emit_alu(nir_alu_instr *instr);
350   virtual void nir_emit_jump(nir_jump_instr *instr);
351   virtual void nir_emit_texture(nir_tex_instr *instr);
352   virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
353   virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
354
355   dst_reg get_nir_dest(const nir_dest &dest, enum brw_reg_type type);
356   dst_reg get_nir_dest(const nir_dest &dest, nir_alu_type type);
357   dst_reg get_nir_dest(const nir_dest &dest);
358   src_reg get_nir_src(const nir_src &src, enum brw_reg_type type,
359                       unsigned num_components = 4);
360   src_reg get_nir_src(const nir_src &src, nir_alu_type type,
361                       unsigned num_components = 4);
362   src_reg get_nir_src(const nir_src &src,
363                       unsigned num_components = 4);
364   src_reg get_indirect_offset(nir_intrinsic_instr *instr);
365
366   virtual dst_reg *make_reg_for_system_value(int location) = 0;
367
368   dst_reg *nir_locals;
369   dst_reg *nir_ssa_values;
370   dst_reg *nir_system_values;
371
372protected:
373   void emit_vertex();
374   void lower_attributes_to_hw_regs(const int *attribute_map,
375                                    bool interleaved);
376   void setup_payload_interference(struct ra_graph *g, int first_payload_node,
377                                   int reg_node_count);
378   virtual void setup_payload() = 0;
379   virtual void emit_prolog() = 0;
380   virtual void emit_thread_end() = 0;
381   virtual void emit_urb_write_header(int mrf) = 0;
382   virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
383   virtual void gs_emit_vertex(int stream_id);
384   virtual void gs_end_primitive();
385
386private:
387   /**
388    * If true, then register allocation should fail instead of spilling.
389    */
390   const bool no_spills;
391
392   int shader_time_index;
393
394   unsigned last_scratch; /**< measured in 32-byte (register size) units */
395};
396
397} /* namespace brw */
398#endif /* __cplusplus */
399
400#endif /* BRW_VEC4_H */
401