brw_fs.h revision 27bf9c1997b77f85c2099436e9ad5dfc0f1608c7
1fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville/* 264c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Saville * Copyright © 2010 Intel Corporation 3fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * 4fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * Permission is hereby granted, free of charge, to any person obtaining a 5fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * copy of this software and associated documentation files (the "Software"), 6fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * to deal in the Software without restriction, including without limitation 7fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * and/or sell copies of the Software, and to permit persons to whom the 9fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * Software is furnished to do so, subject to the following conditions: 10fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * 11fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * The above copyright notice and this permission notice (including the next 12fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * paragraph) shall be included in all copies or substantial portions of the 13fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * Software. 14fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * 15fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * IN THE SOFTWARE. 22fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * 23fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * Authors: 2464c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Saville * Eric Anholt <eric@anholt.net> 25fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville * 2664c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Saville */ 27fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville 28fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville#pragma once 2964c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Saville 30fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville#include "brw_shader.h" 3164c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Saville 3264c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Savilleextern "C" { 3364c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Saville 3464c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Saville#include <sys/types.h> 3564c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Saville 3664c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Saville#include "main/macros.h" 37fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville#include "main/shaderobj.h" 38fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville#include "main/uniforms.h" 39fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville#include "program/prog_parameter.h" 40fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville#include "program/prog_print.h" 4164c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Saville#include "program/prog_optimize.h" 4264c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Saville#include "program/register_allocate.h" 4364c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Saville#include "program/sampler.h" 4464c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Saville#include "program/hash_table.h" 4564c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Saville#include "brw_context.h" 4664c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Saville#include "brw_eu.h" 47fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville#include "brw_wm.h" 48fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville} 49fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville#include "glsl/glsl_types.h" 50fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville#include "glsl/ir.h" 51fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville 52fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Savilleclass fs_bblock; 53fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Savillenamespace { 54fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville class acp_entry; 55fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville} 56fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville 57fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Savilleenum register_file { 58fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville BAD_FILE, 59fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville ARF, 6064c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Saville GRF, 6164c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Saville MRF, 62fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville IMM, 6364c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Saville FIXED_HW_REG, /* a struct brw_reg */ 64fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville UNIFORM, /* prog_data->params[reg] */ 65fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville}; 6664c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Saville 6764c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Savilleclass fs_reg { 6864c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Savillepublic: 69fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville /* Callers of this ralloc-based new need not call delete. It's 7064c42cae4482fe0157e977b8ddd0f2c2436b3f31Wink Saville * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 71fc5b4802a544b6ca304aa7e58a26018ef714d233Wink Saville static void* operator new(size_t size, void *ctx) 72 { 73 void *node; 74 75 node = ralloc_size(ctx, size); 76 assert(node != NULL); 77 78 return node; 79 } 80 81 void init(); 82 83 fs_reg(); 84 fs_reg(float f); 85 fs_reg(int32_t i); 86 fs_reg(uint32_t u); 87 fs_reg(struct brw_reg fixed_hw_reg); 88 fs_reg(enum register_file file, int reg); 89 fs_reg(enum register_file file, int reg, uint32_t type); 90 fs_reg(class fs_visitor *v, const struct glsl_type *type); 91 92 bool equals(const fs_reg &r) const; 93 94 /** Register file: ARF, GRF, MRF, IMM. */ 95 enum register_file file; 96 /** 97 * Register number. For ARF/MRF, it's the hardware register. For 98 * GRF, it's a virtual register number until register allocation 99 */ 100 int reg; 101 /** 102 * For virtual registers, this is a hardware register offset from 103 * the start of the register block (for example, a constant index 104 * in an array access). 105 */ 106 int reg_offset; 107 /** Register type. BRW_REGISTER_TYPE_* */ 108 int type; 109 bool negate; 110 bool abs; 111 bool sechalf; 112 struct brw_reg fixed_hw_reg; 113 int smear; /* -1, or a channel of the reg to smear to all channels. */ 114 115 /** Value for file == IMM */ 116 union { 117 int32_t i; 118 uint32_t u; 119 float f; 120 } imm; 121}; 122 123static const fs_reg reg_undef; 124static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F); 125static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D); 126 127class fs_inst : public exec_node { 128public: 129 /* Callers of this ralloc-based new need not call delete. It's 130 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 131 static void* operator new(size_t size, void *ctx) 132 { 133 void *node; 134 135 node = rzalloc_size(ctx, size); 136 assert(node != NULL); 137 138 return node; 139 } 140 141 void init(); 142 143 fs_inst(); 144 fs_inst(enum opcode opcode); 145 fs_inst(enum opcode opcode, fs_reg dst); 146 fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0); 147 fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1); 148 fs_inst(enum opcode opcode, fs_reg dst, 149 fs_reg src0, fs_reg src1,fs_reg src2); 150 151 bool equals(fs_inst *inst); 152 int regs_written(); 153 bool overwrites_reg(const fs_reg ®); 154 bool is_tex(); 155 bool is_math(); 156 157 enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ 158 fs_reg dst; 159 fs_reg src[3]; 160 bool saturate; 161 bool predicated; 162 bool predicate_inverse; 163 int conditional_mod; /**< BRW_CONDITIONAL_* */ 164 165 int mlen; /**< SEND message length */ 166 int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ 167 uint32_t texture_offset; /**< Texture offset bitfield */ 168 int sampler; 169 int target; /**< MRT target. */ 170 bool eot; 171 bool header_present; 172 bool shadow_compare; 173 bool force_uncompressed; 174 bool force_sechalf; 175 uint32_t offset; /* spill/unspill offset */ 176 177 /** @{ 178 * Annotation for the generated IR. One of the two can be set. 179 */ 180 ir_instruction *ir; 181 const char *annotation; 182 /** @} */ 183}; 184 185class fs_visitor : public ir_visitor 186{ 187public: 188 189 fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog, 190 struct brw_shader *shader); 191 ~fs_visitor(); 192 193 fs_reg *variable_storage(ir_variable *var); 194 int virtual_grf_alloc(int size); 195 void import_uniforms(fs_visitor *v); 196 197 void visit(ir_variable *ir); 198 void visit(ir_assignment *ir); 199 void visit(ir_dereference_variable *ir); 200 void visit(ir_dereference_record *ir); 201 void visit(ir_dereference_array *ir); 202 void visit(ir_expression *ir); 203 void visit(ir_texture *ir); 204 void visit(ir_if *ir); 205 void visit(ir_constant *ir); 206 void visit(ir_swizzle *ir); 207 void visit(ir_return *ir); 208 void visit(ir_loop *ir); 209 void visit(ir_loop_jump *ir); 210 void visit(ir_discard *ir); 211 void visit(ir_call *ir); 212 void visit(ir_function *ir); 213 void visit(ir_function_signature *ir); 214 215 void swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler); 216 217 fs_inst *emit(fs_inst inst); 218 219 fs_inst *emit(enum opcode opcode); 220 fs_inst *emit(enum opcode opcode, fs_reg dst); 221 fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0); 222 fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1); 223 fs_inst *emit(enum opcode opcode, fs_reg dst, 224 fs_reg src0, fs_reg src1, fs_reg src2); 225 226 int type_size(const struct glsl_type *type); 227 fs_inst *get_instruction_generating_reg(fs_inst *start, 228 fs_inst *end, 229 fs_reg reg); 230 231 bool run(); 232 void setup_paramvalues_refs(); 233 void assign_curb_setup(); 234 void calculate_urb_setup(); 235 void assign_urb_setup(); 236 bool assign_regs(); 237 void assign_regs_trivial(); 238 int choose_spill_reg(struct ra_graph *g); 239 void spill_reg(int spill_reg); 240 void split_virtual_grfs(); 241 void setup_pull_constants(); 242 void calculate_live_intervals(); 243 bool propagate_constants(); 244 bool opt_algebraic(); 245 bool opt_cse(); 246 bool opt_cse_local(fs_bblock *block, exec_list *aeb); 247 bool opt_copy_propagate(); 248 bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry); 249 bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block, 250 exec_list *acp); 251 bool register_coalesce(); 252 bool register_coalesce_2(); 253 bool compute_to_mrf(); 254 bool dead_code_eliminate(); 255 bool remove_dead_constants(); 256 bool remove_duplicate_mrf_writes(); 257 bool virtual_grf_interferes(int a, int b); 258 void schedule_instructions(); 259 void fail(const char *msg, ...); 260 261 void push_force_uncompressed(); 262 void pop_force_uncompressed(); 263 void push_force_sechalf(); 264 void pop_force_sechalf(); 265 266 void generate_code(); 267 void generate_fb_write(fs_inst *inst); 268 void generate_pixel_xy(struct brw_reg dst, bool is_x); 269 void generate_linterp(fs_inst *inst, struct brw_reg dst, 270 struct brw_reg *src); 271 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 272 void generate_math1_gen7(fs_inst *inst, 273 struct brw_reg dst, 274 struct brw_reg src); 275 void generate_math2_gen7(fs_inst *inst, 276 struct brw_reg dst, 277 struct brw_reg src0, 278 struct brw_reg src1); 279 void generate_math1_gen6(fs_inst *inst, 280 struct brw_reg dst, 281 struct brw_reg src); 282 void generate_math2_gen6(fs_inst *inst, 283 struct brw_reg dst, 284 struct brw_reg src0, 285 struct brw_reg src1); 286 void generate_math_gen4(fs_inst *inst, 287 struct brw_reg dst, 288 struct brw_reg src); 289 void generate_discard(fs_inst *inst); 290 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src); 291 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src, 292 bool negate_value); 293 void generate_spill(fs_inst *inst, struct brw_reg src); 294 void generate_unspill(fs_inst *inst, struct brw_reg dst); 295 void generate_pull_constant_load(fs_inst *inst, struct brw_reg dst); 296 void generate_mov_dispatch_to_flags(); 297 298 void emit_dummy_fs(); 299 fs_reg *emit_fragcoord_interpolation(ir_variable *ir); 300 fs_inst *emit_linterp(const fs_reg &attr, const fs_reg &interp, 301 glsl_interp_qualifier interpolation_mode, 302 bool is_centroid); 303 fs_reg *emit_frontfacing_interpolation(ir_variable *ir); 304 fs_reg *emit_general_interpolation(ir_variable *ir); 305 void emit_interpolation_setup_gen4(); 306 void emit_interpolation_setup_gen6(); 307 fs_reg emit_texcoord(ir_texture *ir, int sampler); 308 fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, 309 int sampler); 310 fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, 311 int sampler); 312 fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, 313 int sampler); 314 fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); 315 fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); 316 bool try_emit_saturate(ir_expression *ir); 317 bool try_emit_mad(ir_expression *ir, int mul_arg); 318 void emit_bool_to_cond_code(ir_rvalue *condition); 319 void emit_if_gen6(ir_if *ir); 320 void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset); 321 322 void emit_color_write(int target, int index, int first_color_mrf); 323 void emit_fb_writes(); 324 bool try_rewrite_rhs_to_dst(ir_assignment *ir, 325 fs_reg dst, 326 fs_reg src, 327 fs_inst *pre_rhs_inst, 328 fs_inst *last_rhs_inst); 329 void emit_assignment_writes(fs_reg &l, fs_reg &r, 330 const glsl_type *type, bool predicated); 331 void resolve_ud_negate(fs_reg *reg); 332 void resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg); 333 334 struct brw_reg interp_reg(int location, int channel); 335 int setup_uniform_values(int loc, const glsl_type *type); 336 void setup_builtin_uniform_values(ir_variable *ir); 337 int implied_mrf_writes(fs_inst *inst); 338 339 struct brw_context *brw; 340 const struct gl_fragment_program *fp; 341 struct intel_context *intel; 342 struct gl_context *ctx; 343 struct brw_wm_compile *c; 344 struct brw_compile *p; 345 struct brw_shader *shader; 346 struct gl_shader_program *prog; 347 void *mem_ctx; 348 exec_list instructions; 349 350 /* Delayed setup of c->prog_data.params[] due to realloc of 351 * ParamValues[] during compile. 352 */ 353 int param_index[MAX_UNIFORMS * 4]; 354 int param_offset[MAX_UNIFORMS * 4]; 355 356 int *virtual_grf_sizes; 357 int virtual_grf_count; 358 int virtual_grf_array_size; 359 int *virtual_grf_def; 360 int *virtual_grf_use; 361 bool live_intervals_valid; 362 363 /* This is the map from UNIFORM hw_reg + reg_offset as generated by 364 * the visitor to the packed uniform number after 365 * remove_dead_constants() that represents the actual uploaded 366 * uniform index. 367 */ 368 int *params_remap; 369 370 struct hash_table *variable_ht; 371 ir_variable *frag_depth; 372 fs_reg outputs[BRW_MAX_DRAW_BUFFERS]; 373 unsigned output_components[BRW_MAX_DRAW_BUFFERS]; 374 fs_reg dual_src_output; 375 int first_non_payload_grf; 376 int max_grf; 377 int urb_setup[FRAG_ATTRIB_MAX]; 378 379 /** @{ debug annotation info */ 380 const char *current_annotation; 381 ir_instruction *base_ir; 382 /** @} */ 383 384 bool failed; 385 char *fail_msg; 386 387 /* Result of last visit() method. */ 388 fs_reg result; 389 390 fs_reg pixel_x; 391 fs_reg pixel_y; 392 fs_reg wpos_w; 393 fs_reg pixel_w; 394 fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; 395 fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; 396 fs_reg reg_null_cmp; 397 398 int grf_used; 399 400 int force_uncompressed_stack; 401 int force_sechalf_stack; 402 403 class fs_bblock *bblock; 404}; 405 406bool brw_do_channel_expressions(struct exec_list *instructions); 407bool brw_do_vector_splitting(struct exec_list *instructions); 408bool brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog); 409