brw_vec4.h revision 31874f074c2eaf2a9421c57f0798c79078d296c4
1/* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#ifndef BRW_VEC4_H 25#define BRW_VEC4_H 26 27#include <stdint.h> 28#include "brw_shader.h" 29#include "main/compiler.h" 30#include "program/hash_table.h" 31 32extern "C" { 33#include "brw_vs.h" 34#include "brw_context.h" 35#include "brw_eu.h" 36}; 37 38#include "glsl/ir.h" 39 40namespace brw { 41 42class dst_reg; 43 44/** 45 * Common helper for constructing swizzles. When only a subset of 46 * channels of a vec4 are used, we don't want to reference the other 47 * channels, as that will tell optimization passes that those other 48 * channels are used. 49 */ 50static unsigned 51swizzle_for_size(int size) 52{ 53 static const unsigned size_swizzles[4] = { 54 BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), 55 BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), 56 BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), 57 BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), 58 }; 59 60 assert((size >= 1) && (size <= 4)); 61 return size_swizzles[size - 1]; 62} 63 64enum register_file { 65 ARF = BRW_ARCHITECTURE_REGISTER_FILE, 66 GRF = BRW_GENERAL_REGISTER_FILE, 67 MRF = BRW_MESSAGE_REGISTER_FILE, 68 IMM = BRW_IMMEDIATE_VALUE, 69 HW_REG, /* a struct brw_reg */ 70 ATTR, 71 UNIFORM, /* prog_data->params[hw_reg] */ 72 BAD_FILE 73}; 74 75class reg 76{ 77public: 78 /** Register file: ARF, GRF, MRF, IMM. */ 79 enum register_file file; 80 /** virtual register number. 0 = fixed hw reg */ 81 int reg; 82 /** Offset within the virtual register. */ 83 int reg_offset; 84 /** Register type. BRW_REGISTER_TYPE_* */ 85 int type; 86 struct brw_reg fixed_hw_reg; 87 88 /** Value for file == BRW_IMMMEDIATE_FILE */ 89 union { 90 int32_t i; 91 uint32_t u; 92 float f; 93 } imm; 94}; 95 96class src_reg : public reg 97{ 98public: 99 /* Callers of this ralloc-based new need not call delete. It's 100 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 101 static void* operator new(size_t size, void *ctx) 102 { 103 void *node; 104 105 node = ralloc_size(ctx, size); 106 assert(node != NULL); 107 108 return node; 109 } 110 111 void init() 112 { 113 memset(this, 0, sizeof(*this)); 114 115 this->file = BAD_FILE; 116 } 117 118 src_reg(register_file file, int reg, const glsl_type *type) 119 { 120 init(); 121 122 this->file = file; 123 this->reg = reg; 124 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) 125 this->swizzle = swizzle_for_size(type->vector_elements); 126 else 127 this->swizzle = SWIZZLE_XYZW; 128 } 129 130 /** Generic unset register constructor. */ 131 src_reg() 132 { 133 init(); 134 } 135 136 src_reg(float f) 137 { 138 init(); 139 140 this->file = IMM; 141 this->type = BRW_REGISTER_TYPE_F; 142 this->imm.f = f; 143 } 144 145 src_reg(uint32_t u) 146 { 147 init(); 148 149 this->file = IMM; 150 this->type = BRW_REGISTER_TYPE_UD; 151 this->imm.u = u; 152 } 153 154 src_reg(int32_t i) 155 { 156 init(); 157 158 this->file = IMM; 159 this->type = BRW_REGISTER_TYPE_D; 160 this->imm.i = i; 161 } 162 163 bool equals(src_reg *r); 164 bool is_zero() const; 165 bool is_one() const; 166 167 src_reg(class vec4_visitor *v, const struct glsl_type *type); 168 169 explicit src_reg(dst_reg reg); 170 171 GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */ 172 bool negate; 173 bool abs; 174 175 src_reg *reladdr; 176}; 177 178class dst_reg : public reg 179{ 180public: 181 /* Callers of this ralloc-based new need not call delete. It's 182 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 183 static void* operator new(size_t size, void *ctx) 184 { 185 void *node; 186 187 node = ralloc_size(ctx, size); 188 assert(node != NULL); 189 190 return node; 191 } 192 193 void init() 194 { 195 memset(this, 0, sizeof(*this)); 196 this->file = BAD_FILE; 197 this->writemask = WRITEMASK_XYZW; 198 } 199 200 dst_reg() 201 { 202 init(); 203 } 204 205 dst_reg(register_file file, int reg) 206 { 207 init(); 208 209 this->file = file; 210 this->reg = reg; 211 } 212 213 dst_reg(struct brw_reg reg) 214 { 215 init(); 216 217 this->file = HW_REG; 218 this->fixed_hw_reg = reg; 219 } 220 221 dst_reg(class vec4_visitor *v, const struct glsl_type *type); 222 223 explicit dst_reg(src_reg reg); 224 225 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ 226 227 src_reg *reladdr; 228}; 229 230class vec4_instruction : public exec_node { 231public: 232 /* Callers of this ralloc-based new need not call delete. It's 233 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 234 static void* operator new(size_t size, void *ctx) 235 { 236 void *node; 237 238 node = rzalloc_size(ctx, size); 239 assert(node != NULL); 240 241 return node; 242 } 243 244 vec4_instruction(vec4_visitor *v, enum opcode opcode, 245 dst_reg dst = dst_reg(), 246 src_reg src0 = src_reg(), 247 src_reg src1 = src_reg(), 248 src_reg src2 = src_reg()); 249 250 struct brw_reg get_dst(void); 251 struct brw_reg get_src(int i); 252 253 enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ 254 dst_reg dst; 255 src_reg src[3]; 256 257 bool saturate; 258 bool predicate_inverse; 259 uint32_t predicate; 260 261 int conditional_mod; /**< BRW_CONDITIONAL_* */ 262 263 int sampler; 264 int target; /**< MRT target. */ 265 bool shadow_compare; 266 267 bool eot; 268 bool header_present; 269 int mlen; /**< SEND message length */ 270 int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ 271 272 uint32_t offset; /* spill/unspill offset */ 273 /** @{ 274 * Annotation for the generated IR. One of the two can be set. 275 */ 276 ir_instruction *ir; 277 const char *annotation; 278 279 bool is_math(); 280}; 281 282class vec4_visitor : public ir_visitor 283{ 284public: 285 vec4_visitor(struct brw_vs_compile *c, 286 struct gl_shader_program *prog, struct brw_shader *shader); 287 ~vec4_visitor(); 288 289 dst_reg dst_null_f() 290 { 291 return dst_reg(brw_null_reg()); 292 } 293 294 dst_reg dst_null_d() 295 { 296 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 297 } 298 299 struct brw_context *brw; 300 const struct gl_vertex_program *vp; 301 struct intel_context *intel; 302 struct gl_context *ctx; 303 struct brw_vs_compile *c; 304 struct brw_vs_prog_data *prog_data; 305 struct brw_compile *p; 306 struct brw_shader *shader; 307 struct gl_shader_program *prog; 308 void *mem_ctx; 309 exec_list instructions; 310 311 char *fail_msg; 312 bool failed; 313 314 /** 315 * GLSL IR currently being processed, which is associated with our 316 * driver IR instructions for debugging purposes. 317 */ 318 ir_instruction *base_ir; 319 const char *current_annotation; 320 321 int *virtual_grf_sizes; 322 int virtual_grf_count; 323 int virtual_grf_array_size; 324 int first_non_payload_grf; 325 int *virtual_grf_def; 326 int *virtual_grf_use; 327 dst_reg userplane[MAX_CLIP_PLANES]; 328 329 /** 330 * This is the size to be used for an array with an element per 331 * reg_offset 332 */ 333 int virtual_grf_reg_count; 334 /** Per-virtual-grf indices into an array of size virtual_grf_reg_count */ 335 int *virtual_grf_reg_map; 336 337 bool live_intervals_valid; 338 339 dst_reg *variable_storage(ir_variable *var); 340 341 void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr); 342 343 src_reg src_reg_for_float(float val); 344 345 /** 346 * \name Visit methods 347 * 348 * As typical for the visitor pattern, there must be one \c visit method for 349 * each concrete subclass of \c ir_instruction. Virtual base classes within 350 * the hierarchy should not have \c visit methods. 351 */ 352 /*@{*/ 353 virtual void visit(ir_variable *); 354 virtual void visit(ir_loop *); 355 virtual void visit(ir_loop_jump *); 356 virtual void visit(ir_function_signature *); 357 virtual void visit(ir_function *); 358 virtual void visit(ir_expression *); 359 virtual void visit(ir_swizzle *); 360 virtual void visit(ir_dereference_variable *); 361 virtual void visit(ir_dereference_array *); 362 virtual void visit(ir_dereference_record *); 363 virtual void visit(ir_assignment *); 364 virtual void visit(ir_constant *); 365 virtual void visit(ir_call *); 366 virtual void visit(ir_return *); 367 virtual void visit(ir_discard *); 368 virtual void visit(ir_texture *); 369 virtual void visit(ir_if *); 370 /*@}*/ 371 372 src_reg result; 373 374 /* Regs for vertex results. Generated at ir_variable visiting time 375 * for the ir->location's used. 376 */ 377 dst_reg output_reg[BRW_VERT_RESULT_MAX]; 378 const char *output_reg_annotation[BRW_VERT_RESULT_MAX]; 379 int uniform_size[MAX_UNIFORMS]; 380 int uniform_vector_size[MAX_UNIFORMS]; 381 int uniforms; 382 383 struct hash_table *variable_ht; 384 385 bool run(void); 386 void fail(const char *msg, ...); 387 388 int virtual_grf_alloc(int size); 389 void setup_uniform_clipplane_values(); 390 int setup_uniform_values(int loc, const glsl_type *type); 391 void setup_builtin_uniform_values(ir_variable *ir); 392 int setup_attributes(int payload_reg); 393 int setup_uniforms(int payload_reg); 394 void setup_payload(); 395 void reg_allocate_trivial(); 396 void reg_allocate(); 397 void move_grf_array_access_to_scratch(); 398 void move_uniform_array_access_to_pull_constants(); 399 void move_push_constants_to_pull_constants(); 400 void split_uniform_registers(); 401 void pack_uniform_registers(); 402 void calculate_live_intervals(); 403 bool dead_code_eliminate(); 404 bool virtual_grf_interferes(int a, int b); 405 bool opt_copy_propagation(); 406 bool opt_algebraic(); 407 bool opt_compute_to_mrf(); 408 409 vec4_instruction *emit(vec4_instruction *inst); 410 411 vec4_instruction *emit(enum opcode opcode); 412 413 vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0); 414 415 vec4_instruction *emit(enum opcode opcode, dst_reg dst, 416 src_reg src0, src_reg src1); 417 418 vec4_instruction *emit(enum opcode opcode, dst_reg dst, 419 src_reg src0, src_reg src1, src_reg src2); 420 421 vec4_instruction *emit_before(vec4_instruction *inst, 422 vec4_instruction *new_inst); 423 424 vec4_instruction *MOV(dst_reg dst, src_reg src0); 425 vec4_instruction *NOT(dst_reg dst, src_reg src0); 426 vec4_instruction *RNDD(dst_reg dst, src_reg src0); 427 vec4_instruction *RNDE(dst_reg dst, src_reg src0); 428 vec4_instruction *RNDZ(dst_reg dst, src_reg src0); 429 vec4_instruction *FRC(dst_reg dst, src_reg src0); 430 vec4_instruction *ADD(dst_reg dst, src_reg src0, src_reg src1); 431 vec4_instruction *MUL(dst_reg dst, src_reg src0, src_reg src1); 432 vec4_instruction *MACH(dst_reg dst, src_reg src0, src_reg src1); 433 vec4_instruction *MAC(dst_reg dst, src_reg src0, src_reg src1); 434 vec4_instruction *AND(dst_reg dst, src_reg src0, src_reg src1); 435 vec4_instruction *OR(dst_reg dst, src_reg src0, src_reg src1); 436 vec4_instruction *XOR(dst_reg dst, src_reg src0, src_reg src1); 437 vec4_instruction *DP3(dst_reg dst, src_reg src0, src_reg src1); 438 vec4_instruction *DP4(dst_reg dst, src_reg src0, src_reg src1); 439 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1, 440 uint32_t condition); 441 vec4_instruction *IF(src_reg src0, src_reg src1, uint32_t condition); 442 vec4_instruction *IF(uint32_t predicate); 443 vec4_instruction *PULL_CONSTANT_LOAD(dst_reg dst, src_reg index); 444 vec4_instruction *SCRATCH_READ(dst_reg dst, src_reg index); 445 vec4_instruction *SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index); 446 447 int implied_mrf_writes(vec4_instruction *inst); 448 449 bool try_rewrite_rhs_to_dst(ir_assignment *ir, 450 dst_reg dst, 451 src_reg src, 452 vec4_instruction *pre_rhs_inst, 453 vec4_instruction *last_rhs_inst); 454 455 /** Walks an exec_list of ir_instruction and sends it through this visitor. */ 456 void visit_instructions(const exec_list *list); 457 458 void emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate); 459 void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1); 460 void emit_if_gen6(ir_if *ir); 461 462 void emit_block_move(dst_reg *dst, src_reg *src, 463 const struct glsl_type *type, uint32_t predicate); 464 465 void emit_constant_values(dst_reg *dst, ir_constant *value); 466 467 /** 468 * Emit the correct dot-product instruction for the type of arguments 469 */ 470 void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements); 471 472 void emit_scalar(ir_instruction *ir, enum prog_opcode op, 473 dst_reg dst, src_reg src0); 474 475 void emit_scalar(ir_instruction *ir, enum prog_opcode op, 476 dst_reg dst, src_reg src0, src_reg src1); 477 478 void emit_scs(ir_instruction *ir, enum prog_opcode op, 479 dst_reg dst, const src_reg &src); 480 481 void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src); 482 void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src); 483 void emit_math(enum opcode opcode, dst_reg dst, src_reg src); 484 void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); 485 void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); 486 void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); 487 488 void emit_ndc_computation(); 489 void emit_psiz_and_flags(struct brw_reg reg); 490 void emit_clip_distances(struct brw_reg reg, int offset); 491 void emit_generic_urb_slot(dst_reg reg, int vert_result); 492 void emit_urb_slot(int mrf, int vert_result); 493 void emit_urb_writes(void); 494 495 src_reg get_scratch_offset(vec4_instruction *inst, 496 src_reg *reladdr, int reg_offset); 497 src_reg get_pull_constant_offset(vec4_instruction *inst, 498 src_reg *reladdr, int reg_offset); 499 void emit_scratch_read(vec4_instruction *inst, 500 dst_reg dst, 501 src_reg orig_src, 502 int base_offset); 503 void emit_scratch_write(vec4_instruction *inst, 504 src_reg temp, 505 dst_reg orig_dst, 506 int base_offset); 507 void emit_pull_constant_load(vec4_instruction *inst, 508 dst_reg dst, 509 src_reg orig_src, 510 int base_offset); 511 512 bool try_emit_sat(ir_expression *ir); 513 void resolve_ud_negate(src_reg *reg); 514 515 bool process_move_condition(ir_rvalue *ir); 516 517 void generate_code(); 518 void generate_vs_instruction(vec4_instruction *inst, 519 struct brw_reg dst, 520 struct brw_reg *src); 521 522 void generate_math1_gen4(vec4_instruction *inst, 523 struct brw_reg dst, 524 struct brw_reg src); 525 void generate_math1_gen6(vec4_instruction *inst, 526 struct brw_reg dst, 527 struct brw_reg src); 528 void generate_math2_gen4(vec4_instruction *inst, 529 struct brw_reg dst, 530 struct brw_reg src0, 531 struct brw_reg src1); 532 void generate_math2_gen6(vec4_instruction *inst, 533 struct brw_reg dst, 534 struct brw_reg src0, 535 struct brw_reg src1); 536 537 void generate_urb_write(vec4_instruction *inst); 538 void generate_oword_dual_block_offsets(struct brw_reg m1, 539 struct brw_reg index); 540 void generate_scratch_write(vec4_instruction *inst, 541 struct brw_reg dst, 542 struct brw_reg src, 543 struct brw_reg index); 544 void generate_scratch_read(vec4_instruction *inst, 545 struct brw_reg dst, 546 struct brw_reg index); 547 void generate_pull_constant_load(vec4_instruction *inst, 548 struct brw_reg dst, 549 struct brw_reg index); 550}; 551 552} /* namespace brw */ 553 554#endif /* BRW_VEC4_H */ 555