vc4_qir.h revision 8c5dcdbccb68b73d2856d9c1faafadc536e682e3
1/* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#ifndef VC4_QIR_H 25#define VC4_QIR_H 26 27#include <assert.h> 28#include <stdio.h> 29#include <stdlib.h> 30#include <stdbool.h> 31#include <stdint.h> 32#include <string.h> 33 34#include "util/macros.h" 35#include "util/simple_list.h" 36#include "util/u_math.h" 37#include "tgsi/tgsi_parse.h" 38 39enum qfile { 40 QFILE_NULL, 41 QFILE_TEMP, 42 QFILE_VARY, 43 QFILE_UNIF, 44 QFILE_VPM, 45 46 /** 47 * Stores an immediate value in the index field that can be turned 48 * into a small immediate field by qpu_encode_small_immediate(). 49 */ 50 QFILE_SMALL_IMM, 51}; 52 53struct qreg { 54 enum qfile file; 55 uint32_t index; 56}; 57 58enum qop { 59 QOP_UNDEF, 60 QOP_MOV, 61 QOP_FADD, 62 QOP_FSUB, 63 QOP_FMUL, 64 QOP_MUL24, 65 QOP_FMIN, 66 QOP_FMAX, 67 QOP_FMINABS, 68 QOP_FMAXABS, 69 QOP_ADD, 70 QOP_SUB, 71 QOP_SHL, 72 QOP_SHR, 73 QOP_ASR, 74 QOP_MIN, 75 QOP_MAX, 76 QOP_AND, 77 QOP_OR, 78 QOP_XOR, 79 QOP_NOT, 80 81 /* Note: Orderings of these compares must be the same as in 82 * qpu_defines.h. Selects the src[0] if the ns flag bit is set, 83 * otherwise 0. */ 84 QOP_SEL_X_0_ZS, 85 QOP_SEL_X_0_ZC, 86 QOP_SEL_X_0_NS, 87 QOP_SEL_X_0_NC, 88 /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */ 89 QOP_SEL_X_Y_ZS, 90 QOP_SEL_X_Y_ZC, 91 QOP_SEL_X_Y_NS, 92 QOP_SEL_X_Y_NC, 93 94 QOP_FTOI, 95 QOP_ITOF, 96 QOP_RCP, 97 QOP_RSQ, 98 QOP_EXP2, 99 QOP_LOG2, 100 QOP_VW_SETUP, 101 QOP_VR_SETUP, 102 QOP_PACK_SCALED, 103 QOP_PACK_8888_F, 104 QOP_PACK_8A_F, 105 QOP_PACK_8B_F, 106 QOP_PACK_8C_F, 107 QOP_PACK_8D_F, 108 QOP_TLB_DISCARD_SETUP, 109 QOP_TLB_STENCIL_SETUP, 110 QOP_TLB_Z_WRITE, 111 QOP_TLB_COLOR_WRITE, 112 QOP_TLB_COLOR_READ, 113 QOP_VARY_ADD_C, 114 115 QOP_FRAG_X, 116 QOP_FRAG_Y, 117 QOP_FRAG_Z, 118 QOP_FRAG_W, 119 QOP_FRAG_REV_FLAG, 120 121 QOP_UNPACK_8A_F, 122 QOP_UNPACK_8B_F, 123 QOP_UNPACK_8C_F, 124 QOP_UNPACK_8D_F, 125 QOP_UNPACK_16A_F, 126 QOP_UNPACK_16B_F, 127 128 QOP_UNPACK_8A_I, 129 QOP_UNPACK_8B_I, 130 QOP_UNPACK_8C_I, 131 QOP_UNPACK_8D_I, 132 QOP_UNPACK_16A_I, 133 QOP_UNPACK_16B_I, 134 135 /** Texture x coordinate parameter write */ 136 QOP_TEX_S, 137 /** Texture y coordinate parameter write */ 138 QOP_TEX_T, 139 /** Texture border color parameter or cube map z coordinate write */ 140 QOP_TEX_R, 141 /** Texture LOD bias parameter write */ 142 QOP_TEX_B, 143 144 /** 145 * Texture-unit 4-byte read with address provided direct in S 146 * cooordinate. 147 * 148 * The first operand is the offset from the start of the UBO, and the 149 * second is the uniform that has the UBO's base pointer. 150 */ 151 QOP_TEX_DIRECT, 152 153 /** 154 * Signal of texture read being necessary and then reading r4 into 155 * the destination 156 */ 157 QOP_TEX_RESULT, 158 QOP_R4_UNPACK_A, 159 QOP_R4_UNPACK_B, 160 QOP_R4_UNPACK_C, 161 QOP_R4_UNPACK_D 162}; 163 164struct queued_qpu_inst { 165 struct simple_node link; 166 uint64_t inst; 167}; 168 169struct qinst { 170 struct simple_node link; 171 172 enum qop op; 173 struct qreg dst; 174 struct qreg *src; 175 bool sf; 176}; 177 178enum qstage { 179 /** 180 * Coordinate shader, runs during binning, before the VS, and just 181 * outputs position. 182 */ 183 QSTAGE_COORD, 184 QSTAGE_VERT, 185 QSTAGE_FRAG, 186}; 187 188enum quniform_contents { 189 /** 190 * Indicates that a constant 32-bit value is copied from the program's 191 * uniform contents. 192 */ 193 QUNIFORM_CONSTANT, 194 /** 195 * Indicates that the program's uniform contents are used as an index 196 * into the GL uniform storage. 197 */ 198 QUNIFORM_UNIFORM, 199 200 /** @{ 201 * Scaling factors from clip coordinates to relative to the viewport 202 * center. 203 * 204 * This is used by the coordinate and vertex shaders to produce the 205 * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed 206 * point offsets from the viewport ccenter. 207 */ 208 QUNIFORM_VIEWPORT_X_SCALE, 209 QUNIFORM_VIEWPORT_Y_SCALE, 210 /** @} */ 211 212 QUNIFORM_VIEWPORT_Z_OFFSET, 213 QUNIFORM_VIEWPORT_Z_SCALE, 214 215 QUNIFORM_USER_CLIP_PLANE, 216 217 /** 218 * A reference to a texture config parameter 0 uniform. 219 * 220 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 221 * defines texture type, miplevels, and such. It will be found as a 222 * parameter to the first QOP_TEX_[STRB] instruction in a sequence. 223 */ 224 QUNIFORM_TEXTURE_CONFIG_P0, 225 226 /** 227 * A reference to a texture config parameter 1 uniform. 228 * 229 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 230 * defines texture width, height, filters, and wrap modes. It will be 231 * found as a parameter to the second QOP_TEX_[STRB] instruction in a 232 * sequence. 233 */ 234 QUNIFORM_TEXTURE_CONFIG_P1, 235 236 /** A reference to a texture config parameter 2 cubemap stride uniform */ 237 QUNIFORM_TEXTURE_CONFIG_P2, 238 239 QUNIFORM_UBO_ADDR, 240 241 QUNIFORM_TEXRECT_SCALE_X, 242 QUNIFORM_TEXRECT_SCALE_Y, 243 244 QUNIFORM_TEXTURE_BORDER_COLOR, 245 246 QUNIFORM_BLEND_CONST_COLOR, 247 QUNIFORM_STENCIL, 248 249 QUNIFORM_ALPHA_REF, 250}; 251 252struct vc4_varying_semantic { 253 uint8_t semantic; 254 uint8_t index; 255 uint8_t swizzle; 256}; 257 258struct vc4_compiler_ubo_range { 259 /** 260 * offset in bytes from the start of the ubo where this range is 261 * uploaded. 262 * 263 * Only set once used is set. 264 */ 265 uint32_t dst_offset; 266 267 /** 268 * offset in bytes from the start of the gallium uniforms where the 269 * data comes from. 270 */ 271 uint32_t src_offset; 272 273 /** size in bytes of this ubo range */ 274 uint32_t size; 275 276 /** 277 * Set if this range is used by the shader for indirect uniforms 278 * access. 279 */ 280 bool used; 281}; 282 283struct vc4_compile { 284 struct vc4_context *vc4; 285 struct tgsi_parse_context parser; 286 struct qreg *temps; 287 /* For each temp, the instruction generating its value. */ 288 struct qinst **defs; 289 uint32_t defs_array_size; 290 /** 291 * Inputs to the shader, arranged by TGSI declaration order. 292 * 293 * Not all fragment shader QFILE_VARY reads are present in this array. 294 */ 295 struct qreg *inputs; 296 struct qreg *outputs; 297 struct qreg *consts; 298 struct qreg addr[4]; /* TGSI ARL destination. */ 299 uint32_t temps_array_size; 300 uint32_t inputs_array_size; 301 uint32_t outputs_array_size; 302 uint32_t uniforms_array_size; 303 uint32_t consts_array_size; 304 uint32_t num_consts; 305 306 struct vc4_compiler_ubo_range *ubo_ranges; 307 uint32_t ubo_ranges_array_size; 308 uint32_t num_ubo_ranges; 309 uint32_t next_ubo_dst_offset; 310 311 struct qreg line_x, point_x, point_y; 312 struct qreg discard; 313 314 uint8_t vattr_sizes[8]; 315 316 /** 317 * Array of the TGSI semantics of all FS QFILE_VARY reads. 318 * 319 * This includes those that aren't part of the VPM varyings, like 320 * point/line coordinates. 321 */ 322 struct vc4_varying_semantic *input_semantics; 323 uint32_t num_input_semantics; 324 uint32_t input_semantics_array_size; 325 326 /** 327 * An entry per outputs[] in the VS indicating what the semantic of 328 * the output is. Used to emit from the VS in the order that the FS 329 * needs. 330 */ 331 struct vc4_varying_semantic *output_semantics; 332 333 struct pipe_shader_state *shader_state; 334 struct vc4_key *key; 335 struct vc4_fs_key *fs_key; 336 struct vc4_vs_key *vs_key; 337 338 uint32_t *uniform_data; 339 enum quniform_contents *uniform_contents; 340 uint32_t uniform_array_size; 341 uint32_t num_uniforms; 342 uint32_t num_outputs; 343 uint32_t num_texture_samples; 344 uint32_t output_position_index; 345 uint32_t output_clipvertex_index; 346 uint32_t output_color_index; 347 uint32_t output_point_size_index; 348 349 struct qreg undef; 350 enum qstage stage; 351 uint32_t num_temps; 352 struct simple_node instructions; 353 uint32_t immediates[1024]; 354 355 struct simple_node qpu_inst_list; 356 uint64_t *qpu_insts; 357 uint32_t qpu_inst_count; 358 uint32_t qpu_inst_size; 359 uint32_t num_inputs; 360 361 uint32_t program_id; 362 uint32_t variant_id; 363}; 364 365struct vc4_compile *qir_compile_init(void); 366void qir_compile_destroy(struct vc4_compile *c); 367struct qinst *qir_inst(enum qop op, struct qreg dst, 368 struct qreg src0, struct qreg src1); 369struct qinst *qir_inst4(enum qop op, struct qreg dst, 370 struct qreg a, 371 struct qreg b, 372 struct qreg c, 373 struct qreg d); 374void qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst); 375struct qreg qir_uniform(struct vc4_compile *c, 376 enum quniform_contents contents, 377 uint32_t data); 378void qir_reorder_uniforms(struct vc4_compile *c); 379void qir_emit(struct vc4_compile *c, struct qinst *inst); 380struct qreg qir_get_temp(struct vc4_compile *c); 381int qir_get_op_nsrc(enum qop qop); 382bool qir_reg_equals(struct qreg a, struct qreg b); 383bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst); 384bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst); 385bool qir_is_multi_instruction(struct qinst *inst); 386bool qir_is_tex(struct qinst *inst); 387bool qir_depends_on_flags(struct qinst *inst); 388bool qir_writes_r4(struct qinst *inst); 389bool qir_reads_r4(struct qinst *inst); 390bool qir_src_needs_a_file(struct qinst *inst); 391struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg); 392 393void qir_dump(struct vc4_compile *c); 394void qir_dump_inst(struct vc4_compile *c, struct qinst *inst); 395const char *qir_get_stage_name(enum qstage stage); 396 397void qir_optimize(struct vc4_compile *c); 398bool qir_opt_algebraic(struct vc4_compile *c); 399bool qir_opt_constant_folding(struct vc4_compile *c); 400bool qir_opt_copy_propagation(struct vc4_compile *c); 401bool qir_opt_cse(struct vc4_compile *c); 402bool qir_opt_dead_code(struct vc4_compile *c); 403bool qir_opt_small_immediates(struct vc4_compile *c); 404bool qir_opt_vpm_writes(struct vc4_compile *c); 405void qir_lower_uniforms(struct vc4_compile *c); 406 407void qpu_schedule_instructions(struct vc4_compile *c); 408 409void qir_SF(struct vc4_compile *c, struct qreg src); 410 411static inline struct qreg 412qir_uniform_ui(struct vc4_compile *c, uint32_t ui) 413{ 414 return qir_uniform(c, QUNIFORM_CONSTANT, ui); 415} 416 417static inline struct qreg 418qir_uniform_f(struct vc4_compile *c, float f) 419{ 420 return qir_uniform(c, QUNIFORM_CONSTANT, fui(f)); 421} 422 423#define QIR_ALU0(name) \ 424static inline struct qreg \ 425qir_##name(struct vc4_compile *c) \ 426{ \ 427 struct qreg t = qir_get_temp(c); \ 428 qir_emit(c, qir_inst(QOP_##name, t, c->undef, c->undef)); \ 429 return t; \ 430} 431 432#define QIR_ALU1(name) \ 433static inline struct qreg \ 434qir_##name(struct vc4_compile *c, struct qreg a) \ 435{ \ 436 struct qreg t = qir_get_temp(c); \ 437 qir_emit(c, qir_inst(QOP_##name, t, a, c->undef)); \ 438 return t; \ 439} 440 441#define QIR_ALU2(name) \ 442static inline struct qreg \ 443qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ 444{ \ 445 struct qreg t = qir_get_temp(c); \ 446 qir_emit(c, qir_inst(QOP_##name, t, a, b)); \ 447 return t; \ 448} 449 450#define QIR_NODST_1(name) \ 451static inline void \ 452qir_##name(struct vc4_compile *c, struct qreg a) \ 453{ \ 454 qir_emit(c, qir_inst(QOP_##name, c->undef, a, c->undef)); \ 455} 456 457#define QIR_NODST_2(name) \ 458static inline void \ 459qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ 460{ \ 461 qir_emit(c, qir_inst(QOP_##name, c->undef, a, b)); \ 462} 463 464QIR_ALU1(MOV) 465QIR_ALU2(FADD) 466QIR_ALU2(FSUB) 467QIR_ALU2(FMUL) 468QIR_ALU2(MUL24) 469QIR_ALU1(SEL_X_0_ZS) 470QIR_ALU1(SEL_X_0_ZC) 471QIR_ALU1(SEL_X_0_NS) 472QIR_ALU1(SEL_X_0_NC) 473QIR_ALU2(SEL_X_Y_ZS) 474QIR_ALU2(SEL_X_Y_ZC) 475QIR_ALU2(SEL_X_Y_NS) 476QIR_ALU2(SEL_X_Y_NC) 477QIR_ALU2(FMIN) 478QIR_ALU2(FMAX) 479QIR_ALU2(FMINABS) 480QIR_ALU2(FMAXABS) 481QIR_ALU1(FTOI) 482QIR_ALU1(ITOF) 483 484QIR_ALU2(ADD) 485QIR_ALU2(SUB) 486QIR_ALU2(SHL) 487QIR_ALU2(SHR) 488QIR_ALU2(ASR) 489QIR_ALU2(MIN) 490QIR_ALU2(MAX) 491QIR_ALU2(AND) 492QIR_ALU2(OR) 493QIR_ALU2(XOR) 494QIR_ALU1(NOT) 495 496QIR_ALU1(RCP) 497QIR_ALU1(RSQ) 498QIR_ALU1(EXP2) 499QIR_ALU1(LOG2) 500QIR_ALU2(PACK_SCALED) 501QIR_ALU1(PACK_8888_F) 502QIR_ALU2(PACK_8A_F) 503QIR_ALU2(PACK_8B_F) 504QIR_ALU2(PACK_8C_F) 505QIR_ALU2(PACK_8D_F) 506QIR_ALU1(VARY_ADD_C) 507QIR_NODST_2(TEX_S) 508QIR_NODST_2(TEX_T) 509QIR_NODST_2(TEX_R) 510QIR_NODST_2(TEX_B) 511QIR_NODST_2(TEX_DIRECT) 512QIR_ALU0(FRAG_X) 513QIR_ALU0(FRAG_Y) 514QIR_ALU0(FRAG_Z) 515QIR_ALU0(FRAG_W) 516QIR_ALU0(FRAG_REV_FLAG) 517QIR_ALU0(TEX_RESULT) 518QIR_ALU0(TLB_COLOR_READ) 519QIR_NODST_1(TLB_Z_WRITE) 520QIR_NODST_1(TLB_DISCARD_SETUP) 521QIR_NODST_1(TLB_STENCIL_SETUP) 522 523static inline struct qreg 524qir_R4_UNPACK(struct vc4_compile *c, struct qreg r4, int i) 525{ 526 struct qreg t = qir_get_temp(c); 527 qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, r4, c->undef)); 528 return t; 529} 530 531static inline struct qreg 532qir_SEL_X_0_COND(struct vc4_compile *c, int i) 533{ 534 struct qreg t = qir_get_temp(c); 535 qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, c->undef, c->undef)); 536 return t; 537} 538 539static inline struct qreg 540qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i) 541{ 542 struct qreg t = qir_get_temp(c); 543 qir_emit(c, qir_inst(QOP_UNPACK_8A_F + i, t, src, c->undef)); 544 return t; 545} 546 547static inline struct qreg 548qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i) 549{ 550 struct qreg t = qir_get_temp(c); 551 qir_emit(c, qir_inst(QOP_UNPACK_8A_I + i, t, src, c->undef)); 552 return t; 553} 554 555static inline struct qreg 556qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i) 557{ 558 struct qreg t = qir_get_temp(c); 559 qir_emit(c, qir_inst(QOP_UNPACK_16A_F + i, t, src, c->undef)); 560 return t; 561} 562 563static inline struct qreg 564qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i) 565{ 566 struct qreg t = qir_get_temp(c); 567 qir_emit(c, qir_inst(QOP_UNPACK_16A_I + i, t, src, c->undef)); 568 return t; 569} 570 571static inline struct qreg 572qir_PACK_8_F(struct vc4_compile *c, struct qreg rest, struct qreg val, int chan) 573{ 574 struct qreg t = qir_get_temp(c); 575 qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, t, rest, val)); 576 return t; 577} 578 579static inline struct qreg 580qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y) 581{ 582 return qir_EXP2(c, qir_FMUL(c, 583 y, 584 qir_LOG2(c, x))); 585} 586 587static inline void 588qir_VPM_WRITE(struct vc4_compile *c, struct qreg val) 589{ 590 static const struct qreg vpm = { QFILE_VPM, 0 }; 591 qir_emit(c, qir_inst(QOP_MOV, vpm, val, c->undef)); 592} 593 594#endif /* VC4_QIR_H */ 595