vc4_qir.h revision 85316d059c899ac096331251de6b233229aa0b4f
1/* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#ifndef VC4_QIR_H 25#define VC4_QIR_H 26 27#include <assert.h> 28#include <stdio.h> 29#include <stdlib.h> 30#include <stdbool.h> 31#include <stdint.h> 32#include <string.h> 33 34#include "util/macros.h" 35#include "util/simple_list.h" 36#include "util/u_math.h" 37#include "tgsi/tgsi_parse.h" 38 39enum qfile { 40 QFILE_NULL, 41 QFILE_TEMP, 42 QFILE_VARY, 43 QFILE_UNIF, 44 QFILE_VPM, 45 46 /** 47 * Stores an immediate value in the index field that can be turned 48 * into a small immediate field by qpu_encode_small_immediate(). 49 */ 50 QFILE_SMALL_IMM, 51}; 52 53struct qreg { 54 enum qfile file; 55 uint32_t index; 56}; 57 58enum qop { 59 QOP_UNDEF, 60 QOP_MOV, 61 QOP_FADD, 62 QOP_FSUB, 63 QOP_FMUL, 64 QOP_MUL24, 65 QOP_FMIN, 66 QOP_FMAX, 67 QOP_FMINABS, 68 QOP_FMAXABS, 69 QOP_ADD, 70 QOP_SUB, 71 QOP_SHL, 72 QOP_SHR, 73 QOP_ASR, 74 QOP_MIN, 75 QOP_MAX, 76 QOP_AND, 77 QOP_OR, 78 QOP_XOR, 79 QOP_NOT, 80 81 /* Note: Orderings of these compares must be the same as in 82 * qpu_defines.h. Selects the src[0] if the ns flag bit is set, 83 * otherwise 0. */ 84 QOP_SEL_X_0_ZS, 85 QOP_SEL_X_0_ZC, 86 QOP_SEL_X_0_NS, 87 QOP_SEL_X_0_NC, 88 /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */ 89 QOP_SEL_X_Y_ZS, 90 QOP_SEL_X_Y_ZC, 91 QOP_SEL_X_Y_NS, 92 QOP_SEL_X_Y_NC, 93 94 QOP_FTOI, 95 QOP_ITOF, 96 QOP_RCP, 97 QOP_RSQ, 98 QOP_EXP2, 99 QOP_LOG2, 100 QOP_VW_SETUP, 101 QOP_VR_SETUP, 102 QOP_PACK_SCALED, 103 QOP_PACK_8888_F, 104 QOP_PACK_8A_F, 105 QOP_PACK_8B_F, 106 QOP_PACK_8C_F, 107 QOP_PACK_8D_F, 108 QOP_TLB_DISCARD_SETUP, 109 QOP_TLB_STENCIL_SETUP, 110 QOP_TLB_Z_WRITE, 111 QOP_TLB_COLOR_WRITE, 112 QOP_TLB_COLOR_READ, 113 QOP_VARY_ADD_C, 114 115 QOP_FRAG_X, 116 QOP_FRAG_Y, 117 QOP_FRAG_Z, 118 QOP_FRAG_W, 119 QOP_FRAG_REV_FLAG, 120 121 QOP_UNPACK_8A_F, 122 QOP_UNPACK_8B_F, 123 QOP_UNPACK_8C_F, 124 QOP_UNPACK_8D_F, 125 QOP_UNPACK_16A_F, 126 QOP_UNPACK_16B_F, 127 128 QOP_UNPACK_8A_I, 129 QOP_UNPACK_8B_I, 130 QOP_UNPACK_8C_I, 131 QOP_UNPACK_8D_I, 132 QOP_UNPACK_16A_I, 133 QOP_UNPACK_16B_I, 134 135 /** Texture x coordinate parameter write */ 136 QOP_TEX_S, 137 /** Texture y coordinate parameter write */ 138 QOP_TEX_T, 139 /** Texture border color parameter or cube map z coordinate write */ 140 QOP_TEX_R, 141 /** Texture LOD bias parameter write */ 142 QOP_TEX_B, 143 144 /** 145 * Texture-unit 4-byte read with address provided direct in S 146 * cooordinate. 147 * 148 * The first operand is the offset from the start of the UBO, and the 149 * second is the uniform that has the UBO's base pointer. 150 */ 151 QOP_TEX_DIRECT, 152 153 /** 154 * Signal of texture read being necessary and then reading r4 into 155 * the destination 156 */ 157 QOP_TEX_RESULT, 158 QOP_R4_UNPACK_A, 159 QOP_R4_UNPACK_B, 160 QOP_R4_UNPACK_C, 161 QOP_R4_UNPACK_D 162}; 163 164struct queued_qpu_inst { 165 struct simple_node link; 166 uint64_t inst; 167}; 168 169struct qinst { 170 struct simple_node link; 171 172 enum qop op; 173 struct qreg dst; 174 struct qreg *src; 175 bool sf; 176}; 177 178enum qstage { 179 /** 180 * Coordinate shader, runs during binning, before the VS, and just 181 * outputs position. 182 */ 183 QSTAGE_COORD, 184 QSTAGE_VERT, 185 QSTAGE_FRAG, 186}; 187 188enum quniform_contents { 189 /** 190 * Indicates that a constant 32-bit value is copied from the program's 191 * uniform contents. 192 */ 193 QUNIFORM_CONSTANT, 194 /** 195 * Indicates that the program's uniform contents are used as an index 196 * into the GL uniform storage. 197 */ 198 QUNIFORM_UNIFORM, 199 200 /** @{ 201 * Scaling factors from clip coordinates to relative to the viewport 202 * center. 203 * 204 * This is used by the coordinate and vertex shaders to produce the 205 * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed 206 * point offsets from the viewport ccenter. 207 */ 208 QUNIFORM_VIEWPORT_X_SCALE, 209 QUNIFORM_VIEWPORT_Y_SCALE, 210 /** @} */ 211 212 QUNIFORM_VIEWPORT_Z_OFFSET, 213 QUNIFORM_VIEWPORT_Z_SCALE, 214 215 QUNIFORM_USER_CLIP_PLANE, 216 217 /** 218 * A reference to a texture config parameter 0 uniform. 219 * 220 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 221 * defines texture type, miplevels, and such. It will be found as a 222 * parameter to the first QOP_TEX_[STRB] instruction in a sequence. 223 */ 224 QUNIFORM_TEXTURE_CONFIG_P0, 225 226 /** 227 * A reference to a texture config parameter 1 uniform. 228 * 229 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 230 * defines texture width, height, filters, and wrap modes. It will be 231 * found as a parameter to the second QOP_TEX_[STRB] instruction in a 232 * sequence. 233 */ 234 QUNIFORM_TEXTURE_CONFIG_P1, 235 236 /** A reference to a texture config parameter 2 cubemap stride uniform */ 237 QUNIFORM_TEXTURE_CONFIG_P2, 238 239 QUNIFORM_UBO_ADDR, 240 241 QUNIFORM_TEXRECT_SCALE_X, 242 QUNIFORM_TEXRECT_SCALE_Y, 243 244 QUNIFORM_TEXTURE_BORDER_COLOR, 245 246 QUNIFORM_BLEND_CONST_COLOR, 247 QUNIFORM_STENCIL, 248 249 QUNIFORM_ALPHA_REF, 250}; 251 252struct vc4_varying_semantic { 253 uint8_t semantic; 254 uint8_t index; 255 uint8_t swizzle; 256}; 257 258struct vc4_compiler_ubo_range { 259 /** 260 * offset in bytes from the start of the ubo where this range is 261 * uploaded. 262 * 263 * Only set once used is set. 264 */ 265 uint32_t dst_offset; 266 267 /** 268 * offset in bytes from the start of the gallium uniforms where the 269 * data comes from. 270 */ 271 uint32_t src_offset; 272 273 /** size in bytes of this ubo range */ 274 uint32_t size; 275 276 /** 277 * Set if this range is used by the shader for indirect uniforms 278 * access. 279 */ 280 bool used; 281}; 282 283struct vc4_compile { 284 struct vc4_context *vc4; 285 struct tgsi_parse_context parser; 286 struct qreg *temps; 287 /* For each temp, the instruction generating its value. */ 288 struct qinst **defs; 289 uint32_t defs_array_size; 290 /** 291 * Inputs to the shader, arranged by TGSI declaration order. 292 * 293 * Not all fragment shader QFILE_VARY reads are present in this array. 294 */ 295 struct qreg *inputs; 296 struct qreg *outputs; 297 struct qreg *consts; 298 struct qreg addr[4]; /* TGSI ARL destination. */ 299 uint32_t temps_array_size; 300 uint32_t inputs_array_size; 301 uint32_t outputs_array_size; 302 uint32_t uniforms_array_size; 303 uint32_t consts_array_size; 304 uint32_t num_consts; 305 306 struct vc4_compiler_ubo_range *ubo_ranges; 307 uint32_t ubo_ranges_array_size; 308 uint32_t num_ubo_ranges; 309 uint32_t next_ubo_dst_offset; 310 311 struct qreg line_x, point_x, point_y; 312 struct qreg discard; 313 314 uint8_t vattr_sizes[8]; 315 316 /** 317 * Array of the TGSI semantics of all FS QFILE_VARY reads. 318 * 319 * This includes those that aren't part of the VPM varyings, like 320 * point/line coordinates. 321 */ 322 struct vc4_varying_semantic *input_semantics; 323 uint32_t num_input_semantics; 324 uint32_t input_semantics_array_size; 325 326 /** 327 * An entry per outputs[] in the VS indicating what the semantic of 328 * the output is. Used to emit from the VS in the order that the FS 329 * needs. 330 */ 331 struct vc4_varying_semantic *output_semantics; 332 333 struct pipe_shader_state *shader_state; 334 struct vc4_key *key; 335 struct vc4_fs_key *fs_key; 336 struct vc4_vs_key *vs_key; 337 338 uint32_t *uniform_data; 339 enum quniform_contents *uniform_contents; 340 uint32_t uniform_array_size; 341 uint32_t num_uniforms; 342 uint32_t num_outputs; 343 uint32_t num_texture_samples; 344 uint32_t output_position_index; 345 uint32_t output_clipvertex_index; 346 uint32_t output_color_index; 347 uint32_t output_point_size_index; 348 349 struct qreg undef; 350 enum qstage stage; 351 uint32_t num_temps; 352 struct simple_node instructions; 353 uint32_t immediates[1024]; 354 355 struct simple_node qpu_inst_list; 356 uint64_t *qpu_insts; 357 uint32_t qpu_inst_count; 358 uint32_t qpu_inst_size; 359 uint32_t num_inputs; 360 361 uint32_t program_id; 362 uint32_t variant_id; 363}; 364 365struct vc4_compile *qir_compile_init(void); 366void qir_compile_destroy(struct vc4_compile *c); 367struct qinst *qir_inst(enum qop op, struct qreg dst, 368 struct qreg src0, struct qreg src1); 369struct qinst *qir_inst4(enum qop op, struct qreg dst, 370 struct qreg a, 371 struct qreg b, 372 struct qreg c, 373 struct qreg d); 374void qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst); 375struct qreg qir_uniform(struct vc4_compile *c, 376 enum quniform_contents contents, 377 uint32_t data); 378void qir_reorder_uniforms(struct vc4_compile *c); 379void qir_emit(struct vc4_compile *c, struct qinst *inst); 380struct qreg qir_get_temp(struct vc4_compile *c); 381int qir_get_op_nsrc(enum qop qop); 382bool qir_reg_equals(struct qreg a, struct qreg b); 383bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst); 384bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst); 385bool qir_is_multi_instruction(struct qinst *inst); 386bool qir_is_tex(struct qinst *inst); 387bool qir_depends_on_flags(struct qinst *inst); 388bool qir_writes_r4(struct qinst *inst); 389bool qir_reads_r4(struct qinst *inst); 390bool qir_src_needs_a_file(struct qinst *inst); 391struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg); 392 393void qir_dump(struct vc4_compile *c); 394void qir_dump_inst(struct vc4_compile *c, struct qinst *inst); 395const char *qir_get_stage_name(enum qstage stage); 396 397void qir_optimize(struct vc4_compile *c); 398bool qir_opt_algebraic(struct vc4_compile *c); 399bool qir_opt_copy_propagation(struct vc4_compile *c); 400bool qir_opt_cse(struct vc4_compile *c); 401bool qir_opt_dead_code(struct vc4_compile *c); 402bool qir_opt_small_immediates(struct vc4_compile *c); 403bool qir_opt_vpm_writes(struct vc4_compile *c); 404void qir_lower_uniforms(struct vc4_compile *c); 405 406void qpu_schedule_instructions(struct vc4_compile *c); 407 408void qir_SF(struct vc4_compile *c, struct qreg src); 409 410static inline struct qreg 411qir_uniform_ui(struct vc4_compile *c, uint32_t ui) 412{ 413 return qir_uniform(c, QUNIFORM_CONSTANT, ui); 414} 415 416static inline struct qreg 417qir_uniform_f(struct vc4_compile *c, float f) 418{ 419 return qir_uniform(c, QUNIFORM_CONSTANT, fui(f)); 420} 421 422#define QIR_ALU0(name) \ 423static inline struct qreg \ 424qir_##name(struct vc4_compile *c) \ 425{ \ 426 struct qreg t = qir_get_temp(c); \ 427 qir_emit(c, qir_inst(QOP_##name, t, c->undef, c->undef)); \ 428 return t; \ 429} 430 431#define QIR_ALU1(name) \ 432static inline struct qreg \ 433qir_##name(struct vc4_compile *c, struct qreg a) \ 434{ \ 435 struct qreg t = qir_get_temp(c); \ 436 qir_emit(c, qir_inst(QOP_##name, t, a, c->undef)); \ 437 return t; \ 438} 439 440#define QIR_ALU2(name) \ 441static inline struct qreg \ 442qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ 443{ \ 444 struct qreg t = qir_get_temp(c); \ 445 qir_emit(c, qir_inst(QOP_##name, t, a, b)); \ 446 return t; \ 447} 448 449#define QIR_NODST_1(name) \ 450static inline void \ 451qir_##name(struct vc4_compile *c, struct qreg a) \ 452{ \ 453 qir_emit(c, qir_inst(QOP_##name, c->undef, a, c->undef)); \ 454} 455 456#define QIR_NODST_2(name) \ 457static inline void \ 458qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ 459{ \ 460 qir_emit(c, qir_inst(QOP_##name, c->undef, a, b)); \ 461} 462 463QIR_ALU1(MOV) 464QIR_ALU2(FADD) 465QIR_ALU2(FSUB) 466QIR_ALU2(FMUL) 467QIR_ALU2(MUL24) 468QIR_ALU1(SEL_X_0_ZS) 469QIR_ALU1(SEL_X_0_ZC) 470QIR_ALU1(SEL_X_0_NS) 471QIR_ALU1(SEL_X_0_NC) 472QIR_ALU2(SEL_X_Y_ZS) 473QIR_ALU2(SEL_X_Y_ZC) 474QIR_ALU2(SEL_X_Y_NS) 475QIR_ALU2(SEL_X_Y_NC) 476QIR_ALU2(FMIN) 477QIR_ALU2(FMAX) 478QIR_ALU2(FMINABS) 479QIR_ALU2(FMAXABS) 480QIR_ALU1(FTOI) 481QIR_ALU1(ITOF) 482 483QIR_ALU2(ADD) 484QIR_ALU2(SUB) 485QIR_ALU2(SHL) 486QIR_ALU2(SHR) 487QIR_ALU2(ASR) 488QIR_ALU2(MIN) 489QIR_ALU2(MAX) 490QIR_ALU2(AND) 491QIR_ALU2(OR) 492QIR_ALU2(XOR) 493QIR_ALU1(NOT) 494 495QIR_ALU1(RCP) 496QIR_ALU1(RSQ) 497QIR_ALU1(EXP2) 498QIR_ALU1(LOG2) 499QIR_ALU2(PACK_SCALED) 500QIR_ALU1(PACK_8888_F) 501QIR_ALU2(PACK_8A_F) 502QIR_ALU2(PACK_8B_F) 503QIR_ALU2(PACK_8C_F) 504QIR_ALU2(PACK_8D_F) 505QIR_ALU1(VARY_ADD_C) 506QIR_NODST_2(TEX_S) 507QIR_NODST_2(TEX_T) 508QIR_NODST_2(TEX_R) 509QIR_NODST_2(TEX_B) 510QIR_NODST_2(TEX_DIRECT) 511QIR_ALU0(FRAG_X) 512QIR_ALU0(FRAG_Y) 513QIR_ALU0(FRAG_Z) 514QIR_ALU0(FRAG_W) 515QIR_ALU0(FRAG_REV_FLAG) 516QIR_ALU0(TEX_RESULT) 517QIR_ALU0(TLB_COLOR_READ) 518QIR_NODST_1(TLB_Z_WRITE) 519QIR_NODST_1(TLB_DISCARD_SETUP) 520QIR_NODST_1(TLB_STENCIL_SETUP) 521 522static inline struct qreg 523qir_R4_UNPACK(struct vc4_compile *c, struct qreg r4, int i) 524{ 525 struct qreg t = qir_get_temp(c); 526 qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, r4, c->undef)); 527 return t; 528} 529 530static inline struct qreg 531qir_SEL_X_0_COND(struct vc4_compile *c, int i) 532{ 533 struct qreg t = qir_get_temp(c); 534 qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, c->undef, c->undef)); 535 return t; 536} 537 538static inline struct qreg 539qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i) 540{ 541 struct qreg t = qir_get_temp(c); 542 qir_emit(c, qir_inst(QOP_UNPACK_8A_F + i, t, src, c->undef)); 543 return t; 544} 545 546static inline struct qreg 547qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i) 548{ 549 struct qreg t = qir_get_temp(c); 550 qir_emit(c, qir_inst(QOP_UNPACK_8A_I + i, t, src, c->undef)); 551 return t; 552} 553 554static inline struct qreg 555qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i) 556{ 557 struct qreg t = qir_get_temp(c); 558 qir_emit(c, qir_inst(QOP_UNPACK_16A_F + i, t, src, c->undef)); 559 return t; 560} 561 562static inline struct qreg 563qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i) 564{ 565 struct qreg t = qir_get_temp(c); 566 qir_emit(c, qir_inst(QOP_UNPACK_16A_I + i, t, src, c->undef)); 567 return t; 568} 569 570static inline struct qreg 571qir_PACK_8_F(struct vc4_compile *c, struct qreg rest, struct qreg val, int chan) 572{ 573 struct qreg t = qir_get_temp(c); 574 qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, t, rest, val)); 575 return t; 576} 577 578static inline struct qreg 579qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y) 580{ 581 return qir_EXP2(c, qir_FMUL(c, 582 y, 583 qir_LOG2(c, x))); 584} 585 586static inline void 587qir_VPM_WRITE(struct vc4_compile *c, struct qreg val) 588{ 589 static const struct qreg vpm = { QFILE_VPM, 0 }; 590 qir_emit(c, qir_inst(QOP_MOV, vpm, val, c->undef)); 591} 592 593#endif /* VC4_QIR_H */ 594