vc4_qir.h revision 0f69d59b1c8f5314c1abe18659b96adcfc51a0e5
1/* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#ifndef VC4_QIR_H 25#define VC4_QIR_H 26 27#include <assert.h> 28#include <stdio.h> 29#include <stdlib.h> 30#include <stdbool.h> 31#include <stdint.h> 32#include <string.h> 33 34#include "util/macros.h" 35#include "glsl/nir/nir.h" 36#include "util/list.h" 37#include "util/u_math.h" 38 39enum qfile { 40 QFILE_NULL, 41 QFILE_TEMP, 42 QFILE_VARY, 43 QFILE_UNIF, 44 QFILE_VPM, 45 46 /** 47 * Stores an immediate value in the index field that can be turned 48 * into a small immediate field by qpu_encode_small_immediate(). 49 */ 50 QFILE_SMALL_IMM, 51}; 52 53struct qreg { 54 enum qfile file; 55 uint32_t index; 56}; 57 58enum qop { 59 QOP_UNDEF, 60 QOP_MOV, 61 QOP_FADD, 62 QOP_FSUB, 63 QOP_FMUL, 64 QOP_MUL24, 65 QOP_FMIN, 66 QOP_FMAX, 67 QOP_FMINABS, 68 QOP_FMAXABS, 69 QOP_ADD, 70 QOP_SUB, 71 QOP_SHL, 72 QOP_SHR, 73 QOP_ASR, 74 QOP_MIN, 75 QOP_MAX, 76 QOP_AND, 77 QOP_OR, 78 QOP_XOR, 79 QOP_NOT, 80 81 /* Note: Orderings of these compares must be the same as in 82 * qpu_defines.h. Selects the src[0] if the ns flag bit is set, 83 * otherwise 0. */ 84 QOP_SEL_X_0_ZS, 85 QOP_SEL_X_0_ZC, 86 QOP_SEL_X_0_NS, 87 QOP_SEL_X_0_NC, 88 /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */ 89 QOP_SEL_X_Y_ZS, 90 QOP_SEL_X_Y_ZC, 91 QOP_SEL_X_Y_NS, 92 QOP_SEL_X_Y_NC, 93 94 QOP_FTOI, 95 QOP_ITOF, 96 QOP_RCP, 97 QOP_RSQ, 98 QOP_EXP2, 99 QOP_LOG2, 100 QOP_VW_SETUP, 101 QOP_VR_SETUP, 102 QOP_PACK_SCALED, 103 QOP_PACK_8888_F, 104 QOP_PACK_8A_F, 105 QOP_PACK_8B_F, 106 QOP_PACK_8C_F, 107 QOP_PACK_8D_F, 108 QOP_TLB_DISCARD_SETUP, 109 QOP_TLB_STENCIL_SETUP, 110 QOP_TLB_Z_WRITE, 111 QOP_TLB_COLOR_WRITE, 112 QOP_TLB_COLOR_READ, 113 QOP_VARY_ADD_C, 114 115 QOP_FRAG_X, 116 QOP_FRAG_Y, 117 QOP_FRAG_Z, 118 QOP_FRAG_W, 119 QOP_FRAG_REV_FLAG, 120 121 QOP_UNPACK_8A_F, 122 QOP_UNPACK_8B_F, 123 QOP_UNPACK_8C_F, 124 QOP_UNPACK_8D_F, 125 QOP_UNPACK_16A_F, 126 QOP_UNPACK_16B_F, 127 128 QOP_UNPACK_8A_I, 129 QOP_UNPACK_8B_I, 130 QOP_UNPACK_8C_I, 131 QOP_UNPACK_8D_I, 132 QOP_UNPACK_16A_I, 133 QOP_UNPACK_16B_I, 134 135 /** Texture x coordinate parameter write */ 136 QOP_TEX_S, 137 /** Texture y coordinate parameter write */ 138 QOP_TEX_T, 139 /** Texture border color parameter or cube map z coordinate write */ 140 QOP_TEX_R, 141 /** Texture LOD bias parameter write */ 142 QOP_TEX_B, 143 144 /** 145 * Texture-unit 4-byte read with address provided direct in S 146 * cooordinate. 147 * 148 * The first operand is the offset from the start of the UBO, and the 149 * second is the uniform that has the UBO's base pointer. 150 */ 151 QOP_TEX_DIRECT, 152 153 /** 154 * Signal of texture read being necessary and then reading r4 into 155 * the destination 156 */ 157 QOP_TEX_RESULT, 158 QOP_R4_UNPACK_A, 159 QOP_R4_UNPACK_B, 160 QOP_R4_UNPACK_C, 161 QOP_R4_UNPACK_D 162}; 163 164struct queued_qpu_inst { 165 struct list_head link; 166 uint64_t inst; 167}; 168 169struct qinst { 170 struct list_head link; 171 172 enum qop op; 173 struct qreg dst; 174 struct qreg *src; 175 bool sf; 176}; 177 178enum qstage { 179 /** 180 * Coordinate shader, runs during binning, before the VS, and just 181 * outputs position. 182 */ 183 QSTAGE_COORD, 184 QSTAGE_VERT, 185 QSTAGE_FRAG, 186}; 187 188enum quniform_contents { 189 /** 190 * Indicates that a constant 32-bit value is copied from the program's 191 * uniform contents. 192 */ 193 QUNIFORM_CONSTANT, 194 /** 195 * Indicates that the program's uniform contents are used as an index 196 * into the GL uniform storage. 197 */ 198 QUNIFORM_UNIFORM, 199 200 /** @{ 201 * Scaling factors from clip coordinates to relative to the viewport 202 * center. 203 * 204 * This is used by the coordinate and vertex shaders to produce the 205 * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed 206 * point offsets from the viewport ccenter. 207 */ 208 QUNIFORM_VIEWPORT_X_SCALE, 209 QUNIFORM_VIEWPORT_Y_SCALE, 210 /** @} */ 211 212 QUNIFORM_VIEWPORT_Z_OFFSET, 213 QUNIFORM_VIEWPORT_Z_SCALE, 214 215 QUNIFORM_USER_CLIP_PLANE, 216 217 /** 218 * A reference to a texture config parameter 0 uniform. 219 * 220 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 221 * defines texture type, miplevels, and such. It will be found as a 222 * parameter to the first QOP_TEX_[STRB] instruction in a sequence. 223 */ 224 QUNIFORM_TEXTURE_CONFIG_P0, 225 226 /** 227 * A reference to a texture config parameter 1 uniform. 228 * 229 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 230 * defines texture width, height, filters, and wrap modes. It will be 231 * found as a parameter to the second QOP_TEX_[STRB] instruction in a 232 * sequence. 233 */ 234 QUNIFORM_TEXTURE_CONFIG_P1, 235 236 /** A reference to a texture config parameter 2 cubemap stride uniform */ 237 QUNIFORM_TEXTURE_CONFIG_P2, 238 239 QUNIFORM_UBO_ADDR, 240 241 QUNIFORM_TEXRECT_SCALE_X, 242 QUNIFORM_TEXRECT_SCALE_Y, 243 244 QUNIFORM_TEXTURE_BORDER_COLOR, 245 246 QUNIFORM_BLEND_CONST_COLOR, 247 QUNIFORM_STENCIL, 248 249 QUNIFORM_ALPHA_REF, 250}; 251 252struct vc4_varying_semantic { 253 uint8_t semantic; 254 uint8_t index; 255 uint8_t swizzle; 256}; 257 258struct vc4_compiler_ubo_range { 259 /** 260 * offset in bytes from the start of the ubo where this range is 261 * uploaded. 262 * 263 * Only set once used is set. 264 */ 265 uint32_t dst_offset; 266 267 /** 268 * offset in bytes from the start of the gallium uniforms where the 269 * data comes from. 270 */ 271 uint32_t src_offset; 272 273 /** size in bytes of this ubo range */ 274 uint32_t size; 275 276 /** 277 * Set if this range is used by the shader for indirect uniforms 278 * access. 279 */ 280 bool used; 281}; 282 283struct vc4_compile { 284 struct vc4_context *vc4; 285 nir_shader *s; 286 nir_function_impl *impl; 287 struct exec_list *cf_node_list; 288 289 /** 290 * Mapping from nir_register * or nir_ssa_def * to array of struct 291 * qreg for the values. 292 */ 293 struct hash_table *def_ht; 294 295 /* For each temp, the instruction generating its value. */ 296 struct qinst **defs; 297 uint32_t defs_array_size; 298 299 /** 300 * Inputs to the shader, arranged by TGSI declaration order. 301 * 302 * Not all fragment shader QFILE_VARY reads are present in this array. 303 */ 304 struct qreg *inputs; 305 struct qreg *outputs; 306 uint32_t inputs_array_size; 307 uint32_t outputs_array_size; 308 uint32_t uniforms_array_size; 309 310 struct vc4_compiler_ubo_range *ubo_ranges; 311 uint32_t ubo_ranges_array_size; 312 /** Number of uniform areas declared in ubo_ranges. */ 313 uint32_t num_uniform_ranges; 314 /** Number of uniform areas used for indirect addressed loads. */ 315 uint32_t num_ubo_ranges; 316 uint32_t next_ubo_dst_offset; 317 318 struct qreg line_x, point_x, point_y; 319 struct qreg discard; 320 321 uint8_t vattr_sizes[8]; 322 323 /** 324 * Array of the TGSI semantics of all FS QFILE_VARY reads. 325 * 326 * This includes those that aren't part of the VPM varyings, like 327 * point/line coordinates. 328 */ 329 struct vc4_varying_semantic *input_semantics; 330 uint32_t num_input_semantics; 331 uint32_t input_semantics_array_size; 332 333 /** 334 * An entry per outputs[] in the VS indicating what the semantic of 335 * the output is. Used to emit from the VS in the order that the FS 336 * needs. 337 */ 338 struct vc4_varying_semantic *output_semantics; 339 340 struct pipe_shader_state *shader_state; 341 struct vc4_key *key; 342 struct vc4_fs_key *fs_key; 343 struct vc4_vs_key *vs_key; 344 345 uint32_t *uniform_data; 346 enum quniform_contents *uniform_contents; 347 uint32_t uniform_array_size; 348 uint32_t num_uniforms; 349 uint32_t num_outputs; 350 uint32_t num_texture_samples; 351 uint32_t output_position_index; 352 uint32_t output_clipvertex_index; 353 uint32_t output_color_index; 354 uint32_t output_point_size_index; 355 356 struct qreg undef; 357 enum qstage stage; 358 uint32_t num_temps; 359 struct list_head instructions; 360 uint32_t immediates[1024]; 361 362 struct list_head qpu_inst_list; 363 uint64_t *qpu_insts; 364 uint32_t qpu_inst_count; 365 uint32_t qpu_inst_size; 366 uint32_t num_inputs; 367 368 uint32_t program_id; 369 uint32_t variant_id; 370}; 371 372struct vc4_compile *qir_compile_init(void); 373void qir_compile_destroy(struct vc4_compile *c); 374struct qinst *qir_inst(enum qop op, struct qreg dst, 375 struct qreg src0, struct qreg src1); 376struct qinst *qir_inst4(enum qop op, struct qreg dst, 377 struct qreg a, 378 struct qreg b, 379 struct qreg c, 380 struct qreg d); 381void qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst); 382struct qreg qir_uniform(struct vc4_compile *c, 383 enum quniform_contents contents, 384 uint32_t data); 385void qir_reorder_uniforms(struct vc4_compile *c); 386void qir_emit(struct vc4_compile *c, struct qinst *inst); 387struct qreg qir_get_temp(struct vc4_compile *c); 388int qir_get_op_nsrc(enum qop qop); 389bool qir_reg_equals(struct qreg a, struct qreg b); 390bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst); 391bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst); 392bool qir_is_multi_instruction(struct qinst *inst); 393bool qir_is_tex(struct qinst *inst); 394bool qir_depends_on_flags(struct qinst *inst); 395bool qir_writes_r4(struct qinst *inst); 396bool qir_reads_r4(struct qinst *inst); 397bool qir_src_needs_a_file(struct qinst *inst); 398struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg); 399 400void qir_dump(struct vc4_compile *c); 401void qir_dump_inst(struct vc4_compile *c, struct qinst *inst); 402const char *qir_get_stage_name(enum qstage stage); 403 404void qir_optimize(struct vc4_compile *c); 405bool qir_opt_algebraic(struct vc4_compile *c); 406bool qir_opt_constant_folding(struct vc4_compile *c); 407bool qir_opt_copy_propagation(struct vc4_compile *c); 408bool qir_opt_cse(struct vc4_compile *c); 409bool qir_opt_dead_code(struct vc4_compile *c); 410bool qir_opt_small_immediates(struct vc4_compile *c); 411bool qir_opt_vpm_writes(struct vc4_compile *c); 412void qir_lower_uniforms(struct vc4_compile *c); 413 414void qpu_schedule_instructions(struct vc4_compile *c); 415 416void qir_SF(struct vc4_compile *c, struct qreg src); 417 418static inline struct qreg 419qir_uniform_ui(struct vc4_compile *c, uint32_t ui) 420{ 421 return qir_uniform(c, QUNIFORM_CONSTANT, ui); 422} 423 424static inline struct qreg 425qir_uniform_f(struct vc4_compile *c, float f) 426{ 427 return qir_uniform(c, QUNIFORM_CONSTANT, fui(f)); 428} 429 430#define QIR_ALU0(name) \ 431static inline struct qreg \ 432qir_##name(struct vc4_compile *c) \ 433{ \ 434 struct qreg t = qir_get_temp(c); \ 435 qir_emit(c, qir_inst(QOP_##name, t, c->undef, c->undef)); \ 436 return t; \ 437} 438 439#define QIR_ALU1(name) \ 440static inline struct qreg \ 441qir_##name(struct vc4_compile *c, struct qreg a) \ 442{ \ 443 struct qreg t = qir_get_temp(c); \ 444 qir_emit(c, qir_inst(QOP_##name, t, a, c->undef)); \ 445 return t; \ 446} 447 448#define QIR_ALU2(name) \ 449static inline struct qreg \ 450qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ 451{ \ 452 struct qreg t = qir_get_temp(c); \ 453 qir_emit(c, qir_inst(QOP_##name, t, a, b)); \ 454 return t; \ 455} 456 457#define QIR_NODST_1(name) \ 458static inline void \ 459qir_##name(struct vc4_compile *c, struct qreg a) \ 460{ \ 461 qir_emit(c, qir_inst(QOP_##name, c->undef, a, c->undef)); \ 462} 463 464#define QIR_NODST_2(name) \ 465static inline void \ 466qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ 467{ \ 468 qir_emit(c, qir_inst(QOP_##name, c->undef, a, b)); \ 469} 470 471QIR_ALU1(MOV) 472QIR_ALU2(FADD) 473QIR_ALU2(FSUB) 474QIR_ALU2(FMUL) 475QIR_ALU2(MUL24) 476QIR_ALU1(SEL_X_0_ZS) 477QIR_ALU1(SEL_X_0_ZC) 478QIR_ALU1(SEL_X_0_NS) 479QIR_ALU1(SEL_X_0_NC) 480QIR_ALU2(SEL_X_Y_ZS) 481QIR_ALU2(SEL_X_Y_ZC) 482QIR_ALU2(SEL_X_Y_NS) 483QIR_ALU2(SEL_X_Y_NC) 484QIR_ALU2(FMIN) 485QIR_ALU2(FMAX) 486QIR_ALU2(FMINABS) 487QIR_ALU2(FMAXABS) 488QIR_ALU1(FTOI) 489QIR_ALU1(ITOF) 490 491QIR_ALU2(ADD) 492QIR_ALU2(SUB) 493QIR_ALU2(SHL) 494QIR_ALU2(SHR) 495QIR_ALU2(ASR) 496QIR_ALU2(MIN) 497QIR_ALU2(MAX) 498QIR_ALU2(AND) 499QIR_ALU2(OR) 500QIR_ALU2(XOR) 501QIR_ALU1(NOT) 502 503QIR_ALU1(RCP) 504QIR_ALU1(RSQ) 505QIR_ALU1(EXP2) 506QIR_ALU1(LOG2) 507QIR_ALU2(PACK_SCALED) 508QIR_ALU1(PACK_8888_F) 509QIR_ALU2(PACK_8A_F) 510QIR_ALU2(PACK_8B_F) 511QIR_ALU2(PACK_8C_F) 512QIR_ALU2(PACK_8D_F) 513QIR_ALU1(VARY_ADD_C) 514QIR_NODST_2(TEX_S) 515QIR_NODST_2(TEX_T) 516QIR_NODST_2(TEX_R) 517QIR_NODST_2(TEX_B) 518QIR_NODST_2(TEX_DIRECT) 519QIR_ALU0(FRAG_X) 520QIR_ALU0(FRAG_Y) 521QIR_ALU0(FRAG_Z) 522QIR_ALU0(FRAG_W) 523QIR_ALU0(FRAG_REV_FLAG) 524QIR_ALU0(TEX_RESULT) 525QIR_ALU0(TLB_COLOR_READ) 526QIR_NODST_1(TLB_COLOR_WRITE) 527QIR_NODST_1(TLB_Z_WRITE) 528QIR_NODST_1(TLB_DISCARD_SETUP) 529QIR_NODST_1(TLB_STENCIL_SETUP) 530 531static inline struct qreg 532qir_R4_UNPACK(struct vc4_compile *c, struct qreg r4, int i) 533{ 534 struct qreg t = qir_get_temp(c); 535 qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, r4, c->undef)); 536 return t; 537} 538 539static inline struct qreg 540qir_SEL_X_0_COND(struct vc4_compile *c, int i) 541{ 542 struct qreg t = qir_get_temp(c); 543 qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, c->undef, c->undef)); 544 return t; 545} 546 547static inline struct qreg 548qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i) 549{ 550 struct qreg t = qir_get_temp(c); 551 qir_emit(c, qir_inst(QOP_UNPACK_8A_F + i, t, src, c->undef)); 552 return t; 553} 554 555static inline struct qreg 556qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i) 557{ 558 struct qreg t = qir_get_temp(c); 559 qir_emit(c, qir_inst(QOP_UNPACK_8A_I + i, t, src, c->undef)); 560 return t; 561} 562 563static inline struct qreg 564qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i) 565{ 566 struct qreg t = qir_get_temp(c); 567 qir_emit(c, qir_inst(QOP_UNPACK_16A_F + i, t, src, c->undef)); 568 return t; 569} 570 571static inline struct qreg 572qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i) 573{ 574 struct qreg t = qir_get_temp(c); 575 qir_emit(c, qir_inst(QOP_UNPACK_16A_I + i, t, src, c->undef)); 576 return t; 577} 578 579static inline struct qreg 580qir_PACK_8_F(struct vc4_compile *c, struct qreg rest, struct qreg val, int chan) 581{ 582 struct qreg t = qir_get_temp(c); 583 qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, t, rest, val)); 584 return t; 585} 586 587static inline struct qreg 588qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y) 589{ 590 return qir_EXP2(c, qir_FMUL(c, 591 y, 592 qir_LOG2(c, x))); 593} 594 595static inline void 596qir_VPM_WRITE(struct vc4_compile *c, struct qreg val) 597{ 598 static const struct qreg vpm = { QFILE_VPM, 0 }; 599 qir_emit(c, qir_inst(QOP_MOV, vpm, val, c->undef)); 600} 601 602#endif /* VC4_QIR_H */ 603