vc4_qir.h revision 14dc281c1332518b6144718e1fb3845abbe23ff7
1/* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#ifndef VC4_QIR_H 25#define VC4_QIR_H 26 27#include <assert.h> 28#include <stdio.h> 29#include <stdlib.h> 30#include <stdbool.h> 31#include <stdint.h> 32#include <string.h> 33 34#include "util/macros.h" 35#include "util/simple_list.h" 36#include "tgsi/tgsi_parse.h" 37 38enum qfile { 39 QFILE_NULL, 40 QFILE_TEMP, 41 QFILE_VARY, 42 QFILE_UNIF, 43 QFILE_VPM, 44 45 /** 46 * Stores an immediate value in the index field that can be turned 47 * into a small immediate field by qpu_encode_small_immediate(). 48 */ 49 QFILE_SMALL_IMM, 50}; 51 52struct qreg { 53 enum qfile file; 54 uint32_t index; 55}; 56 57enum qop { 58 QOP_UNDEF, 59 QOP_MOV, 60 QOP_FADD, 61 QOP_FSUB, 62 QOP_FMUL, 63 QOP_MUL24, 64 QOP_FMIN, 65 QOP_FMAX, 66 QOP_FMINABS, 67 QOP_FMAXABS, 68 QOP_ADD, 69 QOP_SUB, 70 QOP_SHL, 71 QOP_SHR, 72 QOP_ASR, 73 QOP_MIN, 74 QOP_MAX, 75 QOP_AND, 76 QOP_OR, 77 QOP_XOR, 78 QOP_NOT, 79 80 /* Note: Orderings of these compares must be the same as in 81 * qpu_defines.h. Selects the src[0] if the ns flag bit is set, 82 * otherwise 0. */ 83 QOP_SEL_X_0_ZS, 84 QOP_SEL_X_0_ZC, 85 QOP_SEL_X_0_NS, 86 QOP_SEL_X_0_NC, 87 /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */ 88 QOP_SEL_X_Y_ZS, 89 QOP_SEL_X_Y_ZC, 90 QOP_SEL_X_Y_NS, 91 QOP_SEL_X_Y_NC, 92 93 QOP_FTOI, 94 QOP_ITOF, 95 QOP_RCP, 96 QOP_RSQ, 97 QOP_EXP2, 98 QOP_LOG2, 99 QOP_VW_SETUP, 100 QOP_VR_SETUP, 101 QOP_PACK_SCALED, 102 QOP_PACK_8888_F, 103 QOP_PACK_8A_F, 104 QOP_PACK_8B_F, 105 QOP_PACK_8C_F, 106 QOP_PACK_8D_F, 107 QOP_TLB_DISCARD_SETUP, 108 QOP_TLB_STENCIL_SETUP, 109 QOP_TLB_Z_WRITE, 110 QOP_TLB_COLOR_WRITE, 111 QOP_TLB_COLOR_READ, 112 QOP_VARY_ADD_C, 113 114 QOP_FRAG_X, 115 QOP_FRAG_Y, 116 QOP_FRAG_Z, 117 QOP_FRAG_W, 118 QOP_FRAG_REV_FLAG, 119 120 QOP_UNPACK_8A_F, 121 QOP_UNPACK_8B_F, 122 QOP_UNPACK_8C_F, 123 QOP_UNPACK_8D_F, 124 QOP_UNPACK_16A_F, 125 QOP_UNPACK_16B_F, 126 127 QOP_UNPACK_8A_I, 128 QOP_UNPACK_8B_I, 129 QOP_UNPACK_8C_I, 130 QOP_UNPACK_8D_I, 131 QOP_UNPACK_16A_I, 132 QOP_UNPACK_16B_I, 133 134 /** Texture x coordinate parameter write */ 135 QOP_TEX_S, 136 /** Texture y coordinate parameter write */ 137 QOP_TEX_T, 138 /** Texture border color parameter or cube map z coordinate write */ 139 QOP_TEX_R, 140 /** Texture LOD bias parameter write */ 141 QOP_TEX_B, 142 143 /** 144 * Texture-unit 4-byte read with address provided direct in S 145 * cooordinate. 146 * 147 * The first operand is the offset from the start of the UBO, and the 148 * second is the uniform that has the UBO's base pointer. 149 */ 150 QOP_TEX_DIRECT, 151 152 /** 153 * Signal of texture read being necessary and then reading r4 into 154 * the destination 155 */ 156 QOP_TEX_RESULT, 157 QOP_R4_UNPACK_A, 158 QOP_R4_UNPACK_B, 159 QOP_R4_UNPACK_C, 160 QOP_R4_UNPACK_D 161}; 162 163struct queued_qpu_inst { 164 struct simple_node link; 165 uint64_t inst; 166}; 167 168struct qinst { 169 struct simple_node link; 170 171 enum qop op; 172 struct qreg dst; 173 struct qreg *src; 174 bool sf; 175}; 176 177enum qstage { 178 /** 179 * Coordinate shader, runs during binning, before the VS, and just 180 * outputs position. 181 */ 182 QSTAGE_COORD, 183 QSTAGE_VERT, 184 QSTAGE_FRAG, 185}; 186 187enum quniform_contents { 188 /** 189 * Indicates that a constant 32-bit value is copied from the program's 190 * uniform contents. 191 */ 192 QUNIFORM_CONSTANT, 193 /** 194 * Indicates that the program's uniform contents are used as an index 195 * into the GL uniform storage. 196 */ 197 QUNIFORM_UNIFORM, 198 199 /** @{ 200 * Scaling factors from clip coordinates to relative to the viewport 201 * center. 202 * 203 * This is used by the coordinate and vertex shaders to produce the 204 * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed 205 * point offsets from the viewport ccenter. 206 */ 207 QUNIFORM_VIEWPORT_X_SCALE, 208 QUNIFORM_VIEWPORT_Y_SCALE, 209 /** @} */ 210 211 QUNIFORM_VIEWPORT_Z_OFFSET, 212 QUNIFORM_VIEWPORT_Z_SCALE, 213 214 QUNIFORM_USER_CLIP_PLANE, 215 216 /** 217 * A reference to a texture config parameter 0 uniform. 218 * 219 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 220 * defines texture type, miplevels, and such. It will be found as a 221 * parameter to the first QOP_TEX_[STRB] instruction in a sequence. 222 */ 223 QUNIFORM_TEXTURE_CONFIG_P0, 224 225 /** 226 * A reference to a texture config parameter 1 uniform. 227 * 228 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 229 * defines texture width, height, filters, and wrap modes. It will be 230 * found as a parameter to the second QOP_TEX_[STRB] instruction in a 231 * sequence. 232 */ 233 QUNIFORM_TEXTURE_CONFIG_P1, 234 235 /** A reference to a texture config parameter 2 cubemap stride uniform */ 236 QUNIFORM_TEXTURE_CONFIG_P2, 237 238 QUNIFORM_UBO_ADDR, 239 240 QUNIFORM_TEXRECT_SCALE_X, 241 QUNIFORM_TEXRECT_SCALE_Y, 242 243 QUNIFORM_TEXTURE_BORDER_COLOR, 244 245 QUNIFORM_BLEND_CONST_COLOR, 246 QUNIFORM_STENCIL, 247 248 QUNIFORM_ALPHA_REF, 249}; 250 251struct vc4_varying_semantic { 252 uint8_t semantic; 253 uint8_t index; 254 uint8_t swizzle; 255}; 256 257struct vc4_compiler_ubo_range { 258 /** 259 * offset in bytes from the start of the ubo where this range is 260 * uploaded. 261 * 262 * Only set once used is set. 263 */ 264 uint32_t dst_offset; 265 266 /** 267 * offset in bytes from the start of the gallium uniforms where the 268 * data comes from. 269 */ 270 uint32_t src_offset; 271 272 /** size in bytes of this ubo range */ 273 uint32_t size; 274 275 /** 276 * Set if this range is used by the shader for indirect uniforms 277 * access. 278 */ 279 bool used; 280}; 281 282struct vc4_compile { 283 struct vc4_context *vc4; 284 struct tgsi_parse_context parser; 285 struct qreg *temps; 286 /** 287 * Inputs to the shader, arranged by TGSI declaration order. 288 * 289 * Not all fragment shader QFILE_VARY reads are present in this array. 290 */ 291 struct qreg *inputs; 292 struct qreg *outputs; 293 struct qreg *consts; 294 struct qreg addr[4]; /* TGSI ARL destination. */ 295 uint32_t temps_array_size; 296 uint32_t inputs_array_size; 297 uint32_t outputs_array_size; 298 uint32_t uniforms_array_size; 299 uint32_t consts_array_size; 300 uint32_t num_consts; 301 302 struct vc4_compiler_ubo_range *ubo_ranges; 303 uint32_t ubo_ranges_array_size; 304 uint32_t num_ubo_ranges; 305 uint32_t next_ubo_dst_offset; 306 307 struct qreg line_x, point_x, point_y; 308 struct qreg discard; 309 310 uint8_t vattr_sizes[8]; 311 312 /** 313 * Array of the TGSI semantics of all FS QFILE_VARY reads. 314 * 315 * This includes those that aren't part of the VPM varyings, like 316 * point/line coordinates. 317 */ 318 struct vc4_varying_semantic *input_semantics; 319 uint32_t num_input_semantics; 320 uint32_t input_semantics_array_size; 321 322 /** 323 * An entry per outputs[] in the VS indicating what the semantic of 324 * the output is. Used to emit from the VS in the order that the FS 325 * needs. 326 */ 327 struct vc4_varying_semantic *output_semantics; 328 329 struct pipe_shader_state *shader_state; 330 struct vc4_key *key; 331 struct vc4_fs_key *fs_key; 332 struct vc4_vs_key *vs_key; 333 334 uint32_t *uniform_data; 335 enum quniform_contents *uniform_contents; 336 uint32_t uniform_array_size; 337 uint32_t num_uniforms; 338 uint32_t num_outputs; 339 uint32_t num_texture_samples; 340 uint32_t output_position_index; 341 uint32_t output_clipvertex_index; 342 uint32_t output_color_index; 343 uint32_t output_point_size_index; 344 345 struct qreg undef; 346 enum qstage stage; 347 uint32_t num_temps; 348 struct simple_node instructions; 349 uint32_t immediates[1024]; 350 351 struct simple_node qpu_inst_list; 352 uint64_t *qpu_insts; 353 uint32_t qpu_inst_count; 354 uint32_t qpu_inst_size; 355 uint32_t num_inputs; 356 357 uint32_t program_id; 358 uint32_t variant_id; 359}; 360 361struct vc4_compile *qir_compile_init(void); 362void qir_compile_destroy(struct vc4_compile *c); 363struct qinst *qir_inst(enum qop op, struct qreg dst, 364 struct qreg src0, struct qreg src1); 365struct qinst *qir_inst4(enum qop op, struct qreg dst, 366 struct qreg a, 367 struct qreg b, 368 struct qreg c, 369 struct qreg d); 370void qir_remove_instruction(struct qinst *qinst); 371void qir_reorder_uniforms(struct vc4_compile *c); 372void qir_emit(struct vc4_compile *c, struct qinst *inst); 373struct qreg qir_get_temp(struct vc4_compile *c); 374int qir_get_op_nsrc(enum qop qop); 375bool qir_reg_equals(struct qreg a, struct qreg b); 376bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst); 377bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst); 378bool qir_is_multi_instruction(struct qinst *inst); 379bool qir_is_tex(struct qinst *inst); 380bool qir_depends_on_flags(struct qinst *inst); 381bool qir_writes_r4(struct qinst *inst); 382bool qir_reads_r4(struct qinst *inst); 383bool qir_src_needs_a_file(struct qinst *inst); 384struct qreg qir_follow_movs(struct qinst **defs, struct qreg reg); 385 386void qir_dump(struct vc4_compile *c); 387void qir_dump_inst(struct vc4_compile *c, struct qinst *inst); 388const char *qir_get_stage_name(enum qstage stage); 389 390void qir_optimize(struct vc4_compile *c); 391bool qir_opt_algebraic(struct vc4_compile *c); 392bool qir_opt_copy_propagation(struct vc4_compile *c); 393bool qir_opt_cse(struct vc4_compile *c); 394bool qir_opt_dead_code(struct vc4_compile *c); 395bool qir_opt_small_immediates(struct vc4_compile *c); 396bool qir_opt_vpm_writes(struct vc4_compile *c); 397void qir_lower_uniforms(struct vc4_compile *c); 398 399void qpu_schedule_instructions(struct vc4_compile *c); 400 401void qir_SF(struct vc4_compile *c, struct qreg src); 402 403#define QIR_ALU0(name) \ 404static inline struct qreg \ 405qir_##name(struct vc4_compile *c) \ 406{ \ 407 struct qreg t = qir_get_temp(c); \ 408 qir_emit(c, qir_inst(QOP_##name, t, c->undef, c->undef)); \ 409 return t; \ 410} 411 412#define QIR_ALU1(name) \ 413static inline struct qreg \ 414qir_##name(struct vc4_compile *c, struct qreg a) \ 415{ \ 416 struct qreg t = qir_get_temp(c); \ 417 qir_emit(c, qir_inst(QOP_##name, t, a, c->undef)); \ 418 return t; \ 419} 420 421#define QIR_ALU2(name) \ 422static inline struct qreg \ 423qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ 424{ \ 425 struct qreg t = qir_get_temp(c); \ 426 qir_emit(c, qir_inst(QOP_##name, t, a, b)); \ 427 return t; \ 428} 429 430#define QIR_NODST_1(name) \ 431static inline void \ 432qir_##name(struct vc4_compile *c, struct qreg a) \ 433{ \ 434 qir_emit(c, qir_inst(QOP_##name, c->undef, a, c->undef)); \ 435} 436 437#define QIR_NODST_2(name) \ 438static inline void \ 439qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ 440{ \ 441 qir_emit(c, qir_inst(QOP_##name, c->undef, a, b)); \ 442} 443 444QIR_ALU1(MOV) 445QIR_ALU2(FADD) 446QIR_ALU2(FSUB) 447QIR_ALU2(FMUL) 448QIR_ALU2(MUL24) 449QIR_ALU1(SEL_X_0_ZS) 450QIR_ALU1(SEL_X_0_ZC) 451QIR_ALU1(SEL_X_0_NS) 452QIR_ALU1(SEL_X_0_NC) 453QIR_ALU2(SEL_X_Y_ZS) 454QIR_ALU2(SEL_X_Y_ZC) 455QIR_ALU2(SEL_X_Y_NS) 456QIR_ALU2(SEL_X_Y_NC) 457QIR_ALU2(FMIN) 458QIR_ALU2(FMAX) 459QIR_ALU2(FMINABS) 460QIR_ALU2(FMAXABS) 461QIR_ALU1(FTOI) 462QIR_ALU1(ITOF) 463 464QIR_ALU2(ADD) 465QIR_ALU2(SUB) 466QIR_ALU2(SHL) 467QIR_ALU2(SHR) 468QIR_ALU2(ASR) 469QIR_ALU2(MIN) 470QIR_ALU2(MAX) 471QIR_ALU2(AND) 472QIR_ALU2(OR) 473QIR_ALU2(XOR) 474QIR_ALU1(NOT) 475 476QIR_ALU1(RCP) 477QIR_ALU1(RSQ) 478QIR_ALU1(EXP2) 479QIR_ALU1(LOG2) 480QIR_ALU2(PACK_SCALED) 481QIR_ALU1(PACK_8888_F) 482QIR_ALU2(PACK_8A_F) 483QIR_ALU2(PACK_8B_F) 484QIR_ALU2(PACK_8C_F) 485QIR_ALU2(PACK_8D_F) 486QIR_ALU1(VARY_ADD_C) 487QIR_NODST_2(TEX_S) 488QIR_NODST_2(TEX_T) 489QIR_NODST_2(TEX_R) 490QIR_NODST_2(TEX_B) 491QIR_NODST_2(TEX_DIRECT) 492QIR_ALU0(FRAG_X) 493QIR_ALU0(FRAG_Y) 494QIR_ALU0(FRAG_Z) 495QIR_ALU0(FRAG_W) 496QIR_ALU0(FRAG_REV_FLAG) 497QIR_ALU0(TEX_RESULT) 498QIR_ALU0(TLB_COLOR_READ) 499QIR_NODST_1(TLB_Z_WRITE) 500QIR_NODST_1(TLB_DISCARD_SETUP) 501QIR_NODST_1(TLB_STENCIL_SETUP) 502 503static inline struct qreg 504qir_R4_UNPACK(struct vc4_compile *c, struct qreg r4, int i) 505{ 506 struct qreg t = qir_get_temp(c); 507 qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, r4, c->undef)); 508 return t; 509} 510 511static inline struct qreg 512qir_SEL_X_0_COND(struct vc4_compile *c, int i) 513{ 514 struct qreg t = qir_get_temp(c); 515 qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, c->undef, c->undef)); 516 return t; 517} 518 519static inline struct qreg 520qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i) 521{ 522 struct qreg t = qir_get_temp(c); 523 qir_emit(c, qir_inst(QOP_UNPACK_8A_F + i, t, src, c->undef)); 524 return t; 525} 526 527static inline struct qreg 528qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i) 529{ 530 struct qreg t = qir_get_temp(c); 531 qir_emit(c, qir_inst(QOP_UNPACK_8A_I + i, t, src, c->undef)); 532 return t; 533} 534 535static inline struct qreg 536qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i) 537{ 538 struct qreg t = qir_get_temp(c); 539 qir_emit(c, qir_inst(QOP_UNPACK_16A_F + i, t, src, c->undef)); 540 return t; 541} 542 543static inline struct qreg 544qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i) 545{ 546 struct qreg t = qir_get_temp(c); 547 qir_emit(c, qir_inst(QOP_UNPACK_16A_I + i, t, src, c->undef)); 548 return t; 549} 550 551static inline struct qreg 552qir_PACK_8_F(struct vc4_compile *c, struct qreg rest, struct qreg val, int chan) 553{ 554 struct qreg t = qir_get_temp(c); 555 qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, t, rest, val)); 556 return t; 557} 558 559static inline struct qreg 560qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y) 561{ 562 return qir_EXP2(c, qir_FMUL(c, 563 y, 564 qir_LOG2(c, x))); 565} 566 567static inline void 568qir_VPM_WRITE(struct vc4_compile *c, struct qreg val) 569{ 570 static const struct qreg vpm = { QFILE_VPM, 0 }; 571 qir_emit(c, qir_inst(QOP_MOV, vpm, val, c->undef)); 572} 573 574#endif /* VC4_QIR_H */ 575