vc4_qir.h revision 2142fd1f6f36ef9a384ef298fec02111dc826308
1/* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#ifndef VC4_QIR_H 25#define VC4_QIR_H 26 27#include <stdio.h> 28#include <stdlib.h> 29#include <stdbool.h> 30#include <stdint.h> 31#include <string.h> 32 33#include "util/u_simple_list.h" 34#include "tgsi/tgsi_parse.h" 35 36enum qfile { 37 QFILE_NULL, 38 QFILE_TEMP, 39 QFILE_VARY, 40 QFILE_UNIF, 41}; 42 43struct qreg { 44 enum qfile file; 45 uint32_t index; 46}; 47 48enum qop { 49 QOP_UNDEF, 50 QOP_MOV, 51 QOP_FADD, 52 QOP_FSUB, 53 QOP_FMUL, 54 QOP_MUL24, 55 QOP_FMIN, 56 QOP_FMAX, 57 QOP_FMINABS, 58 QOP_FMAXABS, 59 QOP_ADD, 60 QOP_SUB, 61 QOP_SHL, 62 QOP_SHR, 63 QOP_ASR, 64 QOP_MIN, 65 QOP_MAX, 66 QOP_AND, 67 QOP_OR, 68 QOP_XOR, 69 QOP_NOT, 70 71 /* Sets the flag register according to src. */ 72 QOP_SF, 73 74 /* Note: Orderings of these compares must be the same as in 75 * qpu_defines.h. Selects the src[0] if the ns flag bit is set, 76 * otherwise 0. */ 77 QOP_SEL_X_0_ZS, 78 QOP_SEL_X_0_ZC, 79 QOP_SEL_X_0_NS, 80 QOP_SEL_X_0_NC, 81 /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */ 82 QOP_SEL_X_Y_ZS, 83 QOP_SEL_X_Y_ZC, 84 QOP_SEL_X_Y_NS, 85 QOP_SEL_X_Y_NC, 86 87 QOP_FTOI, 88 QOP_ITOF, 89 QOP_RCP, 90 QOP_RSQ, 91 QOP_EXP2, 92 QOP_LOG2, 93 QOP_VW_SETUP, 94 QOP_VR_SETUP, 95 QOP_PACK_SCALED, 96 QOP_PACK_COLORS, 97 QOP_VPM_WRITE, 98 QOP_VPM_READ, 99 QOP_TLB_DISCARD_SETUP, 100 QOP_TLB_STENCIL_SETUP, 101 QOP_TLB_Z_WRITE, 102 QOP_TLB_COLOR_WRITE, 103 QOP_TLB_COLOR_READ, 104 QOP_VARY_ADD_C, 105 106 QOP_FRAG_X, 107 QOP_FRAG_Y, 108 QOP_FRAG_Z, 109 QOP_FRAG_W, 110 QOP_FRAG_REV_FLAG, 111 112 QOP_UNPACK_8A_F, 113 QOP_UNPACK_8B_F, 114 QOP_UNPACK_8C_F, 115 QOP_UNPACK_8D_F, 116 117 QOP_UNPACK_8A_I, 118 QOP_UNPACK_8B_I, 119 QOP_UNPACK_8C_I, 120 QOP_UNPACK_8D_I, 121 122 /** Texture x coordinate parameter write */ 123 QOP_TEX_S, 124 /** Texture y coordinate parameter write */ 125 QOP_TEX_T, 126 /** Texture border color parameter or cube map z coordinate write */ 127 QOP_TEX_R, 128 /** Texture LOD bias parameter write */ 129 QOP_TEX_B, 130 131 /** 132 * Texture-unit 4-byte read with address provided direct in S 133 * cooordinate. 134 * 135 * The first operand is the offset from the start of the UBO, and the 136 * second is the uniform that has the UBO's base pointer. 137 */ 138 QOP_TEX_DIRECT, 139 140 /** 141 * Signal of texture read being necessary and then reading r4 into 142 * the destination 143 */ 144 QOP_TEX_RESULT, 145 QOP_R4_UNPACK_A, 146 QOP_R4_UNPACK_B, 147 QOP_R4_UNPACK_C, 148 QOP_R4_UNPACK_D 149}; 150 151struct simple_node { 152 struct simple_node *next; 153 struct simple_node *prev; 154}; 155 156struct queued_qpu_inst { 157 struct simple_node link; 158 uint64_t inst; 159}; 160 161struct qinst { 162 struct simple_node link; 163 164 enum qop op; 165 struct qreg dst; 166 struct qreg *src; 167}; 168 169enum qstage { 170 /** 171 * Coordinate shader, runs during binning, before the VS, and just 172 * outputs position. 173 */ 174 QSTAGE_COORD, 175 QSTAGE_VERT, 176 QSTAGE_FRAG, 177}; 178 179enum quniform_contents { 180 /** 181 * Indicates that a constant 32-bit value is copied from the program's 182 * uniform contents. 183 */ 184 QUNIFORM_CONSTANT, 185 /** 186 * Indicates that the program's uniform contents are used as an index 187 * into the GL uniform storage. 188 */ 189 QUNIFORM_UNIFORM, 190 191 /** @{ 192 * Scaling factors from clip coordinates to relative to the viewport 193 * center. 194 * 195 * This is used by the coordinate and vertex shaders to produce the 196 * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed 197 * point offsets from the viewport ccenter. 198 */ 199 QUNIFORM_VIEWPORT_X_SCALE, 200 QUNIFORM_VIEWPORT_Y_SCALE, 201 /** @} */ 202 203 QUNIFORM_VIEWPORT_Z_OFFSET, 204 QUNIFORM_VIEWPORT_Z_SCALE, 205 206 QUNIFORM_USER_CLIP_PLANE, 207 208 /** 209 * A reference to a texture config parameter 0 uniform. 210 * 211 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 212 * defines texture type, miplevels, and such. It will be found as a 213 * parameter to the first QOP_TEX_[STRB] instruction in a sequence. 214 */ 215 QUNIFORM_TEXTURE_CONFIG_P0, 216 217 /** 218 * A reference to a texture config parameter 1 uniform. 219 * 220 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 221 * defines texture width, height, filters, and wrap modes. It will be 222 * found as a parameter to the second QOP_TEX_[STRB] instruction in a 223 * sequence. 224 */ 225 QUNIFORM_TEXTURE_CONFIG_P1, 226 227 /** A reference to a texture config parameter 2 cubemap stride uniform */ 228 QUNIFORM_TEXTURE_CONFIG_P2, 229 230 QUNIFORM_UBO_ADDR, 231 232 QUNIFORM_TEXRECT_SCALE_X, 233 QUNIFORM_TEXRECT_SCALE_Y, 234 235 QUNIFORM_TEXTURE_BORDER_COLOR, 236 237 QUNIFORM_BLEND_CONST_COLOR, 238 QUNIFORM_STENCIL, 239 240 QUNIFORM_ALPHA_REF, 241}; 242 243struct vc4_varying_semantic { 244 uint8_t semantic; 245 uint8_t index; 246 uint8_t swizzle; 247}; 248 249struct vc4_compiler_ubo_range { 250 /** 251 * offset in bytes from the start of the ubo where this range is 252 * uploaded. 253 * 254 * Only set once used is set. 255 */ 256 uint32_t dst_offset; 257 258 /** 259 * offset in bytes from the start of the gallium uniforms where the 260 * data comes from. 261 */ 262 uint32_t src_offset; 263 264 /** size in bytes of this ubo range */ 265 uint32_t size; 266 267 /** 268 * Set if this range is used by the shader for indirect uniforms 269 * access. 270 */ 271 bool used; 272}; 273 274struct vc4_compile { 275 struct vc4_context *vc4; 276 struct tgsi_parse_context parser; 277 struct qreg *temps; 278 /** 279 * Inputs to the shader, arranged by TGSI declaration order. 280 * 281 * Not all fragment shader QFILE_VARY reads are present in this array. 282 */ 283 struct qreg *inputs; 284 struct qreg *outputs; 285 struct qreg *consts; 286 struct qreg addr[4]; /* TGSI ARL destination. */ 287 uint32_t temps_array_size; 288 uint32_t inputs_array_size; 289 uint32_t outputs_array_size; 290 uint32_t uniforms_array_size; 291 uint32_t consts_array_size; 292 uint32_t num_consts; 293 294 struct vc4_compiler_ubo_range *ubo_ranges; 295 uint32_t ubo_ranges_array_size; 296 uint32_t num_ubo_ranges; 297 uint32_t next_ubo_dst_offset; 298 299 struct qreg line_x, point_x, point_y; 300 struct qreg discard; 301 302 /** 303 * Array of the TGSI semantics of all FS QFILE_VARY reads. 304 * 305 * This includes those that aren't part of the VPM varyings, like 306 * point/line coordinates. 307 */ 308 struct vc4_varying_semantic *input_semantics; 309 uint32_t num_input_semantics; 310 uint32_t input_semantics_array_size; 311 312 /** 313 * An entry per outputs[] in the VS indicating what the semantic of 314 * the output is. Used to emit from the VS in the order that the FS 315 * needs. 316 */ 317 struct vc4_varying_semantic *output_semantics; 318 319 struct pipe_shader_state *shader_state; 320 struct vc4_key *key; 321 struct vc4_fs_key *fs_key; 322 struct vc4_vs_key *vs_key; 323 324 uint32_t *uniform_data; 325 enum quniform_contents *uniform_contents; 326 uint32_t uniform_array_size; 327 uint32_t num_uniforms; 328 uint32_t num_outputs; 329 uint32_t num_texture_samples; 330 uint32_t output_position_index; 331 uint32_t output_clipvertex_index; 332 uint32_t output_color_index; 333 uint32_t output_point_size_index; 334 335 struct qreg undef; 336 enum qstage stage; 337 uint32_t num_temps; 338 struct simple_node instructions; 339 uint32_t immediates[1024]; 340 341 struct simple_node qpu_inst_list; 342 uint64_t *qpu_insts; 343 uint32_t qpu_inst_count; 344 uint32_t qpu_inst_size; 345 uint32_t num_inputs; 346 347 uint32_t program_id; 348 uint32_t variant_id; 349}; 350 351struct vc4_compile *qir_compile_init(void); 352void qir_compile_destroy(struct vc4_compile *c); 353struct qinst *qir_inst(enum qop op, struct qreg dst, 354 struct qreg src0, struct qreg src1); 355struct qinst *qir_inst4(enum qop op, struct qreg dst, 356 struct qreg a, 357 struct qreg b, 358 struct qreg c, 359 struct qreg d); 360void qir_remove_instruction(struct qinst *qinst); 361void qir_reorder_uniforms(struct vc4_compile *c); 362void qir_emit(struct vc4_compile *c, struct qinst *inst); 363struct qreg qir_get_temp(struct vc4_compile *c); 364int qir_get_op_nsrc(enum qop qop); 365bool qir_reg_equals(struct qreg a, struct qreg b); 366bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst); 367bool qir_depends_on_flags(struct qinst *inst); 368bool qir_writes_r4(struct qinst *inst); 369bool qir_reads_r4(struct qinst *inst); 370 371void qir_dump(struct vc4_compile *c); 372void qir_dump_inst(struct vc4_compile *c, struct qinst *inst); 373const char *qir_get_stage_name(enum qstage stage); 374 375void qir_optimize(struct vc4_compile *c); 376bool qir_opt_algebraic(struct vc4_compile *c); 377bool qir_opt_copy_propagation(struct vc4_compile *c); 378bool qir_opt_cse(struct vc4_compile *c); 379bool qir_opt_dead_code(struct vc4_compile *c); 380 381void qpu_schedule_instructions(struct vc4_compile *c); 382 383#define QIR_ALU0(name) \ 384static inline struct qreg \ 385qir_##name(struct vc4_compile *c) \ 386{ \ 387 struct qreg t = qir_get_temp(c); \ 388 qir_emit(c, qir_inst(QOP_##name, t, c->undef, c->undef)); \ 389 return t; \ 390} 391 392#define QIR_ALU1(name) \ 393static inline struct qreg \ 394qir_##name(struct vc4_compile *c, struct qreg a) \ 395{ \ 396 struct qreg t = qir_get_temp(c); \ 397 qir_emit(c, qir_inst(QOP_##name, t, a, c->undef)); \ 398 return t; \ 399} 400 401#define QIR_ALU2(name) \ 402static inline struct qreg \ 403qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ 404{ \ 405 struct qreg t = qir_get_temp(c); \ 406 qir_emit(c, qir_inst(QOP_##name, t, a, b)); \ 407 return t; \ 408} 409 410#define QIR_NODST_1(name) \ 411static inline void \ 412qir_##name(struct vc4_compile *c, struct qreg a) \ 413{ \ 414 qir_emit(c, qir_inst(QOP_##name, c->undef, a, c->undef)); \ 415} 416 417#define QIR_NODST_2(name) \ 418static inline void \ 419qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ 420{ \ 421 qir_emit(c, qir_inst(QOP_##name, c->undef, a, b)); \ 422} 423 424QIR_ALU1(MOV) 425QIR_ALU2(FADD) 426QIR_ALU2(FSUB) 427QIR_ALU2(FMUL) 428QIR_ALU2(MUL24) 429QIR_NODST_1(SF) 430QIR_ALU1(SEL_X_0_ZS) 431QIR_ALU1(SEL_X_0_ZC) 432QIR_ALU1(SEL_X_0_NS) 433QIR_ALU1(SEL_X_0_NC) 434QIR_ALU2(SEL_X_Y_ZS) 435QIR_ALU2(SEL_X_Y_ZC) 436QIR_ALU2(SEL_X_Y_NS) 437QIR_ALU2(SEL_X_Y_NC) 438QIR_ALU2(FMIN) 439QIR_ALU2(FMAX) 440QIR_ALU2(FMINABS) 441QIR_ALU2(FMAXABS) 442QIR_ALU1(FTOI) 443QIR_ALU1(ITOF) 444 445QIR_ALU2(ADD) 446QIR_ALU2(SUB) 447QIR_ALU2(SHL) 448QIR_ALU2(SHR) 449QIR_ALU2(ASR) 450QIR_ALU2(MIN) 451QIR_ALU2(MAX) 452QIR_ALU2(AND) 453QIR_ALU2(OR) 454QIR_ALU2(XOR) 455QIR_ALU1(NOT) 456 457QIR_ALU1(RCP) 458QIR_ALU1(RSQ) 459QIR_ALU1(EXP2) 460QIR_ALU1(LOG2) 461QIR_ALU2(PACK_SCALED) 462QIR_ALU1(VARY_ADD_C) 463QIR_NODST_1(VPM_WRITE) 464QIR_NODST_2(TEX_S) 465QIR_NODST_2(TEX_T) 466QIR_NODST_2(TEX_R) 467QIR_NODST_2(TEX_B) 468QIR_NODST_2(TEX_DIRECT) 469QIR_ALU0(FRAG_X) 470QIR_ALU0(FRAG_Y) 471QIR_ALU0(FRAG_Z) 472QIR_ALU0(FRAG_W) 473QIR_ALU0(FRAG_REV_FLAG) 474QIR_ALU0(TEX_RESULT) 475QIR_ALU0(TLB_COLOR_READ) 476QIR_NODST_1(TLB_Z_WRITE) 477QIR_NODST_1(TLB_DISCARD_SETUP) 478QIR_NODST_1(TLB_STENCIL_SETUP) 479 480static inline struct qreg 481qir_R4_UNPACK(struct vc4_compile *c, struct qreg r4, int i) 482{ 483 struct qreg t = qir_get_temp(c); 484 qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, r4, c->undef)); 485 return t; 486} 487 488static inline struct qreg 489qir_SEL_X_0_COND(struct vc4_compile *c, int i) 490{ 491 struct qreg t = qir_get_temp(c); 492 qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, c->undef, c->undef)); 493 return t; 494} 495 496static inline struct qreg 497qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i) 498{ 499 struct qreg t = qir_get_temp(c); 500 qir_emit(c, qir_inst(QOP_UNPACK_8A_F + i, t, src, c->undef)); 501 return t; 502} 503 504static inline struct qreg 505qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i) 506{ 507 struct qreg t = qir_get_temp(c); 508 qir_emit(c, qir_inst(QOP_UNPACK_8A_I + i, t, src, c->undef)); 509 return t; 510} 511 512static inline struct qreg 513qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y) 514{ 515 return qir_EXP2(c, qir_FMUL(c, 516 y, 517 qir_LOG2(c, x))); 518} 519 520#endif /* VC4_QIR_H */ 521