vc4_qir.h revision 48a2154520351a22fc860efcdaa4329a51d29c8d
1/* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#ifndef VC4_QIR_H 25#define VC4_QIR_H 26 27#include <stdio.h> 28#include <stdlib.h> 29#include <stdbool.h> 30#include <stdint.h> 31#include <string.h> 32 33#include "util/u_simple_list.h" 34#include "tgsi/tgsi_parse.h" 35 36enum qfile { 37 QFILE_NULL, 38 QFILE_TEMP, 39 QFILE_VARY, 40 QFILE_UNIF, 41}; 42 43struct qreg { 44 enum qfile file; 45 uint32_t index; 46}; 47 48enum qop { 49 QOP_UNDEF, 50 QOP_MOV, 51 QOP_FADD, 52 QOP_FSUB, 53 QOP_FMUL, 54 QOP_MUL24, 55 QOP_FMIN, 56 QOP_FMAX, 57 QOP_FMINABS, 58 QOP_FMAXABS, 59 QOP_ADD, 60 QOP_SUB, 61 QOP_SHL, 62 QOP_SHR, 63 QOP_ASR, 64 QOP_MIN, 65 QOP_MAX, 66 QOP_AND, 67 QOP_OR, 68 QOP_XOR, 69 QOP_NOT, 70 71 /* Sets the flag register according to src. */ 72 QOP_SF, 73 74 /* Note: Orderings of these compares must be the same as in 75 * qpu_defines.h. Selects the src[0] if the ns flag bit is set, 76 * otherwise 0. */ 77 QOP_SEL_X_0_ZS, 78 QOP_SEL_X_0_ZC, 79 QOP_SEL_X_0_NS, 80 QOP_SEL_X_0_NC, 81 /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */ 82 QOP_SEL_X_Y_ZS, 83 QOP_SEL_X_Y_ZC, 84 QOP_SEL_X_Y_NS, 85 QOP_SEL_X_Y_NC, 86 87 QOP_FTOI, 88 QOP_ITOF, 89 QOP_RCP, 90 QOP_RSQ, 91 QOP_EXP2, 92 QOP_LOG2, 93 QOP_VW_SETUP, 94 QOP_VR_SETUP, 95 QOP_PACK_SCALED, 96 QOP_PACK_COLORS, 97 QOP_VPM_WRITE, 98 QOP_VPM_READ, 99 QOP_TLB_DISCARD_SETUP, 100 QOP_TLB_STENCIL_SETUP, 101 QOP_TLB_Z_WRITE, 102 QOP_TLB_COLOR_WRITE, 103 QOP_TLB_COLOR_READ, 104 QOP_VARY_ADD_C, 105 106 QOP_FRAG_X, 107 QOP_FRAG_Y, 108 QOP_FRAG_Z, 109 QOP_FRAG_W, 110 QOP_FRAG_REV_FLAG, 111 112 QOP_UNPACK_8A_F, 113 QOP_UNPACK_8B_F, 114 QOP_UNPACK_8C_F, 115 QOP_UNPACK_8D_F, 116 QOP_UNPACK_16A_F, 117 QOP_UNPACK_16B_F, 118 119 QOP_UNPACK_8A_I, 120 QOP_UNPACK_8B_I, 121 QOP_UNPACK_8C_I, 122 QOP_UNPACK_8D_I, 123 QOP_UNPACK_16A_I, 124 QOP_UNPACK_16B_I, 125 126 /** Texture x coordinate parameter write */ 127 QOP_TEX_S, 128 /** Texture y coordinate parameter write */ 129 QOP_TEX_T, 130 /** Texture border color parameter or cube map z coordinate write */ 131 QOP_TEX_R, 132 /** Texture LOD bias parameter write */ 133 QOP_TEX_B, 134 135 /** 136 * Texture-unit 4-byte read with address provided direct in S 137 * cooordinate. 138 * 139 * The first operand is the offset from the start of the UBO, and the 140 * second is the uniform that has the UBO's base pointer. 141 */ 142 QOP_TEX_DIRECT, 143 144 /** 145 * Signal of texture read being necessary and then reading r4 into 146 * the destination 147 */ 148 QOP_TEX_RESULT, 149 QOP_R4_UNPACK_A, 150 QOP_R4_UNPACK_B, 151 QOP_R4_UNPACK_C, 152 QOP_R4_UNPACK_D 153}; 154 155struct simple_node { 156 struct simple_node *next; 157 struct simple_node *prev; 158}; 159 160struct queued_qpu_inst { 161 struct simple_node link; 162 uint64_t inst; 163}; 164 165struct qinst { 166 struct simple_node link; 167 168 enum qop op; 169 struct qreg dst; 170 struct qreg *src; 171}; 172 173enum qstage { 174 /** 175 * Coordinate shader, runs during binning, before the VS, and just 176 * outputs position. 177 */ 178 QSTAGE_COORD, 179 QSTAGE_VERT, 180 QSTAGE_FRAG, 181}; 182 183enum quniform_contents { 184 /** 185 * Indicates that a constant 32-bit value is copied from the program's 186 * uniform contents. 187 */ 188 QUNIFORM_CONSTANT, 189 /** 190 * Indicates that the program's uniform contents are used as an index 191 * into the GL uniform storage. 192 */ 193 QUNIFORM_UNIFORM, 194 195 /** @{ 196 * Scaling factors from clip coordinates to relative to the viewport 197 * center. 198 * 199 * This is used by the coordinate and vertex shaders to produce the 200 * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed 201 * point offsets from the viewport ccenter. 202 */ 203 QUNIFORM_VIEWPORT_X_SCALE, 204 QUNIFORM_VIEWPORT_Y_SCALE, 205 /** @} */ 206 207 QUNIFORM_VIEWPORT_Z_OFFSET, 208 QUNIFORM_VIEWPORT_Z_SCALE, 209 210 QUNIFORM_USER_CLIP_PLANE, 211 212 /** 213 * A reference to a texture config parameter 0 uniform. 214 * 215 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 216 * defines texture type, miplevels, and such. It will be found as a 217 * parameter to the first QOP_TEX_[STRB] instruction in a sequence. 218 */ 219 QUNIFORM_TEXTURE_CONFIG_P0, 220 221 /** 222 * A reference to a texture config parameter 1 uniform. 223 * 224 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 225 * defines texture width, height, filters, and wrap modes. It will be 226 * found as a parameter to the second QOP_TEX_[STRB] instruction in a 227 * sequence. 228 */ 229 QUNIFORM_TEXTURE_CONFIG_P1, 230 231 /** A reference to a texture config parameter 2 cubemap stride uniform */ 232 QUNIFORM_TEXTURE_CONFIG_P2, 233 234 QUNIFORM_UBO_ADDR, 235 236 QUNIFORM_TEXRECT_SCALE_X, 237 QUNIFORM_TEXRECT_SCALE_Y, 238 239 QUNIFORM_TEXTURE_BORDER_COLOR, 240 241 QUNIFORM_BLEND_CONST_COLOR, 242 QUNIFORM_STENCIL, 243 244 QUNIFORM_ALPHA_REF, 245}; 246 247struct vc4_varying_semantic { 248 uint8_t semantic; 249 uint8_t index; 250 uint8_t swizzle; 251}; 252 253struct vc4_compiler_ubo_range { 254 /** 255 * offset in bytes from the start of the ubo where this range is 256 * uploaded. 257 * 258 * Only set once used is set. 259 */ 260 uint32_t dst_offset; 261 262 /** 263 * offset in bytes from the start of the gallium uniforms where the 264 * data comes from. 265 */ 266 uint32_t src_offset; 267 268 /** size in bytes of this ubo range */ 269 uint32_t size; 270 271 /** 272 * Set if this range is used by the shader for indirect uniforms 273 * access. 274 */ 275 bool used; 276}; 277 278struct vc4_compile { 279 struct vc4_context *vc4; 280 struct tgsi_parse_context parser; 281 struct qreg *temps; 282 /** 283 * Inputs to the shader, arranged by TGSI declaration order. 284 * 285 * Not all fragment shader QFILE_VARY reads are present in this array. 286 */ 287 struct qreg *inputs; 288 struct qreg *outputs; 289 struct qreg *consts; 290 struct qreg addr[4]; /* TGSI ARL destination. */ 291 uint32_t temps_array_size; 292 uint32_t inputs_array_size; 293 uint32_t outputs_array_size; 294 uint32_t uniforms_array_size; 295 uint32_t consts_array_size; 296 uint32_t num_consts; 297 298 struct vc4_compiler_ubo_range *ubo_ranges; 299 uint32_t ubo_ranges_array_size; 300 uint32_t num_ubo_ranges; 301 uint32_t next_ubo_dst_offset; 302 303 struct qreg line_x, point_x, point_y; 304 struct qreg discard; 305 306 /** 307 * Array of the TGSI semantics of all FS QFILE_VARY reads. 308 * 309 * This includes those that aren't part of the VPM varyings, like 310 * point/line coordinates. 311 */ 312 struct vc4_varying_semantic *input_semantics; 313 uint32_t num_input_semantics; 314 uint32_t input_semantics_array_size; 315 316 /** 317 * An entry per outputs[] in the VS indicating what the semantic of 318 * the output is. Used to emit from the VS in the order that the FS 319 * needs. 320 */ 321 struct vc4_varying_semantic *output_semantics; 322 323 struct pipe_shader_state *shader_state; 324 struct vc4_key *key; 325 struct vc4_fs_key *fs_key; 326 struct vc4_vs_key *vs_key; 327 328 uint32_t *uniform_data; 329 enum quniform_contents *uniform_contents; 330 uint32_t uniform_array_size; 331 uint32_t num_uniforms; 332 uint32_t num_outputs; 333 uint32_t num_texture_samples; 334 uint32_t output_position_index; 335 uint32_t output_clipvertex_index; 336 uint32_t output_color_index; 337 uint32_t output_point_size_index; 338 339 struct qreg undef; 340 enum qstage stage; 341 uint32_t num_temps; 342 struct simple_node instructions; 343 uint32_t immediates[1024]; 344 345 struct simple_node qpu_inst_list; 346 uint64_t *qpu_insts; 347 uint32_t qpu_inst_count; 348 uint32_t qpu_inst_size; 349 uint32_t num_inputs; 350 351 uint32_t program_id; 352 uint32_t variant_id; 353}; 354 355struct vc4_compile *qir_compile_init(void); 356void qir_compile_destroy(struct vc4_compile *c); 357struct qinst *qir_inst(enum qop op, struct qreg dst, 358 struct qreg src0, struct qreg src1); 359struct qinst *qir_inst4(enum qop op, struct qreg dst, 360 struct qreg a, 361 struct qreg b, 362 struct qreg c, 363 struct qreg d); 364void qir_remove_instruction(struct qinst *qinst); 365void qir_reorder_uniforms(struct vc4_compile *c); 366void qir_emit(struct vc4_compile *c, struct qinst *inst); 367struct qreg qir_get_temp(struct vc4_compile *c); 368int qir_get_op_nsrc(enum qop qop); 369bool qir_reg_equals(struct qreg a, struct qreg b); 370bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst); 371bool qir_depends_on_flags(struct qinst *inst); 372bool qir_writes_r4(struct qinst *inst); 373bool qir_reads_r4(struct qinst *inst); 374 375void qir_dump(struct vc4_compile *c); 376void qir_dump_inst(struct vc4_compile *c, struct qinst *inst); 377const char *qir_get_stage_name(enum qstage stage); 378 379void qir_optimize(struct vc4_compile *c); 380bool qir_opt_algebraic(struct vc4_compile *c); 381bool qir_opt_copy_propagation(struct vc4_compile *c); 382bool qir_opt_cse(struct vc4_compile *c); 383bool qir_opt_dead_code(struct vc4_compile *c); 384 385void qpu_schedule_instructions(struct vc4_compile *c); 386 387#define QIR_ALU0(name) \ 388static inline struct qreg \ 389qir_##name(struct vc4_compile *c) \ 390{ \ 391 struct qreg t = qir_get_temp(c); \ 392 qir_emit(c, qir_inst(QOP_##name, t, c->undef, c->undef)); \ 393 return t; \ 394} 395 396#define QIR_ALU1(name) \ 397static inline struct qreg \ 398qir_##name(struct vc4_compile *c, struct qreg a) \ 399{ \ 400 struct qreg t = qir_get_temp(c); \ 401 qir_emit(c, qir_inst(QOP_##name, t, a, c->undef)); \ 402 return t; \ 403} 404 405#define QIR_ALU2(name) \ 406static inline struct qreg \ 407qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ 408{ \ 409 struct qreg t = qir_get_temp(c); \ 410 qir_emit(c, qir_inst(QOP_##name, t, a, b)); \ 411 return t; \ 412} 413 414#define QIR_NODST_1(name) \ 415static inline void \ 416qir_##name(struct vc4_compile *c, struct qreg a) \ 417{ \ 418 qir_emit(c, qir_inst(QOP_##name, c->undef, a, c->undef)); \ 419} 420 421#define QIR_NODST_2(name) \ 422static inline void \ 423qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ 424{ \ 425 qir_emit(c, qir_inst(QOP_##name, c->undef, a, b)); \ 426} 427 428QIR_ALU1(MOV) 429QIR_ALU2(FADD) 430QIR_ALU2(FSUB) 431QIR_ALU2(FMUL) 432QIR_ALU2(MUL24) 433QIR_NODST_1(SF) 434QIR_ALU1(SEL_X_0_ZS) 435QIR_ALU1(SEL_X_0_ZC) 436QIR_ALU1(SEL_X_0_NS) 437QIR_ALU1(SEL_X_0_NC) 438QIR_ALU2(SEL_X_Y_ZS) 439QIR_ALU2(SEL_X_Y_ZC) 440QIR_ALU2(SEL_X_Y_NS) 441QIR_ALU2(SEL_X_Y_NC) 442QIR_ALU2(FMIN) 443QIR_ALU2(FMAX) 444QIR_ALU2(FMINABS) 445QIR_ALU2(FMAXABS) 446QIR_ALU1(FTOI) 447QIR_ALU1(ITOF) 448 449QIR_ALU2(ADD) 450QIR_ALU2(SUB) 451QIR_ALU2(SHL) 452QIR_ALU2(SHR) 453QIR_ALU2(ASR) 454QIR_ALU2(MIN) 455QIR_ALU2(MAX) 456QIR_ALU2(AND) 457QIR_ALU2(OR) 458QIR_ALU2(XOR) 459QIR_ALU1(NOT) 460 461QIR_ALU1(RCP) 462QIR_ALU1(RSQ) 463QIR_ALU1(EXP2) 464QIR_ALU1(LOG2) 465QIR_ALU2(PACK_SCALED) 466QIR_ALU1(VARY_ADD_C) 467QIR_NODST_1(VPM_WRITE) 468QIR_NODST_2(TEX_S) 469QIR_NODST_2(TEX_T) 470QIR_NODST_2(TEX_R) 471QIR_NODST_2(TEX_B) 472QIR_NODST_2(TEX_DIRECT) 473QIR_ALU0(FRAG_X) 474QIR_ALU0(FRAG_Y) 475QIR_ALU0(FRAG_Z) 476QIR_ALU0(FRAG_W) 477QIR_ALU0(FRAG_REV_FLAG) 478QIR_ALU0(TEX_RESULT) 479QIR_ALU0(TLB_COLOR_READ) 480QIR_NODST_1(TLB_Z_WRITE) 481QIR_NODST_1(TLB_DISCARD_SETUP) 482QIR_NODST_1(TLB_STENCIL_SETUP) 483 484static inline struct qreg 485qir_R4_UNPACK(struct vc4_compile *c, struct qreg r4, int i) 486{ 487 struct qreg t = qir_get_temp(c); 488 qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, r4, c->undef)); 489 return t; 490} 491 492static inline struct qreg 493qir_SEL_X_0_COND(struct vc4_compile *c, int i) 494{ 495 struct qreg t = qir_get_temp(c); 496 qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, c->undef, c->undef)); 497 return t; 498} 499 500static inline struct qreg 501qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i) 502{ 503 struct qreg t = qir_get_temp(c); 504 qir_emit(c, qir_inst(QOP_UNPACK_8A_F + i, t, src, c->undef)); 505 return t; 506} 507 508static inline struct qreg 509qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i) 510{ 511 struct qreg t = qir_get_temp(c); 512 qir_emit(c, qir_inst(QOP_UNPACK_8A_I + i, t, src, c->undef)); 513 return t; 514} 515 516static inline struct qreg 517qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i) 518{ 519 struct qreg t = qir_get_temp(c); 520 qir_emit(c, qir_inst(QOP_UNPACK_16A_F + i, t, src, c->undef)); 521 return t; 522} 523 524static inline struct qreg 525qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i) 526{ 527 struct qreg t = qir_get_temp(c); 528 qir_emit(c, qir_inst(QOP_UNPACK_16A_I + i, t, src, c->undef)); 529 return t; 530} 531 532static inline struct qreg 533qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y) 534{ 535 return qir_EXP2(c, qir_FMUL(c, 536 y, 537 qir_LOG2(c, x))); 538} 539 540#endif /* VC4_QIR_H */ 541