vc4_qir.h revision 13ddd48b97474c261ef2d7412629748d6d91f2ad
1/* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#ifndef VC4_QIR_H 25#define VC4_QIR_H 26 27#include <assert.h> 28#include <stdio.h> 29#include <stdlib.h> 30#include <stdbool.h> 31#include <stdint.h> 32#include <string.h> 33 34#include "util/macros.h" 35#include "glsl/nir/nir.h" 36#include "util/list.h" 37#include "util/u_math.h" 38 39#include "vc4_screen.h" 40#include "pipe/p_state.h" 41 42enum qfile { 43 QFILE_NULL, 44 QFILE_TEMP, 45 QFILE_VARY, 46 QFILE_UNIF, 47 QFILE_VPM, 48 49 /** 50 * Stores an immediate value in the index field that can be turned 51 * into a small immediate field by qpu_encode_small_immediate(). 52 */ 53 QFILE_SMALL_IMM, 54}; 55 56struct qreg { 57 enum qfile file; 58 uint32_t index; 59}; 60 61enum qop { 62 QOP_UNDEF, 63 QOP_MOV, 64 QOP_FADD, 65 QOP_FSUB, 66 QOP_FMUL, 67 QOP_MUL24, 68 QOP_FMIN, 69 QOP_FMAX, 70 QOP_FMINABS, 71 QOP_FMAXABS, 72 QOP_ADD, 73 QOP_SUB, 74 QOP_SHL, 75 QOP_SHR, 76 QOP_ASR, 77 QOP_MIN, 78 QOP_MAX, 79 QOP_AND, 80 QOP_OR, 81 QOP_XOR, 82 QOP_NOT, 83 84 /* Note: Orderings of these compares must be the same as in 85 * qpu_defines.h. Selects the src[0] if the ns flag bit is set, 86 * otherwise 0. */ 87 QOP_SEL_X_0_ZS, 88 QOP_SEL_X_0_ZC, 89 QOP_SEL_X_0_NS, 90 QOP_SEL_X_0_NC, 91 /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */ 92 QOP_SEL_X_Y_ZS, 93 QOP_SEL_X_Y_ZC, 94 QOP_SEL_X_Y_NS, 95 QOP_SEL_X_Y_NC, 96 97 QOP_FTOI, 98 QOP_ITOF, 99 QOP_RCP, 100 QOP_RSQ, 101 QOP_EXP2, 102 QOP_LOG2, 103 QOP_VW_SETUP, 104 QOP_VR_SETUP, 105 QOP_PACK_SCALED, 106 QOP_PACK_8888_F, 107 QOP_PACK_8A_F, 108 QOP_PACK_8B_F, 109 QOP_PACK_8C_F, 110 QOP_PACK_8D_F, 111 QOP_TLB_DISCARD_SETUP, 112 QOP_TLB_STENCIL_SETUP, 113 QOP_TLB_Z_WRITE, 114 QOP_TLB_COLOR_WRITE, 115 QOP_TLB_COLOR_READ, 116 QOP_VARY_ADD_C, 117 118 QOP_FRAG_X, 119 QOP_FRAG_Y, 120 QOP_FRAG_Z, 121 QOP_FRAG_W, 122 QOP_FRAG_REV_FLAG, 123 124 QOP_UNPACK_8A_F, 125 QOP_UNPACK_8B_F, 126 QOP_UNPACK_8C_F, 127 QOP_UNPACK_8D_F, 128 QOP_UNPACK_16A_F, 129 QOP_UNPACK_16B_F, 130 131 QOP_UNPACK_8A_I, 132 QOP_UNPACK_8B_I, 133 QOP_UNPACK_8C_I, 134 QOP_UNPACK_8D_I, 135 QOP_UNPACK_16A_I, 136 QOP_UNPACK_16B_I, 137 138 /** Texture x coordinate parameter write */ 139 QOP_TEX_S, 140 /** Texture y coordinate parameter write */ 141 QOP_TEX_T, 142 /** Texture border color parameter or cube map z coordinate write */ 143 QOP_TEX_R, 144 /** Texture LOD bias parameter write */ 145 QOP_TEX_B, 146 147 /** 148 * Texture-unit 4-byte read with address provided direct in S 149 * cooordinate. 150 * 151 * The first operand is the offset from the start of the UBO, and the 152 * second is the uniform that has the UBO's base pointer. 153 */ 154 QOP_TEX_DIRECT, 155 156 /** 157 * Signal of texture read being necessary and then reading r4 into 158 * the destination 159 */ 160 QOP_TEX_RESULT, 161 QOP_R4_UNPACK_A, 162 QOP_R4_UNPACK_B, 163 QOP_R4_UNPACK_C, 164 QOP_R4_UNPACK_D 165}; 166 167struct queued_qpu_inst { 168 struct list_head link; 169 uint64_t inst; 170}; 171 172struct qinst { 173 struct list_head link; 174 175 enum qop op; 176 struct qreg dst; 177 struct qreg *src; 178 bool sf; 179}; 180 181enum qstage { 182 /** 183 * Coordinate shader, runs during binning, before the VS, and just 184 * outputs position. 185 */ 186 QSTAGE_COORD, 187 QSTAGE_VERT, 188 QSTAGE_FRAG, 189}; 190 191enum quniform_contents { 192 /** 193 * Indicates that a constant 32-bit value is copied from the program's 194 * uniform contents. 195 */ 196 QUNIFORM_CONSTANT, 197 /** 198 * Indicates that the program's uniform contents are used as an index 199 * into the GL uniform storage. 200 */ 201 QUNIFORM_UNIFORM, 202 203 /** @{ 204 * Scaling factors from clip coordinates to relative to the viewport 205 * center. 206 * 207 * This is used by the coordinate and vertex shaders to produce the 208 * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed 209 * point offsets from the viewport ccenter. 210 */ 211 QUNIFORM_VIEWPORT_X_SCALE, 212 QUNIFORM_VIEWPORT_Y_SCALE, 213 /** @} */ 214 215 QUNIFORM_VIEWPORT_Z_OFFSET, 216 QUNIFORM_VIEWPORT_Z_SCALE, 217 218 QUNIFORM_USER_CLIP_PLANE, 219 220 /** 221 * A reference to a texture config parameter 0 uniform. 222 * 223 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 224 * defines texture type, miplevels, and such. It will be found as a 225 * parameter to the first QOP_TEX_[STRB] instruction in a sequence. 226 */ 227 QUNIFORM_TEXTURE_CONFIG_P0, 228 229 /** 230 * A reference to a texture config parameter 1 uniform. 231 * 232 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 233 * defines texture width, height, filters, and wrap modes. It will be 234 * found as a parameter to the second QOP_TEX_[STRB] instruction in a 235 * sequence. 236 */ 237 QUNIFORM_TEXTURE_CONFIG_P1, 238 239 /** A reference to a texture config parameter 2 cubemap stride uniform */ 240 QUNIFORM_TEXTURE_CONFIG_P2, 241 242 QUNIFORM_UBO_ADDR, 243 244 QUNIFORM_TEXRECT_SCALE_X, 245 QUNIFORM_TEXRECT_SCALE_Y, 246 247 QUNIFORM_TEXTURE_BORDER_COLOR, 248 249 QUNIFORM_BLEND_CONST_COLOR, 250 QUNIFORM_STENCIL, 251 252 QUNIFORM_ALPHA_REF, 253}; 254 255struct vc4_varying_semantic { 256 uint8_t semantic; 257 uint8_t index; 258 uint8_t swizzle; 259}; 260 261struct vc4_compiler_ubo_range { 262 /** 263 * offset in bytes from the start of the ubo where this range is 264 * uploaded. 265 * 266 * Only set once used is set. 267 */ 268 uint32_t dst_offset; 269 270 /** 271 * offset in bytes from the start of the gallium uniforms where the 272 * data comes from. 273 */ 274 uint32_t src_offset; 275 276 /** size in bytes of this ubo range */ 277 uint32_t size; 278 279 /** 280 * Set if this range is used by the shader for indirect uniforms 281 * access. 282 */ 283 bool used; 284}; 285 286struct vc4_key { 287 struct vc4_uncompiled_shader *shader_state; 288 struct { 289 enum pipe_format format; 290 unsigned compare_mode:1; 291 unsigned compare_func:3; 292 unsigned wrap_s:3; 293 unsigned wrap_t:3; 294 uint8_t swizzle[4]; 295 } tex[VC4_MAX_TEXTURE_SAMPLERS]; 296 uint8_t ucp_enables; 297}; 298 299struct vc4_fs_key { 300 struct vc4_key base; 301 enum pipe_format color_format; 302 bool depth_enabled; 303 bool stencil_enabled; 304 bool stencil_twoside; 305 bool stencil_full_writemasks; 306 bool is_points; 307 bool is_lines; 308 bool alpha_test; 309 bool point_coord_upper_left; 310 bool light_twoside; 311 uint8_t alpha_test_func; 312 uint8_t logicop_func; 313 uint32_t point_sprite_mask; 314 315 struct pipe_rt_blend_state blend; 316}; 317 318struct vc4_vs_key { 319 struct vc4_key base; 320 321 /** 322 * This is a proxy for the array of FS input semantics, which is 323 * larger than we would want to put in the key. 324 */ 325 uint64_t compiled_fs_id; 326 327 enum pipe_format attr_formats[8]; 328 bool is_coord; 329 bool per_vertex_point_size; 330}; 331 332struct vc4_compile { 333 struct vc4_context *vc4; 334 nir_shader *s; 335 nir_function_impl *impl; 336 struct exec_list *cf_node_list; 337 338 /** 339 * Mapping from nir_register * or nir_ssa_def * to array of struct 340 * qreg for the values. 341 */ 342 struct hash_table *def_ht; 343 344 /* For each temp, the instruction generating its value. */ 345 struct qinst **defs; 346 uint32_t defs_array_size; 347 348 /** 349 * Inputs to the shader, arranged by TGSI declaration order. 350 * 351 * Not all fragment shader QFILE_VARY reads are present in this array. 352 */ 353 struct qreg *inputs; 354 struct qreg *outputs; 355 uint32_t inputs_array_size; 356 uint32_t outputs_array_size; 357 uint32_t uniforms_array_size; 358 359 struct vc4_compiler_ubo_range *ubo_ranges; 360 uint32_t ubo_ranges_array_size; 361 /** Number of uniform areas declared in ubo_ranges. */ 362 uint32_t num_uniform_ranges; 363 /** Number of uniform areas used for indirect addressed loads. */ 364 uint32_t num_ubo_ranges; 365 uint32_t next_ubo_dst_offset; 366 367 struct qreg line_x, point_x, point_y; 368 struct qreg discard; 369 370 uint8_t vattr_sizes[8]; 371 372 /** 373 * Array of the TGSI semantics of all FS QFILE_VARY reads. 374 * 375 * This includes those that aren't part of the VPM varyings, like 376 * point/line coordinates. 377 */ 378 struct vc4_varying_semantic *input_semantics; 379 uint32_t num_input_semantics; 380 uint32_t input_semantics_array_size; 381 382 /** 383 * An entry per outputs[] in the VS indicating what the semantic of 384 * the output is. Used to emit from the VS in the order that the FS 385 * needs. 386 */ 387 struct vc4_varying_semantic *output_semantics; 388 389 struct pipe_shader_state *shader_state; 390 struct vc4_key *key; 391 struct vc4_fs_key *fs_key; 392 struct vc4_vs_key *vs_key; 393 394 uint32_t *uniform_data; 395 enum quniform_contents *uniform_contents; 396 uint32_t uniform_array_size; 397 uint32_t num_uniforms; 398 uint32_t num_outputs; 399 uint32_t num_texture_samples; 400 uint32_t output_position_index; 401 uint32_t output_clipvertex_index; 402 uint32_t output_color_index; 403 uint32_t output_point_size_index; 404 405 struct qreg undef; 406 enum qstage stage; 407 uint32_t num_temps; 408 struct list_head instructions; 409 uint32_t immediates[1024]; 410 411 struct list_head qpu_inst_list; 412 uint64_t *qpu_insts; 413 uint32_t qpu_inst_count; 414 uint32_t qpu_inst_size; 415 uint32_t num_inputs; 416 417 uint32_t program_id; 418 uint32_t variant_id; 419}; 420 421struct vc4_compile *qir_compile_init(void); 422void qir_compile_destroy(struct vc4_compile *c); 423struct qinst *qir_inst(enum qop op, struct qreg dst, 424 struct qreg src0, struct qreg src1); 425struct qinst *qir_inst4(enum qop op, struct qreg dst, 426 struct qreg a, 427 struct qreg b, 428 struct qreg c, 429 struct qreg d); 430void qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst); 431struct qreg qir_uniform(struct vc4_compile *c, 432 enum quniform_contents contents, 433 uint32_t data); 434void qir_reorder_uniforms(struct vc4_compile *c); 435void qir_emit(struct vc4_compile *c, struct qinst *inst); 436struct qreg qir_get_temp(struct vc4_compile *c); 437int qir_get_op_nsrc(enum qop qop); 438bool qir_reg_equals(struct qreg a, struct qreg b); 439bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst); 440bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst); 441bool qir_is_multi_instruction(struct qinst *inst); 442bool qir_is_tex(struct qinst *inst); 443bool qir_depends_on_flags(struct qinst *inst); 444bool qir_writes_r4(struct qinst *inst); 445bool qir_reads_r4(struct qinst *inst); 446bool qir_src_needs_a_file(struct qinst *inst); 447struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg); 448 449void qir_dump(struct vc4_compile *c); 450void qir_dump_inst(struct vc4_compile *c, struct qinst *inst); 451const char *qir_get_stage_name(enum qstage stage); 452 453void qir_optimize(struct vc4_compile *c); 454bool qir_opt_algebraic(struct vc4_compile *c); 455bool qir_opt_constant_folding(struct vc4_compile *c); 456bool qir_opt_copy_propagation(struct vc4_compile *c); 457bool qir_opt_cse(struct vc4_compile *c); 458bool qir_opt_dead_code(struct vc4_compile *c); 459bool qir_opt_small_immediates(struct vc4_compile *c); 460bool qir_opt_vpm_writes(struct vc4_compile *c); 461void vc4_nir_lower_io(struct vc4_compile *c); 462void qir_lower_uniforms(struct vc4_compile *c); 463 464void qpu_schedule_instructions(struct vc4_compile *c); 465 466void qir_SF(struct vc4_compile *c, struct qreg src); 467 468static inline struct qreg 469qir_uniform_ui(struct vc4_compile *c, uint32_t ui) 470{ 471 return qir_uniform(c, QUNIFORM_CONSTANT, ui); 472} 473 474static inline struct qreg 475qir_uniform_f(struct vc4_compile *c, float f) 476{ 477 return qir_uniform(c, QUNIFORM_CONSTANT, fui(f)); 478} 479 480#define QIR_ALU0(name) \ 481static inline struct qreg \ 482qir_##name(struct vc4_compile *c) \ 483{ \ 484 struct qreg t = qir_get_temp(c); \ 485 qir_emit(c, qir_inst(QOP_##name, t, c->undef, c->undef)); \ 486 return t; \ 487} 488 489#define QIR_ALU1(name) \ 490static inline struct qreg \ 491qir_##name(struct vc4_compile *c, struct qreg a) \ 492{ \ 493 struct qreg t = qir_get_temp(c); \ 494 qir_emit(c, qir_inst(QOP_##name, t, a, c->undef)); \ 495 return t; \ 496} 497 498#define QIR_ALU2(name) \ 499static inline struct qreg \ 500qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ 501{ \ 502 struct qreg t = qir_get_temp(c); \ 503 qir_emit(c, qir_inst(QOP_##name, t, a, b)); \ 504 return t; \ 505} 506 507#define QIR_NODST_1(name) \ 508static inline void \ 509qir_##name(struct vc4_compile *c, struct qreg a) \ 510{ \ 511 qir_emit(c, qir_inst(QOP_##name, c->undef, a, c->undef)); \ 512} 513 514#define QIR_NODST_2(name) \ 515static inline void \ 516qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ 517{ \ 518 qir_emit(c, qir_inst(QOP_##name, c->undef, a, b)); \ 519} 520 521QIR_ALU1(MOV) 522QIR_ALU2(FADD) 523QIR_ALU2(FSUB) 524QIR_ALU2(FMUL) 525QIR_ALU2(MUL24) 526QIR_ALU1(SEL_X_0_ZS) 527QIR_ALU1(SEL_X_0_ZC) 528QIR_ALU1(SEL_X_0_NS) 529QIR_ALU1(SEL_X_0_NC) 530QIR_ALU2(SEL_X_Y_ZS) 531QIR_ALU2(SEL_X_Y_ZC) 532QIR_ALU2(SEL_X_Y_NS) 533QIR_ALU2(SEL_X_Y_NC) 534QIR_ALU2(FMIN) 535QIR_ALU2(FMAX) 536QIR_ALU2(FMINABS) 537QIR_ALU2(FMAXABS) 538QIR_ALU1(FTOI) 539QIR_ALU1(ITOF) 540 541QIR_ALU2(ADD) 542QIR_ALU2(SUB) 543QIR_ALU2(SHL) 544QIR_ALU2(SHR) 545QIR_ALU2(ASR) 546QIR_ALU2(MIN) 547QIR_ALU2(MAX) 548QIR_ALU2(AND) 549QIR_ALU2(OR) 550QIR_ALU2(XOR) 551QIR_ALU1(NOT) 552 553QIR_ALU1(RCP) 554QIR_ALU1(RSQ) 555QIR_ALU1(EXP2) 556QIR_ALU1(LOG2) 557QIR_ALU2(PACK_SCALED) 558QIR_ALU1(PACK_8888_F) 559QIR_ALU2(PACK_8A_F) 560QIR_ALU2(PACK_8B_F) 561QIR_ALU2(PACK_8C_F) 562QIR_ALU2(PACK_8D_F) 563QIR_ALU1(VARY_ADD_C) 564QIR_NODST_2(TEX_S) 565QIR_NODST_2(TEX_T) 566QIR_NODST_2(TEX_R) 567QIR_NODST_2(TEX_B) 568QIR_NODST_2(TEX_DIRECT) 569QIR_ALU0(FRAG_X) 570QIR_ALU0(FRAG_Y) 571QIR_ALU0(FRAG_Z) 572QIR_ALU0(FRAG_W) 573QIR_ALU0(FRAG_REV_FLAG) 574QIR_ALU0(TEX_RESULT) 575QIR_ALU0(TLB_COLOR_READ) 576QIR_NODST_1(TLB_COLOR_WRITE) 577QIR_NODST_1(TLB_Z_WRITE) 578QIR_NODST_1(TLB_DISCARD_SETUP) 579QIR_NODST_1(TLB_STENCIL_SETUP) 580 581static inline struct qreg 582qir_R4_UNPACK(struct vc4_compile *c, struct qreg r4, int i) 583{ 584 struct qreg t = qir_get_temp(c); 585 qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, r4, c->undef)); 586 return t; 587} 588 589static inline struct qreg 590qir_SEL_X_0_COND(struct vc4_compile *c, int i) 591{ 592 struct qreg t = qir_get_temp(c); 593 qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, c->undef, c->undef)); 594 return t; 595} 596 597static inline struct qreg 598qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i) 599{ 600 struct qreg t = qir_get_temp(c); 601 qir_emit(c, qir_inst(QOP_UNPACK_8A_F + i, t, src, c->undef)); 602 return t; 603} 604 605static inline struct qreg 606qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i) 607{ 608 struct qreg t = qir_get_temp(c); 609 qir_emit(c, qir_inst(QOP_UNPACK_8A_I + i, t, src, c->undef)); 610 return t; 611} 612 613static inline struct qreg 614qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i) 615{ 616 struct qreg t = qir_get_temp(c); 617 qir_emit(c, qir_inst(QOP_UNPACK_16A_F + i, t, src, c->undef)); 618 return t; 619} 620 621static inline struct qreg 622qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i) 623{ 624 struct qreg t = qir_get_temp(c); 625 qir_emit(c, qir_inst(QOP_UNPACK_16A_I + i, t, src, c->undef)); 626 return t; 627} 628 629static inline struct qreg 630qir_PACK_8_F(struct vc4_compile *c, struct qreg rest, struct qreg val, int chan) 631{ 632 struct qreg t = qir_get_temp(c); 633 qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, t, rest, val)); 634 return t; 635} 636 637static inline struct qreg 638qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y) 639{ 640 return qir_EXP2(c, qir_FMUL(c, 641 y, 642 qir_LOG2(c, x))); 643} 644 645static inline void 646qir_VPM_WRITE(struct vc4_compile *c, struct qreg val) 647{ 648 static const struct qreg vpm = { QFILE_VPM, 0 }; 649 qir_emit(c, qir_inst(QOP_MOV, vpm, val, c->undef)); 650} 651 652#endif /* VC4_QIR_H */ 653