instr-a3xx.h revision 660d5c1646f5d63f9626b24beabc9cfc318849d4
1/* 2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#ifndef INSTR_A3XX_H_ 25#define INSTR_A3XX_H_ 26 27#define PACKED __attribute__((__packed__)) 28 29#include <stdint.h> 30#include <assert.h> 31 32typedef enum { 33 /* category 0: */ 34 OPC_NOP = 0, 35 OPC_BR = 1, 36 OPC_JUMP = 2, 37 OPC_CALL = 3, 38 OPC_RET = 4, 39 OPC_KILL = 5, 40 OPC_END = 6, 41 OPC_EMIT = 7, 42 OPC_CUT = 8, 43 OPC_CHMASK = 9, 44 OPC_CHSH = 10, 45 OPC_FLOW_REV = 11, 46 47 /* category 1: */ 48 /* no opc.. all category 1 are variants of mov */ 49 50 /* category 2: */ 51 OPC_ADD_F = 0, 52 OPC_MIN_F = 1, 53 OPC_MAX_F = 2, 54 OPC_MUL_F = 3, 55 OPC_SIGN_F = 4, 56 OPC_CMPS_F = 5, 57 OPC_ABSNEG_F = 6, 58 OPC_CMPV_F = 7, 59 /* 8 - invalid */ 60 OPC_FLOOR_F = 9, 61 OPC_CEIL_F = 10, 62 OPC_RNDNE_F = 11, 63 OPC_RNDAZ_F = 12, 64 OPC_TRUNC_F = 13, 65 /* 14-15 - invalid */ 66 OPC_ADD_U = 16, 67 OPC_ADD_S = 17, 68 OPC_SUB_U = 18, 69 OPC_SUB_S = 19, 70 OPC_CMPS_U = 20, 71 OPC_CMPS_S = 21, 72 OPC_MIN_U = 22, 73 OPC_MIN_S = 23, 74 OPC_MAX_U = 24, 75 OPC_MAX_S = 25, 76 OPC_ABSNEG_S = 26, 77 /* 27 - invalid */ 78 OPC_AND_B = 28, 79 OPC_OR_B = 29, 80 OPC_NOT_B = 30, 81 OPC_XOR_B = 31, 82 /* 32 - invalid */ 83 OPC_CMPV_U = 33, 84 OPC_CMPV_S = 34, 85 /* 35-47 - invalid */ 86 OPC_MUL_U = 48, 87 OPC_MUL_S = 49, 88 OPC_MULL_U = 50, 89 OPC_BFREV_B = 51, 90 OPC_CLZ_S = 52, 91 OPC_CLZ_B = 53, 92 OPC_SHL_B = 54, 93 OPC_SHR_B = 55, 94 OPC_ASHR_B = 56, 95 OPC_BARY_F = 57, 96 OPC_MGEN_B = 58, 97 OPC_GETBIT_B = 59, 98 OPC_SETRM = 60, 99 OPC_CBITS_B = 61, 100 OPC_SHB = 62, 101 OPC_MSAD = 63, 102 103 /* category 3: */ 104 OPC_MAD_U16 = 0, 105 OPC_MADSH_U16 = 1, 106 OPC_MAD_S16 = 2, 107 OPC_MADSH_M16 = 3, /* should this be .s16? */ 108 OPC_MAD_U24 = 4, 109 OPC_MAD_S24 = 5, 110 OPC_MAD_F16 = 6, 111 OPC_MAD_F32 = 7, 112 OPC_SEL_B16 = 8, 113 OPC_SEL_B32 = 9, 114 OPC_SEL_S16 = 10, 115 OPC_SEL_S32 = 11, 116 OPC_SEL_F16 = 12, 117 OPC_SEL_F32 = 13, 118 OPC_SAD_S16 = 14, 119 OPC_SAD_S32 = 15, 120 121 /* category 4: */ 122 OPC_RCP = 0, 123 OPC_RSQ = 1, 124 OPC_LOG2 = 2, 125 OPC_EXP2 = 3, 126 OPC_SIN = 4, 127 OPC_COS = 5, 128 OPC_SQRT = 6, 129 // 7-63 - invalid 130 131 /* category 5: */ 132 OPC_ISAM = 0, 133 OPC_ISAML = 1, 134 OPC_ISAMM = 2, 135 OPC_SAM = 3, 136 OPC_SAMB = 4, 137 OPC_SAML = 5, 138 OPC_SAMGQ = 6, 139 OPC_GETLOD = 7, 140 OPC_CONV = 8, 141 OPC_CONVM = 9, 142 OPC_GETSIZE = 10, 143 OPC_GETBUF = 11, 144 OPC_GETPOS = 12, 145 OPC_GETINFO = 13, 146 OPC_DSX = 14, 147 OPC_DSY = 15, 148 OPC_GATHER4R = 16, 149 OPC_GATHER4G = 17, 150 OPC_GATHER4B = 18, 151 OPC_GATHER4A = 19, 152 OPC_SAMGP0 = 20, 153 OPC_SAMGP1 = 21, 154 OPC_SAMGP2 = 22, 155 OPC_SAMGP3 = 23, 156 OPC_DSXPP_1 = 24, 157 OPC_DSYPP_1 = 25, 158 OPC_RGETPOS = 26, 159 OPC_RGETINFO = 27, 160 161 /* category 6: */ 162 OPC_LDG = 0, /* load-global */ 163 OPC_LDL = 1, 164 OPC_LDP = 2, 165 OPC_STG = 3, /* store-global */ 166 OPC_STL = 4, 167 OPC_STP = 5, 168 OPC_STI = 6, 169 OPC_G2L = 7, 170 OPC_L2G = 8, 171 OPC_PREFETCH = 9, 172 OPC_LDLW = 10, 173 OPC_STLW = 11, 174 OPC_RESFMT = 14, 175 OPC_RESINFO = 15, 176 OPC_ATOMIC_ADD_L = 16, 177 OPC_ATOMIC_SUB_L = 17, 178 OPC_ATOMIC_XCHG_L = 18, 179 OPC_ATOMIC_INC_L = 19, 180 OPC_ATOMIC_DEC_L = 20, 181 OPC_ATOMIC_CMPXCHG_L = 21, 182 OPC_ATOMIC_MIN_L = 22, 183 OPC_ATOMIC_MAX_L = 23, 184 OPC_ATOMIC_AND_L = 24, 185 OPC_ATOMIC_OR_L = 25, 186 OPC_ATOMIC_XOR_L = 26, 187 OPC_LDGB_TYPED_4D = 27, 188 OPC_STGB_4D_4 = 28, 189 OPC_STIB = 29, 190 OPC_LDC_4 = 30, 191 OPC_LDLV = 31, 192 193 /* meta instructions (category -1): */ 194 /* placeholder instr to mark shader inputs: */ 195 OPC_META_INPUT = 0, 196 OPC_META_PHI = 1, 197 /* The "fan-in" and "fan-out" instructions are used for keeping 198 * track of instructions that write to multiple dst registers 199 * (fan-out) like texture sample instructions, or read multiple 200 * consecutive scalar registers (fan-in) (bary.f, texture samp) 201 */ 202 OPC_META_FO = 2, 203 OPC_META_FI = 3, 204 205} opc_t; 206 207typedef enum { 208 TYPE_F16 = 0, 209 TYPE_F32 = 1, 210 TYPE_U16 = 2, 211 TYPE_U32 = 3, 212 TYPE_S16 = 4, 213 TYPE_S32 = 5, 214 TYPE_U8 = 6, 215 TYPE_S8 = 7, // XXX I assume? 216} type_t; 217 218static inline uint32_t type_size(type_t type) 219{ 220 switch (type) { 221 case TYPE_F32: 222 case TYPE_U32: 223 case TYPE_S32: 224 return 32; 225 case TYPE_F16: 226 case TYPE_U16: 227 case TYPE_S16: 228 return 16; 229 case TYPE_U8: 230 case TYPE_S8: 231 return 8; 232 default: 233 assert(0); /* invalid type */ 234 return 0; 235 } 236} 237 238static inline int type_float(type_t type) 239{ 240 return (type == TYPE_F32) || (type == TYPE_F16); 241} 242 243static inline int type_uint(type_t type) 244{ 245 return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8); 246} 247 248static inline int type_sint(type_t type) 249{ 250 return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8); 251} 252 253typedef union PACKED { 254 /* normal gpr or const src register: */ 255 struct PACKED { 256 uint32_t comp : 2; 257 uint32_t num : 10; 258 }; 259 /* for immediate val: */ 260 int32_t iim_val : 11; 261 /* to make compiler happy: */ 262 uint32_t dummy32; 263 uint32_t dummy10 : 10; 264 uint32_t dummy11 : 11; 265 uint32_t dummy12 : 12; 266 uint32_t dummy13 : 13; 267 uint32_t dummy8 : 8; 268} reg_t; 269 270/* special registers: */ 271#define REG_A0 61 /* address register */ 272#define REG_P0 62 /* predicate register */ 273 274static inline int reg_special(reg_t reg) 275{ 276 return (reg.num == REG_A0) || (reg.num == REG_P0); 277} 278 279typedef struct PACKED { 280 /* dword0: */ 281 union PACKED { 282 struct PACKED { 283 int16_t immed : 16; 284 uint32_t dummy1 : 16; 285 } a3xx; 286 struct PACKED { 287 int32_t immed : 20; 288 uint32_t dummy1 : 12; 289 } a4xx; 290 }; 291 292 /* dword1: */ 293 uint32_t dummy2 : 8; 294 uint32_t repeat : 3; 295 uint32_t dummy3 : 1; 296 uint32_t ss : 1; 297 uint32_t dummy4 : 7; 298 uint32_t inv : 1; 299 uint32_t comp : 2; 300 uint32_t opc : 4; 301 uint32_t jmp_tgt : 1; 302 uint32_t sync : 1; 303 uint32_t opc_cat : 3; 304} instr_cat0_t; 305 306typedef struct PACKED { 307 /* dword0: */ 308 union PACKED { 309 /* for normal src register: */ 310 struct PACKED { 311 uint32_t src : 11; 312 /* at least low bit of pad must be zero or it will 313 * look like a address relative src 314 */ 315 uint32_t pad : 21; 316 }; 317 /* for address relative: */ 318 struct PACKED { 319 int32_t off : 10; 320 uint32_t src_rel_c : 1; 321 uint32_t src_rel : 1; 322 uint32_t unknown : 20; 323 }; 324 /* for immediate: */ 325 int32_t iim_val; 326 uint32_t uim_val; 327 float fim_val; 328 }; 329 330 /* dword1: */ 331 uint32_t dst : 8; 332 uint32_t repeat : 3; 333 uint32_t src_r : 1; 334 uint32_t ss : 1; 335 uint32_t ul : 1; 336 uint32_t dst_type : 3; 337 uint32_t dst_rel : 1; 338 uint32_t src_type : 3; 339 uint32_t src_c : 1; 340 uint32_t src_im : 1; 341 uint32_t even : 1; 342 uint32_t pos_inf : 1; 343 uint32_t must_be_0 : 2; 344 uint32_t jmp_tgt : 1; 345 uint32_t sync : 1; 346 uint32_t opc_cat : 3; 347} instr_cat1_t; 348 349typedef struct PACKED { 350 /* dword0: */ 351 union PACKED { 352 struct PACKED { 353 uint32_t src1 : 11; 354 uint32_t must_be_zero1: 2; 355 uint32_t src1_im : 1; /* immediate */ 356 uint32_t src1_neg : 1; /* negate */ 357 uint32_t src1_abs : 1; /* absolute value */ 358 }; 359 struct PACKED { 360 uint32_t src1 : 10; 361 uint32_t src1_c : 1; /* relative-const */ 362 uint32_t src1_rel : 1; /* relative address */ 363 uint32_t must_be_zero : 1; 364 uint32_t dummy : 3; 365 } rel1; 366 struct PACKED { 367 uint32_t src1 : 12; 368 uint32_t src1_c : 1; /* const */ 369 uint32_t dummy : 3; 370 } c1; 371 }; 372 373 union PACKED { 374 struct PACKED { 375 uint32_t src2 : 11; 376 uint32_t must_be_zero2: 2; 377 uint32_t src2_im : 1; /* immediate */ 378 uint32_t src2_neg : 1; /* negate */ 379 uint32_t src2_abs : 1; /* absolute value */ 380 }; 381 struct PACKED { 382 uint32_t src2 : 10; 383 uint32_t src2_c : 1; /* relative-const */ 384 uint32_t src2_rel : 1; /* relative address */ 385 uint32_t must_be_zero : 1; 386 uint32_t dummy : 3; 387 } rel2; 388 struct PACKED { 389 uint32_t src2 : 12; 390 uint32_t src2_c : 1; /* const */ 391 uint32_t dummy : 3; 392 } c2; 393 }; 394 395 /* dword1: */ 396 uint32_t dst : 8; 397 uint32_t repeat : 3; 398 uint32_t src1_r : 1; 399 uint32_t ss : 1; 400 uint32_t ul : 1; /* dunno */ 401 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ 402 uint32_t ei : 1; 403 uint32_t cond : 3; 404 uint32_t src2_r : 1; 405 uint32_t full : 1; /* not half */ 406 uint32_t opc : 6; 407 uint32_t jmp_tgt : 1; 408 uint32_t sync : 1; 409 uint32_t opc_cat : 3; 410} instr_cat2_t; 411 412typedef struct PACKED { 413 /* dword0: */ 414 union PACKED { 415 struct PACKED { 416 uint32_t src1 : 11; 417 uint32_t must_be_zero1: 2; 418 uint32_t src2_c : 1; 419 uint32_t src1_neg : 1; 420 uint32_t src2_r : 1; 421 }; 422 struct PACKED { 423 uint32_t src1 : 10; 424 uint32_t src1_c : 1; 425 uint32_t src1_rel : 1; 426 uint32_t must_be_zero : 1; 427 uint32_t dummy : 3; 428 } rel1; 429 struct PACKED { 430 uint32_t src1 : 12; 431 uint32_t src1_c : 1; 432 uint32_t dummy : 3; 433 } c1; 434 }; 435 436 union PACKED { 437 struct PACKED { 438 uint32_t src3 : 11; 439 uint32_t must_be_zero2: 2; 440 uint32_t src3_r : 1; 441 uint32_t src2_neg : 1; 442 uint32_t src3_neg : 1; 443 }; 444 struct PACKED { 445 uint32_t src3 : 10; 446 uint32_t src3_c : 1; 447 uint32_t src3_rel : 1; 448 uint32_t must_be_zero : 1; 449 uint32_t dummy : 3; 450 } rel2; 451 struct PACKED { 452 uint32_t src3 : 12; 453 uint32_t src3_c : 1; 454 uint32_t dummy : 3; 455 } c2; 456 }; 457 458 /* dword1: */ 459 uint32_t dst : 8; 460 uint32_t repeat : 3; 461 uint32_t src1_r : 1; 462 uint32_t ss : 1; 463 uint32_t ul : 1; 464 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ 465 uint32_t src2 : 8; 466 uint32_t opc : 4; 467 uint32_t jmp_tgt : 1; 468 uint32_t sync : 1; 469 uint32_t opc_cat : 3; 470} instr_cat3_t; 471 472static inline bool instr_cat3_full(instr_cat3_t *cat3) 473{ 474 switch (cat3->opc) { 475 case OPC_MAD_F16: 476 case OPC_MAD_U16: 477 case OPC_MAD_S16: 478 case OPC_SEL_B16: 479 case OPC_SEL_S16: 480 case OPC_SEL_F16: 481 case OPC_SAD_S16: 482 case OPC_SAD_S32: // really?? 483 return false; 484 default: 485 return true; 486 } 487} 488 489typedef struct PACKED { 490 /* dword0: */ 491 union PACKED { 492 struct PACKED { 493 uint32_t src : 11; 494 uint32_t must_be_zero1: 2; 495 uint32_t src_im : 1; /* immediate */ 496 uint32_t src_neg : 1; /* negate */ 497 uint32_t src_abs : 1; /* absolute value */ 498 }; 499 struct PACKED { 500 uint32_t src : 10; 501 uint32_t src_c : 1; /* relative-const */ 502 uint32_t src_rel : 1; /* relative address */ 503 uint32_t must_be_zero : 1; 504 uint32_t dummy : 3; 505 } rel; 506 struct PACKED { 507 uint32_t src : 12; 508 uint32_t src_c : 1; /* const */ 509 uint32_t dummy : 3; 510 } c; 511 }; 512 uint32_t dummy1 : 16; /* seem to be ignored */ 513 514 /* dword1: */ 515 uint32_t dst : 8; 516 uint32_t repeat : 3; 517 uint32_t src_r : 1; 518 uint32_t ss : 1; 519 uint32_t ul : 1; 520 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ 521 uint32_t dummy2 : 5; /* seem to be ignored */ 522 uint32_t full : 1; /* not half */ 523 uint32_t opc : 6; 524 uint32_t jmp_tgt : 1; 525 uint32_t sync : 1; 526 uint32_t opc_cat : 3; 527} instr_cat4_t; 528 529typedef struct PACKED { 530 /* dword0: */ 531 union PACKED { 532 /* normal case: */ 533 struct PACKED { 534 uint32_t full : 1; /* not half */ 535 uint32_t src1 : 8; 536 uint32_t src2 : 8; 537 uint32_t dummy1 : 4; /* seem to be ignored */ 538 uint32_t samp : 4; 539 uint32_t tex : 7; 540 } norm; 541 /* s2en case: */ 542 struct PACKED { 543 uint32_t full : 1; /* not half */ 544 uint32_t src1 : 8; 545 uint32_t src2 : 11; 546 uint32_t dummy1 : 1; 547 uint32_t src3 : 8; 548 uint32_t dummy2 : 3; 549 } s2en; 550 /* same in either case: */ 551 // XXX I think, confirm this 552 struct PACKED { 553 uint32_t full : 1; /* not half */ 554 uint32_t src1 : 8; 555 uint32_t pad : 23; 556 }; 557 }; 558 559 /* dword1: */ 560 uint32_t dst : 8; 561 uint32_t wrmask : 4; /* write-mask */ 562 uint32_t type : 3; 563 uint32_t dummy2 : 1; /* seems to be ignored */ 564 uint32_t is_3d : 1; 565 566 uint32_t is_a : 1; 567 uint32_t is_s : 1; 568 uint32_t is_s2en : 1; 569 uint32_t is_o : 1; 570 uint32_t is_p : 1; 571 572 uint32_t opc : 5; 573 uint32_t jmp_tgt : 1; 574 uint32_t sync : 1; 575 uint32_t opc_cat : 3; 576} instr_cat5_t; 577 578/* [src1 + off], src2: */ 579typedef struct PACKED { 580 /* dword0: */ 581 uint32_t mustbe1 : 1; 582 int32_t off : 13; 583 uint32_t src1 : 8; 584 uint32_t src1_im : 1; 585 uint32_t src2_im : 1; 586 uint32_t src2 : 8; 587 588 /* dword1: */ 589 uint32_t dst : 8; 590 uint32_t dummy2 : 9; 591 uint32_t type : 3; 592 uint32_t dummy3 : 2; 593 uint32_t opc : 5; 594 uint32_t jmp_tgt : 1; 595 uint32_t sync : 1; 596 uint32_t opc_cat : 3; 597} instr_cat6a_t; 598 599/* [src1], src2: */ 600typedef struct PACKED { 601 /* dword0: */ 602 uint32_t mustbe0 : 1; 603 uint32_t src1 : 8; 604 uint32_t ignore0 : 13; 605 uint32_t src1_im : 1; 606 uint32_t src2_im : 1; 607 uint32_t src2 : 8; 608 609 /* dword1: */ 610 uint32_t dst : 8; 611 uint32_t dummy2 : 9; 612 uint32_t type : 3; 613 uint32_t dummy3 : 2; 614 uint32_t opc : 5; 615 uint32_t jmp_tgt : 1; 616 uint32_t sync : 1; 617 uint32_t opc_cat : 3; 618} instr_cat6b_t; 619 620/* I think some of the other cat6 instructions use additional 621 * sub-encodings.. 622 */ 623 624typedef union PACKED { 625 instr_cat6a_t a; 626 instr_cat6b_t b; 627 struct PACKED { 628 /* dword0: */ 629 uint32_t has_off : 1; 630 uint32_t pad1 : 31; 631 632 /* dword1: */ 633 uint32_t dst : 8; 634 uint32_t dummy2 : 9; 635 uint32_t type : 3; 636 uint32_t dummy3 : 2; 637 uint32_t opc : 5; 638 uint32_t jmp_tgt : 1; 639 uint32_t sync : 1; 640 uint32_t opc_cat : 3; 641 }; 642} instr_cat6_t; 643 644typedef union PACKED { 645 instr_cat0_t cat0; 646 instr_cat1_t cat1; 647 instr_cat2_t cat2; 648 instr_cat3_t cat3; 649 instr_cat4_t cat4; 650 instr_cat5_t cat5; 651 instr_cat6_t cat6; 652 struct PACKED { 653 /* dword0: */ 654 uint64_t pad1 : 40; 655 uint32_t repeat : 3; /* cat0-cat4 */ 656 uint32_t pad2 : 1; 657 uint32_t ss : 1; /* cat1-cat4 (cat0??) */ 658 uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */ 659 uint32_t pad3 : 13; 660 uint32_t jmp_tgt : 1; 661 uint32_t sync : 1; 662 uint32_t opc_cat : 3; 663 664 }; 665} instr_t; 666 667static inline uint32_t instr_opc(instr_t *instr) 668{ 669 switch (instr->opc_cat) { 670 case 0: return instr->cat0.opc; 671 case 1: return 0; 672 case 2: return instr->cat2.opc; 673 case 3: return instr->cat3.opc; 674 case 4: return instr->cat4.opc; 675 case 5: return instr->cat5.opc; 676 case 6: return instr->cat6.opc; 677 default: return 0; 678 } 679} 680 681static inline bool is_mad(opc_t opc) 682{ 683 switch (opc) { 684 case OPC_MAD_U16: 685 case OPC_MAD_S16: 686 case OPC_MAD_U24: 687 case OPC_MAD_S24: 688 case OPC_MAD_F16: 689 case OPC_MAD_F32: 690 return true; 691 default: 692 return false; 693 } 694} 695 696static inline bool is_madsh(opc_t opc) 697{ 698 switch (opc) { 699 case OPC_MADSH_U16: 700 case OPC_MADSH_M16: 701 return true; 702 default: 703 return false; 704 } 705} 706 707#endif /* INSTR_A3XX_H_ */ 708