instr-a3xx.h revision 203f37540a698a812f0a66e2f3f1fff954af22ab
1/* 2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#ifndef INSTR_A3XX_H_ 25#define INSTR_A3XX_H_ 26 27#define PACKED __attribute__((__packed__)) 28 29#include <stdint.h> 30#include <assert.h> 31 32typedef enum { 33 /* category 0: */ 34 OPC_NOP = 0, 35 OPC_BR = 1, 36 OPC_JUMP = 2, 37 OPC_CALL = 3, 38 OPC_RET = 4, 39 OPC_KILL = 5, 40 OPC_END = 6, 41 OPC_EMIT = 7, 42 OPC_CUT = 8, 43 OPC_CHMASK = 9, 44 OPC_CHSH = 10, 45 OPC_FLOW_REV = 11, 46 47 /* category 1: */ 48 /* no opc.. all category 1 are variants of mov */ 49 50 /* category 2: */ 51 OPC_ADD_F = 0, 52 OPC_MIN_F = 1, 53 OPC_MAX_F = 2, 54 OPC_MUL_F = 3, 55 OPC_SIGN_F = 4, 56 OPC_CMPS_F = 5, 57 OPC_ABSNEG_F = 6, 58 OPC_CMPV_F = 7, 59 /* 8 - invalid */ 60 OPC_FLOOR_F = 9, 61 OPC_CEIL_F = 10, 62 OPC_RNDNE_F = 11, 63 OPC_RNDAZ_F = 12, 64 OPC_TRUNC_F = 13, 65 /* 14-15 - invalid */ 66 OPC_ADD_U = 16, 67 OPC_ADD_S = 17, 68 OPC_SUB_U = 18, 69 OPC_SUB_S = 19, 70 OPC_CMPS_U = 20, 71 OPC_CMPS_S = 21, 72 OPC_MIN_U = 22, 73 OPC_MIN_S = 23, 74 OPC_MAX_U = 24, 75 OPC_MAX_S = 25, 76 OPC_ABSNEG_S = 26, 77 /* 27 - invalid */ 78 OPC_AND_B = 28, 79 OPC_OR_B = 29, 80 OPC_NOT_B = 30, 81 OPC_XOR_B = 31, 82 /* 32 - invalid */ 83 OPC_CMPV_U = 33, 84 OPC_CMPV_S = 34, 85 /* 35-47 - invalid */ 86 OPC_MUL_U = 48, 87 OPC_MUL_S = 49, 88 OPC_MULL_U = 50, 89 OPC_BFREV_B = 51, 90 OPC_CLZ_S = 52, 91 OPC_CLZ_B = 53, 92 OPC_SHL_B = 54, 93 OPC_SHR_B = 55, 94 OPC_ASHR_B = 56, 95 OPC_BARY_F = 57, 96 OPC_MGEN_B = 58, 97 OPC_GETBIT_B = 59, 98 OPC_SETRM = 60, 99 OPC_CBITS_B = 61, 100 OPC_SHB = 62, 101 OPC_MSAD = 63, 102 103 /* category 3: */ 104 OPC_MAD_U16 = 0, 105 OPC_MADSH_U16 = 1, 106 OPC_MAD_S16 = 2, 107 OPC_MADSH_M16 = 3, /* should this be .s16? */ 108 OPC_MAD_U24 = 4, 109 OPC_MAD_S24 = 5, 110 OPC_MAD_F16 = 6, 111 OPC_MAD_F32 = 7, 112 OPC_SEL_B16 = 8, 113 OPC_SEL_B32 = 9, 114 OPC_SEL_S16 = 10, 115 OPC_SEL_S32 = 11, 116 OPC_SEL_F16 = 12, 117 OPC_SEL_F32 = 13, 118 OPC_SAD_S16 = 14, 119 OPC_SAD_S32 = 15, 120 121 /* category 4: */ 122 OPC_RCP = 0, 123 OPC_RSQ = 1, 124 OPC_LOG2 = 2, 125 OPC_EXP2 = 3, 126 OPC_SIN = 4, 127 OPC_COS = 5, 128 OPC_SQRT = 6, 129 // 7-63 - invalid 130 131 /* category 5: */ 132 OPC_ISAM = 0, 133 OPC_ISAML = 1, 134 OPC_ISAMM = 2, 135 OPC_SAM = 3, 136 OPC_SAMB = 4, 137 OPC_SAML = 5, 138 OPC_SAMGQ = 6, 139 OPC_GETLOD = 7, 140 OPC_CONV = 8, 141 OPC_CONVM = 9, 142 OPC_GETSIZE = 10, 143 OPC_GETBUF = 11, 144 OPC_GETPOS = 12, 145 OPC_GETINFO = 13, 146 OPC_DSX = 14, 147 OPC_DSY = 15, 148 OPC_GATHER4R = 16, 149 OPC_GATHER4G = 17, 150 OPC_GATHER4B = 18, 151 OPC_GATHER4A = 19, 152 OPC_SAMGP0 = 20, 153 OPC_SAMGP1 = 21, 154 OPC_SAMGP2 = 22, 155 OPC_SAMGP3 = 23, 156 OPC_DSXPP_1 = 24, 157 OPC_DSYPP_1 = 25, 158 OPC_RGETPOS = 26, 159 OPC_RGETINFO = 27, 160 161 /* category 6: */ 162 OPC_LDG = 0, /* load-global */ 163 OPC_LDL = 1, 164 OPC_LDP = 2, 165 OPC_STG = 3, /* store-global */ 166 OPC_STL = 4, 167 OPC_STP = 5, 168 OPC_STI = 6, 169 OPC_G2L = 7, 170 OPC_L2G = 8, 171 OPC_PREFETCH = 9, 172 OPC_LDLW = 10, 173 OPC_STLW = 11, 174 OPC_RESFMT = 14, 175 OPC_RESINFO = 15, 176 OPC_ATOMIC_ADD_L = 16, 177 OPC_ATOMIC_SUB_L = 17, 178 OPC_ATOMIC_XCHG_L = 18, 179 OPC_ATOMIC_INC_L = 19, 180 OPC_ATOMIC_DEC_L = 20, 181 OPC_ATOMIC_CMPXCHG_L = 21, 182 OPC_ATOMIC_MIN_L = 22, 183 OPC_ATOMIC_MAX_L = 23, 184 OPC_ATOMIC_AND_L = 24, 185 OPC_ATOMIC_OR_L = 25, 186 OPC_ATOMIC_XOR_L = 26, 187 OPC_LDGB_TYPED_4D = 27, 188 OPC_STGB_4D_4 = 28, 189 OPC_STIB = 29, 190 OPC_LDC_4 = 30, 191 OPC_LDLV = 31, 192 193 /* meta instructions (category -1): */ 194 /* placeholder instr to mark inputs/outputs: */ 195 OPC_META_INPUT = 0, 196 OPC_META_OUTPUT = 1, 197 /* The "fan-in" and "fan-out" instructions are used for keeping 198 * track of instructions that write to multiple dst registers 199 * (fan-out) like texture sample instructions, or read multiple 200 * consecutive scalar registers (fan-in) (bary.f, texture samp) 201 */ 202 OPC_META_FO = 2, 203 OPC_META_FI = 3, 204 /* branches/flow control */ 205 OPC_META_FLOW = 4, 206 OPC_META_PHI = 5, 207 208} opc_t; 209 210typedef enum { 211 TYPE_F16 = 0, 212 TYPE_F32 = 1, 213 TYPE_U16 = 2, 214 TYPE_U32 = 3, 215 TYPE_S16 = 4, 216 TYPE_S32 = 5, 217 TYPE_U8 = 6, 218 TYPE_S8 = 7, // XXX I assume? 219} type_t; 220 221static inline uint32_t type_size(type_t type) 222{ 223 switch (type) { 224 case TYPE_F32: 225 case TYPE_U32: 226 case TYPE_S32: 227 return 32; 228 case TYPE_F16: 229 case TYPE_U16: 230 case TYPE_S16: 231 return 16; 232 case TYPE_U8: 233 case TYPE_S8: 234 return 8; 235 default: 236 assert(0); /* invalid type */ 237 return 0; 238 } 239} 240 241static inline int type_float(type_t type) 242{ 243 return (type == TYPE_F32) || (type == TYPE_F16); 244} 245 246static inline int type_uint(type_t type) 247{ 248 return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8); 249} 250 251static inline int type_sint(type_t type) 252{ 253 return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8); 254} 255 256typedef union PACKED { 257 /* normal gpr or const src register: */ 258 struct PACKED { 259 uint32_t comp : 2; 260 uint32_t num : 10; 261 }; 262 /* for immediate val: */ 263 int32_t iim_val : 11; 264 /* to make compiler happy: */ 265 uint32_t dummy32; 266 uint32_t dummy10 : 10; 267 uint32_t dummy11 : 11; 268 uint32_t dummy12 : 12; 269 uint32_t dummy13 : 13; 270 uint32_t dummy8 : 8; 271} reg_t; 272 273/* special registers: */ 274#define REG_A0 61 /* address register */ 275#define REG_P0 62 /* predicate register */ 276 277static inline int reg_special(reg_t reg) 278{ 279 return (reg.num == REG_A0) || (reg.num == REG_P0); 280} 281 282typedef struct PACKED { 283 /* dword0: */ 284 int16_t immed : 16; 285 uint32_t dummy1 : 16; 286 287 /* dword1: */ 288 uint32_t dummy2 : 8; 289 uint32_t repeat : 3; 290 uint32_t dummy3 : 1; 291 uint32_t ss : 1; 292 uint32_t dummy4 : 7; 293 uint32_t inv : 1; 294 uint32_t comp : 2; 295 uint32_t opc : 4; 296 uint32_t jmp_tgt : 1; 297 uint32_t sync : 1; 298 uint32_t opc_cat : 3; 299} instr_cat0_t; 300 301typedef struct PACKED { 302 /* dword0: */ 303 union PACKED { 304 /* for normal src register: */ 305 struct PACKED { 306 uint32_t src : 11; 307 /* at least low bit of pad must be zero or it will 308 * look like a address relative src 309 */ 310 uint32_t pad : 21; 311 }; 312 /* for address relative: */ 313 struct PACKED { 314 int32_t off : 10; 315 uint32_t src_rel_c : 1; 316 uint32_t src_rel : 1; 317 uint32_t unknown : 20; 318 }; 319 /* for immediate: */ 320 int32_t iim_val; 321 uint32_t uim_val; 322 float fim_val; 323 }; 324 325 /* dword1: */ 326 uint32_t dst : 8; 327 uint32_t repeat : 3; 328 uint32_t src_r : 1; 329 uint32_t ss : 1; 330 uint32_t ul : 1; 331 uint32_t dst_type : 3; 332 uint32_t dst_rel : 1; 333 uint32_t src_type : 3; 334 uint32_t src_c : 1; 335 uint32_t src_im : 1; 336 uint32_t even : 1; 337 uint32_t pos_inf : 1; 338 uint32_t must_be_0 : 2; 339 uint32_t jmp_tgt : 1; 340 uint32_t sync : 1; 341 uint32_t opc_cat : 3; 342} instr_cat1_t; 343 344typedef struct PACKED { 345 /* dword0: */ 346 union PACKED { 347 struct PACKED { 348 uint32_t src1 : 11; 349 uint32_t must_be_zero1: 2; 350 uint32_t src1_im : 1; /* immediate */ 351 uint32_t src1_neg : 1; /* negate */ 352 uint32_t src1_abs : 1; /* absolute value */ 353 }; 354 struct PACKED { 355 uint32_t src1 : 10; 356 uint32_t src1_c : 1; /* relative-const */ 357 uint32_t src1_rel : 1; /* relative address */ 358 uint32_t must_be_zero : 1; 359 uint32_t dummy : 3; 360 } rel1; 361 struct PACKED { 362 uint32_t src1 : 12; 363 uint32_t src1_c : 1; /* const */ 364 uint32_t dummy : 3; 365 } c1; 366 }; 367 368 union PACKED { 369 struct PACKED { 370 uint32_t src2 : 11; 371 uint32_t must_be_zero2: 2; 372 uint32_t src2_im : 1; /* immediate */ 373 uint32_t src2_neg : 1; /* negate */ 374 uint32_t src2_abs : 1; /* absolute value */ 375 }; 376 struct PACKED { 377 uint32_t src2 : 10; 378 uint32_t src2_c : 1; /* relative-const */ 379 uint32_t src2_rel : 1; /* relative address */ 380 uint32_t must_be_zero : 1; 381 uint32_t dummy : 3; 382 } rel2; 383 struct PACKED { 384 uint32_t src2 : 12; 385 uint32_t src2_c : 1; /* const */ 386 uint32_t dummy : 3; 387 } c2; 388 }; 389 390 /* dword1: */ 391 uint32_t dst : 8; 392 uint32_t repeat : 3; 393 uint32_t src1_r : 1; 394 uint32_t ss : 1; 395 uint32_t ul : 1; /* dunno */ 396 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ 397 uint32_t ei : 1; 398 uint32_t cond : 3; 399 uint32_t src2_r : 1; 400 uint32_t full : 1; /* not half */ 401 uint32_t opc : 6; 402 uint32_t jmp_tgt : 1; 403 uint32_t sync : 1; 404 uint32_t opc_cat : 3; 405} instr_cat2_t; 406 407typedef struct PACKED { 408 /* dword0: */ 409 union PACKED { 410 struct PACKED { 411 uint32_t src1 : 11; 412 uint32_t must_be_zero1: 2; 413 uint32_t src2_c : 1; 414 uint32_t src1_neg : 1; 415 uint32_t src2_r : 1; 416 }; 417 struct PACKED { 418 uint32_t src1 : 10; 419 uint32_t src1_c : 1; 420 uint32_t src1_rel : 1; 421 uint32_t must_be_zero : 1; 422 uint32_t dummy : 3; 423 } rel1; 424 struct PACKED { 425 uint32_t src1 : 12; 426 uint32_t src1_c : 1; 427 uint32_t dummy : 3; 428 } c1; 429 }; 430 431 union PACKED { 432 struct PACKED { 433 uint32_t src3 : 11; 434 uint32_t must_be_zero2: 2; 435 uint32_t src3_r : 1; 436 uint32_t src2_neg : 1; 437 uint32_t src3_neg : 1; 438 }; 439 struct PACKED { 440 uint32_t src3 : 10; 441 uint32_t src3_c : 1; 442 uint32_t src3_rel : 1; 443 uint32_t must_be_zero : 1; 444 uint32_t dummy : 3; 445 } rel2; 446 struct PACKED { 447 uint32_t src3 : 12; 448 uint32_t src3_c : 1; 449 uint32_t dummy : 3; 450 } c2; 451 }; 452 453 /* dword1: */ 454 uint32_t dst : 8; 455 uint32_t repeat : 3; 456 uint32_t src1_r : 1; 457 uint32_t ss : 1; 458 uint32_t ul : 1; 459 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ 460 uint32_t src2 : 8; 461 uint32_t opc : 4; 462 uint32_t jmp_tgt : 1; 463 uint32_t sync : 1; 464 uint32_t opc_cat : 3; 465} instr_cat3_t; 466 467static inline bool instr_cat3_full(instr_cat3_t *cat3) 468{ 469 switch (cat3->opc) { 470 case OPC_MAD_F16: 471 case OPC_MAD_U16: 472 case OPC_MAD_S16: 473 case OPC_SEL_B16: 474 case OPC_SEL_S16: 475 case OPC_SEL_F16: 476 case OPC_SAD_S16: 477 case OPC_SAD_S32: // really?? 478 return false; 479 default: 480 return true; 481 } 482} 483 484typedef struct PACKED { 485 /* dword0: */ 486 union PACKED { 487 struct PACKED { 488 uint32_t src : 11; 489 uint32_t must_be_zero1: 2; 490 uint32_t src_im : 1; /* immediate */ 491 uint32_t src_neg : 1; /* negate */ 492 uint32_t src_abs : 1; /* absolute value */ 493 }; 494 struct PACKED { 495 uint32_t src : 10; 496 uint32_t src_c : 1; /* relative-const */ 497 uint32_t src_rel : 1; /* relative address */ 498 uint32_t must_be_zero : 1; 499 uint32_t dummy : 3; 500 } rel; 501 struct PACKED { 502 uint32_t src : 12; 503 uint32_t src_c : 1; /* const */ 504 uint32_t dummy : 3; 505 } c; 506 }; 507 uint32_t dummy1 : 16; /* seem to be ignored */ 508 509 /* dword1: */ 510 uint32_t dst : 8; 511 uint32_t repeat : 3; 512 uint32_t src_r : 1; 513 uint32_t ss : 1; 514 uint32_t ul : 1; 515 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ 516 uint32_t dummy2 : 5; /* seem to be ignored */ 517 uint32_t full : 1; /* not half */ 518 uint32_t opc : 6; 519 uint32_t jmp_tgt : 1; 520 uint32_t sync : 1; 521 uint32_t opc_cat : 3; 522} instr_cat4_t; 523 524typedef struct PACKED { 525 /* dword0: */ 526 union PACKED { 527 /* normal case: */ 528 struct PACKED { 529 uint32_t full : 1; /* not half */ 530 uint32_t src1 : 8; 531 uint32_t src2 : 8; 532 uint32_t dummy1 : 4; /* seem to be ignored */ 533 uint32_t samp : 4; 534 uint32_t tex : 7; 535 } norm; 536 /* s2en case: */ 537 struct PACKED { 538 uint32_t full : 1; /* not half */ 539 uint32_t src1 : 8; 540 uint32_t src2 : 11; 541 uint32_t dummy1 : 1; 542 uint32_t src3 : 8; 543 uint32_t dummy2 : 3; 544 } s2en; 545 /* same in either case: */ 546 // XXX I think, confirm this 547 struct PACKED { 548 uint32_t full : 1; /* not half */ 549 uint32_t src1 : 8; 550 uint32_t pad : 23; 551 }; 552 }; 553 554 /* dword1: */ 555 uint32_t dst : 8; 556 uint32_t wrmask : 4; /* write-mask */ 557 uint32_t type : 3; 558 uint32_t dummy2 : 1; /* seems to be ignored */ 559 uint32_t is_3d : 1; 560 561 uint32_t is_a : 1; 562 uint32_t is_s : 1; 563 uint32_t is_s2en : 1; 564 uint32_t is_o : 1; 565 uint32_t is_p : 1; 566 567 uint32_t opc : 5; 568 uint32_t jmp_tgt : 1; 569 uint32_t sync : 1; 570 uint32_t opc_cat : 3; 571} instr_cat5_t; 572 573/* [src1 + off], src2: */ 574typedef struct PACKED { 575 /* dword0: */ 576 uint32_t mustbe1 : 1; 577 int32_t off : 13; 578 uint32_t src1 : 8; 579 uint32_t src1_im : 1; 580 uint32_t src2_im : 1; 581 uint32_t src2 : 8; 582 583 /* dword1: */ 584 uint32_t dst : 8; 585 uint32_t dummy2 : 9; 586 uint32_t type : 3; 587 uint32_t dummy3 : 2; 588 uint32_t opc : 5; 589 uint32_t jmp_tgt : 1; 590 uint32_t sync : 1; 591 uint32_t opc_cat : 3; 592} instr_cat6a_t; 593 594/* [src1], src2: */ 595typedef struct PACKED { 596 /* dword0: */ 597 uint32_t mustbe0 : 1; 598 uint32_t src1 : 8; 599 uint32_t ignore0 : 13; 600 uint32_t src1_im : 1; 601 uint32_t src2_im : 1; 602 uint32_t src2 : 8; 603 604 /* dword1: */ 605 uint32_t dst : 8; 606 uint32_t dummy2 : 9; 607 uint32_t type : 3; 608 uint32_t dummy3 : 2; 609 uint32_t opc : 5; 610 uint32_t jmp_tgt : 1; 611 uint32_t sync : 1; 612 uint32_t opc_cat : 3; 613} instr_cat6b_t; 614 615/* I think some of the other cat6 instructions use additional 616 * sub-encodings.. 617 */ 618 619typedef union PACKED { 620 instr_cat6a_t a; 621 instr_cat6b_t b; 622 struct PACKED { 623 /* dword0: */ 624 uint32_t has_off : 1; 625 uint32_t pad1 : 31; 626 627 /* dword1: */ 628 uint32_t pad2 : 17; 629 uint32_t type : 3; 630 uint32_t pad3 : 2; 631 uint32_t opc : 5; 632 uint32_t jmp_tgt : 1; 633 uint32_t sync : 1; 634 uint32_t opc_cat : 3; 635 }; 636} instr_cat6_t; 637 638typedef union PACKED { 639 instr_cat0_t cat0; 640 instr_cat1_t cat1; 641 instr_cat2_t cat2; 642 instr_cat3_t cat3; 643 instr_cat4_t cat4; 644 instr_cat5_t cat5; 645 instr_cat6_t cat6; 646 struct PACKED { 647 /* dword0: */ 648 uint64_t pad1 : 40; 649 uint32_t repeat : 3; /* cat0-cat4 */ 650 uint32_t pad2 : 1; 651 uint32_t ss : 1; /* cat1-cat4 (cat0??) */ 652 uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */ 653 uint32_t pad3 : 13; 654 uint32_t jmp_tgt : 1; 655 uint32_t sync : 1; 656 uint32_t opc_cat : 3; 657 658 }; 659} instr_t; 660 661static inline uint32_t instr_opc(instr_t *instr) 662{ 663 switch (instr->opc_cat) { 664 case 0: return instr->cat0.opc; 665 case 1: return 0; 666 case 2: return instr->cat2.opc; 667 case 3: return instr->cat3.opc; 668 case 4: return instr->cat4.opc; 669 case 5: return instr->cat5.opc; 670 case 6: return instr->cat6.opc; 671 default: return 0; 672 } 673} 674 675static inline bool is_mad(opc_t opc) 676{ 677 switch (opc) { 678 case OPC_MAD_U16: 679 case OPC_MADSH_U16: 680 case OPC_MAD_S16: 681 case OPC_MADSH_M16: 682 case OPC_MAD_U24: 683 case OPC_MAD_S24: 684 case OPC_MAD_F16: 685 case OPC_MAD_F32: 686 return true; 687 default: 688 return false; 689 } 690} 691 692#endif /* INSTR_A3XX_H_ */ 693