r600_asm.c revision 56227f875bdff6ef4fd53b09ba267c786ae9dac2
13aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev/* 23aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 33aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev * 43aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev * Permission is hereby granted, free of charge, to any person obtaining a 53aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev * copy of this software and associated documentation files (the "Software"), 63aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev * to deal in the Software without restriction, including without limitation 73aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev * on the rights to use, copy, modify, merge, publish, distribute, sub 83aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev * license, and/or sell copies of the Software, and to permit persons to whom 93aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev * the Software is furnished to do so, subject to the following conditions: 103aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev * 113aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev * The above copyright notice and this permission notice (including the next 123aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev * paragraph) shall be included in all copies or substantial portions of the 133aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev * Software. 143aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev * 153aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 163aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 173aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 183aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 193aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 203aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 213aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev * USE OR OTHER DEALINGS IN THE SOFTWARE. 223aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev */ 233aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev#include "r600_sq.h" 243aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev#include "r600_opcodes.h" 253aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev#include "r600_formats.h" 263aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev#include "r600d.h" 273aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev 283aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev#include <errno.h> 293aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev#include <byteswap.h> 303aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev#include "util/u_memory.h" 313aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev#include "pipe/p_shader_tokens.h" 323aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev 333aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev#define NUM_OF_CYCLES 3 343aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev#define NUM_OF_COMPONENTS 4 353aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev 363aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishevstatic inline unsigned int r600_bytecode_get_num_operands(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 373aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev{ 383aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev if(alu->is_op3) 393aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev return 3; 403aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev 413aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev switch (bc->chip_class) { 423aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case R600: 433aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case R700: 443aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev switch (alu->inst) { 453aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: 463aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev return 0; 473aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD: 483aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT: 493aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT: 503aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT: 513aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT: 523aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE: 533aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT: 543aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE: 553aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE: 563aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL: 573aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE: 583aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT: 593aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT: 603aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT: 613aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT: 623aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX: 633aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN: 643aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT: 653aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT: 663aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT: 673aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT: 683aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE: 693aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT: 703aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE: 713aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT: 723aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT: 733aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT: 743aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT: 753aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE: 763aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT: 773aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT: 783aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE: 793aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT: 803aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE: 813aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE: 823aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT: 833aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT: 843aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4: 853aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE: 863aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE: 873aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT: 883aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT: 893aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT: 903aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT: 913aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev return 2; 923aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev 933aa430dc5437a98734b36f996f9b17081a589143Yavor Goulishev case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV: 94 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA: 95 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR: 96 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT: 97 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT: 98 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT: 99 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL: 100 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR: 101 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC: 102 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE: 103 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED: 104 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE: 105 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED: 106 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE: 107 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT: 108 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT: 109 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED: 110 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE: 111 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT: 112 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT: 113 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT: 114 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT: 115 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: 116 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: 117 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE: 118 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT: 119 return 1; 120 default: R600_ERR( 121 "Need instruction operand number for 0x%x.\n", alu->inst); 122 } 123 break; 124 case EVERGREEN: 125 case CAYMAN: 126 switch (alu->inst) { 127 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: 128 return 0; 129 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD: 130 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT: 131 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT: 132 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT: 133 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT: 134 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE: 135 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT: 136 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE: 137 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE: 138 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL: 139 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE: 140 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT: 141 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT: 142 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT: 143 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT: 144 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX: 145 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN: 146 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT: 147 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT: 148 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT: 149 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT: 150 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE: 151 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT: 152 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE: 153 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT: 154 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT: 155 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT: 156 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT: 157 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE: 158 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT: 159 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT: 160 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE: 161 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT: 162 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT: 163 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE: 164 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE: 165 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT: 166 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4: 167 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE: 168 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE: 169 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_XY: 170 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_ZW: 171 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT: 172 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT: 173 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT: 174 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT: 175 return 2; 176 177 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV: 178 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT: 179 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT: 180 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL: 181 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR: 182 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC: 183 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE: 184 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED: 185 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE: 186 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED: 187 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE: 188 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED: 189 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE: 190 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT: 191 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR: 192 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT: 193 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT: 194 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT: 195 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: 196 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: 197 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE: 198 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT: 199 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_LOAD_P0: 200 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT: 201 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT: 202 return 1; 203 default: R600_ERR( 204 "Need instruction operand number for 0x%x.\n", alu->inst); 205 } 206 break; 207 } 208 209 return 3; 210} 211 212int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id); 213 214static struct r600_bytecode_cf *r600_bytecode_cf(void) 215{ 216 struct r600_bytecode_cf *cf = CALLOC_STRUCT(r600_bytecode_cf); 217 218 if (cf == NULL) 219 return NULL; 220 LIST_INITHEAD(&cf->list); 221 LIST_INITHEAD(&cf->alu); 222 LIST_INITHEAD(&cf->vtx); 223 LIST_INITHEAD(&cf->tex); 224 return cf; 225} 226 227static struct r600_bytecode_alu *r600_bytecode_alu(void) 228{ 229 struct r600_bytecode_alu *alu = CALLOC_STRUCT(r600_bytecode_alu); 230 231 if (alu == NULL) 232 return NULL; 233 LIST_INITHEAD(&alu->list); 234 return alu; 235} 236 237static struct r600_bytecode_vtx *r600_bytecode_vtx(void) 238{ 239 struct r600_bytecode_vtx *vtx = CALLOC_STRUCT(r600_bytecode_vtx); 240 241 if (vtx == NULL) 242 return NULL; 243 LIST_INITHEAD(&vtx->list); 244 return vtx; 245} 246 247static struct r600_bytecode_tex *r600_bytecode_tex(void) 248{ 249 struct r600_bytecode_tex *tex = CALLOC_STRUCT(r600_bytecode_tex); 250 251 if (tex == NULL) 252 return NULL; 253 LIST_INITHEAD(&tex->list); 254 return tex; 255} 256 257void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class, enum radeon_family family) 258{ 259 if ((chip_class == R600) && (family != CHIP_RV670)) 260 bc->ar_handling = AR_HANDLE_RV6XX; 261 else 262 bc->ar_handling = AR_HANDLE_NORMAL; 263 264 if ((chip_class == R600) && (family != CHIP_RV670 && family != CHIP_RS780 && 265 family != CHIP_RS880)) 266 bc->r6xx_nop_after_rel_dst = 1; 267 else 268 bc->r6xx_nop_after_rel_dst = 0; 269 LIST_INITHEAD(&bc->cf); 270 bc->chip_class = chip_class; 271} 272 273static int r600_bytecode_add_cf(struct r600_bytecode *bc) 274{ 275 struct r600_bytecode_cf *cf = r600_bytecode_cf(); 276 277 if (cf == NULL) 278 return -ENOMEM; 279 LIST_ADDTAIL(&cf->list, &bc->cf); 280 if (bc->cf_last) { 281 cf->id = bc->cf_last->id + 2; 282 if (bc->cf_last->eg_alu_extended) { 283 /* take into account extended alu size */ 284 cf->id += 2; 285 bc->ndw += 2; 286 } 287 } 288 bc->cf_last = cf; 289 bc->ncf++; 290 bc->ndw += 2; 291 bc->force_add_cf = 0; 292 bc->ar_loaded = 0; 293 return 0; 294} 295 296int r600_bytecode_add_output(struct r600_bytecode *bc, const struct r600_bytecode_output *output) 297{ 298 int r; 299 300 if (output->gpr >= bc->ngpr) 301 bc->ngpr = output->gpr + 1; 302 303 if (bc->cf_last && (bc->cf_last->inst == output->inst || 304 (bc->cf_last->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT) && 305 output->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE))) && 306 output->type == bc->cf_last->output.type && 307 output->elem_size == bc->cf_last->output.elem_size && 308 output->swizzle_x == bc->cf_last->output.swizzle_x && 309 output->swizzle_y == bc->cf_last->output.swizzle_y && 310 output->swizzle_z == bc->cf_last->output.swizzle_z && 311 output->swizzle_w == bc->cf_last->output.swizzle_w && 312 (output->burst_count + bc->cf_last->output.burst_count) <= 16) { 313 314 if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr && 315 (output->array_base + output->burst_count) == bc->cf_last->output.array_base) { 316 317 bc->cf_last->output.end_of_program |= output->end_of_program; 318 bc->cf_last->output.inst = output->inst; 319 bc->cf_last->output.gpr = output->gpr; 320 bc->cf_last->output.array_base = output->array_base; 321 bc->cf_last->output.burst_count += output->burst_count; 322 return 0; 323 324 } else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) && 325 output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) { 326 327 bc->cf_last->output.end_of_program |= output->end_of_program; 328 bc->cf_last->output.inst = output->inst; 329 bc->cf_last->output.burst_count += output->burst_count; 330 return 0; 331 } 332 } 333 334 r = r600_bytecode_add_cf(bc); 335 if (r) 336 return r; 337 bc->cf_last->inst = output->inst; 338 memcpy(&bc->cf_last->output, output, sizeof(struct r600_bytecode_output)); 339 return 0; 340} 341 342/* alu instructions that can ony exits once per group */ 343static int is_alu_once_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 344{ 345 switch (bc->chip_class) { 346 case R600: 347 case R700: 348 return !alu->is_op3 && ( 349 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE || 350 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT || 351 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE || 352 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE || 353 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT || 354 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT || 355 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT || 356 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT || 357 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT || 358 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT || 359 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT || 360 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT || 361 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE || 362 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT || 363 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE || 364 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE || 365 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV || 366 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP || 367 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR || 368 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE || 369 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH || 370 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH || 371 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH || 372 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH || 373 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT || 374 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT || 375 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT || 376 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT || 377 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT || 378 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT || 379 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT || 380 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT || 381 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT || 382 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT); 383 case EVERGREEN: 384 case CAYMAN: 385 default: 386 return !alu->is_op3 && ( 387 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE || 388 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT || 389 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE || 390 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE || 391 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT || 392 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT || 393 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT || 394 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT || 395 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT || 396 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT || 397 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT || 398 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT || 399 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE || 400 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT || 401 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE || 402 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE || 403 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV || 404 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP || 405 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR || 406 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE || 407 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH || 408 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH || 409 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH || 410 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH || 411 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT || 412 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT || 413 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT || 414 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT || 415 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT || 416 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT || 417 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT || 418 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT || 419 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT || 420 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT); 421 } 422} 423 424static int is_alu_reduction_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 425{ 426 switch (bc->chip_class) { 427 case R600: 428 case R700: 429 return !alu->is_op3 && ( 430 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE || 431 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 || 432 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE || 433 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4); 434 case EVERGREEN: 435 case CAYMAN: 436 default: 437 return !alu->is_op3 && ( 438 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE || 439 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 || 440 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE || 441 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4); 442 } 443} 444 445static int is_alu_cube_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 446{ 447 switch (bc->chip_class) { 448 case R600: 449 case R700: 450 return !alu->is_op3 && 451 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; 452 case EVERGREEN: 453 case CAYMAN: 454 default: 455 return !alu->is_op3 && 456 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; 457 } 458} 459 460static int is_alu_mova_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 461{ 462 switch (bc->chip_class) { 463 case R600: 464 case R700: 465 return !alu->is_op3 && ( 466 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA || 467 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR || 468 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT || 469 alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT); 470 case EVERGREEN: 471 case CAYMAN: 472 default: 473 return !alu->is_op3 && ( 474 alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); 475 } 476} 477 478static int is_opcode_in_range(unsigned opcode, unsigned min, unsigned max) 479{ 480 return min <= opcode && opcode <= max; 481} 482 483/* ALU instructions that can only execute on the vector unit: 484 * 485 * opcode ranges: 486 * R6xx/R7xx: 487 * op3 : [0x08 - 0x0B] 488 * op2 : 0x07, [0x15 - 0x18], [0x1B - 0x1D], [0x50 - 0x53], [0x7A - 0x7E] 489 * 490 * EVERGREEN: 491 * op3: [0x04 - 0x11] 492 * op2: [0xA0 - 0xE2] 493 */ 494static int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 495{ 496 switch (bc->chip_class) { 497 case R600: 498 case R700: 499 if (alu->is_op3) 500 return is_opcode_in_range(alu->inst, 501 V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_64, 502 V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD_64_D2); 503 else 504 return (alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FREXP_64) || 505 is_opcode_in_range(alu->inst, 506 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA, 507 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT) || 508 is_opcode_in_range(alu->inst, 509 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_64, 510 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT32_TO_FLT64) || 511 is_opcode_in_range(alu->inst, 512 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, 513 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4) || 514 is_opcode_in_range(alu->inst, 515 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LDEXP_64, 516 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_64); 517 518 case EVERGREEN: 519 if (alu->is_op3) 520 return is_opcode_in_range(alu->inst, 521 EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_BFE_UINT, 522 EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_LDS_IDX_OP); 523 else 524 return is_opcode_in_range(alu->inst, 525 EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_BFM_INT, 526 EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_LOAD_P20); 527 case CAYMAN: 528 default: 529 assert(0); 530 return 0; 531 } 532} 533 534/* ALU instructions that can only execute on the trans unit: 535 * 536 * opcode ranges: 537 * R600: 538 * op3: 0x0C 539 * op2: [0x60 - 0x79] 540 * 541 * R700: 542 * op3: 0x0C 543 * op2: [0x60 - 0x6F], [0x73 - 0x79] 544 * 545 * EVERGREEN: 546 * op3: 0x1F 547 * op2: [0x81 - 0x9C] 548 */ 549static int is_alu_trans_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 550{ 551 552 switch (bc->chip_class) { 553 case R600: 554 if (alu->is_op3) 555 return alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT; 556 else 557 return is_opcode_in_range(alu->inst, 558 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT, 559 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT); 560 case R700: 561 if (alu->is_op3) 562 return alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT; 563 else 564 return is_opcode_in_range(alu->inst, 565 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT, 566 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS) || 567 is_opcode_in_range(alu->inst, 568 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT, 569 V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT); 570 case EVERGREEN: 571 if (alu->is_op3) 572 return alu->inst == EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT; 573 else 574 return is_opcode_in_range(alu->inst, 575 EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, 576 EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT); 577 case CAYMAN: 578 default: 579 assert(0); 580 return 0; 581 } 582} 583 584/* alu instructions that can execute on any unit */ 585static int is_alu_any_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 586{ 587 return !is_alu_vec_unit_inst(bc, alu) && 588 !is_alu_trans_unit_inst(bc, alu); 589} 590 591static int is_nop_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) 592{ 593 switch (bc->chip_class) { 594 case R600: 595 case R700: 596 return (!alu->is_op3 && alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 597 case EVERGREEN: 598 case CAYMAN: 599 default: 600 return (!alu->is_op3 && alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 601 } 602} 603 604static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *alu_first, 605 struct r600_bytecode_alu *assignment[5]) 606{ 607 struct r600_bytecode_alu *alu; 608 unsigned i, chan, trans; 609 int max_slots = bc->chip_class == CAYMAN ? 4 : 5; 610 611 for (i = 0; i < max_slots; i++) 612 assignment[i] = NULL; 613 614 for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bytecode_alu, alu->list.next, list)) { 615 chan = alu->dst.chan; 616 if (max_slots == 4) 617 trans = 0; 618 else if (is_alu_trans_unit_inst(bc, alu)) 619 trans = 1; 620 else if (is_alu_vec_unit_inst(bc, alu)) 621 trans = 0; 622 else if (assignment[chan]) 623 trans = 1; /* Assume ALU_INST_PREFER_VECTOR. */ 624 else 625 trans = 0; 626 627 if (trans) { 628 if (assignment[4]) { 629 assert(0); /* ALU.Trans has already been allocated. */ 630 return -1; 631 } 632 assignment[4] = alu; 633 } else { 634 if (assignment[chan]) { 635 assert(0); /* ALU.chan has already been allocated. */ 636 return -1; 637 } 638 assignment[chan] = alu; 639 } 640 641 if (alu->last) 642 break; 643 } 644 return 0; 645} 646 647struct alu_bank_swizzle { 648 int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS]; 649 int hw_cfile_addr[4]; 650 int hw_cfile_elem[4]; 651}; 652 653static const unsigned cycle_for_bank_swizzle_vec[][3] = { 654 [SQ_ALU_VEC_012] = { 0, 1, 2 }, 655 [SQ_ALU_VEC_021] = { 0, 2, 1 }, 656 [SQ_ALU_VEC_120] = { 1, 2, 0 }, 657 [SQ_ALU_VEC_102] = { 1, 0, 2 }, 658 [SQ_ALU_VEC_201] = { 2, 0, 1 }, 659 [SQ_ALU_VEC_210] = { 2, 1, 0 } 660}; 661 662static const unsigned cycle_for_bank_swizzle_scl[][3] = { 663 [SQ_ALU_SCL_210] = { 2, 1, 0 }, 664 [SQ_ALU_SCL_122] = { 1, 2, 2 }, 665 [SQ_ALU_SCL_212] = { 2, 1, 2 }, 666 [SQ_ALU_SCL_221] = { 2, 2, 1 } 667}; 668 669static void init_bank_swizzle(struct alu_bank_swizzle *bs) 670{ 671 int i, cycle, component; 672 /* set up gpr use */ 673 for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++) 674 for (component = 0; component < NUM_OF_COMPONENTS; component++) 675 bs->hw_gpr[cycle][component] = -1; 676 for (i = 0; i < 4; i++) 677 bs->hw_cfile_addr[i] = -1; 678 for (i = 0; i < 4; i++) 679 bs->hw_cfile_elem[i] = -1; 680} 681 682static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle) 683{ 684 if (bs->hw_gpr[cycle][chan] == -1) 685 bs->hw_gpr[cycle][chan] = sel; 686 else if (bs->hw_gpr[cycle][chan] != (int)sel) { 687 /* Another scalar operation has already used the GPR read port for the channel. */ 688 return -1; 689 } 690 return 0; 691} 692 693static int reserve_cfile(struct r600_bytecode *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan) 694{ 695 int res, num_res = 4; 696 if (bc->chip_class >= R700) { 697 num_res = 2; 698 chan /= 2; 699 } 700 for (res = 0; res < num_res; ++res) { 701 if (bs->hw_cfile_addr[res] == -1) { 702 bs->hw_cfile_addr[res] = sel; 703 bs->hw_cfile_elem[res] = chan; 704 return 0; 705 } else if (bs->hw_cfile_addr[res] == sel && 706 bs->hw_cfile_elem[res] == chan) 707 return 0; /* Read for this scalar element already reserved, nothing to do here. */ 708 } 709 /* All cfile read ports are used, cannot reference vector element. */ 710 return -1; 711} 712 713static int is_gpr(unsigned sel) 714{ 715 return (sel >= 0 && sel <= 127); 716} 717 718/* CB constants start at 512, and get translated to a kcache index when ALU 719 * clauses are constructed. Note that we handle kcache constants the same way 720 * as (the now gone) cfile constants, is that really required? */ 721static int is_cfile(unsigned sel) 722{ 723 return (sel > 255 && sel < 512) || 724 (sel > 511 && sel < 4607) || /* Kcache before translation. */ 725 (sel > 127 && sel < 192); /* Kcache after translation. */ 726} 727 728static int is_const(int sel) 729{ 730 return is_cfile(sel) || 731 (sel >= V_SQ_ALU_SRC_0 && 732 sel <= V_SQ_ALU_SRC_LITERAL); 733} 734 735static int check_vector(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, 736 struct alu_bank_swizzle *bs, int bank_swizzle) 737{ 738 int r, src, num_src, sel, elem, cycle; 739 740 num_src = r600_bytecode_get_num_operands(bc, alu); 741 for (src = 0; src < num_src; src++) { 742 sel = alu->src[src].sel; 743 elem = alu->src[src].chan; 744 if (is_gpr(sel)) { 745 cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src]; 746 if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan) 747 /* Nothing to do; special-case optimization, 748 * second source uses first source’s reservation. */ 749 continue; 750 else { 751 r = reserve_gpr(bs, sel, elem, cycle); 752 if (r) 753 return r; 754 } 755 } else if (is_cfile(sel)) { 756 r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem); 757 if (r) 758 return r; 759 } 760 /* No restrictions on PV, PS, literal or special constants. */ 761 } 762 return 0; 763} 764 765static int check_scalar(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, 766 struct alu_bank_swizzle *bs, int bank_swizzle) 767{ 768 int r, src, num_src, const_count, sel, elem, cycle; 769 770 num_src = r600_bytecode_get_num_operands(bc, alu); 771 for (const_count = 0, src = 0; src < num_src; ++src) { 772 sel = alu->src[src].sel; 773 elem = alu->src[src].chan; 774 if (is_const(sel)) { /* Any constant, including literal and inline constants. */ 775 if (const_count >= 2) 776 /* More than two references to a constant in 777 * transcendental operation. */ 778 return -1; 779 else 780 const_count++; 781 } 782 if (is_cfile(sel)) { 783 r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem); 784 if (r) 785 return r; 786 } 787 } 788 for (src = 0; src < num_src; ++src) { 789 sel = alu->src[src].sel; 790 elem = alu->src[src].chan; 791 if (is_gpr(sel)) { 792 cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src]; 793 if (cycle < const_count) 794 /* Cycle for GPR load conflicts with 795 * constant load in transcendental operation. */ 796 return -1; 797 r = reserve_gpr(bs, sel, elem, cycle); 798 if (r) 799 return r; 800 } 801 /* PV PS restrictions */ 802 if (const_count && (sel == 254 || sel == 255)) { 803 cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src]; 804 if (cycle < const_count) 805 return -1; 806 } 807 } 808 return 0; 809} 810 811static int check_and_set_bank_swizzle(struct r600_bytecode *bc, 812 struct r600_bytecode_alu *slots[5]) 813{ 814 struct alu_bank_swizzle bs; 815 int bank_swizzle[5]; 816 int i, r = 0, forced = 1; 817 boolean scalar_only = bc->chip_class == CAYMAN ? false : true; 818 int max_slots = bc->chip_class == CAYMAN ? 4 : 5; 819 820 for (i = 0; i < max_slots; i++) { 821 if (slots[i]) { 822 if (slots[i]->bank_swizzle_force) { 823 slots[i]->bank_swizzle = slots[i]->bank_swizzle_force; 824 } else { 825 forced = 0; 826 } 827 } 828 829 if (i < 4 && slots[i]) 830 scalar_only = false; 831 } 832 if (forced) 833 return 0; 834 835 /* Just check every possible combination of bank swizzle. 836 * Not very efficent, but works on the first try in most of the cases. */ 837 for (i = 0; i < 4; i++) 838 if (!slots[i] || !slots[i]->bank_swizzle_force) 839 bank_swizzle[i] = SQ_ALU_VEC_012; 840 else 841 bank_swizzle[i] = slots[i]->bank_swizzle; 842 843 bank_swizzle[4] = SQ_ALU_SCL_210; 844 while(bank_swizzle[4] <= SQ_ALU_SCL_221) { 845 846 if (max_slots == 4) { 847 for (i = 0; i < max_slots; i++) { 848 if (bank_swizzle[i] == SQ_ALU_VEC_210) 849 return -1; 850 } 851 } 852 init_bank_swizzle(&bs); 853 if (scalar_only == false) { 854 for (i = 0; i < 4; i++) { 855 if (slots[i]) { 856 r = check_vector(bc, slots[i], &bs, bank_swizzle[i]); 857 if (r) 858 break; 859 } 860 } 861 } else 862 r = 0; 863 864 if (!r && slots[4] && max_slots == 5) { 865 r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]); 866 } 867 if (!r) { 868 for (i = 0; i < max_slots; i++) { 869 if (slots[i]) 870 slots[i]->bank_swizzle = bank_swizzle[i]; 871 } 872 return 0; 873 } 874 875 if (scalar_only) { 876 bank_swizzle[4]++; 877 } else { 878 for (i = 0; i < max_slots; i++) { 879 if (!slots[i] || !slots[i]->bank_swizzle_force) { 880 bank_swizzle[i]++; 881 if (bank_swizzle[i] <= SQ_ALU_VEC_210) 882 break; 883 else 884 bank_swizzle[i] = SQ_ALU_VEC_012; 885 } 886 } 887 } 888 } 889 890 /* Couldn't find a working swizzle. */ 891 return -1; 892} 893 894static int replace_gpr_with_pv_ps(struct r600_bytecode *bc, 895 struct r600_bytecode_alu *slots[5], struct r600_bytecode_alu *alu_prev) 896{ 897 struct r600_bytecode_alu *prev[5]; 898 int gpr[5], chan[5]; 899 int i, j, r, src, num_src; 900 int max_slots = bc->chip_class == CAYMAN ? 4 : 5; 901 902 r = assign_alu_units(bc, alu_prev, prev); 903 if (r) 904 return r; 905 906 for (i = 0; i < max_slots; ++i) { 907 if (prev[i] && (prev[i]->dst.write || prev[i]->is_op3) && !prev[i]->dst.rel) { 908 gpr[i] = prev[i]->dst.sel; 909 /* cube writes more than PV.X */ 910 if (!is_alu_cube_inst(bc, prev[i]) && is_alu_reduction_inst(bc, prev[i])) 911 chan[i] = 0; 912 else 913 chan[i] = prev[i]->dst.chan; 914 } else 915 gpr[i] = -1; 916 } 917 918 for (i = 0; i < max_slots; ++i) { 919 struct r600_bytecode_alu *alu = slots[i]; 920 if(!alu) 921 continue; 922 923 num_src = r600_bytecode_get_num_operands(bc, alu); 924 for (src = 0; src < num_src; ++src) { 925 if (!is_gpr(alu->src[src].sel) || alu->src[src].rel) 926 continue; 927 928 if (bc->chip_class < CAYMAN) { 929 if (alu->src[src].sel == gpr[4] && 930 alu->src[src].chan == chan[4] && 931 alu_prev->pred_sel == alu->pred_sel) { 932 alu->src[src].sel = V_SQ_ALU_SRC_PS; 933 alu->src[src].chan = 0; 934 continue; 935 } 936 } 937 938 for (j = 0; j < 4; ++j) { 939 if (alu->src[src].sel == gpr[j] && 940 alu->src[src].chan == j && 941 alu_prev->pred_sel == alu->pred_sel) { 942 alu->src[src].sel = V_SQ_ALU_SRC_PV; 943 alu->src[src].chan = chan[j]; 944 break; 945 } 946 } 947 } 948 } 949 950 return 0; 951} 952 953void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg) 954{ 955 switch(value) { 956 case 0: 957 *sel = V_SQ_ALU_SRC_0; 958 break; 959 case 1: 960 *sel = V_SQ_ALU_SRC_1_INT; 961 break; 962 case -1: 963 *sel = V_SQ_ALU_SRC_M_1_INT; 964 break; 965 case 0x3F800000: /* 1.0f */ 966 *sel = V_SQ_ALU_SRC_1; 967 break; 968 case 0x3F000000: /* 0.5f */ 969 *sel = V_SQ_ALU_SRC_0_5; 970 break; 971 case 0xBF800000: /* -1.0f */ 972 *sel = V_SQ_ALU_SRC_1; 973 *neg ^= 1; 974 break; 975 case 0xBF000000: /* -0.5f */ 976 *sel = V_SQ_ALU_SRC_0_5; 977 *neg ^= 1; 978 break; 979 default: 980 *sel = V_SQ_ALU_SRC_LITERAL; 981 break; 982 } 983} 984 985/* compute how many literal are needed */ 986static int r600_bytecode_alu_nliterals(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, 987 uint32_t literal[4], unsigned *nliteral) 988{ 989 unsigned num_src = r600_bytecode_get_num_operands(bc, alu); 990 unsigned i, j; 991 992 for (i = 0; i < num_src; ++i) { 993 if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 994 uint32_t value = alu->src[i].value; 995 unsigned found = 0; 996 for (j = 0; j < *nliteral; ++j) { 997 if (literal[j] == value) { 998 found = 1; 999 break; 1000 } 1001 } 1002 if (!found) { 1003 if (*nliteral >= 4) 1004 return -EINVAL; 1005 literal[(*nliteral)++] = value; 1006 } 1007 } 1008 } 1009 return 0; 1010} 1011 1012static void r600_bytecode_alu_adjust_literals(struct r600_bytecode *bc, 1013 struct r600_bytecode_alu *alu, 1014 uint32_t literal[4], unsigned nliteral) 1015{ 1016 unsigned num_src = r600_bytecode_get_num_operands(bc, alu); 1017 unsigned i, j; 1018 1019 for (i = 0; i < num_src; ++i) { 1020 if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1021 uint32_t value = alu->src[i].value; 1022 for (j = 0; j < nliteral; ++j) { 1023 if (literal[j] == value) { 1024 alu->src[i].chan = j; 1025 break; 1026 } 1027 } 1028 } 1029 } 1030} 1031 1032static int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu *slots[5], 1033 struct r600_bytecode_alu *alu_prev) 1034{ 1035 struct r600_bytecode_alu *prev[5]; 1036 struct r600_bytecode_alu *result[5] = { NULL }; 1037 1038 uint32_t literal[4], prev_literal[4]; 1039 unsigned nliteral = 0, prev_nliteral = 0; 1040 1041 int i, j, r, src, num_src; 1042 int num_once_inst = 0; 1043 int have_mova = 0, have_rel = 0; 1044 int max_slots = bc->chip_class == CAYMAN ? 4 : 5; 1045 1046 r = assign_alu_units(bc, alu_prev, prev); 1047 if (r) 1048 return r; 1049 1050 for (i = 0; i < max_slots; ++i) { 1051 struct r600_bytecode_alu *alu; 1052 1053 /* check number of literals */ 1054 if (prev[i]) { 1055 if (r600_bytecode_alu_nliterals(bc, prev[i], literal, &nliteral)) 1056 return 0; 1057 if (r600_bytecode_alu_nliterals(bc, prev[i], prev_literal, &prev_nliteral)) 1058 return 0; 1059 if (is_alu_mova_inst(bc, prev[i])) { 1060 if (have_rel) 1061 return 0; 1062 have_mova = 1; 1063 } 1064 num_once_inst += is_alu_once_inst(bc, prev[i]); 1065 } 1066 if (slots[i] && r600_bytecode_alu_nliterals(bc, slots[i], literal, &nliteral)) 1067 return 0; 1068 1069 /* Let's check used slots. */ 1070 if (prev[i] && !slots[i]) { 1071 result[i] = prev[i]; 1072 continue; 1073 } else if (prev[i] && slots[i]) { 1074 if (max_slots == 5 && result[4] == NULL && prev[4] == NULL && slots[4] == NULL) { 1075 /* Trans unit is still free try to use it. */ 1076 if (is_alu_any_unit_inst(bc, slots[i])) { 1077 result[i] = prev[i]; 1078 result[4] = slots[i]; 1079 } else if (is_alu_any_unit_inst(bc, prev[i])) { 1080 if (slots[i]->dst.sel == prev[i]->dst.sel && 1081 (slots[i]->dst.write == 1 || slots[i]->is_op3) && 1082 (prev[i]->dst.write == 1 || prev[i]->is_op3)) 1083 return 0; 1084 1085 result[i] = slots[i]; 1086 result[4] = prev[i]; 1087 } else 1088 return 0; 1089 } else 1090 return 0; 1091 } else if(!slots[i]) { 1092 continue; 1093 } else { 1094 if (max_slots == 5 && slots[i] && prev[4] && 1095 slots[i]->dst.sel == prev[4]->dst.sel && 1096 slots[i]->dst.chan == prev[4]->dst.chan && 1097 (slots[i]->dst.write == 1 || slots[i]->is_op3) && 1098 (prev[4]->dst.write == 1 || prev[4]->is_op3)) 1099 return 0; 1100 1101 result[i] = slots[i]; 1102 } 1103 1104 alu = slots[i]; 1105 num_once_inst += is_alu_once_inst(bc, alu); 1106 1107 /* don't reschedule NOPs */ 1108 if (is_nop_inst(bc, alu)) 1109 return 0; 1110 1111 /* Let's check dst gpr. */ 1112 if (alu->dst.rel) { 1113 if (have_mova) 1114 return 0; 1115 have_rel = 1; 1116 } 1117 1118 /* Let's check source gprs */ 1119 num_src = r600_bytecode_get_num_operands(bc, alu); 1120 for (src = 0; src < num_src; ++src) { 1121 if (alu->src[src].rel) { 1122 if (have_mova) 1123 return 0; 1124 have_rel = 1; 1125 } 1126 1127 /* Constants don't matter. */ 1128 if (!is_gpr(alu->src[src].sel)) 1129 continue; 1130 1131 for (j = 0; j < max_slots; ++j) { 1132 if (!prev[j] || !(prev[j]->dst.write || prev[j]->is_op3)) 1133 continue; 1134 1135 /* If it's relative then we can't determin which gpr is really used. */ 1136 if (prev[j]->dst.chan == alu->src[src].chan && 1137 (prev[j]->dst.sel == alu->src[src].sel || 1138 prev[j]->dst.rel || alu->src[src].rel)) 1139 return 0; 1140 } 1141 } 1142 } 1143 1144 /* more than one PRED_ or KILL_ ? */ 1145 if (num_once_inst > 1) 1146 return 0; 1147 1148 /* check if the result can still be swizzlet */ 1149 r = check_and_set_bank_swizzle(bc, result); 1150 if (r) 1151 return 0; 1152 1153 /* looks like everything worked out right, apply the changes */ 1154 1155 /* undo adding previus literals */ 1156 bc->cf_last->ndw -= align(prev_nliteral, 2); 1157 1158 /* sort instructions */ 1159 for (i = 0; i < max_slots; ++i) { 1160 slots[i] = result[i]; 1161 if (result[i]) { 1162 LIST_DEL(&result[i]->list); 1163 result[i]->last = 0; 1164 LIST_ADDTAIL(&result[i]->list, &bc->cf_last->alu); 1165 } 1166 } 1167 1168 /* determine new last instruction */ 1169 LIST_ENTRY(struct r600_bytecode_alu, bc->cf_last->alu.prev, list)->last = 1; 1170 1171 /* determine new first instruction */ 1172 for (i = 0; i < max_slots; ++i) { 1173 if (result[i]) { 1174 bc->cf_last->curr_bs_head = result[i]; 1175 break; 1176 } 1177 } 1178 1179 bc->cf_last->prev_bs_head = bc->cf_last->prev2_bs_head; 1180 bc->cf_last->prev2_bs_head = NULL; 1181 1182 return 0; 1183} 1184 1185/* we'll keep kcache sets sorted by bank & addr */ 1186static int r600_bytecode_alloc_kcache_line(struct r600_bytecode *bc, 1187 struct r600_bytecode_kcache *kcache, 1188 unsigned bank, unsigned line) 1189{ 1190 int i, kcache_banks = bc->chip_class >= EVERGREEN ? 4 : 2; 1191 1192 for (i = 0; i < kcache_banks; i++) { 1193 if (kcache[i].mode) { 1194 int d; 1195 1196 if (kcache[i].bank < bank) 1197 continue; 1198 1199 if ((kcache[i].bank == bank && kcache[i].addr > line+1) || 1200 kcache[i].bank > bank) { 1201 /* try to insert new line */ 1202 if (kcache[kcache_banks-1].mode) { 1203 /* all sets are in use */ 1204 return -ENOMEM; 1205 } 1206 1207 memmove(&kcache[i+1],&kcache[i], (kcache_banks-i-1)*sizeof(struct r600_bytecode_kcache)); 1208 kcache[i].mode = V_SQ_CF_KCACHE_LOCK_1; 1209 kcache[i].bank = bank; 1210 kcache[i].addr = line; 1211 return 0; 1212 } 1213 1214 d = line - kcache[i].addr; 1215 1216 if (d == -1) { 1217 kcache[i].addr--; 1218 if (kcache[i].mode == V_SQ_CF_KCACHE_LOCK_2) { 1219 /* we are prepending the line to the current set, 1220 * discarding the existing second line, 1221 * so we'll have to insert line+2 after it */ 1222 line += 2; 1223 continue; 1224 } else if (kcache[i].mode == V_SQ_CF_KCACHE_LOCK_1) { 1225 kcache[i].mode = V_SQ_CF_KCACHE_LOCK_2; 1226 return 0; 1227 } else { 1228 /* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */ 1229 return -ENOMEM; 1230 } 1231 } else if (d == 1) { 1232 kcache[i].mode = V_SQ_CF_KCACHE_LOCK_2; 1233 return 0; 1234 } else if (d == 0) 1235 return 0; 1236 } else { /* free kcache set - use it */ 1237 kcache[i].mode = V_SQ_CF_KCACHE_LOCK_1; 1238 kcache[i].bank = bank; 1239 kcache[i].addr = line; 1240 return 0; 1241 } 1242 } 1243 return -ENOMEM; 1244} 1245 1246static int r600_bytecode_alloc_inst_kcache_lines(struct r600_bytecode *bc, 1247 struct r600_bytecode_kcache *kcache, 1248 struct r600_bytecode_alu *alu) 1249{ 1250 int i, r; 1251 1252 for (i = 0; i < 3; i++) { 1253 unsigned bank, line, sel = alu->src[i].sel; 1254 1255 if (sel < 512) 1256 continue; 1257 1258 bank = alu->src[i].kc_bank; 1259 line = (sel-512)>>4; 1260 1261 if ((r = r600_bytecode_alloc_kcache_line(bc, kcache, bank, line))) 1262 return r; 1263 } 1264 return 0; 1265} 1266 1267static int r600_bytecode_assign_kcache_banks(struct r600_bytecode *bc, 1268 struct r600_bytecode_alu *alu, 1269 struct r600_bytecode_kcache * kcache) 1270{ 1271 int i, j; 1272 1273 /* Alter the src operands to refer to the kcache. */ 1274 for (i = 0; i < 3; ++i) { 1275 static const unsigned int base[] = {128, 160, 256, 288}; 1276 unsigned int line, sel = alu->src[i].sel, found = 0; 1277 1278 if (sel < 512) 1279 continue; 1280 1281 sel -= 512; 1282 line = sel>>4; 1283 1284 for (j = 0; j < 4 && !found; ++j) { 1285 switch (kcache[j].mode) { 1286 case V_SQ_CF_KCACHE_NOP: 1287 case V_SQ_CF_KCACHE_LOCK_LOOP_INDEX: 1288 R600_ERR("unexpected kcache line mode\n"); 1289 return -ENOMEM; 1290 default: 1291 if (kcache[j].bank == alu->src[i].kc_bank && 1292 kcache[j].addr <= line && 1293 line < kcache[j].addr + kcache[j].mode) { 1294 alu->src[i].sel = sel - (kcache[j].addr<<4); 1295 alu->src[i].sel += base[j]; 1296 found=1; 1297 } 1298 } 1299 } 1300 } 1301 return 0; 1302} 1303 1304static int r600_bytecode_alloc_kcache_lines(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, int type) 1305{ 1306 struct r600_bytecode_kcache kcache_sets[4]; 1307 struct r600_bytecode_kcache *kcache = kcache_sets; 1308 int r; 1309 1310 memcpy(kcache, bc->cf_last->kcache, 4 * sizeof(struct r600_bytecode_kcache)); 1311 1312 if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) { 1313 /* can't alloc, need to start new clause */ 1314 if ((r = r600_bytecode_add_cf(bc))) { 1315 return r; 1316 } 1317 bc->cf_last->inst = type; 1318 1319 /* retry with the new clause */ 1320 kcache = bc->cf_last->kcache; 1321 if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) { 1322 /* can't alloc again- should never happen */ 1323 return r; 1324 } 1325 } else { 1326 /* update kcache sets */ 1327 memcpy(bc->cf_last->kcache, kcache, 4 * sizeof(struct r600_bytecode_kcache)); 1328 } 1329 1330 /* if we actually used more than 2 kcache sets - use ALU_EXTENDED on eg+ */ 1331 if (kcache[2].mode != V_SQ_CF_KCACHE_NOP) { 1332 if (bc->chip_class < EVERGREEN) 1333 return -ENOMEM; 1334 bc->cf_last->eg_alu_extended = 1; 1335 } 1336 1337 return 0; 1338} 1339 1340static int insert_nop_r6xx(struct r600_bytecode *bc) 1341{ 1342 struct r600_bytecode_alu alu; 1343 int r, i; 1344 1345 for (i = 0; i < 4; i++) { 1346 memset(&alu, 0, sizeof(alu)); 1347 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; 1348 alu.src[0].chan = i; 1349 alu.dst.chan = i; 1350 alu.last = (i == 3); 1351 r = r600_bytecode_add_alu(bc, &alu); 1352 if (r) 1353 return r; 1354 } 1355 return 0; 1356} 1357 1358/* load AR register from gpr (bc->ar_reg) with MOVA_INT */ 1359static int load_ar_r6xx(struct r600_bytecode *bc) 1360{ 1361 struct r600_bytecode_alu alu; 1362 int r; 1363 1364 if (bc->ar_loaded) 1365 return 0; 1366 1367 /* hack to avoid making MOVA the last instruction in the clause */ 1368 if ((bc->cf_last->ndw>>1) >= 110) 1369 bc->force_add_cf = 1; 1370 1371 memset(&alu, 0, sizeof(alu)); 1372 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT; 1373 alu.src[0].sel = bc->ar_reg; 1374 alu.last = 1; 1375 alu.index_mode = INDEX_MODE_LOOP; 1376 r = r600_bytecode_add_alu(bc, &alu); 1377 if (r) 1378 return r; 1379 1380 /* no requirement to set uses waterfall on MOVA_GPR_INT */ 1381 bc->ar_loaded = 1; 1382 return 0; 1383} 1384 1385/* load AR register from gpr (bc->ar_reg) with MOVA_INT */ 1386static int load_ar(struct r600_bytecode *bc) 1387{ 1388 struct r600_bytecode_alu alu; 1389 int r; 1390 1391 if (bc->ar_handling) 1392 return load_ar_r6xx(bc); 1393 1394 if (bc->ar_loaded) 1395 return 0; 1396 1397 /* hack to avoid making MOVA the last instruction in the clause */ 1398 if ((bc->cf_last->ndw>>1) >= 110) 1399 bc->force_add_cf = 1; 1400 1401 memset(&alu, 0, sizeof(alu)); 1402 alu.inst = BC_INST(bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); 1403 alu.src[0].sel = bc->ar_reg; 1404 alu.last = 1; 1405 r = r600_bytecode_add_alu(bc, &alu); 1406 if (r) 1407 return r; 1408 1409 bc->cf_last->r6xx_uses_waterfall = 1; 1410 bc->ar_loaded = 1; 1411 return 0; 1412} 1413 1414int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, int type) 1415{ 1416 struct r600_bytecode_alu *nalu = r600_bytecode_alu(); 1417 struct r600_bytecode_alu *lalu; 1418 int i, r; 1419 1420 if (nalu == NULL) 1421 return -ENOMEM; 1422 memcpy(nalu, alu, sizeof(struct r600_bytecode_alu)); 1423 1424 if (bc->cf_last != NULL && bc->cf_last->inst != type) { 1425 /* check if we could add it anyway */ 1426 if (bc->cf_last->inst == BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) && 1427 type == BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)) { 1428 LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) { 1429 if (lalu->execute_mask) { 1430 bc->force_add_cf = 1; 1431 break; 1432 } 1433 } 1434 } else 1435 bc->force_add_cf = 1; 1436 } 1437 1438 /* cf can contains only alu or only vtx or only tex */ 1439 if (bc->cf_last == NULL || bc->force_add_cf) { 1440 r = r600_bytecode_add_cf(bc); 1441 if (r) { 1442 free(nalu); 1443 return r; 1444 } 1445 } 1446 bc->cf_last->inst = type; 1447 1448 /* Check AR usage and load it if required */ 1449 for (i = 0; i < 3; i++) 1450 if (nalu->src[i].rel && !bc->ar_loaded) 1451 load_ar(bc); 1452 1453 if (nalu->dst.rel && !bc->ar_loaded) 1454 load_ar(bc); 1455 1456 /* Setup the kcache for this ALU instruction. This will start a new 1457 * ALU clause if needed. */ 1458 if ((r = r600_bytecode_alloc_kcache_lines(bc, nalu, type))) { 1459 free(nalu); 1460 return r; 1461 } 1462 1463 if (!bc->cf_last->curr_bs_head) { 1464 bc->cf_last->curr_bs_head = nalu; 1465 } 1466 /* number of gpr == the last gpr used in any alu */ 1467 for (i = 0; i < 3; i++) { 1468 if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) { 1469 bc->ngpr = nalu->src[i].sel + 1; 1470 } 1471 if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL) 1472 r600_bytecode_special_constants(nalu->src[i].value, 1473 &nalu->src[i].sel, &nalu->src[i].neg); 1474 } 1475 if (nalu->dst.sel >= bc->ngpr) { 1476 bc->ngpr = nalu->dst.sel + 1; 1477 } 1478 LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu); 1479 /* each alu use 2 dwords */ 1480 bc->cf_last->ndw += 2; 1481 bc->ndw += 2; 1482 1483 /* process cur ALU instructions for bank swizzle */ 1484 if (nalu->last) { 1485 uint32_t literal[4]; 1486 unsigned nliteral; 1487 struct r600_bytecode_alu *slots[5]; 1488 int max_slots = bc->chip_class == CAYMAN ? 4 : 5; 1489 r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots); 1490 if (r) 1491 return r; 1492 1493 if (bc->cf_last->prev_bs_head) { 1494 r = merge_inst_groups(bc, slots, bc->cf_last->prev_bs_head); 1495 if (r) 1496 return r; 1497 } 1498 1499 if (bc->cf_last->prev_bs_head) { 1500 r = replace_gpr_with_pv_ps(bc, slots, bc->cf_last->prev_bs_head); 1501 if (r) 1502 return r; 1503 } 1504 1505 r = check_and_set_bank_swizzle(bc, slots); 1506 if (r) 1507 return r; 1508 1509 for (i = 0, nliteral = 0; i < max_slots; i++) { 1510 if (slots[i]) { 1511 r = r600_bytecode_alu_nliterals(bc, slots[i], literal, &nliteral); 1512 if (r) 1513 return r; 1514 } 1515 } 1516 bc->cf_last->ndw += align(nliteral, 2); 1517 1518 /* at most 128 slots, one add alu can add 5 slots + 4 constants(2 slots) 1519 * worst case */ 1520 if ((bc->cf_last->ndw >> 1) >= 120) { 1521 bc->force_add_cf = 1; 1522 } 1523 1524 bc->cf_last->prev2_bs_head = bc->cf_last->prev_bs_head; 1525 bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head; 1526 bc->cf_last->curr_bs_head = NULL; 1527 } 1528 1529 if (nalu->dst.rel && bc->r6xx_nop_after_rel_dst) 1530 insert_nop_r6xx(bc); 1531 1532 return 0; 1533} 1534 1535int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu) 1536{ 1537 return r600_bytecode_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 1538} 1539 1540static unsigned r600_bytecode_num_tex_and_vtx_instructions(const struct r600_bytecode *bc) 1541{ 1542 switch (bc->chip_class) { 1543 case R600: 1544 return 8; 1545 1546 case R700: 1547 case EVERGREEN: 1548 case CAYMAN: 1549 return 16; 1550 1551 default: 1552 R600_ERR("Unknown chip class %d.\n", bc->chip_class); 1553 return 8; 1554 } 1555} 1556 1557static inline boolean last_inst_was_not_vtx_fetch(struct r600_bytecode *bc) 1558{ 1559 switch (bc->chip_class) { 1560 case R700: 1561 case R600: 1562 return bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX && 1563 bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC; 1564 case EVERGREEN: 1565 return bc->cf_last->inst != EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX; 1566 case CAYMAN: 1567 return bc->cf_last->inst != CM_V_SQ_CF_WORD1_SQ_CF_INST_TC; 1568 default: 1569 R600_ERR("Unknown chip class %d.\n", bc->chip_class); 1570 return FALSE; 1571 } 1572} 1573 1574int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx) 1575{ 1576 struct r600_bytecode_vtx *nvtx = r600_bytecode_vtx(); 1577 int r; 1578 1579 if (nvtx == NULL) 1580 return -ENOMEM; 1581 memcpy(nvtx, vtx, sizeof(struct r600_bytecode_vtx)); 1582 1583 /* cf can contains only alu or only vtx or only tex */ 1584 if (bc->cf_last == NULL || 1585 last_inst_was_not_vtx_fetch(bc) || 1586 bc->force_add_cf) { 1587 r = r600_bytecode_add_cf(bc); 1588 if (r) { 1589 free(nvtx); 1590 return r; 1591 } 1592 switch (bc->chip_class) { 1593 case R600: 1594 case R700: 1595 bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX; 1596 break; 1597 case EVERGREEN: 1598 bc->cf_last->inst = EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX; 1599 break; 1600 case CAYMAN: 1601 bc->cf_last->inst = CM_V_SQ_CF_WORD1_SQ_CF_INST_TC; 1602 break; 1603 default: 1604 R600_ERR("Unknown chip class %d.\n", bc->chip_class); 1605 return -EINVAL; 1606 } 1607 } 1608 LIST_ADDTAIL(&nvtx->list, &bc->cf_last->vtx); 1609 /* each fetch use 4 dwords */ 1610 bc->cf_last->ndw += 4; 1611 bc->ndw += 4; 1612 if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc)) 1613 bc->force_add_cf = 1; 1614 1615 bc->ngpr = MAX2(bc->ngpr, vtx->src_gpr + 1); 1616 bc->ngpr = MAX2(bc->ngpr, vtx->dst_gpr + 1); 1617 1618 return 0; 1619} 1620 1621int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex) 1622{ 1623 struct r600_bytecode_tex *ntex = r600_bytecode_tex(); 1624 int r; 1625 1626 if (ntex == NULL) 1627 return -ENOMEM; 1628 memcpy(ntex, tex, sizeof(struct r600_bytecode_tex)); 1629 1630 /* we can't fetch data und use it as texture lookup address in the same TEX clause */ 1631 if (bc->cf_last != NULL && 1632 bc->cf_last->inst == BC_INST(bc, V_SQ_CF_WORD1_SQ_CF_INST_TEX)) { 1633 struct r600_bytecode_tex *ttex; 1634 LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) { 1635 if (ttex->dst_gpr == ntex->src_gpr) { 1636 bc->force_add_cf = 1; 1637 break; 1638 } 1639 } 1640 /* slight hack to make gradients always go into same cf */ 1641 if (ntex->inst == SQ_TEX_INST_SET_GRADIENTS_H) 1642 bc->force_add_cf = 1; 1643 } 1644 1645 /* cf can contains only alu or only vtx or only tex */ 1646 if (bc->cf_last == NULL || 1647 bc->cf_last->inst != BC_INST(bc, V_SQ_CF_WORD1_SQ_CF_INST_TEX) || 1648 bc->force_add_cf) { 1649 r = r600_bytecode_add_cf(bc); 1650 if (r) { 1651 free(ntex); 1652 return r; 1653 } 1654 bc->cf_last->inst = BC_INST(bc, V_SQ_CF_WORD1_SQ_CF_INST_TEX); 1655 } 1656 if (ntex->src_gpr >= bc->ngpr) { 1657 bc->ngpr = ntex->src_gpr + 1; 1658 } 1659 if (ntex->dst_gpr >= bc->ngpr) { 1660 bc->ngpr = ntex->dst_gpr + 1; 1661 } 1662 LIST_ADDTAIL(&ntex->list, &bc->cf_last->tex); 1663 /* each texture fetch use 4 dwords */ 1664 bc->cf_last->ndw += 4; 1665 bc->ndw += 4; 1666 if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc)) 1667 bc->force_add_cf = 1; 1668 return 0; 1669} 1670 1671int r600_bytecode_add_cfinst(struct r600_bytecode *bc, int inst) 1672{ 1673 int r; 1674 r = r600_bytecode_add_cf(bc); 1675 if (r) 1676 return r; 1677 1678 bc->cf_last->cond = V_SQ_CF_COND_ACTIVE; 1679 bc->cf_last->inst = inst; 1680 return 0; 1681} 1682 1683int cm_bytecode_add_cf_end(struct r600_bytecode *bc) 1684{ 1685 return r600_bytecode_add_cfinst(bc, CM_V_SQ_CF_WORD1_SQ_CF_INST_END); 1686} 1687 1688/* common to all 3 families */ 1689static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id) 1690{ 1691 bc->bytecode[id] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | 1692 S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | 1693 S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | 1694 S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x); 1695 if (bc->chip_class < CAYMAN) 1696 bc->bytecode[id] |= S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); 1697 id++; 1698 bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) | 1699 S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) | 1700 S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) | 1701 S_SQ_VTX_WORD1_DST_SEL_W(vtx->dst_sel_w) | 1702 S_SQ_VTX_WORD1_USE_CONST_FIELDS(vtx->use_const_fields) | 1703 S_SQ_VTX_WORD1_DATA_FORMAT(vtx->data_format) | 1704 S_SQ_VTX_WORD1_NUM_FORMAT_ALL(vtx->num_format_all) | 1705 S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) | 1706 S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) | 1707 S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); 1708 bc->bytecode[id] = S_SQ_VTX_WORD2_OFFSET(vtx->offset)| 1709 S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian); 1710 if (bc->chip_class < CAYMAN) 1711 bc->bytecode[id] |= S_SQ_VTX_WORD2_MEGA_FETCH(1); 1712 id++; 1713 bc->bytecode[id++] = 0; 1714 return 0; 1715} 1716 1717/* common to all 3 families */ 1718static int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id) 1719{ 1720 bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) | 1721 S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) | 1722 S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) | 1723 S_SQ_TEX_WORD0_SRC_REL(tex->src_rel); 1724 bc->bytecode[id++] = S_SQ_TEX_WORD1_DST_GPR(tex->dst_gpr) | 1725 S_SQ_TEX_WORD1_DST_REL(tex->dst_rel) | 1726 S_SQ_TEX_WORD1_DST_SEL_X(tex->dst_sel_x) | 1727 S_SQ_TEX_WORD1_DST_SEL_Y(tex->dst_sel_y) | 1728 S_SQ_TEX_WORD1_DST_SEL_Z(tex->dst_sel_z) | 1729 S_SQ_TEX_WORD1_DST_SEL_W(tex->dst_sel_w) | 1730 S_SQ_TEX_WORD1_LOD_BIAS(tex->lod_bias) | 1731 S_SQ_TEX_WORD1_COORD_TYPE_X(tex->coord_type_x) | 1732 S_SQ_TEX_WORD1_COORD_TYPE_Y(tex->coord_type_y) | 1733 S_SQ_TEX_WORD1_COORD_TYPE_Z(tex->coord_type_z) | 1734 S_SQ_TEX_WORD1_COORD_TYPE_W(tex->coord_type_w); 1735 bc->bytecode[id++] = S_SQ_TEX_WORD2_OFFSET_X(tex->offset_x) | 1736 S_SQ_TEX_WORD2_OFFSET_Y(tex->offset_y) | 1737 S_SQ_TEX_WORD2_OFFSET_Z(tex->offset_z) | 1738 S_SQ_TEX_WORD2_SAMPLER_ID(tex->sampler_id) | 1739 S_SQ_TEX_WORD2_SRC_SEL_X(tex->src_sel_x) | 1740 S_SQ_TEX_WORD2_SRC_SEL_Y(tex->src_sel_y) | 1741 S_SQ_TEX_WORD2_SRC_SEL_Z(tex->src_sel_z) | 1742 S_SQ_TEX_WORD2_SRC_SEL_W(tex->src_sel_w); 1743 bc->bytecode[id++] = 0; 1744 return 0; 1745} 1746 1747/* r600 only, r700/eg bits in r700_asm.c */ 1748static int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id) 1749{ 1750 /* don't replace gpr by pv or ps for destination register */ 1751 bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | 1752 S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) | 1753 S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | 1754 S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | 1755 S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | 1756 S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) | 1757 S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | 1758 S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | 1759 S_SQ_ALU_WORD0_INDEX_MODE(alu->index_mode) | 1760 S_SQ_ALU_WORD0_PRED_SEL(alu->pred_sel) | 1761 S_SQ_ALU_WORD0_LAST(alu->last); 1762 1763 if (alu->is_op3) { 1764 bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | 1765 S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | 1766 S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) | 1767 S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) | 1768 S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | 1769 S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) | 1770 S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | 1771 S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | 1772 S_SQ_ALU_WORD1_OP3_ALU_INST(alu->inst) | 1773 S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle); 1774 } else { 1775 bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | 1776 S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | 1777 S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) | 1778 S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) | 1779 S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | 1780 S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | 1781 S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | 1782 S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) | 1783 S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | 1784 S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) | 1785 S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->execute_mask) | 1786 S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->update_pred); 1787 } 1788 return 0; 1789} 1790 1791static void r600_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf) 1792{ 1793 *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); 1794 *bytecode++ = cf->inst | 1795 S_SQ_CF_WORD1_BARRIER(1) | 1796 S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1); 1797} 1798 1799/* common for r600/r700 - eg in eg_asm.c */ 1800static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) 1801{ 1802 unsigned id = cf->id; 1803 1804 switch (cf->inst) { 1805 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU: 1806 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE: 1807 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER: 1808 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER: 1809 bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | 1810 S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) | 1811 S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) | 1812 S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank); 1813 1814 bc->bytecode[id++] = cf->inst | 1815 S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) | 1816 S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) | 1817 S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) | 1818 S_SQ_CF_ALU_WORD1_BARRIER(1) | 1819 S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chip_class == R600 ? cf->r6xx_uses_waterfall : 0) | 1820 S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); 1821 break; 1822 case V_SQ_CF_WORD1_SQ_CF_INST_TEX: 1823 case V_SQ_CF_WORD1_SQ_CF_INST_VTX: 1824 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: 1825 if (bc->chip_class == R700) 1826 r700_bytecode_cf_vtx_build(&bc->bytecode[id], cf); 1827 else 1828 r600_bytecode_cf_vtx_build(&bc->bytecode[id], cf); 1829 break; 1830 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: 1831 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: 1832 bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | 1833 S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | 1834 S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | 1835 S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type); 1836 bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | 1837 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | 1838 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) | 1839 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) | 1840 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | 1841 S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) | 1842 cf->output.inst | 1843 S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); 1844 break; 1845 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0: 1846 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1: 1847 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2: 1848 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3: 1849 bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | 1850 S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | 1851 S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | 1852 S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type); 1853 bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | 1854 S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) | 1855 cf->output.inst | 1856 S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program) | 1857 S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size) | 1858 S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask); 1859 break; 1860 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: 1861 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: 1862 case V_SQ_CF_WORD1_SQ_CF_INST_POP: 1863 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: 1864 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: 1865 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: 1866 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: 1867 case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: 1868 case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: 1869 bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); 1870 bc->bytecode[id++] = cf->inst | 1871 S_SQ_CF_WORD1_BARRIER(1) | 1872 S_SQ_CF_WORD1_COND(cf->cond) | 1873 S_SQ_CF_WORD1_POP_COUNT(cf->pop_count); 1874 1875 break; 1876 default: 1877 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); 1878 return -EINVAL; 1879 } 1880 return 0; 1881} 1882 1883int r600_bytecode_build(struct r600_bytecode *bc) 1884{ 1885 struct r600_bytecode_cf *cf; 1886 struct r600_bytecode_alu *alu; 1887 struct r600_bytecode_vtx *vtx; 1888 struct r600_bytecode_tex *tex; 1889 uint32_t literal[4]; 1890 unsigned nliteral; 1891 unsigned addr; 1892 int i, r; 1893 1894 if (bc->callstack[0].max > 0) 1895 bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2; 1896 if (bc->type == TGSI_PROCESSOR_VERTEX && !bc->nstack) { 1897 bc->nstack = 1; 1898 } 1899 1900 /* first path compute addr of each CF block */ 1901 /* addr start after all the CF instructions */ 1902 addr = bc->cf_last->id + 2; 1903 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 1904 if (bc->chip_class >= EVERGREEN) { 1905 switch (cf->inst) { 1906 case EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX: 1907 case EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX: 1908 /* fetch node need to be 16 bytes aligned*/ 1909 addr += 3; 1910 addr &= 0xFFFFFFFCUL; 1911 break; 1912 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU: 1913 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER: 1914 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER: 1915 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE: 1916 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: 1917 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: 1918 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0: 1919 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1: 1920 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2: 1921 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3: 1922 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF0: 1923 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF1: 1924 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF2: 1925 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF3: 1926 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF0: 1927 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF1: 1928 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF2: 1929 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF3: 1930 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF0: 1931 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF1: 1932 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF2: 1933 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF3: 1934 case EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP: 1935 case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE: 1936 case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP: 1937 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: 1938 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: 1939 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: 1940 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: 1941 case EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: 1942 case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN: 1943 case CM_V_SQ_CF_WORD1_SQ_CF_INST_END: 1944 case CF_NATIVE: 1945 break; 1946 default: 1947 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); 1948 return -EINVAL; 1949 } 1950 } else { 1951 switch (cf->inst) { 1952 case V_SQ_CF_WORD1_SQ_CF_INST_TEX: 1953 case V_SQ_CF_WORD1_SQ_CF_INST_VTX: 1954 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: 1955 /* fetch node need to be 16 bytes aligned*/ 1956 addr += 3; 1957 addr &= 0xFFFFFFFCUL; 1958 break; 1959 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU: 1960 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER: 1961 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER: 1962 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE: 1963 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: 1964 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: 1965 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0: 1966 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1: 1967 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2: 1968 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3: 1969 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: 1970 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: 1971 case V_SQ_CF_WORD1_SQ_CF_INST_POP: 1972 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: 1973 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: 1974 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: 1975 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: 1976 case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: 1977 case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: 1978 break; 1979 default: 1980 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); 1981 return -EINVAL; 1982 } 1983 } 1984 cf->addr = addr; 1985 addr += cf->ndw; 1986 bc->ndw = cf->addr + cf->ndw; 1987 } 1988 free(bc->bytecode); 1989 bc->bytecode = calloc(1, bc->ndw * 4); 1990 if (bc->bytecode == NULL) 1991 return -ENOMEM; 1992 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 1993 addr = cf->addr; 1994 if (bc->chip_class >= EVERGREEN) { 1995 r = eg_bytecode_cf_build(bc, cf); 1996 if (r) 1997 return r; 1998 1999 switch (cf->inst) { 2000 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU: 2001 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER: 2002 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER: 2003 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE: 2004 nliteral = 0; 2005 memset(literal, 0, sizeof(literal)); 2006 LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { 2007 r = r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral); 2008 if (r) 2009 return r; 2010 r600_bytecode_alu_adjust_literals(bc, alu, literal, nliteral); 2011 r600_bytecode_assign_kcache_banks(bc, alu, cf->kcache); 2012 2013 switch(bc->chip_class) { 2014 case EVERGREEN: /* eg alu is same encoding as r700 */ 2015 case CAYMAN: 2016 r = r700_bytecode_alu_build(bc, alu, addr); 2017 break; 2018 default: 2019 R600_ERR("unknown chip class %d.\n", bc->chip_class); 2020 return -EINVAL; 2021 } 2022 if (r) 2023 return r; 2024 addr += 2; 2025 if (alu->last) { 2026 for (i = 0; i < align(nliteral, 2); ++i) { 2027 bc->bytecode[addr++] = literal[i]; 2028 } 2029 nliteral = 0; 2030 memset(literal, 0, sizeof(literal)); 2031 } 2032 } 2033 break; 2034 case EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX: 2035 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 2036 r = r600_bytecode_vtx_build(bc, vtx, addr); 2037 if (r) 2038 return r; 2039 addr += 4; 2040 } 2041 break; 2042 case EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX: 2043 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 2044 assert(bc->chip_class >= EVERGREEN); 2045 r = r600_bytecode_vtx_build(bc, vtx, addr); 2046 if (r) 2047 return r; 2048 addr += 4; 2049 } 2050 LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { 2051 r = r600_bytecode_tex_build(bc, tex, addr); 2052 if (r) 2053 return r; 2054 addr += 4; 2055 } 2056 break; 2057 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: 2058 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: 2059 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0: 2060 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1: 2061 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2: 2062 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3: 2063 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF0: 2064 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF1: 2065 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF2: 2066 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF3: 2067 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF0: 2068 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF1: 2069 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF2: 2070 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF3: 2071 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF0: 2072 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF1: 2073 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF2: 2074 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF3: 2075 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: 2076 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: 2077 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: 2078 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: 2079 case EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP: 2080 case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE: 2081 case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP: 2082 case EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: 2083 case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN: 2084 case CM_V_SQ_CF_WORD1_SQ_CF_INST_END: 2085 break; 2086 case CF_NATIVE: 2087 break; 2088 default: 2089 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); 2090 return -EINVAL; 2091 } 2092 } else { 2093 r = r600_bytecode_cf_build(bc, cf); 2094 if (r) 2095 return r; 2096 2097 switch (cf->inst) { 2098 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU: 2099 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER: 2100 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER: 2101 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE: 2102 nliteral = 0; 2103 memset(literal, 0, sizeof(literal)); 2104 LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { 2105 r = r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral); 2106 if (r) 2107 return r; 2108 r600_bytecode_alu_adjust_literals(bc, alu, literal, nliteral); 2109 r600_bytecode_assign_kcache_banks(bc, alu, cf->kcache); 2110 2111 switch(bc->chip_class) { 2112 case R600: 2113 r = r600_bytecode_alu_build(bc, alu, addr); 2114 break; 2115 case R700: 2116 r = r700_bytecode_alu_build(bc, alu, addr); 2117 break; 2118 default: 2119 R600_ERR("unknown chip class %d.\n", bc->chip_class); 2120 return -EINVAL; 2121 } 2122 if (r) 2123 return r; 2124 addr += 2; 2125 if (alu->last) { 2126 for (i = 0; i < align(nliteral, 2); ++i) { 2127 bc->bytecode[addr++] = literal[i]; 2128 } 2129 nliteral = 0; 2130 memset(literal, 0, sizeof(literal)); 2131 } 2132 } 2133 break; 2134 case V_SQ_CF_WORD1_SQ_CF_INST_VTX: 2135 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: 2136 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 2137 r = r600_bytecode_vtx_build(bc, vtx, addr); 2138 if (r) 2139 return r; 2140 addr += 4; 2141 } 2142 break; 2143 case V_SQ_CF_WORD1_SQ_CF_INST_TEX: 2144 LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { 2145 r = r600_bytecode_tex_build(bc, tex, addr); 2146 if (r) 2147 return r; 2148 addr += 4; 2149 } 2150 break; 2151 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: 2152 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: 2153 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0: 2154 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1: 2155 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2: 2156 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3: 2157 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: 2158 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: 2159 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: 2160 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: 2161 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: 2162 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: 2163 case V_SQ_CF_WORD1_SQ_CF_INST_POP: 2164 case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: 2165 case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: 2166 break; 2167 default: 2168 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); 2169 return -EINVAL; 2170 } 2171 } 2172 } 2173 return 0; 2174} 2175 2176void r600_bytecode_clear(struct r600_bytecode *bc) 2177{ 2178 struct r600_bytecode_cf *cf = NULL, *next_cf; 2179 2180 free(bc->bytecode); 2181 bc->bytecode = NULL; 2182 2183 LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) { 2184 struct r600_bytecode_alu *alu = NULL, *next_alu; 2185 struct r600_bytecode_tex *tex = NULL, *next_tex; 2186 struct r600_bytecode_tex *vtx = NULL, *next_vtx; 2187 2188 LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) { 2189 free(alu); 2190 } 2191 2192 LIST_INITHEAD(&cf->alu); 2193 2194 LIST_FOR_EACH_ENTRY_SAFE(tex, next_tex, &cf->tex, list) { 2195 free(tex); 2196 } 2197 2198 LIST_INITHEAD(&cf->tex); 2199 2200 LIST_FOR_EACH_ENTRY_SAFE(vtx, next_vtx, &cf->vtx, list) { 2201 free(vtx); 2202 } 2203 2204 LIST_INITHEAD(&cf->vtx); 2205 2206 free(cf); 2207 } 2208 2209 LIST_INITHEAD(&cf->list); 2210} 2211 2212void r600_bytecode_dump(struct r600_bytecode *bc) 2213{ 2214 struct r600_bytecode_cf *cf = NULL; 2215 struct r600_bytecode_alu *alu = NULL; 2216 struct r600_bytecode_vtx *vtx = NULL; 2217 struct r600_bytecode_tex *tex = NULL; 2218 2219 unsigned i, id; 2220 uint32_t literal[4]; 2221 unsigned nliteral; 2222 char chip = '6'; 2223 2224 switch (bc->chip_class) { 2225 case R700: 2226 chip = '7'; 2227 break; 2228 case EVERGREEN: 2229 chip = 'E'; 2230 break; 2231 case CAYMAN: 2232 chip = 'C'; 2233 break; 2234 case R600: 2235 default: 2236 chip = '6'; 2237 break; 2238 } 2239 fprintf(stderr, "bytecode %d dw -- %d gprs ---------------------\n", bc->ndw, bc->ngpr); 2240 fprintf(stderr, " %c\n", chip); 2241 2242 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 2243 id = cf->id; 2244 2245 if (bc->chip_class >= EVERGREEN) { 2246 switch (cf->inst) { 2247 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU: 2248 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER: 2249 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER: 2250 case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE: 2251 if (cf->eg_alu_extended) { 2252 fprintf(stderr, "%04d %08X ALU_EXT0 ", id, bc->bytecode[id]); 2253 fprintf(stderr, "KCACHE_BANK2:%X ", cf->kcache[2].bank); 2254 fprintf(stderr, "KCACHE_BANK3:%X ", cf->kcache[3].bank); 2255 fprintf(stderr, "KCACHE_MODE2:%X\n", cf->kcache[2].mode); 2256 id++; 2257 fprintf(stderr, "%04d %08X ALU_EXT1 ", id, bc->bytecode[id]); 2258 fprintf(stderr, "KCACHE_MODE3:%X ", cf->kcache[3].mode); 2259 fprintf(stderr, "KCACHE_ADDR2:%X ", cf->kcache[2].addr); 2260 fprintf(stderr, "KCACHE_ADDR3:%X\n", cf->kcache[3].addr); 2261 id++; 2262 } 2263 2264 fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]); 2265 fprintf(stderr, "ADDR:%d ", cf->addr); 2266 fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache[0].mode); 2267 fprintf(stderr, "KCACHE_BANK0:%X ", cf->kcache[0].bank); 2268 fprintf(stderr, "KCACHE_BANK1:%X\n", cf->kcache[1].bank); 2269 id++; 2270 fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]); 2271 fprintf(stderr, "INST:0x%x ", EG_G_SQ_CF_ALU_WORD1_CF_INST(cf->inst)); 2272 fprintf(stderr, "KCACHE_MODE1:%X ", cf->kcache[1].mode); 2273 fprintf(stderr, "KCACHE_ADDR0:%X ", cf->kcache[0].addr); 2274 fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache[1].addr); 2275 fprintf(stderr, "COUNT:%d\n", cf->ndw / 2); 2276 break; 2277 case EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX: 2278 case EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX: 2279 fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]); 2280 fprintf(stderr, "ADDR:%d\n", cf->addr); 2281 id++; 2282 fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]); 2283 fprintf(stderr, "INST:0x%x ", EG_G_SQ_CF_WORD1_CF_INST(cf->inst)); 2284 fprintf(stderr, "COUNT:%d\n", cf->ndw / 4); 2285 break; 2286 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: 2287 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: 2288 fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]); 2289 fprintf(stderr, "GPR:%X ", cf->output.gpr); 2290 fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size); 2291 fprintf(stderr, "ARRAY_BASE:%X ", cf->output.array_base); 2292 fprintf(stderr, "TYPE:%X\n", cf->output.type); 2293 id++; 2294 fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]); 2295 fprintf(stderr, "SWIZ_X:%X ", cf->output.swizzle_x); 2296 fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y); 2297 fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z); 2298 fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w); 2299 fprintf(stderr, "BARRIER:%X ", cf->output.barrier); 2300 fprintf(stderr, "INST:0x%x ", EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst)); 2301 fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count); 2302 fprintf(stderr, "EOP:%X\n", cf->output.end_of_program); 2303 break; 2304 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0: 2305 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1: 2306 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2: 2307 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3: 2308 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF0: 2309 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF1: 2310 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF2: 2311 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF3: 2312 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF0: 2313 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF1: 2314 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF2: 2315 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF3: 2316 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF0: 2317 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF1: 2318 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF2: 2319 case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF3: 2320 fprintf(stderr, "%04d %08X EXPORT MEM_STREAM%i_BUF%i ", id, bc->bytecode[id], 2321 (EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) - 2322 EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0)) / 4, 2323 (EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) - 2324 EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0)) % 4); 2325 fprintf(stderr, "GPR:%X ", cf->output.gpr); 2326 fprintf(stderr, "ELEM_SIZE:%i ", cf->output.elem_size); 2327 fprintf(stderr, "ARRAY_BASE:%i ", cf->output.array_base); 2328 fprintf(stderr, "TYPE:%X\n", cf->output.type); 2329 id++; 2330 fprintf(stderr, "%04d %08X EXPORT MEM_STREAM%i_BUF%i ", id, bc->bytecode[id], 2331 (EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) - 2332 EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0)) / 4, 2333 (EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) - 2334 EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0)) % 4); 2335 fprintf(stderr, "ARRAY_SIZE:%i ", cf->output.array_size); 2336 fprintf(stderr, "COMP_MASK:%X ", cf->output.comp_mask); 2337 fprintf(stderr, "BARRIER:%X ", cf->output.barrier); 2338 fprintf(stderr, "INST:%d ", cf->output.inst); 2339 fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count); 2340 fprintf(stderr, "EOP:%X\n", cf->output.end_of_program); 2341 break; 2342 case EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP: 2343 case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE: 2344 case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP: 2345 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: 2346 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: 2347 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: 2348 case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: 2349 case EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: 2350 case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN: 2351 case CM_V_SQ_CF_WORD1_SQ_CF_INST_END: 2352 fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]); 2353 fprintf(stderr, "ADDR:%d\n", cf->cf_addr); 2354 id++; 2355 fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]); 2356 fprintf(stderr, "INST:0x%x ", EG_G_SQ_CF_WORD1_CF_INST(cf->inst)); 2357 fprintf(stderr, "COND:%X ", cf->cond); 2358 fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count); 2359 break; 2360 case CF_NATIVE: 2361 fprintf(stderr, "%04d %08X CF NATIVE\n", id, bc->bytecode[id]); 2362 fprintf(stderr, "%04d %08X CF NATIVE\n", id + 1, bc->bytecode[id + 1]); 2363 break; 2364 default: 2365 R600_ERR("Unknown instruction %0x\n", cf->inst); 2366 } 2367 } else { 2368 switch (cf->inst) { 2369 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU: 2370 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER: 2371 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER: 2372 case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE: 2373 fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]); 2374 fprintf(stderr, "ADDR:%d ", cf->addr); 2375 fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache[0].mode); 2376 fprintf(stderr, "KCACHE_BANK0:%X ", cf->kcache[0].bank); 2377 fprintf(stderr, "KCACHE_BANK1:%X\n", cf->kcache[1].bank); 2378 id++; 2379 fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]); 2380 fprintf(stderr, "INST:0x%x ", R600_G_SQ_CF_ALU_WORD1_CF_INST(cf->inst)); 2381 fprintf(stderr, "KCACHE_MODE1:%X ", cf->kcache[1].mode); 2382 fprintf(stderr, "KCACHE_ADDR0:%X ", cf->kcache[0].addr); 2383 fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache[1].addr); 2384 fprintf(stderr, "COUNT:%d\n", cf->ndw / 2); 2385 break; 2386 case V_SQ_CF_WORD1_SQ_CF_INST_TEX: 2387 case V_SQ_CF_WORD1_SQ_CF_INST_VTX: 2388 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: 2389 fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]); 2390 fprintf(stderr, "ADDR:%d\n", cf->addr); 2391 id++; 2392 fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]); 2393 fprintf(stderr, "INST:0x%x ", R600_G_SQ_CF_WORD1_CF_INST(cf->inst)); 2394 fprintf(stderr, "COUNT:%d\n", cf->ndw / 4); 2395 break; 2396 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: 2397 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: 2398 fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]); 2399 fprintf(stderr, "GPR:%X ", cf->output.gpr); 2400 fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size); 2401 fprintf(stderr, "ARRAY_BASE:%X ", cf->output.array_base); 2402 fprintf(stderr, "TYPE:%X\n", cf->output.type); 2403 id++; 2404 fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]); 2405 fprintf(stderr, "SWIZ_X:%X ", cf->output.swizzle_x); 2406 fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y); 2407 fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z); 2408 fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w); 2409 fprintf(stderr, "BARRIER:%X ", cf->output.barrier); 2410 fprintf(stderr, "INST:0x%x ", R600_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst)); 2411 fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count); 2412 fprintf(stderr, "EOP:%X\n", cf->output.end_of_program); 2413 break; 2414 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0: 2415 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1: 2416 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2: 2417 case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3: 2418 fprintf(stderr, "%04d %08X EXPORT MEM_STREAM%i ", id, bc->bytecode[id], 2419 R600_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) - 2420 R600_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0)); 2421 fprintf(stderr, "GPR:%X ", cf->output.gpr); 2422 fprintf(stderr, "ELEM_SIZE:%i ", cf->output.elem_size); 2423 fprintf(stderr, "ARRAY_BASE:%i ", cf->output.array_base); 2424 fprintf(stderr, "TYPE:%X\n", cf->output.type); 2425 id++; 2426 fprintf(stderr, "%04d %08X EXPORT MEM_STREAM%i ", id, bc->bytecode[id], 2427 R600_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) - 2428 R600_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0)); 2429 fprintf(stderr, "ARRAY_SIZE:%i ", cf->output.array_size); 2430 fprintf(stderr, "COMP_MASK:%X ", cf->output.comp_mask); 2431 fprintf(stderr, "BARRIER:%X ", cf->output.barrier); 2432 fprintf(stderr, "INST:%d ", cf->output.inst); 2433 fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count); 2434 fprintf(stderr, "EOP:%X\n", cf->output.end_of_program); 2435 break; 2436 case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: 2437 case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: 2438 case V_SQ_CF_WORD1_SQ_CF_INST_POP: 2439 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: 2440 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: 2441 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: 2442 case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: 2443 case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: 2444 case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: 2445 fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]); 2446 fprintf(stderr, "ADDR:%d\n", cf->cf_addr); 2447 id++; 2448 fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]); 2449 fprintf(stderr, "INST:0x%x ", R600_G_SQ_CF_WORD1_CF_INST(cf->inst)); 2450 fprintf(stderr, "COND:%X ", cf->cond); 2451 fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count); 2452 break; 2453 default: 2454 R600_ERR("Unknown instruction %0x\n", cf->inst); 2455 } 2456 } 2457 2458 id = cf->addr; 2459 nliteral = 0; 2460 LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { 2461 r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral); 2462 2463 fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); 2464 fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel); 2465 fprintf(stderr, "REL:%d ", alu->src[0].rel); 2466 fprintf(stderr, "CHAN:%d ", alu->src[0].chan); 2467 fprintf(stderr, "NEG:%d) ", alu->src[0].neg); 2468 fprintf(stderr, "SRC1(SEL:%d ", alu->src[1].sel); 2469 fprintf(stderr, "REL:%d ", alu->src[1].rel); 2470 fprintf(stderr, "CHAN:%d ", alu->src[1].chan); 2471 fprintf(stderr, "NEG:%d ", alu->src[1].neg); 2472 fprintf(stderr, "IM:%d) ", alu->index_mode); 2473 fprintf(stderr, "PRED_SEL:%d ", alu->pred_sel); 2474 fprintf(stderr, "LAST:%d)\n", alu->last); 2475 id++; 2476 fprintf(stderr, "%04d %08X %c ", id, bc->bytecode[id], alu->last ? '*' : ' '); 2477 fprintf(stderr, "INST:0x%x ", alu->inst); 2478 fprintf(stderr, "DST(SEL:%d ", alu->dst.sel); 2479 fprintf(stderr, "CHAN:%d ", alu->dst.chan); 2480 fprintf(stderr, "REL:%d ", alu->dst.rel); 2481 fprintf(stderr, "CLAMP:%d) ", alu->dst.clamp); 2482 fprintf(stderr, "BANK_SWIZZLE:%d ", alu->bank_swizzle); 2483 if (alu->is_op3) { 2484 fprintf(stderr, "SRC2(SEL:%d ", alu->src[2].sel); 2485 fprintf(stderr, "REL:%d ", alu->src[2].rel); 2486 fprintf(stderr, "CHAN:%d ", alu->src[2].chan); 2487 fprintf(stderr, "NEG:%d)\n", alu->src[2].neg); 2488 } else { 2489 fprintf(stderr, "SRC0_ABS:%d ", alu->src[0].abs); 2490 fprintf(stderr, "SRC1_ABS:%d ", alu->src[1].abs); 2491 fprintf(stderr, "WRITE_MASK:%d ", alu->dst.write); 2492 fprintf(stderr, "OMOD:%d ", alu->omod); 2493 fprintf(stderr, "EXECUTE_MASK:%d ", alu->execute_mask); 2494 fprintf(stderr, "UPDATE_PRED:%d\n", alu->update_pred); 2495 } 2496 2497 id++; 2498 if (alu->last) { 2499 for (i = 0; i < nliteral; i++, id++) { 2500 float *f = (float*)(bc->bytecode + id); 2501 fprintf(stderr, "%04d %08X\t%f (%d)\n", id, bc->bytecode[id], *f, 2502 *(bc->bytecode + id)); 2503 } 2504 id += nliteral & 1; 2505 nliteral = 0; 2506 } 2507 } 2508 2509 LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { 2510 fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); 2511 fprintf(stderr, "INST:0x%x ", tex->inst); 2512 fprintf(stderr, "RESOURCE_ID:%d ", tex->resource_id); 2513 fprintf(stderr, "SRC(GPR:%d ", tex->src_gpr); 2514 fprintf(stderr, "REL:%d)\n", tex->src_rel); 2515 id++; 2516 fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); 2517 fprintf(stderr, "DST(GPR:%d ", tex->dst_gpr); 2518 fprintf(stderr, "REL:%d ", tex->dst_rel); 2519 fprintf(stderr, "SEL_X:%d ", tex->dst_sel_x); 2520 fprintf(stderr, "SEL_Y:%d ", tex->dst_sel_y); 2521 fprintf(stderr, "SEL_Z:%d ", tex->dst_sel_z); 2522 fprintf(stderr, "SEL_W:%d) ", tex->dst_sel_w); 2523 fprintf(stderr, "LOD_BIAS:%d ", tex->lod_bias); 2524 fprintf(stderr, "COORD_TYPE_X:%d ", tex->coord_type_x); 2525 fprintf(stderr, "COORD_TYPE_Y:%d ", tex->coord_type_y); 2526 fprintf(stderr, "COORD_TYPE_Z:%d ", tex->coord_type_z); 2527 fprintf(stderr, "COORD_TYPE_W:%d\n", tex->coord_type_w); 2528 id++; 2529 fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); 2530 fprintf(stderr, "OFFSET_X:%d ", tex->offset_x); 2531 fprintf(stderr, "OFFSET_Y:%d ", tex->offset_y); 2532 fprintf(stderr, "OFFSET_Z:%d ", tex->offset_z); 2533 fprintf(stderr, "SAMPLER_ID:%d ", tex->sampler_id); 2534 fprintf(stderr, "SRC(SEL_X:%d ", tex->src_sel_x); 2535 fprintf(stderr, "SEL_Y:%d ", tex->src_sel_y); 2536 fprintf(stderr, "SEL_Z:%d ", tex->src_sel_z); 2537 fprintf(stderr, "SEL_W:%d)\n", tex->src_sel_w); 2538 id++; 2539 fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]); 2540 id++; 2541 } 2542 2543 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 2544 fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); 2545 fprintf(stderr, "INST:%d ", vtx->inst); 2546 fprintf(stderr, "FETCH_TYPE:%d ", vtx->fetch_type); 2547 fprintf(stderr, "BUFFER_ID:%d\n", vtx->buffer_id); 2548 id++; 2549 /* This assumes that no semantic fetches exist */ 2550 fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); 2551 fprintf(stderr, "SRC(GPR:%d ", vtx->src_gpr); 2552 fprintf(stderr, "SEL_X:%d) ", vtx->src_sel_x); 2553 if (bc->chip_class < CAYMAN) 2554 fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count); 2555 else 2556 fprintf(stderr, "SEL_Y:%d) ", 0); 2557 fprintf(stderr, "DST(GPR:%d ", vtx->dst_gpr); 2558 fprintf(stderr, "SEL_X:%d ", vtx->dst_sel_x); 2559 fprintf(stderr, "SEL_Y:%d ", vtx->dst_sel_y); 2560 fprintf(stderr, "SEL_Z:%d ", vtx->dst_sel_z); 2561 fprintf(stderr, "SEL_W:%d) ", vtx->dst_sel_w); 2562 fprintf(stderr, "USE_CONST_FIELDS:%d ", vtx->use_const_fields); 2563 fprintf(stderr, "FORMAT(DATA:%d ", vtx->data_format); 2564 fprintf(stderr, "NUM:%d ", vtx->num_format_all); 2565 fprintf(stderr, "COMP:%d ", vtx->format_comp_all); 2566 fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all); 2567 id++; 2568 fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); 2569 fprintf(stderr, "ENDIAN:%d ", vtx->endian); 2570 fprintf(stderr, "OFFSET:%d\n", vtx->offset); 2571 /* XXX */ 2572 id++; 2573 fprintf(stderr, "%04d %08X \n", id, bc->bytecode[id]); 2574 id++; 2575 } 2576 } 2577 2578 fprintf(stderr, "--------------------------------------\n"); 2579} 2580 2581static void r600_vertex_data_type(enum pipe_format pformat, 2582 unsigned *format, 2583 unsigned *num_format, unsigned *format_comp, unsigned *endian) 2584{ 2585 const struct util_format_description *desc; 2586 unsigned i; 2587 2588 *format = 0; 2589 *num_format = 0; 2590 *format_comp = 0; 2591 *endian = ENDIAN_NONE; 2592 2593 desc = util_format_description(pformat); 2594 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { 2595 goto out_unknown; 2596 } 2597 2598 /* Find the first non-VOID channel. */ 2599 for (i = 0; i < 4; i++) { 2600 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 2601 break; 2602 } 2603 } 2604 2605 *endian = r600_endian_swap(desc->channel[i].size); 2606 2607 switch (desc->channel[i].type) { 2608 /* Half-floats, floats, ints */ 2609 case UTIL_FORMAT_TYPE_FLOAT: 2610 switch (desc->channel[i].size) { 2611 case 16: 2612 switch (desc->nr_channels) { 2613 case 1: 2614 *format = FMT_16_FLOAT; 2615 break; 2616 case 2: 2617 *format = FMT_16_16_FLOAT; 2618 break; 2619 case 3: 2620 case 4: 2621 *format = FMT_16_16_16_16_FLOAT; 2622 break; 2623 } 2624 break; 2625 case 32: 2626 switch (desc->nr_channels) { 2627 case 1: 2628 *format = FMT_32_FLOAT; 2629 break; 2630 case 2: 2631 *format = FMT_32_32_FLOAT; 2632 break; 2633 case 3: 2634 *format = FMT_32_32_32_FLOAT; 2635 break; 2636 case 4: 2637 *format = FMT_32_32_32_32_FLOAT; 2638 break; 2639 } 2640 break; 2641 default: 2642 goto out_unknown; 2643 } 2644 break; 2645 /* Unsigned ints */ 2646 case UTIL_FORMAT_TYPE_UNSIGNED: 2647 /* Signed ints */ 2648 case UTIL_FORMAT_TYPE_SIGNED: 2649 switch (desc->channel[i].size) { 2650 case 8: 2651 switch (desc->nr_channels) { 2652 case 1: 2653 *format = FMT_8; 2654 break; 2655 case 2: 2656 *format = FMT_8_8; 2657 break; 2658 case 3: 2659 case 4: 2660 *format = FMT_8_8_8_8; 2661 break; 2662 } 2663 break; 2664 case 10: 2665 if (desc->nr_channels != 4) 2666 goto out_unknown; 2667 2668 *format = FMT_2_10_10_10; 2669 break; 2670 case 16: 2671 switch (desc->nr_channels) { 2672 case 1: 2673 *format = FMT_16; 2674 break; 2675 case 2: 2676 *format = FMT_16_16; 2677 break; 2678 case 3: 2679 case 4: 2680 *format = FMT_16_16_16_16; 2681 break; 2682 } 2683 break; 2684 case 32: 2685 switch (desc->nr_channels) { 2686 case 1: 2687 *format = FMT_32; 2688 break; 2689 case 2: 2690 *format = FMT_32_32; 2691 break; 2692 case 3: 2693 *format = FMT_32_32_32; 2694 break; 2695 case 4: 2696 *format = FMT_32_32_32_32; 2697 break; 2698 } 2699 break; 2700 default: 2701 goto out_unknown; 2702 } 2703 break; 2704 default: 2705 goto out_unknown; 2706 } 2707 2708 if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 2709 *format_comp = 1; 2710 } 2711 2712 *num_format = 0; 2713 if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED || 2714 desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 2715 if (!desc->channel[i].normalized) { 2716 if (desc->channel[i].pure_integer) 2717 *num_format = 1; 2718 else 2719 *num_format = 2; 2720 } 2721 } 2722 return; 2723out_unknown: 2724 R600_ERR("unsupported vertex format %s\n", util_format_name(pformat)); 2725} 2726 2727int r600_vertex_elements_build_fetch_shader(struct r600_context *rctx, struct r600_vertex_element *ve) 2728{ 2729 static int dump_shaders = -1; 2730 2731 struct r600_bytecode bc; 2732 struct r600_bytecode_vtx vtx; 2733 struct pipe_vertex_element *elements = ve->elements; 2734 const struct util_format_description *desc; 2735 unsigned fetch_resource_start = rctx->chip_class >= EVERGREEN ? 0 : 160; 2736 unsigned format, num_format, format_comp, endian; 2737 uint32_t *bytecode; 2738 int i, r; 2739 2740 memset(&bc, 0, sizeof(bc)); 2741 r600_bytecode_init(&bc, rctx->chip_class, rctx->family); 2742 2743 for (i = 0; i < ve->count; i++) { 2744 if (elements[i].instance_divisor > 1) { 2745 struct r600_bytecode_alu alu; 2746 2747 memset(&alu, 0, sizeof(alu)); 2748 alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); 2749 alu.src[0].sel = 0; 2750 alu.src[0].chan = 3; 2751 2752 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2753 alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1; 2754 2755 alu.dst.sel = i + 1; 2756 alu.dst.chan = 3; 2757 alu.dst.write = 1; 2758 alu.last = 1; 2759 2760 if ((r = r600_bytecode_add_alu(&bc, &alu))) { 2761 r600_bytecode_clear(&bc); 2762 return r; 2763 } 2764 } 2765 } 2766 2767 for (i = 0; i < ve->count; i++) { 2768 r600_vertex_data_type(ve->elements[i].src_format, 2769 &format, &num_format, &format_comp, &endian); 2770 2771 desc = util_format_description(ve->elements[i].src_format); 2772 if (desc == NULL) { 2773 r600_bytecode_clear(&bc); 2774 R600_ERR("unknown format %d\n", ve->elements[i].src_format); 2775 return -EINVAL; 2776 } 2777 2778 if (elements[i].src_offset > 65535) { 2779 r600_bytecode_clear(&bc); 2780 R600_ERR("too big src_offset: %u\n", elements[i].src_offset); 2781 return -EINVAL; 2782 } 2783 2784 memset(&vtx, 0, sizeof(vtx)); 2785 vtx.buffer_id = elements[i].vertex_buffer_index + fetch_resource_start; 2786 vtx.fetch_type = elements[i].instance_divisor ? 1 : 0; 2787 vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0; 2788 vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0; 2789 vtx.mega_fetch_count = 0x1F; 2790 vtx.dst_gpr = i + 1; 2791 vtx.dst_sel_x = desc->swizzle[0]; 2792 vtx.dst_sel_y = desc->swizzle[1]; 2793 vtx.dst_sel_z = desc->swizzle[2]; 2794 vtx.dst_sel_w = desc->swizzle[3]; 2795 vtx.data_format = format; 2796 vtx.num_format_all = num_format; 2797 vtx.format_comp_all = format_comp; 2798 vtx.srf_mode_all = 1; 2799 vtx.offset = elements[i].src_offset; 2800 vtx.endian = endian; 2801 2802 if ((r = r600_bytecode_add_vtx(&bc, &vtx))) { 2803 r600_bytecode_clear(&bc); 2804 return r; 2805 } 2806 } 2807 2808 r600_bytecode_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN)); 2809 2810 if ((r = r600_bytecode_build(&bc))) { 2811 r600_bytecode_clear(&bc); 2812 return r; 2813 } 2814 2815 if (dump_shaders == -1) 2816 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); 2817 2818 if (dump_shaders) { 2819 fprintf(stderr, "--------------------------------------------------------------\n"); 2820 r600_bytecode_dump(&bc); 2821 fprintf(stderr, "______________________________________________________________\n"); 2822 } 2823 2824 ve->fs_size = bc.ndw*4; 2825 2826 ve->fetch_shader = (struct r600_resource*) 2827 pipe_buffer_create(rctx->context.screen, 2828 PIPE_BIND_CUSTOM, 2829 PIPE_USAGE_IMMUTABLE, ve->fs_size); 2830 if (ve->fetch_shader == NULL) { 2831 r600_bytecode_clear(&bc); 2832 return -ENOMEM; 2833 } 2834 2835 bytecode = rctx->ws->buffer_map(ve->fetch_shader->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE); 2836 if (bytecode == NULL) { 2837 r600_bytecode_clear(&bc); 2838 pipe_resource_reference((struct pipe_resource**)&ve->fetch_shader, NULL); 2839 return -ENOMEM; 2840 } 2841 2842 if (R600_BIG_ENDIAN) { 2843 for (i = 0; i < ve->fs_size / 4; ++i) { 2844 bytecode[i] = bswap_32(bc.bytecode[i]); 2845 } 2846 } else { 2847 memcpy(bytecode, bc.bytecode, ve->fs_size); 2848 } 2849 2850 rctx->ws->buffer_unmap(ve->fetch_shader->cs_buf); 2851 r600_bytecode_clear(&bc); 2852 2853 if (rctx->chip_class >= EVERGREEN) 2854 evergreen_fetch_shader(&rctx->context, ve); 2855 else 2856 r600_fetch_shader(&rctx->context, ve); 2857 2858 return 0; 2859} 2860