radeonsi_shader.c revision 82cd9c0fc2838a153006a646b0d356ed54b8680e
1 2#include "gallivm/lp_bld_tgsi_action.h" 3#include "gallivm/lp_bld_const.h" 4#include "gallivm/lp_bld_gather.h" 5#include "gallivm/lp_bld_intr.h" 6#include "gallivm/lp_bld_tgsi.h" 7#include "radeon_llvm.h" 8#include "radeon_llvm_emit.h" 9#include "tgsi/tgsi_info.h" 10#include "tgsi/tgsi_parse.h" 11#include "tgsi/tgsi_scan.h" 12#include "tgsi/tgsi_dump.h" 13 14#include "radeonsi_pipe.h" 15#include "radeonsi_shader.h" 16#include "si_state.h" 17#include "sid.h" 18 19#include <assert.h> 20#include <errno.h> 21#include <stdio.h> 22 23/* 24static ps_remap_inputs( 25 struct tgsi_llvm_context * tl_ctx, 26 unsigned tgsi_index, 27 unsigned tgsi_chan) 28{ 29 : 30} 31 32struct si_input 33{ 34 struct list_head head; 35 unsigned tgsi_index; 36 unsigned tgsi_chan; 37 unsigned order; 38}; 39*/ 40 41 42struct si_shader_context 43{ 44 struct radeon_llvm_context radeon_bld; 45 struct r600_context *rctx; 46 struct tgsi_parse_context parse; 47 struct tgsi_token * tokens; 48 struct si_pipe_shader *shader; 49 unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */ 50/* unsigned num_inputs; */ 51/* struct list_head inputs; */ 52/* unsigned * input_mappings *//* From TGSI to SI hw */ 53/* struct tgsi_shader_info info;*/ 54}; 55 56static struct si_shader_context * si_shader_context( 57 struct lp_build_tgsi_context * bld_base) 58{ 59 return (struct si_shader_context *)bld_base; 60} 61 62 63#define PERSPECTIVE_BASE 0 64#define LINEAR_BASE 9 65 66#define SAMPLE_OFFSET 0 67#define CENTER_OFFSET 2 68#define CENTROID_OFSET 4 69 70#define USE_SGPR_MAX_SUFFIX_LEN 5 71#define CONST_ADDR_SPACE 2 72#define USER_SGPR_ADDR_SPACE 8 73 74enum sgpr_type { 75 SGPR_CONST_PTR_F32, 76 SGPR_CONST_PTR_V4I32, 77 SGPR_CONST_PTR_V8I32, 78 SGPR_I32, 79 SGPR_I64 80}; 81 82/** 83 * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad 84 * 85 * @param offset The offset parameter specifies the number of 86 * elements to offset, not the number of bytes or dwords. An element is the 87 * the type pointed to by the base_ptr parameter (e.g. int is the element of 88 * an int* pointer) 89 * 90 * When LLVM lowers the load instruction, it will convert the element offset 91 * into a dword offset automatically. 92 * 93 */ 94static LLVMValueRef build_indexed_load( 95 struct gallivm_state * gallivm, 96 LLVMValueRef base_ptr, 97 LLVMValueRef offset) 98{ 99 LLVMValueRef computed_ptr = LLVMBuildGEP( 100 gallivm->builder, base_ptr, &offset, 1, ""); 101 102 return LLVMBuildLoad(gallivm->builder, computed_ptr, ""); 103} 104 105/** 106 * Load a value stored in one of the user SGPRs 107 * 108 * @param sgpr This is the sgpr to load the value from. If you need to load a 109 * value that is stored in consecutive SGPR registers (e.g. a 64-bit pointer), 110 * then you should pass the index of the first SGPR that holds the value. For 111 * example, if you want to load a pointer that is stored in SGPRs 2 and 3, then 112 * use pass 2 for the sgpr parameter. 113 * 114 * The value of the sgpr parameter must also be aligned to the width of the type 115 * being loaded, so that the sgpr parameter is divisible by the dword width of the 116 * type. For example, if the value being loaded is two dwords wide, then the sgpr 117 * parameter must be divisible by two. 118 */ 119static LLVMValueRef use_sgpr( 120 struct gallivm_state * gallivm, 121 enum sgpr_type type, 122 unsigned sgpr) 123{ 124 LLVMValueRef sgpr_index; 125 LLVMTypeRef ret_type; 126 LLVMValueRef ptr; 127 128 sgpr_index = lp_build_const_int32(gallivm, sgpr); 129 130 switch (type) { 131 case SGPR_CONST_PTR_F32: 132 assert(sgpr % 2 == 0); 133 ret_type = LLVMFloatTypeInContext(gallivm->context); 134 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); 135 break; 136 137 case SGPR_I32: 138 ret_type = LLVMInt32TypeInContext(gallivm->context); 139 break; 140 141 case SGPR_I64: 142 assert(sgpr % 2 == 0); 143 ret_type= LLVMInt64TypeInContext(gallivm->context); 144 break; 145 146 case SGPR_CONST_PTR_V4I32: 147 assert(sgpr % 2 == 0); 148 ret_type = LLVMInt32TypeInContext(gallivm->context); 149 ret_type = LLVMVectorType(ret_type, 4); 150 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); 151 break; 152 153 case SGPR_CONST_PTR_V8I32: 154 assert(sgpr % 2 == 0); 155 ret_type = LLVMInt32TypeInContext(gallivm->context); 156 ret_type = LLVMVectorType(ret_type, 8); 157 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); 158 break; 159 160 default: 161 assert(!"Unsupported SGPR type in use_sgpr()"); 162 return NULL; 163 } 164 165 ret_type = LLVMPointerType(ret_type, USER_SGPR_ADDR_SPACE); 166 ptr = LLVMBuildIntToPtr(gallivm->builder, sgpr_index, ret_type, ""); 167 return LLVMBuildLoad(gallivm->builder, ptr, ""); 168} 169 170static void declare_input_vs( 171 struct si_shader_context * si_shader_ctx, 172 unsigned input_index, 173 const struct tgsi_full_declaration *decl) 174{ 175 LLVMValueRef t_list_ptr; 176 LLVMValueRef t_offset; 177 LLVMValueRef t_list; 178 LLVMValueRef attribute_offset; 179 LLVMValueRef buffer_index_reg; 180 LLVMValueRef args[3]; 181 LLVMTypeRef vec4_type; 182 LLVMValueRef input; 183 struct lp_build_context * uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; 184 struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base; 185 struct r600_context *rctx = si_shader_ctx->rctx; 186 //struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index]; 187 unsigned chan; 188 189 /* Load the T list */ 190 /* XXX: Communicate with the rest of the driver about which SGPR the T# 191 * list pointer is going to be stored in. Hard code to SGPR[6:7] for 192 * now */ 193 t_list_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_V4I32, 6); 194 195 t_offset = lp_build_const_int32(base->gallivm, input_index); 196 197 t_list = build_indexed_load(base->gallivm, t_list_ptr, t_offset); 198 199 /* Build the attribute offset */ 200 attribute_offset = lp_build_const_int32(base->gallivm, 0); 201 202 /* Load the buffer index is always, which is always stored in VGPR0 203 * for Vertex Shaders */ 204 buffer_index_reg = lp_build_intrinsic(base->gallivm->builder, 205 "llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0); 206 207 vec4_type = LLVMVectorType(base->elem_type, 4); 208 args[0] = t_list; 209 args[1] = attribute_offset; 210 args[2] = buffer_index_reg; 211 input = lp_build_intrinsic(base->gallivm->builder, 212 "llvm.SI.vs.load.input", vec4_type, args, 3); 213 214 /* Break up the vec4 into individual components */ 215 for (chan = 0; chan < 4; chan++) { 216 LLVMValueRef llvm_chan = lp_build_const_int32(base->gallivm, chan); 217 /* XXX: Use a helper function for this. There is one in 218 * tgsi_llvm.c. */ 219 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] = 220 LLVMBuildExtractElement(base->gallivm->builder, 221 input, llvm_chan, ""); 222 } 223} 224 225static void declare_input_fs( 226 struct si_shader_context * si_shader_ctx, 227 unsigned input_index, 228 const struct tgsi_full_declaration *decl) 229{ 230 const char * intr_name; 231 unsigned chan; 232 struct lp_build_context * base = 233 &si_shader_ctx->radeon_bld.soa.bld_base.base; 234 struct gallivm_state * gallivm = base->gallivm; 235 236 /* This value is: 237 * [15:0] NewPrimMask (Bit mask for each quad. It is set it the 238 * quad begins a new primitive. Bit 0 always needs 239 * to be unset) 240 * [32:16] ParamOffset 241 * 242 */ 243 /* XXX: This register number must be identical to the S_00B02C_USER_SGPR 244 * register field value 245 */ 246 LLVMValueRef params = use_sgpr(base->gallivm, SGPR_I32, 6); 247 248 249 /* XXX: Is this the input_index? */ 250 LLVMValueRef attr_number = lp_build_const_int32(gallivm, input_index); 251 252 /* XXX: Handle all possible interpolation modes */ 253 switch (decl->Interp.Interpolate) { 254 case TGSI_INTERPOLATE_COLOR: 255 /* XXX: Flat shading hangs the GPU */ 256 if (si_shader_ctx->rctx->queued.named.rasterizer->flatshade) { 257#if 0 258 intr_name = "llvm.SI.fs.interp.constant"; 259#else 260 intr_name = "llvm.SI.fs.interp.linear.center"; 261#endif 262 } else { 263 if (decl->Interp.Centroid) 264 intr_name = "llvm.SI.fs.interp.persp.centroid"; 265 else 266 intr_name = "llvm.SI.fs.interp.persp.center"; 267 } 268 break; 269 case TGSI_INTERPOLATE_CONSTANT: 270 /* XXX: Flat shading hangs the GPU */ 271#if 0 272 intr_name = "llvm.SI.fs.interp.constant"; 273 break; 274#endif 275 case TGSI_INTERPOLATE_LINEAR: 276 if (decl->Interp.Centroid) 277 intr_name = "llvm.SI.fs.interp.linear.centroid"; 278 else 279 intr_name = "llvm.SI.fs.interp.linear.center"; 280 break; 281 case TGSI_INTERPOLATE_PERSPECTIVE: 282 if (decl->Interp.Centroid) 283 intr_name = "llvm.SI.fs.interp.persp.centroid"; 284 else 285 intr_name = "llvm.SI.fs.interp.persp.center"; 286 break; 287 default: 288 fprintf(stderr, "Warning: Unhandled interpolation mode.\n"); 289 return; 290 } 291 292 /* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */ 293 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 294 LLVMValueRef args[3]; 295 LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan); 296 unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan); 297 LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context); 298 args[0] = llvm_chan; 299 args[1] = attr_number; 300 args[2] = params; 301 si_shader_ctx->radeon_bld.inputs[soa_index] = 302 lp_build_intrinsic(gallivm->builder, intr_name, 303 input_type, args, 3); 304 } 305} 306 307static void declare_input( 308 struct radeon_llvm_context * radeon_bld, 309 unsigned input_index, 310 const struct tgsi_full_declaration *decl) 311{ 312 struct si_shader_context * si_shader_ctx = 313 si_shader_context(&radeon_bld->soa.bld_base); 314 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { 315 declare_input_vs(si_shader_ctx, input_index, decl); 316 } else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) { 317 declare_input_fs(si_shader_ctx, input_index, decl); 318 } else { 319 fprintf(stderr, "Warning: Unsupported shader type,\n"); 320 } 321} 322 323static LLVMValueRef fetch_constant( 324 struct lp_build_tgsi_context * bld_base, 325 const struct tgsi_full_src_register *reg, 326 enum tgsi_opcode_type type, 327 unsigned swizzle) 328{ 329 struct lp_build_context * base = &bld_base->base; 330 331 LLVMValueRef const_ptr; 332 LLVMValueRef offset; 333 LLVMValueRef load; 334 335 /* XXX: Assume the pointer to the constant buffer is being stored in 336 * SGPR[0:1] */ 337 const_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_F32, 0); 338 339 /* XXX: This assumes that the constant buffer is not packed, so 340 * CONST[0].x will have an offset of 0 and CONST[1].x will have an 341 * offset of 4. */ 342 offset = lp_build_const_int32(base->gallivm, 343 (reg->Register.Index * 4) + swizzle); 344 345 load = build_indexed_load(base->gallivm, const_ptr, offset); 346 return bitcast(bld_base, type, load); 347} 348 349/* XXX: This is partially implemented for VS only at this point. It is not complete */ 350static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) 351{ 352 struct si_shader_context * si_shader_ctx = si_shader_context(bld_base); 353 struct si_shader * shader = &si_shader_ctx->shader->shader; 354 struct lp_build_context * base = &bld_base->base; 355 struct lp_build_context * uint = 356 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; 357 struct tgsi_parse_context *parse = &si_shader_ctx->parse; 358 LLVMValueRef last_args[9] = { 0 }; 359 unsigned color_count = 0; 360 unsigned param_count = 0; 361 362 while (!tgsi_parse_end_of_tokens(parse)) { 363 /* XXX: component_bits controls which components of the output 364 * registers actually get exported. (e.g bit 0 means export 365 * X component, bit 1 means export Y component, etc.) I'm 366 * hard coding this to 0xf for now. In the future, we might 367 * want to do something else. */ 368 unsigned component_bits = 0xf; 369 unsigned chan; 370 struct tgsi_full_declaration *d = 371 &parse->FullToken.FullDeclaration; 372 LLVMValueRef args[9]; 373 unsigned target; 374 unsigned index; 375 int i; 376 377 tgsi_parse_token(parse); 378 if (parse->FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION) 379 continue; 380 381 switch (d->Declaration.File) { 382 case TGSI_FILE_INPUT: 383 i = shader->ninput++; 384 shader->input[i].name = d->Semantic.Name; 385 shader->input[i].sid = d->Semantic.Index; 386 shader->input[i].interpolate = d->Interp.Interpolate; 387 shader->input[i].centroid = d->Interp.Centroid; 388 continue; 389 390 case TGSI_FILE_OUTPUT: 391 i = shader->noutput++; 392 shader->output[i].name = d->Semantic.Name; 393 shader->output[i].sid = d->Semantic.Index; 394 shader->output[i].interpolate = d->Interp.Interpolate; 395 break; 396 397 default: 398 continue; 399 } 400 401 for (index = d->Range.First; index <= d->Range.Last; index++) { 402 for (chan = 0; chan < 4; chan++ ) { 403 LLVMValueRef out_ptr = 404 si_shader_ctx->radeon_bld.soa.outputs 405 [index][chan]; 406 /* +5 because the first output value will be 407 * the 6th argument to the intrinsic. */ 408 args[chan + 5]= LLVMBuildLoad( 409 base->gallivm->builder, out_ptr, ""); 410 } 411 412 /* XXX: We probably need to keep track of the output 413 * values, so we know what we are passing to the next 414 * stage. */ 415 416 /* Select the correct target */ 417 switch(d->Semantic.Name) { 418 case TGSI_SEMANTIC_POSITION: 419 target = V_008DFC_SQ_EXP_POS; 420 break; 421 case TGSI_SEMANTIC_COLOR: 422 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { 423 target = V_008DFC_SQ_EXP_PARAM + param_count; 424 shader->output[i].param_offset = param_count; 425 param_count++; 426 } else { 427 target = V_008DFC_SQ_EXP_MRT + color_count; 428 color_count++; 429 } 430 break; 431 case TGSI_SEMANTIC_GENERIC: 432 target = V_008DFC_SQ_EXP_PARAM + param_count; 433 shader->output[i].param_offset = param_count; 434 param_count++; 435 break; 436 default: 437 target = 0; 438 fprintf(stderr, 439 "Warning: SI unhandled output type:%d\n", 440 d->Semantic.Name); 441 } 442 443 /* Specify which components to enable */ 444 args[0] = lp_build_const_int32(base->gallivm, 445 component_bits); 446 447 /* Specify whether the EXEC mask represents the valid mask */ 448 args[1] = lp_build_const_int32(base->gallivm, 0); 449 450 /* Specify whether this is the last export */ 451 args[2] = lp_build_const_int32(base->gallivm, 0); 452 453 /* Specify the target we are exporting */ 454 args[3] = lp_build_const_int32(base->gallivm, target); 455 456 /* Set COMPR flag to zero to export data as 32-bit */ 457 args[4] = uint->zero; 458 459 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX ? 460 (d->Semantic.Name == TGSI_SEMANTIC_POSITION) : 461 (d->Semantic.Name == TGSI_SEMANTIC_COLOR)) { 462 if (last_args[0]) { 463 lp_build_intrinsic(base->gallivm->builder, 464 "llvm.SI.export", 465 LLVMVoidTypeInContext(base->gallivm->context), 466 last_args, 9); 467 } 468 469 memcpy(last_args, args, sizeof(args)); 470 } else { 471 lp_build_intrinsic(base->gallivm->builder, 472 "llvm.SI.export", 473 LLVMVoidTypeInContext(base->gallivm->context), 474 args, 9); 475 } 476 477 } 478 } 479 480 if (!last_args[0]) { 481 assert(si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT); 482 483 /* Specify which components to enable */ 484 last_args[0] = lp_build_const_int32(base->gallivm, 0x0); 485 486 /* Specify the target we are exporting */ 487 last_args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT); 488 489 /* Set COMPR flag to zero to export data as 32-bit */ 490 last_args[4] = uint->zero; 491 492 /* dummy bits */ 493 last_args[5]= uint->zero; 494 last_args[6]= uint->zero; 495 last_args[7]= uint->zero; 496 last_args[8]= uint->zero; 497 } 498 499 /* Specify whether the EXEC mask represents the valid mask */ 500 last_args[1] = lp_build_const_int32(base->gallivm, 501 si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT); 502 503 /* Specify that this is the last export */ 504 last_args[2] = lp_build_const_int32(base->gallivm, 1); 505 506 lp_build_intrinsic(base->gallivm->builder, 507 "llvm.SI.export", 508 LLVMVoidTypeInContext(base->gallivm->context), 509 last_args, 9); 510 511/* XXX: Look up what this function does */ 512/* ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);*/ 513} 514 515static void tex_fetch_args( 516 struct lp_build_tgsi_context * bld_base, 517 struct lp_build_emit_data * emit_data) 518{ 519 const struct tgsi_full_instruction * inst = emit_data->inst; 520 LLVMValueRef ptr; 521 LLVMValueRef offset; 522 523 /* WriteMask */ 524 /* XXX: should be optimized using emit_data->inst->Dst[0].Register.WriteMask*/ 525 emit_data->args[0] = lp_build_const_int32(bld_base->base.gallivm, 0xf); 526 527 /* Coordinates */ 528 /* XXX: Not all sample instructions need 4 address arguments. */ 529 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 530 LLVMValueRef src_w; 531 unsigned chan; 532 LLVMValueRef coords[4]; 533 534 emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); 535 src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); 536 537 for (chan = 0; chan < 3; chan++ ) { 538 LLVMValueRef arg = lp_build_emit_fetch(bld_base, 539 emit_data->inst, 0, chan); 540 coords[chan] = lp_build_emit_llvm_binary(bld_base, 541 TGSI_OPCODE_DIV, 542 arg, src_w); 543 } 544 coords[3] = bld_base->base.one; 545 emit_data->args[1] = lp_build_gather_values(bld_base->base.gallivm, 546 coords, 4); 547 } else 548 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, 549 0, LP_CHAN_ALL); 550 551 /* Resource */ 552 ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, 4); 553 offset = lp_build_const_int32(bld_base->base.gallivm, 554 emit_data->inst->Src[1].Register.Index); 555 emit_data->args[2] = build_indexed_load(bld_base->base.gallivm, 556 ptr, offset); 557 558 /* Sampler */ 559 ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V4I32, 2); 560 offset = lp_build_const_int32(bld_base->base.gallivm, 561 emit_data->inst->Src[1].Register.Index); 562 emit_data->args[3] = build_indexed_load(bld_base->base.gallivm, 563 ptr, offset); 564 565 /* Dimensions */ 566 /* XXX: We might want to pass this information to the shader at some. */ 567/* emit_data->args[4] = lp_build_const_int32(bld_base->base.gallivm, 568 emit_data->inst->Texture.Texture); 569*/ 570 571 emit_data->arg_count = 4; 572 /* XXX: To optimize, we could use a float or v2f32, if the last bits of 573 * the writemask are clear */ 574 emit_data->dst_type = LLVMVectorType( 575 LLVMFloatTypeInContext(bld_base->base.gallivm->context), 576 4); 577} 578 579static const struct lp_build_tgsi_action tex_action = { 580 .fetch_args = tex_fetch_args, 581 .emit = lp_build_tgsi_intrinsic, 582 .intr_name = "llvm.SI.sample" 583}; 584 585 586int si_pipe_shader_create( 587 struct pipe_context *ctx, 588 struct si_pipe_shader *shader) 589{ 590 struct r600_context *rctx = (struct r600_context*)ctx; 591 struct si_shader_context si_shader_ctx; 592 struct tgsi_shader_info shader_info; 593 struct lp_build_tgsi_context * bld_base; 594 LLVMModuleRef mod; 595 unsigned char * inst_bytes; 596 unsigned inst_byte_count; 597 unsigned i; 598 uint32_t *ptr; 599 bool dump; 600 601 dump = debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE); 602 603 memset(&si_shader_ctx.radeon_bld, 0, sizeof(si_shader_ctx.radeon_bld)); 604 radeon_llvm_context_init(&si_shader_ctx.radeon_bld); 605 bld_base = &si_shader_ctx.radeon_bld.soa.bld_base; 606 607 tgsi_scan_shader(shader->tokens, &shader_info); 608 bld_base->info = &shader_info; 609 bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant; 610 bld_base->emit_epilogue = si_llvm_emit_epilogue; 611 612 bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action; 613 bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action; 614 615 si_shader_ctx.radeon_bld.load_input = declare_input; 616 si_shader_ctx.tokens = shader->tokens; 617 tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens); 618 si_shader_ctx.shader = shader; 619 si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor; 620 si_shader_ctx.rctx = rctx; 621 622 shader->shader.nr_cbufs = rctx->framebuffer.nr_cbufs; 623 624 /* Dump TGSI code before doing TGSI->LLVM conversion in case the 625 * conversion fails. */ 626 if (dump) { 627 tgsi_dump(shader->tokens, 0); 628 } 629 630 if (!lp_build_tgsi_llvm(bld_base, shader->tokens)) { 631 fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n"); 632 return -EINVAL; 633 } 634 635 radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld); 636 637 mod = bld_base->base.gallivm->module; 638 if (dump) { 639 LLVMDumpModule(mod); 640 } 641 radeon_llvm_compile(mod, &inst_bytes, &inst_byte_count, "SI", dump); 642 if (dump) { 643 fprintf(stderr, "SI CODE:\n"); 644 for (i = 0; i < inst_byte_count; i+=4 ) { 645 fprintf(stderr, "%02x%02x%02x%02x\n", inst_bytes[i + 3], 646 inst_bytes[i + 2], inst_bytes[i + 1], 647 inst_bytes[i]); 648 } 649 } 650 651 shader->num_sgprs = util_le32_to_cpu(*(uint32_t*)inst_bytes); 652 shader->num_vgprs = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 4)); 653 shader->spi_ps_input_ena = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 8)); 654 655 tgsi_parse_free(&si_shader_ctx.parse); 656 657 /* copy new shader */ 658 si_resource_reference(&shader->bo, NULL); 659 shader->bo = si_resource_create_custom(ctx->screen, PIPE_USAGE_IMMUTABLE, 660 inst_byte_count - 12); 661 if (shader->bo == NULL) { 662 return -ENOMEM; 663 } 664 665 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE); 666 if (0 /*R600_BIG_ENDIAN*/) { 667 for (i = 0; i < (inst_byte_count-12)/4; ++i) { 668 ptr[i] = util_bswap32(*(uint32_t*)(inst_bytes+12 + i*4)); 669 } 670 } else { 671 memcpy(ptr, inst_bytes + 12, inst_byte_count - 12); 672 } 673 rctx->ws->buffer_unmap(shader->bo->cs_buf); 674 675 free(inst_bytes); 676 677 return 0; 678} 679 680void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader) 681{ 682 si_resource_reference(&shader->bo, NULL); 683 684 memset(&shader->shader,0,sizeof(struct si_shader)); 685} 686