radeonsi_shader.c revision ce40e4726cf30196b87df387255c64ddc2a97638
1 2/* 3 * Copyright 2012 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Tom Stellard <thomas.stellard@amd.com> 26 * Michel Dänzer <michel.daenzer@amd.com> 27 * Christian König <christian.koenig@amd.com> 28 */ 29 30#include "gallivm/lp_bld_tgsi_action.h" 31#include "gallivm/lp_bld_const.h" 32#include "gallivm/lp_bld_gather.h" 33#include "gallivm/lp_bld_intr.h" 34#include "gallivm/lp_bld_tgsi.h" 35#include "radeon_llvm.h" 36#include "radeon_llvm_emit.h" 37#include "tgsi/tgsi_info.h" 38#include "tgsi/tgsi_parse.h" 39#include "tgsi/tgsi_scan.h" 40#include "tgsi/tgsi_dump.h" 41 42#include "radeonsi_pipe.h" 43#include "radeonsi_shader.h" 44#include "si_state.h" 45#include "sid.h" 46 47#include <assert.h> 48#include <errno.h> 49#include <stdio.h> 50 51/* 52static ps_remap_inputs( 53 struct tgsi_llvm_context * tl_ctx, 54 unsigned tgsi_index, 55 unsigned tgsi_chan) 56{ 57 : 58} 59 60struct si_input 61{ 62 struct list_head head; 63 unsigned tgsi_index; 64 unsigned tgsi_chan; 65 unsigned order; 66}; 67*/ 68 69 70struct si_shader_context 71{ 72 struct radeon_llvm_context radeon_bld; 73 struct r600_context *rctx; 74 struct tgsi_parse_context parse; 75 struct tgsi_token * tokens; 76 struct si_pipe_shader *shader; 77 unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */ 78/* unsigned num_inputs; */ 79/* struct list_head inputs; */ 80/* unsigned * input_mappings *//* From TGSI to SI hw */ 81/* struct tgsi_shader_info info;*/ 82}; 83 84static struct si_shader_context * si_shader_context( 85 struct lp_build_tgsi_context * bld_base) 86{ 87 return (struct si_shader_context *)bld_base; 88} 89 90 91#define PERSPECTIVE_BASE 0 92#define LINEAR_BASE 9 93 94#define SAMPLE_OFFSET 0 95#define CENTER_OFFSET 2 96#define CENTROID_OFSET 4 97 98#define USE_SGPR_MAX_SUFFIX_LEN 5 99#define CONST_ADDR_SPACE 2 100#define USER_SGPR_ADDR_SPACE 8 101 102enum sgpr_type { 103 SGPR_CONST_PTR_F32, 104 SGPR_CONST_PTR_V4I32, 105 SGPR_CONST_PTR_V8I32, 106 SGPR_I32, 107 SGPR_I64 108}; 109 110/** 111 * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad 112 * 113 * @param offset The offset parameter specifies the number of 114 * elements to offset, not the number of bytes or dwords. An element is the 115 * the type pointed to by the base_ptr parameter (e.g. int is the element of 116 * an int* pointer) 117 * 118 * When LLVM lowers the load instruction, it will convert the element offset 119 * into a dword offset automatically. 120 * 121 */ 122static LLVMValueRef build_indexed_load( 123 struct gallivm_state * gallivm, 124 LLVMValueRef base_ptr, 125 LLVMValueRef offset) 126{ 127 LLVMValueRef computed_ptr = LLVMBuildGEP( 128 gallivm->builder, base_ptr, &offset, 1, ""); 129 130 return LLVMBuildLoad(gallivm->builder, computed_ptr, ""); 131} 132 133/** 134 * Load a value stored in one of the user SGPRs 135 * 136 * @param sgpr This is the sgpr to load the value from. If you need to load a 137 * value that is stored in consecutive SGPR registers (e.g. a 64-bit pointer), 138 * then you should pass the index of the first SGPR that holds the value. For 139 * example, if you want to load a pointer that is stored in SGPRs 2 and 3, then 140 * use pass 2 for the sgpr parameter. 141 * 142 * The value of the sgpr parameter must also be aligned to the width of the type 143 * being loaded, so that the sgpr parameter is divisible by the dword width of the 144 * type. For example, if the value being loaded is two dwords wide, then the sgpr 145 * parameter must be divisible by two. 146 */ 147static LLVMValueRef use_sgpr( 148 struct gallivm_state * gallivm, 149 enum sgpr_type type, 150 unsigned sgpr) 151{ 152 LLVMValueRef sgpr_index; 153 LLVMTypeRef ret_type; 154 LLVMValueRef ptr; 155 156 sgpr_index = lp_build_const_int32(gallivm, sgpr); 157 158 switch (type) { 159 case SGPR_CONST_PTR_F32: 160 assert(sgpr % 2 == 0); 161 ret_type = LLVMFloatTypeInContext(gallivm->context); 162 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); 163 break; 164 165 case SGPR_I32: 166 ret_type = LLVMInt32TypeInContext(gallivm->context); 167 break; 168 169 case SGPR_I64: 170 assert(sgpr % 2 == 0); 171 ret_type= LLVMInt64TypeInContext(gallivm->context); 172 break; 173 174 case SGPR_CONST_PTR_V4I32: 175 assert(sgpr % 2 == 0); 176 ret_type = LLVMInt32TypeInContext(gallivm->context); 177 ret_type = LLVMVectorType(ret_type, 4); 178 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); 179 break; 180 181 case SGPR_CONST_PTR_V8I32: 182 assert(sgpr % 2 == 0); 183 ret_type = LLVMInt32TypeInContext(gallivm->context); 184 ret_type = LLVMVectorType(ret_type, 8); 185 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); 186 break; 187 188 default: 189 assert(!"Unsupported SGPR type in use_sgpr()"); 190 return NULL; 191 } 192 193 ret_type = LLVMPointerType(ret_type, USER_SGPR_ADDR_SPACE); 194 ptr = LLVMBuildIntToPtr(gallivm->builder, sgpr_index, ret_type, ""); 195 return LLVMBuildLoad(gallivm->builder, ptr, ""); 196} 197 198static void declare_input_vs( 199 struct si_shader_context * si_shader_ctx, 200 unsigned input_index, 201 const struct tgsi_full_declaration *decl) 202{ 203 LLVMValueRef t_list_ptr; 204 LLVMValueRef t_offset; 205 LLVMValueRef t_list; 206 LLVMValueRef attribute_offset; 207 LLVMValueRef buffer_index_reg; 208 LLVMValueRef args[3]; 209 LLVMTypeRef vec4_type; 210 LLVMValueRef input; 211 struct lp_build_context * uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; 212 struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base; 213 struct r600_context *rctx = si_shader_ctx->rctx; 214 //struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index]; 215 unsigned chan; 216 217 /* Load the T list */ 218 /* XXX: Communicate with the rest of the driver about which SGPR the T# 219 * list pointer is going to be stored in. Hard code to SGPR[6:7] for 220 * now */ 221 t_list_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_V4I32, 6); 222 223 t_offset = lp_build_const_int32(base->gallivm, input_index); 224 225 t_list = build_indexed_load(base->gallivm, t_list_ptr, t_offset); 226 227 /* Build the attribute offset */ 228 attribute_offset = lp_build_const_int32(base->gallivm, 0); 229 230 /* Load the buffer index is always, which is always stored in VGPR0 231 * for Vertex Shaders */ 232 buffer_index_reg = lp_build_intrinsic(base->gallivm->builder, 233 "llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0); 234 235 vec4_type = LLVMVectorType(base->elem_type, 4); 236 args[0] = t_list; 237 args[1] = attribute_offset; 238 args[2] = buffer_index_reg; 239 input = lp_build_intrinsic(base->gallivm->builder, 240 "llvm.SI.vs.load.input", vec4_type, args, 3); 241 242 /* Break up the vec4 into individual components */ 243 for (chan = 0; chan < 4; chan++) { 244 LLVMValueRef llvm_chan = lp_build_const_int32(base->gallivm, chan); 245 /* XXX: Use a helper function for this. There is one in 246 * tgsi_llvm.c. */ 247 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] = 248 LLVMBuildExtractElement(base->gallivm->builder, 249 input, llvm_chan, ""); 250 } 251} 252 253static void declare_input_fs( 254 struct si_shader_context * si_shader_ctx, 255 unsigned input_index, 256 const struct tgsi_full_declaration *decl) 257{ 258 const char * intr_name; 259 unsigned chan; 260 struct lp_build_context * base = 261 &si_shader_ctx->radeon_bld.soa.bld_base.base; 262 struct gallivm_state * gallivm = base->gallivm; 263 264 /* This value is: 265 * [15:0] NewPrimMask (Bit mask for each quad. It is set it the 266 * quad begins a new primitive. Bit 0 always needs 267 * to be unset) 268 * [32:16] ParamOffset 269 * 270 */ 271 /* XXX: This register number must be identical to the S_00B02C_USER_SGPR 272 * register field value 273 */ 274 LLVMValueRef params = use_sgpr(base->gallivm, SGPR_I32, 6); 275 276 277 /* XXX: Is this the input_index? */ 278 LLVMValueRef attr_number = lp_build_const_int32(gallivm, input_index); 279 280 /* XXX: Handle all possible interpolation modes */ 281 switch (decl->Interp.Interpolate) { 282 case TGSI_INTERPOLATE_COLOR: 283 /* XXX: Flat shading hangs the GPU */ 284 if (si_shader_ctx->rctx->queued.named.rasterizer->flatshade) { 285#if 0 286 intr_name = "llvm.SI.fs.interp.constant"; 287#else 288 intr_name = "llvm.SI.fs.interp.linear.center"; 289#endif 290 } else { 291 if (decl->Interp.Centroid) 292 intr_name = "llvm.SI.fs.interp.persp.centroid"; 293 else 294 intr_name = "llvm.SI.fs.interp.persp.center"; 295 } 296 break; 297 case TGSI_INTERPOLATE_CONSTANT: 298 /* XXX: Flat shading hangs the GPU */ 299#if 0 300 intr_name = "llvm.SI.fs.interp.constant"; 301 break; 302#endif 303 case TGSI_INTERPOLATE_LINEAR: 304 if (decl->Interp.Centroid) 305 intr_name = "llvm.SI.fs.interp.linear.centroid"; 306 else 307 intr_name = "llvm.SI.fs.interp.linear.center"; 308 break; 309 case TGSI_INTERPOLATE_PERSPECTIVE: 310 if (decl->Interp.Centroid) 311 intr_name = "llvm.SI.fs.interp.persp.centroid"; 312 else 313 intr_name = "llvm.SI.fs.interp.persp.center"; 314 break; 315 default: 316 fprintf(stderr, "Warning: Unhandled interpolation mode.\n"); 317 return; 318 } 319 320 /* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */ 321 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 322 LLVMValueRef args[3]; 323 LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan); 324 unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan); 325 LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context); 326 args[0] = llvm_chan; 327 args[1] = attr_number; 328 args[2] = params; 329 si_shader_ctx->radeon_bld.inputs[soa_index] = 330 lp_build_intrinsic(gallivm->builder, intr_name, 331 input_type, args, 3); 332 } 333} 334 335static void declare_input( 336 struct radeon_llvm_context * radeon_bld, 337 unsigned input_index, 338 const struct tgsi_full_declaration *decl) 339{ 340 struct si_shader_context * si_shader_ctx = 341 si_shader_context(&radeon_bld->soa.bld_base); 342 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { 343 declare_input_vs(si_shader_ctx, input_index, decl); 344 } else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) { 345 declare_input_fs(si_shader_ctx, input_index, decl); 346 } else { 347 fprintf(stderr, "Warning: Unsupported shader type,\n"); 348 } 349} 350 351static LLVMValueRef fetch_constant( 352 struct lp_build_tgsi_context * bld_base, 353 const struct tgsi_full_src_register *reg, 354 enum tgsi_opcode_type type, 355 unsigned swizzle) 356{ 357 struct lp_build_context * base = &bld_base->base; 358 359 LLVMValueRef const_ptr; 360 LLVMValueRef offset; 361 LLVMValueRef load; 362 363 /* XXX: Assume the pointer to the constant buffer is being stored in 364 * SGPR[0:1] */ 365 const_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_F32, 0); 366 367 /* XXX: This assumes that the constant buffer is not packed, so 368 * CONST[0].x will have an offset of 0 and CONST[1].x will have an 369 * offset of 4. */ 370 offset = lp_build_const_int32(base->gallivm, 371 (reg->Register.Index * 4) + swizzle); 372 373 load = build_indexed_load(base->gallivm, const_ptr, offset); 374 return bitcast(bld_base, type, load); 375} 376 377/* XXX: This is partially implemented for VS only at this point. It is not complete */ 378static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) 379{ 380 struct si_shader_context * si_shader_ctx = si_shader_context(bld_base); 381 struct si_shader * shader = &si_shader_ctx->shader->shader; 382 struct lp_build_context * base = &bld_base->base; 383 struct lp_build_context * uint = 384 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; 385 struct tgsi_parse_context *parse = &si_shader_ctx->parse; 386 LLVMValueRef last_args[9] = { 0 }; 387 unsigned color_count = 0; 388 unsigned param_count = 0; 389 390 while (!tgsi_parse_end_of_tokens(parse)) { 391 /* XXX: component_bits controls which components of the output 392 * registers actually get exported. (e.g bit 0 means export 393 * X component, bit 1 means export Y component, etc.) I'm 394 * hard coding this to 0xf for now. In the future, we might 395 * want to do something else. */ 396 unsigned component_bits = 0xf; 397 unsigned chan; 398 struct tgsi_full_declaration *d = 399 &parse->FullToken.FullDeclaration; 400 LLVMValueRef args[9]; 401 unsigned target; 402 unsigned index; 403 int i; 404 405 tgsi_parse_token(parse); 406 if (parse->FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION) 407 continue; 408 409 switch (d->Declaration.File) { 410 case TGSI_FILE_INPUT: 411 i = shader->ninput++; 412 shader->input[i].name = d->Semantic.Name; 413 shader->input[i].sid = d->Semantic.Index; 414 shader->input[i].interpolate = d->Interp.Interpolate; 415 shader->input[i].centroid = d->Interp.Centroid; 416 continue; 417 418 case TGSI_FILE_OUTPUT: 419 i = shader->noutput++; 420 shader->output[i].name = d->Semantic.Name; 421 shader->output[i].sid = d->Semantic.Index; 422 shader->output[i].interpolate = d->Interp.Interpolate; 423 break; 424 425 default: 426 continue; 427 } 428 429 for (index = d->Range.First; index <= d->Range.Last; index++) { 430 for (chan = 0; chan < 4; chan++ ) { 431 LLVMValueRef out_ptr = 432 si_shader_ctx->radeon_bld.soa.outputs 433 [index][chan]; 434 /* +5 because the first output value will be 435 * the 6th argument to the intrinsic. */ 436 args[chan + 5]= LLVMBuildLoad( 437 base->gallivm->builder, out_ptr, ""); 438 } 439 440 /* XXX: We probably need to keep track of the output 441 * values, so we know what we are passing to the next 442 * stage. */ 443 444 /* Select the correct target */ 445 switch(d->Semantic.Name) { 446 case TGSI_SEMANTIC_POSITION: 447 target = V_008DFC_SQ_EXP_POS; 448 break; 449 case TGSI_SEMANTIC_COLOR: 450 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { 451 target = V_008DFC_SQ_EXP_PARAM + param_count; 452 shader->output[i].param_offset = param_count; 453 param_count++; 454 } else { 455 target = V_008DFC_SQ_EXP_MRT + color_count; 456 color_count++; 457 } 458 break; 459 case TGSI_SEMANTIC_GENERIC: 460 target = V_008DFC_SQ_EXP_PARAM + param_count; 461 shader->output[i].param_offset = param_count; 462 param_count++; 463 break; 464 default: 465 target = 0; 466 fprintf(stderr, 467 "Warning: SI unhandled output type:%d\n", 468 d->Semantic.Name); 469 } 470 471 /* Specify which components to enable */ 472 args[0] = lp_build_const_int32(base->gallivm, 473 component_bits); 474 475 /* Specify whether the EXEC mask represents the valid mask */ 476 args[1] = lp_build_const_int32(base->gallivm, 0); 477 478 /* Specify whether this is the last export */ 479 args[2] = lp_build_const_int32(base->gallivm, 0); 480 481 /* Specify the target we are exporting */ 482 args[3] = lp_build_const_int32(base->gallivm, target); 483 484 /* Set COMPR flag to zero to export data as 32-bit */ 485 args[4] = uint->zero; 486 487 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX ? 488 (d->Semantic.Name == TGSI_SEMANTIC_POSITION) : 489 (d->Semantic.Name == TGSI_SEMANTIC_COLOR)) { 490 if (last_args[0]) { 491 lp_build_intrinsic(base->gallivm->builder, 492 "llvm.SI.export", 493 LLVMVoidTypeInContext(base->gallivm->context), 494 last_args, 9); 495 } 496 497 memcpy(last_args, args, sizeof(args)); 498 } else { 499 lp_build_intrinsic(base->gallivm->builder, 500 "llvm.SI.export", 501 LLVMVoidTypeInContext(base->gallivm->context), 502 args, 9); 503 } 504 505 } 506 } 507 508 if (!last_args[0]) { 509 assert(si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT); 510 511 /* Specify which components to enable */ 512 last_args[0] = lp_build_const_int32(base->gallivm, 0x0); 513 514 /* Specify the target we are exporting */ 515 last_args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT); 516 517 /* Set COMPR flag to zero to export data as 32-bit */ 518 last_args[4] = uint->zero; 519 520 /* dummy bits */ 521 last_args[5]= uint->zero; 522 last_args[6]= uint->zero; 523 last_args[7]= uint->zero; 524 last_args[8]= uint->zero; 525 } 526 527 /* Specify whether the EXEC mask represents the valid mask */ 528 last_args[1] = lp_build_const_int32(base->gallivm, 529 si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT); 530 531 /* Specify that this is the last export */ 532 last_args[2] = lp_build_const_int32(base->gallivm, 1); 533 534 lp_build_intrinsic(base->gallivm->builder, 535 "llvm.SI.export", 536 LLVMVoidTypeInContext(base->gallivm->context), 537 last_args, 9); 538 539/* XXX: Look up what this function does */ 540/* ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);*/ 541} 542 543static void tex_fetch_args( 544 struct lp_build_tgsi_context * bld_base, 545 struct lp_build_emit_data * emit_data) 546{ 547 const struct tgsi_full_instruction * inst = emit_data->inst; 548 LLVMValueRef ptr; 549 LLVMValueRef offset; 550 551 /* WriteMask */ 552 /* XXX: should be optimized using emit_data->inst->Dst[0].Register.WriteMask*/ 553 emit_data->args[0] = lp_build_const_int32(bld_base->base.gallivm, 0xf); 554 555 /* Coordinates */ 556 /* XXX: Not all sample instructions need 4 address arguments. */ 557 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 558 LLVMValueRef src_w; 559 unsigned chan; 560 LLVMValueRef coords[4]; 561 562 emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); 563 src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); 564 565 for (chan = 0; chan < 3; chan++ ) { 566 LLVMValueRef arg = lp_build_emit_fetch(bld_base, 567 emit_data->inst, 0, chan); 568 coords[chan] = lp_build_emit_llvm_binary(bld_base, 569 TGSI_OPCODE_DIV, 570 arg, src_w); 571 } 572 coords[3] = bld_base->base.one; 573 emit_data->args[1] = lp_build_gather_values(bld_base->base.gallivm, 574 coords, 4); 575 } else 576 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, 577 0, LP_CHAN_ALL); 578 579 /* Resource */ 580 ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, 4); 581 offset = lp_build_const_int32(bld_base->base.gallivm, 582 emit_data->inst->Src[1].Register.Index); 583 emit_data->args[2] = build_indexed_load(bld_base->base.gallivm, 584 ptr, offset); 585 586 /* Sampler */ 587 ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V4I32, 2); 588 offset = lp_build_const_int32(bld_base->base.gallivm, 589 emit_data->inst->Src[1].Register.Index); 590 emit_data->args[3] = build_indexed_load(bld_base->base.gallivm, 591 ptr, offset); 592 593 /* Dimensions */ 594 /* XXX: We might want to pass this information to the shader at some. */ 595/* emit_data->args[4] = lp_build_const_int32(bld_base->base.gallivm, 596 emit_data->inst->Texture.Texture); 597*/ 598 599 emit_data->arg_count = 4; 600 /* XXX: To optimize, we could use a float or v2f32, if the last bits of 601 * the writemask are clear */ 602 emit_data->dst_type = LLVMVectorType( 603 LLVMFloatTypeInContext(bld_base->base.gallivm->context), 604 4); 605} 606 607static const struct lp_build_tgsi_action tex_action = { 608 .fetch_args = tex_fetch_args, 609 .emit = lp_build_tgsi_intrinsic, 610 .intr_name = "llvm.SI.sample" 611}; 612 613 614int si_pipe_shader_create( 615 struct pipe_context *ctx, 616 struct si_pipe_shader *shader) 617{ 618 struct r600_context *rctx = (struct r600_context*)ctx; 619 struct si_shader_context si_shader_ctx; 620 struct tgsi_shader_info shader_info; 621 struct lp_build_tgsi_context * bld_base; 622 LLVMModuleRef mod; 623 unsigned char * inst_bytes; 624 unsigned inst_byte_count; 625 unsigned i; 626 uint32_t *ptr; 627 bool dump; 628 629 dump = debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE); 630 631 memset(&si_shader_ctx.radeon_bld, 0, sizeof(si_shader_ctx.radeon_bld)); 632 radeon_llvm_context_init(&si_shader_ctx.radeon_bld); 633 bld_base = &si_shader_ctx.radeon_bld.soa.bld_base; 634 635 tgsi_scan_shader(shader->tokens, &shader_info); 636 bld_base->info = &shader_info; 637 bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant; 638 bld_base->emit_epilogue = si_llvm_emit_epilogue; 639 640 bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action; 641 bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action; 642 643 si_shader_ctx.radeon_bld.load_input = declare_input; 644 si_shader_ctx.tokens = shader->tokens; 645 tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens); 646 si_shader_ctx.shader = shader; 647 si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor; 648 si_shader_ctx.rctx = rctx; 649 650 shader->shader.nr_cbufs = rctx->framebuffer.nr_cbufs; 651 652 /* Dump TGSI code before doing TGSI->LLVM conversion in case the 653 * conversion fails. */ 654 if (dump) { 655 tgsi_dump(shader->tokens, 0); 656 } 657 658 if (!lp_build_tgsi_llvm(bld_base, shader->tokens)) { 659 fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n"); 660 return -EINVAL; 661 } 662 663 radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld); 664 665 mod = bld_base->base.gallivm->module; 666 if (dump) { 667 LLVMDumpModule(mod); 668 } 669 radeon_llvm_compile(mod, &inst_bytes, &inst_byte_count, "SI", dump); 670 if (dump) { 671 fprintf(stderr, "SI CODE:\n"); 672 for (i = 0; i < inst_byte_count; i+=4 ) { 673 fprintf(stderr, "%02x%02x%02x%02x\n", inst_bytes[i + 3], 674 inst_bytes[i + 2], inst_bytes[i + 1], 675 inst_bytes[i]); 676 } 677 } 678 679 shader->num_sgprs = util_le32_to_cpu(*(uint32_t*)inst_bytes); 680 shader->num_vgprs = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 4)); 681 shader->spi_ps_input_ena = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 8)); 682 683 tgsi_parse_free(&si_shader_ctx.parse); 684 685 /* copy new shader */ 686 si_resource_reference(&shader->bo, NULL); 687 shader->bo = si_resource_create_custom(ctx->screen, PIPE_USAGE_IMMUTABLE, 688 inst_byte_count - 12); 689 if (shader->bo == NULL) { 690 return -ENOMEM; 691 } 692 693 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE); 694 if (0 /*R600_BIG_ENDIAN*/) { 695 for (i = 0; i < (inst_byte_count-12)/4; ++i) { 696 ptr[i] = util_bswap32(*(uint32_t*)(inst_bytes+12 + i*4)); 697 } 698 } else { 699 memcpy(ptr, inst_bytes + 12, inst_byte_count - 12); 700 } 701 rctx->ws->buffer_unmap(shader->bo->cs_buf); 702 703 free(inst_bytes); 704 705 return 0; 706} 707 708void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader) 709{ 710 si_resource_reference(&shader->bo, NULL); 711 712 memset(&shader->shader,0,sizeof(struct si_shader)); 713} 714