radeonsi_shader.c revision d1e40b3d40b2e90ad4f275565f1ae27fe6f964cc
1 2/* 3 * Copyright 2012 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Tom Stellard <thomas.stellard@amd.com> 26 * Michel Dänzer <michel.daenzer@amd.com> 27 * Christian König <christian.koenig@amd.com> 28 */ 29 30#include "gallivm/lp_bld_tgsi_action.h" 31#include "gallivm/lp_bld_const.h" 32#include "gallivm/lp_bld_gather.h" 33#include "gallivm/lp_bld_intr.h" 34#include "gallivm/lp_bld_tgsi.h" 35#include "radeon_llvm.h" 36#include "radeon_llvm_emit.h" 37#include "tgsi/tgsi_info.h" 38#include "tgsi/tgsi_parse.h" 39#include "tgsi/tgsi_scan.h" 40#include "tgsi/tgsi_dump.h" 41 42#include "radeonsi_pipe.h" 43#include "radeonsi_shader.h" 44#include "si_state.h" 45#include "sid.h" 46 47#include <assert.h> 48#include <errno.h> 49#include <stdio.h> 50 51/* 52static ps_remap_inputs( 53 struct tgsi_llvm_context * tl_ctx, 54 unsigned tgsi_index, 55 unsigned tgsi_chan) 56{ 57 : 58} 59 60struct si_input 61{ 62 struct list_head head; 63 unsigned tgsi_index; 64 unsigned tgsi_chan; 65 unsigned order; 66}; 67*/ 68 69 70struct si_shader_context 71{ 72 struct radeon_llvm_context radeon_bld; 73 struct r600_context *rctx; 74 struct tgsi_parse_context parse; 75 struct tgsi_token * tokens; 76 struct si_pipe_shader *shader; 77 unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */ 78/* unsigned num_inputs; */ 79/* struct list_head inputs; */ 80/* unsigned * input_mappings *//* From TGSI to SI hw */ 81/* struct tgsi_shader_info info;*/ 82}; 83 84static struct si_shader_context * si_shader_context( 85 struct lp_build_tgsi_context * bld_base) 86{ 87 return (struct si_shader_context *)bld_base; 88} 89 90 91#define PERSPECTIVE_BASE 0 92#define LINEAR_BASE 9 93 94#define SAMPLE_OFFSET 0 95#define CENTER_OFFSET 2 96#define CENTROID_OFSET 4 97 98#define USE_SGPR_MAX_SUFFIX_LEN 5 99#define CONST_ADDR_SPACE 2 100#define USER_SGPR_ADDR_SPACE 8 101 102enum sgpr_type { 103 SGPR_CONST_PTR_F32, 104 SGPR_CONST_PTR_V4I32, 105 SGPR_CONST_PTR_V8I32, 106 SGPR_I32, 107 SGPR_I64 108}; 109 110/** 111 * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad 112 * 113 * @param offset The offset parameter specifies the number of 114 * elements to offset, not the number of bytes or dwords. An element is the 115 * the type pointed to by the base_ptr parameter (e.g. int is the element of 116 * an int* pointer) 117 * 118 * When LLVM lowers the load instruction, it will convert the element offset 119 * into a dword offset automatically. 120 * 121 */ 122static LLVMValueRef build_indexed_load( 123 struct gallivm_state * gallivm, 124 LLVMValueRef base_ptr, 125 LLVMValueRef offset) 126{ 127 LLVMValueRef computed_ptr = LLVMBuildGEP( 128 gallivm->builder, base_ptr, &offset, 1, ""); 129 130 return LLVMBuildLoad(gallivm->builder, computed_ptr, ""); 131} 132 133/** 134 * Load a value stored in one of the user SGPRs 135 * 136 * @param sgpr This is the sgpr to load the value from. If you need to load a 137 * value that is stored in consecutive SGPR registers (e.g. a 64-bit pointer), 138 * then you should pass the index of the first SGPR that holds the value. For 139 * example, if you want to load a pointer that is stored in SGPRs 2 and 3, then 140 * use pass 2 for the sgpr parameter. 141 * 142 * The value of the sgpr parameter must also be aligned to the width of the type 143 * being loaded, so that the sgpr parameter is divisible by the dword width of the 144 * type. For example, if the value being loaded is two dwords wide, then the sgpr 145 * parameter must be divisible by two. 146 */ 147static LLVMValueRef use_sgpr( 148 struct gallivm_state * gallivm, 149 enum sgpr_type type, 150 unsigned sgpr) 151{ 152 LLVMValueRef sgpr_index; 153 LLVMTypeRef ret_type; 154 LLVMValueRef ptr; 155 156 sgpr_index = lp_build_const_int32(gallivm, sgpr); 157 158 switch (type) { 159 case SGPR_CONST_PTR_F32: 160 assert(sgpr % 2 == 0); 161 ret_type = LLVMFloatTypeInContext(gallivm->context); 162 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); 163 break; 164 165 case SGPR_I32: 166 ret_type = LLVMInt32TypeInContext(gallivm->context); 167 break; 168 169 case SGPR_I64: 170 assert(sgpr % 2 == 0); 171 ret_type= LLVMInt64TypeInContext(gallivm->context); 172 break; 173 174 case SGPR_CONST_PTR_V4I32: 175 assert(sgpr % 2 == 0); 176 ret_type = LLVMInt32TypeInContext(gallivm->context); 177 ret_type = LLVMVectorType(ret_type, 4); 178 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); 179 break; 180 181 case SGPR_CONST_PTR_V8I32: 182 assert(sgpr % 2 == 0); 183 ret_type = LLVMInt32TypeInContext(gallivm->context); 184 ret_type = LLVMVectorType(ret_type, 8); 185 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); 186 break; 187 188 default: 189 assert(!"Unsupported SGPR type in use_sgpr()"); 190 return NULL; 191 } 192 193 ret_type = LLVMPointerType(ret_type, USER_SGPR_ADDR_SPACE); 194 ptr = LLVMBuildIntToPtr(gallivm->builder, sgpr_index, ret_type, ""); 195 return LLVMBuildLoad(gallivm->builder, ptr, ""); 196} 197 198static void declare_input_vs( 199 struct si_shader_context * si_shader_ctx, 200 unsigned input_index, 201 const struct tgsi_full_declaration *decl) 202{ 203 LLVMValueRef t_list_ptr; 204 LLVMValueRef t_offset; 205 LLVMValueRef t_list; 206 LLVMValueRef attribute_offset; 207 LLVMValueRef buffer_index_reg; 208 LLVMValueRef args[3]; 209 LLVMTypeRef vec4_type; 210 LLVMValueRef input; 211 struct lp_build_context * uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; 212 struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base; 213 struct r600_context *rctx = si_shader_ctx->rctx; 214 //struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index]; 215 unsigned chan; 216 217 /* Load the T list */ 218 /* XXX: Communicate with the rest of the driver about which SGPR the T# 219 * list pointer is going to be stored in. Hard code to SGPR[6:7] for 220 * now */ 221 t_list_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_V4I32, 6); 222 223 t_offset = lp_build_const_int32(base->gallivm, input_index); 224 225 t_list = build_indexed_load(base->gallivm, t_list_ptr, t_offset); 226 227 /* Build the attribute offset */ 228 attribute_offset = lp_build_const_int32(base->gallivm, 0); 229 230 /* Load the buffer index is always, which is always stored in VGPR0 231 * for Vertex Shaders */ 232 buffer_index_reg = build_intrinsic(base->gallivm->builder, 233 "llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0, 234 LLVMReadNoneAttribute); 235 236 vec4_type = LLVMVectorType(base->elem_type, 4); 237 args[0] = t_list; 238 args[1] = attribute_offset; 239 args[2] = buffer_index_reg; 240 input = lp_build_intrinsic(base->gallivm->builder, 241 "llvm.SI.vs.load.input", vec4_type, args, 3); 242 243 /* Break up the vec4 into individual components */ 244 for (chan = 0; chan < 4; chan++) { 245 LLVMValueRef llvm_chan = lp_build_const_int32(base->gallivm, chan); 246 /* XXX: Use a helper function for this. There is one in 247 * tgsi_llvm.c. */ 248 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] = 249 LLVMBuildExtractElement(base->gallivm->builder, 250 input, llvm_chan, ""); 251 } 252} 253 254static void declare_input_fs( 255 struct si_shader_context * si_shader_ctx, 256 unsigned input_index, 257 const struct tgsi_full_declaration *decl) 258{ 259 const char * intr_name; 260 unsigned chan; 261 struct lp_build_context * base = 262 &si_shader_ctx->radeon_bld.soa.bld_base.base; 263 struct gallivm_state * gallivm = base->gallivm; 264 265 /* This value is: 266 * [15:0] NewPrimMask (Bit mask for each quad. It is set it the 267 * quad begins a new primitive. Bit 0 always needs 268 * to be unset) 269 * [32:16] ParamOffset 270 * 271 */ 272 /* XXX: This register number must be identical to the S_00B02C_USER_SGPR 273 * register field value 274 */ 275 LLVMValueRef params = use_sgpr(base->gallivm, SGPR_I32, 6); 276 277 278 /* XXX: Is this the input_index? */ 279 LLVMValueRef attr_number = lp_build_const_int32(gallivm, input_index); 280 281 /* XXX: Handle all possible interpolation modes */ 282 switch (decl->Interp.Interpolate) { 283 case TGSI_INTERPOLATE_COLOR: 284 /* XXX: Flat shading hangs the GPU */ 285 if (si_shader_ctx->rctx->queued.named.rasterizer && 286 si_shader_ctx->rctx->queued.named.rasterizer->flatshade) { 287#if 0 288 intr_name = "llvm.SI.fs.interp.constant"; 289#else 290 intr_name = "llvm.SI.fs.interp.linear.center"; 291#endif 292 } else { 293 if (decl->Interp.Centroid) 294 intr_name = "llvm.SI.fs.interp.persp.centroid"; 295 else 296 intr_name = "llvm.SI.fs.interp.persp.center"; 297 } 298 break; 299 case TGSI_INTERPOLATE_CONSTANT: 300 /* XXX: Flat shading hangs the GPU */ 301#if 0 302 intr_name = "llvm.SI.fs.interp.constant"; 303 break; 304#endif 305 case TGSI_INTERPOLATE_LINEAR: 306 if (decl->Interp.Centroid) 307 intr_name = "llvm.SI.fs.interp.linear.centroid"; 308 else 309 intr_name = "llvm.SI.fs.interp.linear.center"; 310 break; 311 case TGSI_INTERPOLATE_PERSPECTIVE: 312 if (decl->Interp.Centroid) 313 intr_name = "llvm.SI.fs.interp.persp.centroid"; 314 else 315 intr_name = "llvm.SI.fs.interp.persp.center"; 316 break; 317 default: 318 fprintf(stderr, "Warning: Unhandled interpolation mode.\n"); 319 return; 320 } 321 322 /* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */ 323 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 324 LLVMValueRef args[3]; 325 LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan); 326 unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan); 327 LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context); 328 args[0] = llvm_chan; 329 args[1] = attr_number; 330 args[2] = params; 331 si_shader_ctx->radeon_bld.inputs[soa_index] = 332 lp_build_intrinsic(gallivm->builder, intr_name, 333 input_type, args, 3); 334 } 335} 336 337static void declare_input( 338 struct radeon_llvm_context * radeon_bld, 339 unsigned input_index, 340 const struct tgsi_full_declaration *decl) 341{ 342 struct si_shader_context * si_shader_ctx = 343 si_shader_context(&radeon_bld->soa.bld_base); 344 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { 345 declare_input_vs(si_shader_ctx, input_index, decl); 346 } else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) { 347 declare_input_fs(si_shader_ctx, input_index, decl); 348 } else { 349 fprintf(stderr, "Warning: Unsupported shader type,\n"); 350 } 351} 352 353static LLVMValueRef fetch_constant( 354 struct lp_build_tgsi_context * bld_base, 355 const struct tgsi_full_src_register *reg, 356 enum tgsi_opcode_type type, 357 unsigned swizzle) 358{ 359 struct lp_build_context * base = &bld_base->base; 360 361 LLVMValueRef const_ptr; 362 LLVMValueRef offset; 363 LLVMValueRef load; 364 365 /* XXX: Assume the pointer to the constant buffer is being stored in 366 * SGPR[0:1] */ 367 const_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_F32, 0); 368 369 /* XXX: This assumes that the constant buffer is not packed, so 370 * CONST[0].x will have an offset of 0 and CONST[1].x will have an 371 * offset of 4. */ 372 offset = lp_build_const_int32(base->gallivm, 373 (reg->Register.Index * 4) + swizzle); 374 375 load = build_indexed_load(base->gallivm, const_ptr, offset); 376 return bitcast(bld_base, type, load); 377} 378 379/* XXX: This is partially implemented for VS only at this point. It is not complete */ 380static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) 381{ 382 struct si_shader_context * si_shader_ctx = si_shader_context(bld_base); 383 struct si_shader * shader = &si_shader_ctx->shader->shader; 384 struct lp_build_context * base = &bld_base->base; 385 struct lp_build_context * uint = 386 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; 387 struct tgsi_parse_context *parse = &si_shader_ctx->parse; 388 LLVMValueRef last_args[9] = { 0 }; 389 unsigned color_count = 0; 390 unsigned param_count = 0; 391 392 while (!tgsi_parse_end_of_tokens(parse)) { 393 /* XXX: component_bits controls which components of the output 394 * registers actually get exported. (e.g bit 0 means export 395 * X component, bit 1 means export Y component, etc.) I'm 396 * hard coding this to 0xf for now. In the future, we might 397 * want to do something else. */ 398 unsigned component_bits = 0xf; 399 unsigned chan; 400 struct tgsi_full_declaration *d = 401 &parse->FullToken.FullDeclaration; 402 LLVMValueRef args[9]; 403 unsigned target; 404 unsigned index; 405 int i; 406 407 tgsi_parse_token(parse); 408 if (parse->FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION) 409 continue; 410 411 switch (d->Declaration.File) { 412 case TGSI_FILE_INPUT: 413 i = shader->ninput++; 414 shader->input[i].name = d->Semantic.Name; 415 shader->input[i].sid = d->Semantic.Index; 416 shader->input[i].interpolate = d->Interp.Interpolate; 417 shader->input[i].centroid = d->Interp.Centroid; 418 continue; 419 420 case TGSI_FILE_OUTPUT: 421 i = shader->noutput++; 422 shader->output[i].name = d->Semantic.Name; 423 shader->output[i].sid = d->Semantic.Index; 424 shader->output[i].interpolate = d->Interp.Interpolate; 425 break; 426 427 default: 428 continue; 429 } 430 431 for (index = d->Range.First; index <= d->Range.Last; index++) { 432 for (chan = 0; chan < 4; chan++ ) { 433 LLVMValueRef out_ptr = 434 si_shader_ctx->radeon_bld.soa.outputs 435 [index][chan]; 436 /* +5 because the first output value will be 437 * the 6th argument to the intrinsic. */ 438 args[chan + 5]= LLVMBuildLoad( 439 base->gallivm->builder, out_ptr, ""); 440 } 441 442 /* XXX: We probably need to keep track of the output 443 * values, so we know what we are passing to the next 444 * stage. */ 445 446 /* Select the correct target */ 447 switch(d->Semantic.Name) { 448 case TGSI_SEMANTIC_POSITION: 449 target = V_008DFC_SQ_EXP_POS; 450 break; 451 case TGSI_SEMANTIC_COLOR: 452 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { 453 target = V_008DFC_SQ_EXP_PARAM + param_count; 454 shader->output[i].param_offset = param_count; 455 param_count++; 456 } else { 457 target = V_008DFC_SQ_EXP_MRT + color_count; 458 color_count++; 459 } 460 break; 461 case TGSI_SEMANTIC_GENERIC: 462 target = V_008DFC_SQ_EXP_PARAM + param_count; 463 shader->output[i].param_offset = param_count; 464 param_count++; 465 break; 466 default: 467 target = 0; 468 fprintf(stderr, 469 "Warning: SI unhandled output type:%d\n", 470 d->Semantic.Name); 471 } 472 473 /* Specify which components to enable */ 474 args[0] = lp_build_const_int32(base->gallivm, 475 component_bits); 476 477 /* Specify whether the EXEC mask represents the valid mask */ 478 args[1] = lp_build_const_int32(base->gallivm, 0); 479 480 /* Specify whether this is the last export */ 481 args[2] = lp_build_const_int32(base->gallivm, 0); 482 483 /* Specify the target we are exporting */ 484 args[3] = lp_build_const_int32(base->gallivm, target); 485 486 /* Set COMPR flag to zero to export data as 32-bit */ 487 args[4] = uint->zero; 488 489 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX ? 490 (d->Semantic.Name == TGSI_SEMANTIC_POSITION) : 491 (d->Semantic.Name == TGSI_SEMANTIC_COLOR)) { 492 if (last_args[0]) { 493 lp_build_intrinsic(base->gallivm->builder, 494 "llvm.SI.export", 495 LLVMVoidTypeInContext(base->gallivm->context), 496 last_args, 9); 497 } 498 499 memcpy(last_args, args, sizeof(args)); 500 } else { 501 lp_build_intrinsic(base->gallivm->builder, 502 "llvm.SI.export", 503 LLVMVoidTypeInContext(base->gallivm->context), 504 args, 9); 505 } 506 507 } 508 } 509 510 if (!last_args[0]) { 511 assert(si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT); 512 513 /* Specify which components to enable */ 514 last_args[0] = lp_build_const_int32(base->gallivm, 0x0); 515 516 /* Specify the target we are exporting */ 517 last_args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT); 518 519 /* Set COMPR flag to zero to export data as 32-bit */ 520 last_args[4] = uint->zero; 521 522 /* dummy bits */ 523 last_args[5]= uint->zero; 524 last_args[6]= uint->zero; 525 last_args[7]= uint->zero; 526 last_args[8]= uint->zero; 527 } 528 529 /* Specify whether the EXEC mask represents the valid mask */ 530 last_args[1] = lp_build_const_int32(base->gallivm, 531 si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT); 532 533 /* Specify that this is the last export */ 534 last_args[2] = lp_build_const_int32(base->gallivm, 1); 535 536 lp_build_intrinsic(base->gallivm->builder, 537 "llvm.SI.export", 538 LLVMVoidTypeInContext(base->gallivm->context), 539 last_args, 9); 540 541/* XXX: Look up what this function does */ 542/* ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);*/ 543} 544 545static void tex_fetch_args( 546 struct lp_build_tgsi_context * bld_base, 547 struct lp_build_emit_data * emit_data) 548{ 549 const struct tgsi_full_instruction * inst = emit_data->inst; 550 LLVMValueRef ptr; 551 LLVMValueRef offset; 552 553 /* WriteMask */ 554 /* XXX: should be optimized using emit_data->inst->Dst[0].Register.WriteMask*/ 555 emit_data->args[0] = lp_build_const_int32(bld_base->base.gallivm, 0xf); 556 557 /* Coordinates */ 558 /* XXX: Not all sample instructions need 4 address arguments. */ 559 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 560 LLVMValueRef src_w; 561 unsigned chan; 562 LLVMValueRef coords[4]; 563 564 emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); 565 src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); 566 567 for (chan = 0; chan < 3; chan++ ) { 568 LLVMValueRef arg = lp_build_emit_fetch(bld_base, 569 emit_data->inst, 0, chan); 570 coords[chan] = lp_build_emit_llvm_binary(bld_base, 571 TGSI_OPCODE_DIV, 572 arg, src_w); 573 } 574 coords[3] = bld_base->base.one; 575 emit_data->args[1] = lp_build_gather_values(bld_base->base.gallivm, 576 coords, 4); 577 } else 578 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, 579 0, LP_CHAN_ALL); 580 581 /* Resource */ 582 ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, 4); 583 offset = lp_build_const_int32(bld_base->base.gallivm, 584 emit_data->inst->Src[1].Register.Index); 585 emit_data->args[2] = build_indexed_load(bld_base->base.gallivm, 586 ptr, offset); 587 588 /* Sampler */ 589 ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V4I32, 2); 590 offset = lp_build_const_int32(bld_base->base.gallivm, 591 emit_data->inst->Src[1].Register.Index); 592 emit_data->args[3] = build_indexed_load(bld_base->base.gallivm, 593 ptr, offset); 594 595 /* Dimensions */ 596 /* XXX: We might want to pass this information to the shader at some. */ 597/* emit_data->args[4] = lp_build_const_int32(bld_base->base.gallivm, 598 emit_data->inst->Texture.Texture); 599*/ 600 601 emit_data->arg_count = 4; 602 /* XXX: To optimize, we could use a float or v2f32, if the last bits of 603 * the writemask are clear */ 604 emit_data->dst_type = LLVMVectorType( 605 LLVMFloatTypeInContext(bld_base->base.gallivm->context), 606 4); 607} 608 609static const struct lp_build_tgsi_action tex_action = { 610 .fetch_args = tex_fetch_args, 611 .emit = lp_build_tgsi_intrinsic, 612 .intr_name = "llvm.SI.sample" 613}; 614 615 616int si_pipe_shader_create( 617 struct pipe_context *ctx, 618 struct si_pipe_shader *shader) 619{ 620 struct r600_context *rctx = (struct r600_context*)ctx; 621 struct si_pipe_shader_selector *sel = shader->selector; 622 struct si_shader_context si_shader_ctx; 623 struct tgsi_shader_info shader_info; 624 struct lp_build_tgsi_context * bld_base; 625 LLVMModuleRef mod; 626 unsigned char * inst_bytes; 627 unsigned inst_byte_count; 628 unsigned i; 629 uint32_t *ptr; 630 bool dump; 631 632 dump = debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE); 633 634 memset(&si_shader_ctx.radeon_bld, 0, sizeof(si_shader_ctx.radeon_bld)); 635 radeon_llvm_context_init(&si_shader_ctx.radeon_bld); 636 bld_base = &si_shader_ctx.radeon_bld.soa.bld_base; 637 638 tgsi_scan_shader(sel->tokens, &shader_info); 639 bld_base->info = &shader_info; 640 bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant; 641 bld_base->emit_epilogue = si_llvm_emit_epilogue; 642 643 bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action; 644 bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action; 645 646 si_shader_ctx.radeon_bld.load_input = declare_input; 647 si_shader_ctx.tokens = sel->tokens; 648 tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens); 649 si_shader_ctx.shader = shader; 650 si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor; 651 si_shader_ctx.rctx = rctx; 652 653 shader->shader.nr_cbufs = rctx->framebuffer.nr_cbufs; 654 655 /* Dump TGSI code before doing TGSI->LLVM conversion in case the 656 * conversion fails. */ 657 if (dump) { 658 tgsi_dump(sel->tokens, 0); 659 } 660 661 if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) { 662 fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n"); 663 return -EINVAL; 664 } 665 666 radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld); 667 668 mod = bld_base->base.gallivm->module; 669 if (dump) { 670 LLVMDumpModule(mod); 671 } 672 radeon_llvm_compile(mod, &inst_bytes, &inst_byte_count, "SI", dump); 673 if (dump) { 674 fprintf(stderr, "SI CODE:\n"); 675 for (i = 0; i < inst_byte_count; i+=4 ) { 676 fprintf(stderr, "%02x%02x%02x%02x\n", inst_bytes[i + 3], 677 inst_bytes[i + 2], inst_bytes[i + 1], 678 inst_bytes[i]); 679 } 680 } 681 682 shader->num_sgprs = util_le32_to_cpu(*(uint32_t*)inst_bytes); 683 shader->num_vgprs = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 4)); 684 shader->spi_ps_input_ena = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 8)); 685 686 radeon_llvm_dispose(&si_shader_ctx.radeon_bld); 687 tgsi_parse_free(&si_shader_ctx.parse); 688 689 /* copy new shader */ 690 si_resource_reference(&shader->bo, NULL); 691 shader->bo = si_resource_create_custom(ctx->screen, PIPE_USAGE_IMMUTABLE, 692 inst_byte_count - 12); 693 if (shader->bo == NULL) { 694 return -ENOMEM; 695 } 696 697 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE); 698 if (0 /*R600_BIG_ENDIAN*/) { 699 for (i = 0; i < (inst_byte_count-12)/4; ++i) { 700 ptr[i] = util_bswap32(*(uint32_t*)(inst_bytes+12 + i*4)); 701 } 702 } else { 703 memcpy(ptr, inst_bytes + 12, inst_byte_count - 12); 704 } 705 rctx->ws->buffer_unmap(shader->bo->cs_buf); 706 707 free(inst_bytes); 708 709 return 0; 710} 711 712void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader) 713{ 714 si_resource_reference(&shader->bo, NULL); 715} 716