evergreen_compute_internal.c revision 0c4b19ac63efa41242c515824301e6161aceeea5
1/* 2 * Permission is hereby granted, free of charge, to any person obtaining a 3 * copy of this software and associated documentation files (the "Software"), 4 * to deal in the Software without restriction, including without limitation 5 * on the rights to use, copy, modify, merge, publish, distribute, sub 6 * license, and/or sell copies of the Software, and to permit persons to whom 7 * the Software is furnished to do so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice (including the next 10 * paragraph) shall be included in all copies or substantial portions of the 11 * Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * Authors: 22 * Adam Rak <adam.rak@streamnovation.com> 23 */ 24 25#include <stdlib.h> 26#include <stdio.h> 27 28#include "pipe/p_defines.h" 29#include "pipe/p_state.h" 30#include "pipe/p_context.h" 31#include "util/u_blitter.h" 32#include "util/u_double_list.h" 33#include "util/u_transfer.h" 34#include "util/u_surface.h" 35#include "util/u_pack_color.h" 36#include "util/u_memory.h" 37#include "util/u_inlines.h" 38#include "util/u_framebuffer.h" 39#include "r600.h" 40#include "r600_resource.h" 41#include "r600_shader.h" 42#include "r600_pipe.h" 43#include "r600_formats.h" 44#include "evergreend.h" 45#include "evergreen_compute_internal.h" 46#include "r600_hw_context_priv.h" 47 48int get_compute_resource_num(void) 49{ 50 int num = 0; 51#define DECL_COMPUTE_RESOURCE(name, n) num += n; 52#include "compute_resource.def" 53#undef DECL_COMPUTE_RESOURCE 54 return num; 55} 56 57void evergreen_emit_raw_value( 58 struct evergreen_compute_resource* res, 59 unsigned value) 60{ 61 res->cs[res->cs_end++] = value; 62} 63 64void evergreen_emit_ctx_value(struct r600_context *ctx, unsigned value) 65{ 66 ctx->cs->buf[ctx->cs->cdw++] = value; 67} 68 69void evergreen_mult_reg_set_( 70 struct evergreen_compute_resource* res, 71 int index, 72 u32* array, 73 int size) 74{ 75 int i = 0; 76 77 evergreen_emit_raw_reg_set(res, index, size / 4); 78 79 for (i = 0; i < size; i+=4) { 80 res->cs[res->cs_end++] = array[i / 4]; 81 } 82} 83 84void evergreen_reg_set( 85 struct evergreen_compute_resource* res, 86 unsigned index, 87 unsigned value) 88{ 89 evergreen_emit_raw_reg_set(res, index, 1); 90 res->cs[res->cs_end++] = value; 91} 92 93struct evergreen_compute_resource* get_empty_res( 94 struct r600_pipe_compute* pipe, 95 enum evergreen_compute_resources res_code, 96 int offset_index) 97{ 98 int code_index = -1; 99 int code_size = -1; 100 101 { 102 int i = 0; 103 #define DECL_COMPUTE_RESOURCE(name, n) if (COMPUTE_RESOURCE_ ## name == res_code) {code_index = i; code_size = n;} i += n; 104 #include "compute_resource.def" 105 #undef DECL_COMPUTE_RESOURCE 106 } 107 108 assert(code_index != -1 && "internal error: resouce index not found"); 109 assert(offset_index < code_size && "internal error: overindexing resource"); 110 111 int index = code_index + offset_index; 112 113 struct evergreen_compute_resource* res = &pipe->resources[index]; 114 115 res->enabled = true; 116 res->bo = NULL; 117 res->cs_end = 0; 118 bzero(&res->do_reloc, sizeof(res->do_reloc)); 119 120 return res; 121} 122 123void evergreen_emit_raw_reg_set( 124 struct evergreen_compute_resource* res, 125 unsigned index, 126 int num) 127{ 128 res->enabled = 1; 129 int cs_end = res->cs_end; 130 131 if (index >= EVERGREEN_CONFIG_REG_OFFSET 132 && index < EVERGREEN_CONFIG_REG_END) { 133 res->cs[cs_end] = PKT3C(PKT3_SET_CONFIG_REG, num, 0); 134 res->cs[cs_end+1] = (index - EVERGREEN_CONFIG_REG_OFFSET) >> 2; 135 } else if (index >= EVERGREEN_CONTEXT_REG_OFFSET 136 && index < EVERGREEN_CONTEXT_REG_END) { 137 res->cs[cs_end] = PKT3C(PKT3_SET_CONTEXT_REG, num, 0); 138 res->cs[cs_end+1] = (index - EVERGREEN_CONTEXT_REG_OFFSET) >> 2; 139 } else if (index >= EVERGREEN_RESOURCE_OFFSET 140 && index < EVERGREEN_RESOURCE_END) { 141 res->cs[cs_end] = PKT3C(PKT3_SET_RESOURCE, num, 0); 142 res->cs[cs_end+1] = (index - EVERGREEN_RESOURCE_OFFSET) >> 2; 143 } else if (index >= EVERGREEN_SAMPLER_OFFSET 144 && index < EVERGREEN_SAMPLER_END) { 145 res->cs[cs_end] = PKT3C(PKT3_SET_SAMPLER, num, 0); 146 res->cs[cs_end+1] = (index - EVERGREEN_SAMPLER_OFFSET) >> 2; 147 } else if (index >= EVERGREEN_CTL_CONST_OFFSET 148 && index < EVERGREEN_CTL_CONST_END) { 149 res->cs[cs_end] = PKT3C(PKT3_SET_CTL_CONST, num, 0); 150 res->cs[cs_end+1] = (index - EVERGREEN_CTL_CONST_OFFSET) >> 2; 151 } else if (index >= EVERGREEN_LOOP_CONST_OFFSET 152 && index < EVERGREEN_LOOP_CONST_END) { 153 res->cs[cs_end] = PKT3C(PKT3_SET_LOOP_CONST, num, 0); 154 res->cs[cs_end+1] = (index - EVERGREEN_LOOP_CONST_OFFSET) >> 2; 155 } else if (index >= EVERGREEN_BOOL_CONST_OFFSET 156 && index < EVERGREEN_BOOL_CONST_END) { 157 res->cs[cs_end] = PKT3C(PKT3_SET_BOOL_CONST, num, 0); 158 res->cs[cs_end+1] = (index - EVERGREEN_BOOL_CONST_OFFSET) >> 2; 159 } else { 160 res->cs[cs_end] = PKT0(index, num-1); 161 res->cs_end--; 162 } 163 164 res->cs_end += 2; 165} 166 167void evergreen_emit_force_reloc(struct evergreen_compute_resource* res) 168{ 169 res->do_reloc[res->cs_end] += 1; 170} 171 172void evergreen_emit_ctx_reg_set( 173 struct r600_context *ctx, 174 unsigned index, 175 int num) 176{ 177 178 if (index >= EVERGREEN_CONFIG_REG_OFFSET 179 && index < EVERGREEN_CONFIG_REG_END) { 180 ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CONFIG_REG, num, 0); 181 ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CONFIG_REG_OFFSET) >> 2; 182 } else if (index >= EVERGREEN_CONTEXT_REG_OFFSET 183 && index < EVERGREEN_CONTEXT_REG_END) { 184 ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CONTEXT_REG, num, 0); 185 ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CONTEXT_REG_OFFSET) >> 2; 186 } else if (index >= EVERGREEN_RESOURCE_OFFSET 187 && index < EVERGREEN_RESOURCE_END) { 188 ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_RESOURCE, num, 0); 189 ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_RESOURCE_OFFSET) >> 2; 190 } else if (index >= EVERGREEN_SAMPLER_OFFSET 191 && index < EVERGREEN_SAMPLER_END) { 192 ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_SAMPLER, num, 0); 193 ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_SAMPLER_OFFSET) >> 2; 194 } else if (index >= EVERGREEN_CTL_CONST_OFFSET 195 && index < EVERGREEN_CTL_CONST_END) { 196 ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CTL_CONST, num, 0); 197 ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CTL_CONST_OFFSET) >> 2; 198 } else if (index >= EVERGREEN_LOOP_CONST_OFFSET 199 && index < EVERGREEN_LOOP_CONST_END) { 200 ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_LOOP_CONST, num, 0); 201 ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_LOOP_CONST_OFFSET) >> 2; 202 } else if (index >= EVERGREEN_BOOL_CONST_OFFSET 203 && index < EVERGREEN_BOOL_CONST_END) { 204 ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_BOOL_CONST, num, 0); 205 ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_BOOL_CONST_OFFSET) >> 2; 206 } else { 207 ctx->cs->buf[ctx->cs->cdw++] = PKT0(index, num-1); 208 } 209} 210 211void evergreen_emit_ctx_reloc( 212 struct r600_context *ctx, 213 struct r600_resource *bo, 214 enum radeon_bo_usage usage) 215{ 216 assert(bo); 217 218 ctx->cs->buf[ctx->cs->cdw++] = PKT3(PKT3_NOP, 0, 0); 219 u32 rr = r600_context_bo_reloc(ctx, bo, usage); 220 ctx->cs->buf[ctx->cs->cdw++] = rr; 221} 222 223void evergreen_set_buffer_sync( 224 struct r600_context *ctx, 225 struct r600_resource* bo, 226 int size, 227 int flags, 228 enum radeon_bo_usage usage) 229{ 230 assert(bo); 231 int32_t cp_coher_size = 0; 232 233 if (size == 0xffffffff || size == 0) { 234 cp_coher_size = 0xffffffff; 235 } 236 else { 237 cp_coher_size = ((size + 255) >> 8); 238 } 239 240 uint32_t sync_flags = 0; 241 242 if ((flags & COMPUTE_RES_TC_FLUSH) == COMPUTE_RES_TC_FLUSH) { 243 sync_flags |= S_0085F0_TC_ACTION_ENA(1); 244 } 245 246 if ((flags & COMPUTE_RES_VC_FLUSH) == COMPUTE_RES_VC_FLUSH) { 247 sync_flags |= S_0085F0_VC_ACTION_ENA(1); 248 } 249 250 if ((flags & COMPUTE_RES_SH_FLUSH) == COMPUTE_RES_SH_FLUSH) { 251 sync_flags |= S_0085F0_SH_ACTION_ENA(1); 252 } 253 254 if ((flags & COMPUTE_RES_CB_FLUSH(0)) == COMPUTE_RES_CB_FLUSH(0)) { 255 sync_flags |= S_0085F0_CB_ACTION_ENA(1); 256 257 switch((flags >> 8) & 0xF) { 258 case 0: 259 sync_flags |= S_0085F0_CB0_DEST_BASE_ENA(1); 260 break; 261 case 1: 262 sync_flags |= S_0085F0_CB1_DEST_BASE_ENA(1); 263 break; 264 case 2: 265 sync_flags |= S_0085F0_CB2_DEST_BASE_ENA(1); 266 break; 267 case 3: 268 sync_flags |= S_0085F0_CB3_DEST_BASE_ENA(1); 269 break; 270 case 4: 271 sync_flags |= S_0085F0_CB4_DEST_BASE_ENA(1); 272 break; 273 case 5: 274 sync_flags |= S_0085F0_CB5_DEST_BASE_ENA(1); 275 break; 276 case 6: 277 sync_flags |= S_0085F0_CB6_DEST_BASE_ENA(1); 278 break; 279 case 7: 280 sync_flags |= S_0085F0_CB7_DEST_BASE_ENA(1); 281 break; 282 case 8: 283 sync_flags |= S_0085F0_CB8_DEST_BASE_ENA(1); 284 break; 285 case 9: 286 sync_flags |= S_0085F0_CB9_DEST_BASE_ENA(1); 287 break; 288 case 10: 289 sync_flags |= S_0085F0_CB10_DEST_BASE_ENA(1); 290 break; 291 case 11: 292 sync_flags |= S_0085F0_CB11_DEST_BASE_ENA(1); 293 break; 294 default: 295 assert(0); 296 } 297 } 298 299 int32_t poll_interval = 10; 300 301 ctx->cs->buf[ctx->cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); 302 ctx->cs->buf[ctx->cs->cdw++] = sync_flags; 303 ctx->cs->buf[ctx->cs->cdw++] = cp_coher_size; 304 ctx->cs->buf[ctx->cs->cdw++] = 0; 305 ctx->cs->buf[ctx->cs->cdw++] = poll_interval; 306 307 if (cp_coher_size != 0xffffffff) { 308 evergreen_emit_ctx_reloc(ctx, bo, usage); 309 } 310} 311 312int evergreen_compute_get_gpu_format( 313 struct number_type_and_format* fmt, 314 struct r600_resource *bo) 315{ 316 switch (bo->b.b.format) 317 { 318 case PIPE_FORMAT_R8_UNORM: 319 case PIPE_FORMAT_R32_UNORM: 320 fmt->format = V_028C70_COLOR_32; 321 fmt->number_type = V_028C70_NUMBER_UNORM; 322 fmt->num_format_all = 0; 323 break; 324 case PIPE_FORMAT_R32_FLOAT: 325 fmt->format = V_028C70_COLOR_32_FLOAT; 326 fmt->number_type = V_028C70_NUMBER_FLOAT; 327 fmt->num_format_all = 0; 328 break; 329 case PIPE_FORMAT_R32G32B32A32_FLOAT: 330 fmt->format = V_028C70_COLOR_32_32_32_32_FLOAT; 331 fmt->number_type = V_028C70_NUMBER_FLOAT; 332 fmt->num_format_all = 0; 333 break; 334 335 ///TODO: other formats... 336 337 default: 338 return 0; 339 } 340 341 return 1; 342} 343 344void evergreen_set_rat( 345 struct r600_pipe_compute *pipe, 346 int id, 347 struct r600_resource* bo, 348 int start, 349 int size) 350{ 351 assert(id < 12); 352 assert((size & 3) == 0); 353 assert((start & 0xFF) == 0); 354 355 int offset; 356 COMPUTE_DBG("bind rat: %i \n", id); 357 358 if (id < 8) { 359 offset = id*0x3c; 360 } 361 else { 362 offset = 8*0x3c + (id-8)*0x1c; 363 } 364 365 int linear = 0; 366 367 if (bo->b.b.height0 <= 1 && bo->b.b.depth0 <= 1 368 && bo->b.b.target == PIPE_BUFFER) { 369 linear = 1; 370 } 371 372 struct evergreen_compute_resource* res = 373 get_empty_res(pipe, COMPUTE_RESOURCE_RAT, id); 374 375 evergreen_emit_force_reloc(res); 376 377 evergreen_reg_set(res, R_028C64_CB_COLOR0_PITCH, 0); ///TODO: for 2D? 378 evergreen_reg_set(res, R_028C68_CB_COLOR0_SLICE, 0); 379 380 struct number_type_and_format fmt; 381 382 ///default config 383 if (bo->b.b.format == PIPE_FORMAT_NONE) { 384 fmt.format = V_028C70_COLOR_32; 385 fmt.number_type = V_028C70_NUMBER_FLOAT; 386 } else { 387 evergreen_compute_get_gpu_format(&fmt, bo); 388 } 389 390 if (linear) { 391 evergreen_reg_set(res, 392 R_028C70_CB_COLOR0_INFO, S_028C70_RAT(1) 393 | S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_ALIGNED) 394 | S_028C70_FORMAT(fmt.format) 395 | S_028C70_NUMBER_TYPE(fmt.number_type) 396 ); 397 evergreen_emit_force_reloc(res); 398 } else { 399 assert(0 && "TODO"); 400 ///TODO 401// evergreen_reg_set(res, R_028C70_CB_COLOR0_INFO, S_028C70_RAT(1) | S_028C70_ARRAY_MODE(????)); 402// evergreen_emit_force_reloc(res); 403 } 404 405 evergreen_reg_set(res, R_028C74_CB_COLOR0_ATTRIB, S_028C74_NON_DISP_TILING_ORDER(1)); 406 evergreen_emit_force_reloc(res); 407 408 if (linear) { 409 /* XXX: Why are we using size instead of bo->b.b.b.width0 ? */ 410 evergreen_reg_set(res, R_028C78_CB_COLOR0_DIM, size); 411 } else { 412 evergreen_reg_set(res, R_028C78_CB_COLOR0_DIM, 413 S_028C78_WIDTH_MAX(bo->b.b.width0) 414 | S_028C78_HEIGHT_MAX(bo->b.b.height0)); 415 } 416 417 if (id < 8) { 418 evergreen_reg_set(res, R_028C7C_CB_COLOR0_CMASK, 0); 419 evergreen_emit_force_reloc(res); 420 evergreen_reg_set(res, R_028C84_CB_COLOR0_FMASK, 0); 421 evergreen_emit_force_reloc(res); 422 } 423 424 evergreen_reg_set(res, R_028C60_CB_COLOR0_BASE + offset, start >> 8); 425 426 res->bo = bo; 427 res->usage = RADEON_USAGE_READWRITE; 428 res->coher_bo_size = size; 429 res->flags = COMPUTE_RES_CB_FLUSH(id); 430} 431 432void evergreen_set_lds( 433 struct r600_pipe_compute *pipe, 434 int num_lds, 435 int size, 436 int num_waves) 437{ 438 struct evergreen_compute_resource* res = 439 get_empty_res(pipe, COMPUTE_RESOURCE_LDS, 0); 440 441 if (pipe->ctx->chip_class < CAYMAN) { 442 evergreen_reg_set(res, R_008E2C_SQ_LDS_RESOURCE_MGMT, 443 S_008E2C_NUM_LS_LDS(num_lds)); 444 } else { 445 evergreen_reg_set(res, CM_R_0286FC_SPI_LDS_MGMT, 446 S_0286FC_NUM_LS_LDS(num_lds)); 447 } 448 evergreen_reg_set(res, CM_R_0288E8_SQ_LDS_ALLOC, size | num_waves << 14); 449} 450 451void evergreen_set_gds( 452 struct r600_pipe_compute *pipe, 453 uint32_t addr, 454 uint32_t size) 455{ 456 struct evergreen_compute_resource* res = 457 get_empty_res(pipe, COMPUTE_RESOURCE_GDS, 0); 458 459 evergreen_reg_set(res, R_028728_GDS_ORDERED_WAVE_PER_SE, 1); 460 evergreen_reg_set(res, R_028720_GDS_ADDR_BASE, addr); 461 evergreen_reg_set(res, R_028724_GDS_ADDR_SIZE, size); 462} 463 464void evergreen_set_export( 465 struct r600_pipe_compute *pipe, 466 struct r600_resource* bo, 467 int offset, int size) 468{ 469 #define SX_MEMORY_EXPORT_BASE 0x9010 470 #define SX_MEMORY_EXPORT_SIZE 0x9014 471 472 struct evergreen_compute_resource* res = 473 get_empty_res(pipe, COMPUTE_RESOURCE_EXPORT, 0); 474 475 evergreen_reg_set(res, SX_MEMORY_EXPORT_SIZE, size); 476 477 if (size) { 478 evergreen_reg_set(res, SX_MEMORY_EXPORT_BASE, offset); 479 res->bo = bo; 480 res->usage = RADEON_USAGE_WRITE; 481 res->coher_bo_size = size; 482 res->flags = 0; 483 } 484} 485 486void evergreen_set_loop_const( 487 struct r600_pipe_compute *pipe, 488 int id, int count, int init, int inc) { 489 490 struct evergreen_compute_resource* res = 491 get_empty_res(pipe, COMPUTE_RESOURCE_LOOP, id); 492 493 assert(id < 32); 494 assert(count <= 0xFFF); 495 assert(init <= 0xFF); 496 assert(inc <= 0xFF); 497 498 /* Compute shaders use LOOP_CONST registers SQ_LOOP_CONST_160 to 499 * SQ_LOOP_CONST_191 */ 500 evergreen_reg_set(res, R_03A200_SQ_LOOP_CONST_0 + (160 * 4) + (id * 4), 501 count | init << 12 | inc << 24); 502} 503 504void evergreen_set_tmp_ring( 505 struct r600_pipe_compute *pipe, 506 struct r600_resource* bo, 507 int offset, int size, int se) 508{ 509 #define SQ_LSTMP_RING_BASE 0x00008e10 510 #define SQ_LSTMP_RING_SIZE 0x00008e14 511 #define GRBM_GFX_INDEX 0x802C 512 #define INSTANCE_INDEX(x) ((x) << 0) 513 #define SE_INDEX(x) ((x) << 16) 514 #define INSTANCE_BROADCAST_WRITES (1 << 30) 515 #define SE_BROADCAST_WRITES (1 << 31) 516 517 struct evergreen_compute_resource* res = 518 get_empty_res(pipe, COMPUTE_RESOURCE_TMPRING, se); 519 520 evergreen_reg_set(res, 521 GRBM_GFX_INDEX,INSTANCE_INDEX(0) 522 | SE_INDEX(se) 523 | INSTANCE_BROADCAST_WRITES); 524 evergreen_reg_set(res, SQ_LSTMP_RING_SIZE, size); 525 526 if (size) { 527 assert(bo); 528 529 evergreen_reg_set(res, SQ_LSTMP_RING_BASE, offset); 530 res->bo = bo; 531 res->usage = RADEON_USAGE_WRITE; 532 res->coher_bo_size = 0; 533 res->flags = 0; 534 } 535 536 if (size) { 537 evergreen_emit_force_reloc(res); 538 } 539 540 evergreen_reg_set(res, 541 GRBM_GFX_INDEX,INSTANCE_INDEX(0) 542 | SE_INDEX(0) 543 | INSTANCE_BROADCAST_WRITES 544 | SE_BROADCAST_WRITES); 545} 546 547static uint32_t r600_colorformat_endian_swap(uint32_t colorformat) 548{ 549 if (R600_BIG_ENDIAN) { 550 switch(colorformat) { 551 case V_028C70_COLOR_4_4: 552 return ENDIAN_NONE; 553 554 /* 8-bit buffers. */ 555 case V_028C70_COLOR_8: 556 return ENDIAN_NONE; 557 558 /* 16-bit buffers. */ 559 case V_028C70_COLOR_5_6_5: 560 case V_028C70_COLOR_1_5_5_5: 561 case V_028C70_COLOR_4_4_4_4: 562 case V_028C70_COLOR_16: 563 case V_028C70_COLOR_8_8: 564 return ENDIAN_8IN16; 565 566 /* 32-bit buffers. */ 567 case V_028C70_COLOR_8_8_8_8: 568 case V_028C70_COLOR_2_10_10_10: 569 case V_028C70_COLOR_8_24: 570 case V_028C70_COLOR_24_8: 571 case V_028C70_COLOR_32_FLOAT: 572 case V_028C70_COLOR_16_16_FLOAT: 573 case V_028C70_COLOR_16_16: 574 return ENDIAN_8IN32; 575 576 /* 64-bit buffers. */ 577 case V_028C70_COLOR_16_16_16_16: 578 case V_028C70_COLOR_16_16_16_16_FLOAT: 579 return ENDIAN_8IN16; 580 581 case V_028C70_COLOR_32_32_FLOAT: 582 case V_028C70_COLOR_32_32: 583 case V_028C70_COLOR_X24_8_32_FLOAT: 584 return ENDIAN_8IN32; 585 586 /* 96-bit buffers. */ 587 case V_028C70_COLOR_32_32_32_FLOAT: 588 /* 128-bit buffers. */ 589 case V_028C70_COLOR_32_32_32_32_FLOAT: 590 case V_028C70_COLOR_32_32_32_32: 591 return ENDIAN_8IN32; 592 default: 593 return ENDIAN_NONE; /* Unsupported. */ 594 } 595 } else { 596 return ENDIAN_NONE; 597 } 598} 599 600static unsigned r600_tex_dim(unsigned dim) 601{ 602 switch (dim) { 603 default: 604 case PIPE_TEXTURE_1D: 605 return V_030000_SQ_TEX_DIM_1D; 606 case PIPE_TEXTURE_1D_ARRAY: 607 return V_030000_SQ_TEX_DIM_1D_ARRAY; 608 case PIPE_TEXTURE_2D: 609 case PIPE_TEXTURE_RECT: 610 return V_030000_SQ_TEX_DIM_2D; 611 case PIPE_TEXTURE_2D_ARRAY: 612 return V_030000_SQ_TEX_DIM_2D_ARRAY; 613 case PIPE_TEXTURE_3D: 614 return V_030000_SQ_TEX_DIM_3D; 615 case PIPE_TEXTURE_CUBE: 616 return V_030000_SQ_TEX_DIM_CUBEMAP; 617 } 618} 619 620void evergreen_set_vtx_resource( 621 struct r600_pipe_compute *pipe, 622 struct r600_resource* bo, 623 int id, uint64_t offset, int writable) 624{ 625 assert(id < 16); 626 uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4; 627 struct number_type_and_format fmt; 628 uint64_t va; 629 630 fmt.format = 0; 631 632 assert(bo->b.b.height0 <= 1); 633 assert(bo->b.b.depth0 <= 1); 634 635 int e = evergreen_compute_get_gpu_format(&fmt, bo); 636 637 assert(e && "unknown format"); 638 639 struct evergreen_compute_resource* res = 640 get_empty_res(pipe, COMPUTE_RESOURCE_VERT, id); 641 642 unsigned size = bo->b.b.width0; 643 unsigned stride = 1; 644 645// size = (size * util_format_get_blockwidth(bo->b.b.b.format) * 646// util_format_get_blocksize(bo->b.b.b.format)); 647 648 va = r600_resource_va(&pipe->ctx->screen->screen, &bo->b.b) + offset; 649 650 COMPUTE_DBG("id: %i vtx size: %i byte, width0: %i elem\n", 651 id, size, bo->b.b.width0); 652 653 sq_vtx_constant_word2 = 654 S_030008_BASE_ADDRESS_HI(va >> 32) | 655 S_030008_STRIDE(stride) | 656 S_030008_DATA_FORMAT(fmt.format) | 657 S_030008_NUM_FORMAT_ALL(fmt.num_format_all) | 658 S_030008_ENDIAN_SWAP(0); 659 660 COMPUTE_DBG("%08X %i %i %i %i\n", sq_vtx_constant_word2, offset, 661 stride, fmt.format, fmt.num_format_all); 662 663 sq_vtx_constant_word3 = 664 S_03000C_DST_SEL_X(0) | 665 S_03000C_DST_SEL_Y(1) | 666 S_03000C_DST_SEL_Z(2) | 667 S_03000C_DST_SEL_W(3); 668 669 sq_vtx_constant_word4 = 0; 670 671 evergreen_emit_raw_value(res, PKT3C(PKT3_SET_RESOURCE, 8, 0)); 672 evergreen_emit_raw_value(res, (id+816)*32 >> 2); 673 evergreen_emit_raw_value(res, (unsigned)((va) & 0xffffffff)); 674 evergreen_emit_raw_value(res, size - 1); 675 evergreen_emit_raw_value(res, sq_vtx_constant_word2); 676 evergreen_emit_raw_value(res, sq_vtx_constant_word3); 677 evergreen_emit_raw_value(res, sq_vtx_constant_word4); 678 evergreen_emit_raw_value(res, 0); 679 evergreen_emit_raw_value(res, 0); 680 evergreen_emit_raw_value(res, S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER)); 681 682 res->bo = bo; 683 684 if (writable) { 685 res->usage = RADEON_USAGE_READWRITE; 686 } 687 else { 688 res->usage = RADEON_USAGE_READ; 689 } 690 691 res->coher_bo_size = size; 692 res->flags = COMPUTE_RES_TC_FLUSH | COMPUTE_RES_VC_FLUSH; 693} 694 695void evergreen_set_tex_resource( 696 struct r600_pipe_compute *pipe, 697 struct r600_pipe_sampler_view* view, 698 int id) 699{ 700 struct evergreen_compute_resource* res = 701 get_empty_res(pipe, COMPUTE_RESOURCE_TEX, id); 702 struct r600_resource_texture *tmp = 703 (struct r600_resource_texture*)view->base.texture; 704 705 unsigned format, endian; 706 uint32_t word4 = 0, yuv_format = 0, pitch = 0; 707 unsigned char swizzle[4], array_mode = 0, tile_type = 0; 708 unsigned height, depth; 709 710 swizzle[0] = 0; 711 swizzle[1] = 1; 712 swizzle[2] = 2; 713 swizzle[3] = 3; 714 715 format = r600_translate_texformat((struct pipe_screen *)pipe->ctx->screen, 716 view->base.format, swizzle, &word4, &yuv_format); 717 718 if (format == ~0) { 719 format = 0; 720 } 721 722 endian = r600_colorformat_endian_swap(format); 723 724 height = view->base.texture->height0; 725 depth = view->base.texture->depth0; 726 727 pitch = align(tmp->pitch_in_blocks[0] * 728 util_format_get_blockwidth(tmp->real_format), 8); 729 array_mode = tmp->array_mode[0]; 730 tile_type = tmp->tile_type; 731 732 assert(view->base.texture->target != PIPE_TEXTURE_1D_ARRAY); 733 assert(view->base.texture->target != PIPE_TEXTURE_2D_ARRAY); 734 735 evergreen_emit_raw_value(res, PKT3C(PKT3_SET_RESOURCE, 8, 0)); 736 evergreen_emit_raw_value(res, (id+816)*32 >> 2); ///TODO: check this line 737 evergreen_emit_raw_value(res, 738 (S_030000_DIM(r600_tex_dim(view->base.texture->target)) | 739 S_030000_PITCH((pitch / 8) - 1) | 740 S_030000_NON_DISP_TILING_ORDER(tile_type) | 741 S_030000_TEX_WIDTH(view->base.texture->width0 - 1))); 742 evergreen_emit_raw_value(res, (S_030004_TEX_HEIGHT(height - 1) | 743 S_030004_TEX_DEPTH(depth - 1) | 744 S_030004_ARRAY_MODE(array_mode))); 745 evergreen_emit_raw_value(res, tmp->offset[0] >> 8); 746 evergreen_emit_raw_value(res, tmp->offset[0] >> 8); 747 evergreen_emit_raw_value(res, (word4 | 748 S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | 749 S_030010_ENDIAN_SWAP(endian) | 750 S_030010_BASE_LEVEL(0))); 751 evergreen_emit_raw_value(res, (S_030014_LAST_LEVEL(0) | 752 S_030014_BASE_ARRAY(0) | 753 S_030014_LAST_ARRAY(0))); 754 evergreen_emit_raw_value(res, (S_030018_MAX_ANISO(4 /* max 16 samples */))); 755 evergreen_emit_raw_value(res, 756 S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE) 757 | S_03001C_DATA_FORMAT(format)); 758 759 res->bo = (struct r600_resource*)view->base.texture; 760 761 res->usage = RADEON_USAGE_READ; 762 763 res->coher_bo_size = tmp->offset[0] + util_format_get_blockwidth(tmp->real_format)*view->base.texture->width0*height*depth; 764 res->flags = COMPUTE_RES_TC_FLUSH; 765 766 evergreen_emit_force_reloc(res); 767 evergreen_emit_force_reloc(res); 768} 769 770void evergreen_set_sampler_resource( 771 struct r600_pipe_compute *pipe, 772 struct compute_sampler_state *sampler, 773 int id) 774{ 775 struct evergreen_compute_resource* res = 776 get_empty_res(pipe, COMPUTE_RESOURCE_SAMPLER, id); 777 778 unsigned aniso_flag_offset = sampler->state.max_anisotropy > 1 ? 2 : 0; 779 780 evergreen_emit_raw_value(res, PKT3C(PKT3_SET_SAMPLER, 3, 0)); 781 evergreen_emit_raw_value(res, (id + 90)*3); 782 evergreen_emit_raw_value(res, 783 S_03C000_CLAMP_X(r600_tex_wrap(sampler->state.wrap_s)) | 784 S_03C000_CLAMP_Y(r600_tex_wrap(sampler->state.wrap_t)) | 785 S_03C000_CLAMP_Z(r600_tex_wrap(sampler->state.wrap_r)) | 786 S_03C000_XY_MAG_FILTER(r600_tex_filter(sampler->state.mag_img_filter) | aniso_flag_offset) | 787 S_03C000_XY_MIN_FILTER(r600_tex_filter(sampler->state.min_img_filter) | aniso_flag_offset) | 788 S_03C000_BORDER_COLOR_TYPE(V_03C000_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK) 789 ); 790 evergreen_emit_raw_value(res, 791 S_03C004_MIN_LOD(S_FIXED(CLAMP(sampler->state.min_lod, 0, 15), 8)) | 792 S_03C004_MAX_LOD(S_FIXED(CLAMP(sampler->state.max_lod, 0, 15), 8)) 793 ); 794 evergreen_emit_raw_value(res, 795 S_03C008_LOD_BIAS(S_FIXED(CLAMP(sampler->state.lod_bias, -16, 16), 8)) | 796 (sampler->state.seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) | 797 S_03C008_TYPE(1) 798 ); 799} 800 801void evergreen_set_const_cache( 802 struct r600_pipe_compute *pipe, 803 int cache_id, 804 struct r600_resource* cbo, 805 int size, int offset) 806{ 807 #define SQ_ALU_CONST_BUFFER_SIZE_LS_0 0x00028fc0 808 #define SQ_ALU_CONST_CACHE_LS_0 0x00028f40 809 810 struct evergreen_compute_resource* res = 811 get_empty_res(pipe, COMPUTE_RESOURCE_CONST_MEM, cache_id); 812 813 assert(size < 0x200); 814 assert((offset & 0xFF) == 0); 815 assert(cache_id < 16); 816 817 evergreen_reg_set(res, SQ_ALU_CONST_BUFFER_SIZE_LS_0 + cache_id*4, size); 818 evergreen_reg_set(res, SQ_ALU_CONST_CACHE_LS_0 + cache_id*4, offset >> 8); 819 res->bo = cbo; 820 res->usage = RADEON_USAGE_READ; 821 res->coher_bo_size = size; 822 res->flags = COMPUTE_RES_SH_FLUSH; 823} 824 825struct r600_resource* r600_compute_buffer_alloc_vram( 826 struct r600_screen *screen, 827 unsigned size) 828{ 829 assert(size); 830 831 struct pipe_resource * buffer = pipe_buffer_create( 832 (struct pipe_screen*) screen, 833 PIPE_BIND_CUSTOM, 834 PIPE_USAGE_IMMUTABLE, 835 size); 836 837 return (struct r600_resource *)buffer; 838} 839