1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * Copyright 2014,2015 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Jerome Glisse 26 */ 27 28#include "sid.h" 29#include "si_pipe.h" 30 31static void cik_sdma_copy_buffer(struct si_context *ctx, 32 struct pipe_resource *dst, 33 struct pipe_resource *src, 34 uint64_t dst_offset, 35 uint64_t src_offset, 36 uint64_t size) 37{ 38 struct radeon_winsys_cs *cs = ctx->b.dma.cs; 39 unsigned i, ncopy, csize; 40 struct r600_resource *rdst = r600_resource(dst); 41 struct r600_resource *rsrc = r600_resource(src); 42 43 /* Mark the buffer range of destination as valid (initialized), 44 * so that transfer_map knows it should wait for the GPU when mapping 45 * that range. */ 46 util_range_add(&rdst->valid_buffer_range, dst_offset, 47 dst_offset + size); 48 49 dst_offset += rdst->gpu_address; 50 src_offset += rsrc->gpu_address; 51 52 ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE); 53 r600_need_dma_space(&ctx->b, ncopy * 7, rdst, rsrc); 54 55 for (i = 0; i < ncopy; i++) { 56 csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE); 57 radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, 58 CIK_SDMA_COPY_SUB_OPCODE_LINEAR, 59 0)); 60 radeon_emit(cs, csize); 61 radeon_emit(cs, 0); /* src/dst endian swap */ 62 radeon_emit(cs, src_offset); 63 radeon_emit(cs, src_offset >> 32); 64 radeon_emit(cs, dst_offset); 65 radeon_emit(cs, dst_offset >> 32); 66 dst_offset += csize; 67 src_offset += csize; 68 size -= csize; 69 } 70} 71 72static void cik_sdma_clear_buffer(struct pipe_context *ctx, 73 struct pipe_resource *dst, 74 uint64_t offset, 75 uint64_t size, 76 unsigned clear_value) 77{ 78 struct si_context *sctx = (struct si_context *)ctx; 79 struct radeon_winsys_cs *cs = sctx->b.dma.cs; 80 unsigned i, ncopy, csize; 81 struct r600_resource *rdst = r600_resource(dst); 82 83 if (!cs || offset % 4 != 0 || size % 4 != 0) { 84 ctx->clear_buffer(ctx, dst, offset, size, &clear_value, 4); 85 return; 86 } 87 88 /* Mark the buffer range of destination as valid (initialized), 89 * so that transfer_map knows it should wait for the GPU when mapping 90 * that range. */ 91 util_range_add(&rdst->valid_buffer_range, offset, offset + size); 92 93 offset += rdst->gpu_address; 94 95 /* the same maximum size as for copying */ 96 ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE); 97 r600_need_dma_space(&sctx->b, ncopy * 5, rdst, NULL); 98 99 for (i = 0; i < ncopy; i++) { 100 csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE); 101 radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_PACKET_CONSTANT_FILL, 0, 102 0x8000 /* dword copy */)); 103 radeon_emit(cs, offset); 104 radeon_emit(cs, offset >> 32); 105 radeon_emit(cs, clear_value); 106 radeon_emit(cs, csize); 107 offset += csize; 108 size -= csize; 109 } 110} 111 112static unsigned minify_as_blocks(unsigned width, unsigned level, unsigned blk_w) 113{ 114 width = u_minify(width, level); 115 return DIV_ROUND_UP(width, blk_w); 116} 117 118static unsigned encode_tile_info(struct si_context *sctx, 119 struct r600_texture *tex, unsigned level, 120 bool set_bpp) 121{ 122 struct radeon_info *info = &sctx->screen->b.info; 123 unsigned tile_index = tex->surface.tiling_index[level]; 124 unsigned macro_tile_index = tex->surface.macro_tile_index; 125 unsigned tile_mode = info->si_tile_mode_array[tile_index]; 126 unsigned macro_tile_mode = info->cik_macrotile_mode_array[macro_tile_index]; 127 128 return (set_bpp ? util_logbase2(tex->surface.bpe) : 0) | 129 (G_009910_ARRAY_MODE(tile_mode) << 3) | 130 (G_009910_MICRO_TILE_MODE_NEW(tile_mode) << 8) | 131 /* Non-depth modes don't have TILE_SPLIT set. */ 132 ((util_logbase2(tex->surface.tile_split >> 6)) << 11) | 133 (G_009990_BANK_WIDTH(macro_tile_mode) << 15) | 134 (G_009990_BANK_HEIGHT(macro_tile_mode) << 18) | 135 (G_009990_NUM_BANKS(macro_tile_mode) << 21) | 136 (G_009990_MACRO_TILE_ASPECT(macro_tile_mode) << 24) | 137 (G_009910_PIPE_CONFIG(tile_mode) << 26); 138} 139 140static bool cik_sdma_copy_texture(struct si_context *sctx, 141 struct pipe_resource *dst, 142 unsigned dst_level, 143 unsigned dstx, unsigned dsty, unsigned dstz, 144 struct pipe_resource *src, 145 unsigned src_level, 146 const struct pipe_box *src_box) 147{ 148 struct radeon_info *info = &sctx->screen->b.info; 149 struct r600_texture *rsrc = (struct r600_texture*)src; 150 struct r600_texture *rdst = (struct r600_texture*)dst; 151 unsigned bpp = rdst->surface.bpe; 152 uint64_t dst_address = rdst->resource.gpu_address + 153 rdst->surface.level[dst_level].offset; 154 uint64_t src_address = rsrc->resource.gpu_address + 155 rsrc->surface.level[src_level].offset; 156 unsigned dst_mode = rdst->surface.level[dst_level].mode; 157 unsigned src_mode = rsrc->surface.level[src_level].mode; 158 unsigned dst_tile_index = rdst->surface.tiling_index[dst_level]; 159 unsigned src_tile_index = rsrc->surface.tiling_index[src_level]; 160 unsigned dst_tile_mode = info->si_tile_mode_array[dst_tile_index]; 161 unsigned src_tile_mode = info->si_tile_mode_array[src_tile_index]; 162 unsigned dst_micro_mode = G_009910_MICRO_TILE_MODE_NEW(dst_tile_mode); 163 unsigned src_micro_mode = G_009910_MICRO_TILE_MODE_NEW(src_tile_mode); 164 unsigned dst_pitch = rdst->surface.level[dst_level].nblk_x; 165 unsigned src_pitch = rsrc->surface.level[src_level].nblk_x; 166 uint64_t dst_slice_pitch = rdst->surface.level[dst_level].slice_size / bpp; 167 uint64_t src_slice_pitch = rsrc->surface.level[src_level].slice_size / bpp; 168 unsigned dst_width = minify_as_blocks(rdst->resource.b.b.width0, 169 dst_level, rdst->surface.blk_w); 170 unsigned src_width = minify_as_blocks(rsrc->resource.b.b.width0, 171 src_level, rsrc->surface.blk_w); 172 unsigned dst_height = minify_as_blocks(rdst->resource.b.b.height0, 173 dst_level, rdst->surface.blk_h); 174 unsigned src_height = minify_as_blocks(rsrc->resource.b.b.height0, 175 src_level, rsrc->surface.blk_h); 176 unsigned srcx = src_box->x / rsrc->surface.blk_w; 177 unsigned srcy = src_box->y / rsrc->surface.blk_h; 178 unsigned srcz = src_box->z; 179 unsigned copy_width = DIV_ROUND_UP(src_box->width, rsrc->surface.blk_w); 180 unsigned copy_height = DIV_ROUND_UP(src_box->height, rsrc->surface.blk_h); 181 unsigned copy_depth = src_box->depth; 182 183 assert(src_level <= src->last_level); 184 assert(dst_level <= dst->last_level); 185 assert(rdst->surface.level[dst_level].offset + 186 dst_slice_pitch * bpp * (dstz + src_box->depth) <= 187 rdst->resource.buf->size); 188 assert(rsrc->surface.level[src_level].offset + 189 src_slice_pitch * bpp * (srcz + src_box->depth) <= 190 rsrc->resource.buf->size); 191 192 if (!r600_prepare_for_dma_blit(&sctx->b, rdst, dst_level, dstx, dsty, 193 dstz, rsrc, src_level, src_box)) 194 return false; 195 196 dstx /= rdst->surface.blk_w; 197 dsty /= rdst->surface.blk_h; 198 199 if (srcx >= (1 << 14) || 200 srcy >= (1 << 14) || 201 srcz >= (1 << 11) || 202 dstx >= (1 << 14) || 203 dsty >= (1 << 14) || 204 dstz >= (1 << 11)) 205 return false; 206 207 /* Linear -> linear sub-window copy. */ 208 if (dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED && 209 src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED && 210 /* check if everything fits into the bitfields */ 211 src_pitch <= (1 << 14) && 212 dst_pitch <= (1 << 14) && 213 src_slice_pitch <= (1 << 28) && 214 dst_slice_pitch <= (1 << 28) && 215 copy_width <= (1 << 14) && 216 copy_height <= (1 << 14) && 217 copy_depth <= (1 << 11) && 218 /* HW limitation - CIK: */ 219 (sctx->b.chip_class != CIK || 220 (copy_width < (1 << 14) && 221 copy_height < (1 << 14) && 222 copy_depth < (1 << 11))) && 223 /* HW limitation - some CIK parts: */ 224 ((sctx->b.family != CHIP_BONAIRE && 225 sctx->b.family != CHIP_KAVERI) || 226 (srcx + copy_width != (1 << 14) && 227 srcy + copy_height != (1 << 14)))) { 228 struct radeon_winsys_cs *cs = sctx->b.dma.cs; 229 230 r600_need_dma_space(&sctx->b, 13, &rdst->resource, &rsrc->resource); 231 232 radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, 233 CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW, 0) | 234 (util_logbase2(bpp) << 29)); 235 radeon_emit(cs, src_address); 236 radeon_emit(cs, src_address >> 32); 237 radeon_emit(cs, srcx | (srcy << 16)); 238 radeon_emit(cs, srcz | ((src_pitch - 1) << 16)); 239 radeon_emit(cs, src_slice_pitch - 1); 240 radeon_emit(cs, dst_address); 241 radeon_emit(cs, dst_address >> 32); 242 radeon_emit(cs, dstx | (dsty << 16)); 243 radeon_emit(cs, dstz | ((dst_pitch - 1) << 16)); 244 radeon_emit(cs, dst_slice_pitch - 1); 245 if (sctx->b.chip_class == CIK) { 246 radeon_emit(cs, copy_width | (copy_height << 16)); 247 radeon_emit(cs, copy_depth); 248 } else { 249 radeon_emit(cs, (copy_width - 1) | ((copy_height - 1) << 16)); 250 radeon_emit(cs, (copy_depth - 1)); 251 } 252 return true; 253 } 254 255 /* Tiled <-> linear sub-window copy. */ 256 if ((src_mode >= RADEON_SURF_MODE_1D) != (dst_mode >= RADEON_SURF_MODE_1D)) { 257 struct r600_texture *tiled = src_mode >= RADEON_SURF_MODE_1D ? rsrc : rdst; 258 struct r600_texture *linear = tiled == rsrc ? rdst : rsrc; 259 unsigned tiled_level = tiled == rsrc ? src_level : dst_level; 260 unsigned linear_level = linear == rsrc ? src_level : dst_level; 261 unsigned tiled_x = tiled == rsrc ? srcx : dstx; 262 unsigned linear_x = linear == rsrc ? srcx : dstx; 263 unsigned tiled_y = tiled == rsrc ? srcy : dsty; 264 unsigned linear_y = linear == rsrc ? srcy : dsty; 265 unsigned tiled_z = tiled == rsrc ? srcz : dstz; 266 unsigned linear_z = linear == rsrc ? srcz : dstz; 267 unsigned tiled_width = tiled == rsrc ? src_width : dst_width; 268 unsigned linear_width = linear == rsrc ? src_width : dst_width; 269 unsigned tiled_pitch = tiled == rsrc ? src_pitch : dst_pitch; 270 unsigned linear_pitch = linear == rsrc ? src_pitch : dst_pitch; 271 unsigned tiled_slice_pitch = tiled == rsrc ? src_slice_pitch : dst_slice_pitch; 272 unsigned linear_slice_pitch = linear == rsrc ? src_slice_pitch : dst_slice_pitch; 273 uint64_t tiled_address = tiled == rsrc ? src_address : dst_address; 274 uint64_t linear_address = linear == rsrc ? src_address : dst_address; 275 unsigned tiled_micro_mode = tiled == rsrc ? src_micro_mode : dst_micro_mode; 276 277 assert(tiled_pitch % 8 == 0); 278 assert(tiled_slice_pitch % 64 == 0); 279 unsigned pitch_tile_max = tiled_pitch / 8 - 1; 280 unsigned slice_tile_max = tiled_slice_pitch / 64 - 1; 281 unsigned xalign = MAX2(1, 4 / bpp); 282 unsigned copy_width_aligned = copy_width; 283 284 /* If the region ends at the last pixel and is unaligned, we 285 * can copy the remainder of the line that is not visible to 286 * make it aligned. 287 */ 288 if (copy_width % xalign != 0 && 289 linear_x + copy_width == linear_width && 290 tiled_x + copy_width == tiled_width && 291 linear_x + align(copy_width, xalign) <= linear_pitch && 292 tiled_x + align(copy_width, xalign) <= tiled_pitch) 293 copy_width_aligned = align(copy_width, xalign); 294 295 /* HW limitations. */ 296 if ((sctx->b.family == CHIP_BONAIRE || 297 sctx->b.family == CHIP_KAVERI) && 298 linear_pitch - 1 == 0x3fff && 299 bpp == 16) 300 return false; 301 302 if (sctx->b.chip_class == CIK && 303 (copy_width_aligned == (1 << 14) || 304 copy_height == (1 << 14) || 305 copy_depth == (1 << 11))) 306 return false; 307 308 if ((sctx->b.family == CHIP_BONAIRE || 309 sctx->b.family == CHIP_KAVERI || 310 sctx->b.family == CHIP_KABINI || 311 sctx->b.family == CHIP_MULLINS) && 312 (tiled_x + copy_width == (1 << 14) || 313 tiled_y + copy_height == (1 << 14))) 314 return false; 315 316 /* The hw can read outside of the given linear buffer bounds, 317 * or access those pages but not touch the memory in case 318 * of writes. (it still causes a VM fault) 319 * 320 * Out-of-bounds memory access or page directory access must 321 * be prevented. 322 */ 323 int64_t start_linear_address, end_linear_address; 324 unsigned granularity; 325 326 /* Deduce the size of reads from the linear surface. */ 327 switch (tiled_micro_mode) { 328 case V_009910_ADDR_SURF_DISPLAY_MICRO_TILING: 329 granularity = bpp == 1 ? 64 / (8*bpp) : 330 128 / (8*bpp); 331 break; 332 case V_009910_ADDR_SURF_THIN_MICRO_TILING: 333 case V_009910_ADDR_SURF_DEPTH_MICRO_TILING: 334 if (0 /* TODO: THICK microtiling */) 335 granularity = bpp == 1 ? 32 / (8*bpp) : 336 bpp == 2 ? 64 / (8*bpp) : 337 bpp <= 8 ? 128 / (8*bpp) : 338 256 / (8*bpp); 339 else 340 granularity = bpp <= 2 ? 64 / (8*bpp) : 341 bpp <= 8 ? 128 / (8*bpp) : 342 256 / (8*bpp); 343 break; 344 default: 345 return false; 346 } 347 348 /* The linear reads start at tiled_x & ~(granularity - 1). 349 * If linear_x == 0 && tiled_x % granularity != 0, the hw 350 * starts reading from an address preceding linear_address!!! 351 */ 352 start_linear_address = 353 linear->surface.level[linear_level].offset + 354 bpp * (linear_z * linear_slice_pitch + 355 linear_y * linear_pitch + 356 linear_x); 357 start_linear_address -= (int)(bpp * (tiled_x % granularity)); 358 359 end_linear_address = 360 linear->surface.level[linear_level].offset + 361 bpp * ((linear_z + copy_depth - 1) * linear_slice_pitch + 362 (linear_y + copy_height - 1) * linear_pitch + 363 (linear_x + copy_width)); 364 365 if ((tiled_x + copy_width) % granularity) 366 end_linear_address += granularity - 367 (tiled_x + copy_width) % granularity; 368 369 if (start_linear_address < 0 || 370 end_linear_address > linear->surface.surf_size) 371 return false; 372 373 /* Check requirements. */ 374 if (tiled_address % 256 == 0 && 375 linear_address % 4 == 0 && 376 linear_pitch % xalign == 0 && 377 linear_x % xalign == 0 && 378 tiled_x % xalign == 0 && 379 copy_width_aligned % xalign == 0 && 380 tiled_micro_mode != V_009910_ADDR_SURF_ROTATED_MICRO_TILING && 381 /* check if everything fits into the bitfields */ 382 tiled->surface.tile_split <= 4096 && 383 pitch_tile_max < (1 << 11) && 384 slice_tile_max < (1 << 22) && 385 linear_pitch <= (1 << 14) && 386 linear_slice_pitch <= (1 << 28) && 387 copy_width_aligned <= (1 << 14) && 388 copy_height <= (1 << 14) && 389 copy_depth <= (1 << 11)) { 390 struct radeon_winsys_cs *cs = sctx->b.dma.cs; 391 uint32_t direction = linear == rdst ? 1u << 31 : 0; 392 393 r600_need_dma_space(&sctx->b, 14, &rdst->resource, &rsrc->resource); 394 395 radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, 396 CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW, 0) | 397 direction); 398 radeon_emit(cs, tiled_address); 399 radeon_emit(cs, tiled_address >> 32); 400 radeon_emit(cs, tiled_x | (tiled_y << 16)); 401 radeon_emit(cs, tiled_z | (pitch_tile_max << 16)); 402 radeon_emit(cs, slice_tile_max); 403 radeon_emit(cs, encode_tile_info(sctx, tiled, tiled_level, true)); 404 radeon_emit(cs, linear_address); 405 radeon_emit(cs, linear_address >> 32); 406 radeon_emit(cs, linear_x | (linear_y << 16)); 407 radeon_emit(cs, linear_z | ((linear_pitch - 1) << 16)); 408 radeon_emit(cs, linear_slice_pitch - 1); 409 if (sctx->b.chip_class == CIK) { 410 radeon_emit(cs, copy_width_aligned | (copy_height << 16)); 411 radeon_emit(cs, copy_depth); 412 } else { 413 radeon_emit(cs, (copy_width_aligned - 1) | ((copy_height - 1) << 16)); 414 radeon_emit(cs, (copy_depth - 1)); 415 } 416 return true; 417 } 418 } 419 420 /* Tiled -> Tiled sub-window copy. */ 421 if (dst_mode >= RADEON_SURF_MODE_1D && 422 src_mode >= RADEON_SURF_MODE_1D && 423 /* check if these fit into the bitfields */ 424 src_address % 256 == 0 && 425 dst_address % 256 == 0 && 426 rsrc->surface.tile_split <= 4096 && 427 rdst->surface.tile_split <= 4096 && 428 dstx % 8 == 0 && 429 dsty % 8 == 0 && 430 srcx % 8 == 0 && 431 srcy % 8 == 0 && 432 /* this can either be equal, or display->rotated (VI only) */ 433 (src_micro_mode == dst_micro_mode || 434 (sctx->b.chip_class == VI && 435 src_micro_mode == V_009910_ADDR_SURF_DISPLAY_MICRO_TILING && 436 dst_micro_mode == V_009910_ADDR_SURF_ROTATED_MICRO_TILING))) { 437 assert(src_pitch % 8 == 0); 438 assert(dst_pitch % 8 == 0); 439 assert(src_slice_pitch % 64 == 0); 440 assert(dst_slice_pitch % 64 == 0); 441 unsigned src_pitch_tile_max = src_pitch / 8 - 1; 442 unsigned dst_pitch_tile_max = dst_pitch / 8 - 1; 443 unsigned src_slice_tile_max = src_slice_pitch / 64 - 1; 444 unsigned dst_slice_tile_max = dst_slice_pitch / 64 - 1; 445 unsigned copy_width_aligned = copy_width; 446 unsigned copy_height_aligned = copy_height; 447 448 /* If the region ends at the last pixel and is unaligned, we 449 * can copy the remainder of the tile that is not visible to 450 * make it aligned. 451 */ 452 if (copy_width % 8 != 0 && 453 srcx + copy_width == src_width && 454 dstx + copy_width == dst_width) 455 copy_width_aligned = align(copy_width, 8); 456 457 if (copy_height % 8 != 0 && 458 srcy + copy_height == src_height && 459 dsty + copy_height == dst_height) 460 copy_height_aligned = align(copy_height, 8); 461 462 /* check if these fit into the bitfields */ 463 if (src_pitch_tile_max < (1 << 11) && 464 dst_pitch_tile_max < (1 << 11) && 465 src_slice_tile_max < (1 << 22) && 466 dst_slice_tile_max < (1 << 22) && 467 copy_width_aligned <= (1 << 14) && 468 copy_height_aligned <= (1 << 14) && 469 copy_depth <= (1 << 11) && 470 copy_width_aligned % 8 == 0 && 471 copy_height_aligned % 8 == 0 && 472 /* HW limitation - CIK: */ 473 (sctx->b.chip_class != CIK || 474 (copy_width_aligned < (1 << 14) && 475 copy_height_aligned < (1 << 14) && 476 copy_depth < (1 << 11))) && 477 /* HW limitation - some CIK parts: */ 478 ((sctx->b.family != CHIP_BONAIRE && 479 sctx->b.family != CHIP_KAVERI && 480 sctx->b.family != CHIP_KABINI && 481 sctx->b.family != CHIP_MULLINS) || 482 (srcx + copy_width_aligned != (1 << 14) && 483 srcy + copy_height_aligned != (1 << 14) && 484 dstx + copy_width != (1 << 14)))) { 485 struct radeon_winsys_cs *cs = sctx->b.dma.cs; 486 487 r600_need_dma_space(&sctx->b, 15, &rdst->resource, &rsrc->resource); 488 489 radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, 490 CIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW, 0)); 491 radeon_emit(cs, src_address); 492 radeon_emit(cs, src_address >> 32); 493 radeon_emit(cs, srcx | (srcy << 16)); 494 radeon_emit(cs, srcz | (src_pitch_tile_max << 16)); 495 radeon_emit(cs, src_slice_tile_max); 496 radeon_emit(cs, encode_tile_info(sctx, rsrc, src_level, true)); 497 radeon_emit(cs, dst_address); 498 radeon_emit(cs, dst_address >> 32); 499 radeon_emit(cs, dstx | (dsty << 16)); 500 radeon_emit(cs, dstz | (dst_pitch_tile_max << 16)); 501 radeon_emit(cs, dst_slice_tile_max); 502 radeon_emit(cs, encode_tile_info(sctx, rdst, dst_level, false)); 503 if (sctx->b.chip_class == CIK) { 504 radeon_emit(cs, copy_width_aligned | 505 (copy_height_aligned << 16)); 506 radeon_emit(cs, copy_depth); 507 } else { 508 radeon_emit(cs, (copy_width_aligned - 8) | 509 ((copy_height_aligned - 8) << 16)); 510 radeon_emit(cs, (copy_depth - 1)); 511 } 512 return true; 513 } 514 } 515 516 return false; 517} 518 519static void cik_sdma_copy(struct pipe_context *ctx, 520 struct pipe_resource *dst, 521 unsigned dst_level, 522 unsigned dstx, unsigned dsty, unsigned dstz, 523 struct pipe_resource *src, 524 unsigned src_level, 525 const struct pipe_box *src_box) 526{ 527 struct si_context *sctx = (struct si_context *)ctx; 528 529 if (!sctx->b.dma.cs) 530 goto fallback; 531 532 if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { 533 cik_sdma_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width); 534 return; 535 } 536 537 if (cik_sdma_copy_texture(sctx, dst, dst_level, dstx, dsty, dstz, 538 src, src_level, src_box)) 539 return; 540 541fallback: 542 si_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, 543 src, src_level, src_box); 544} 545 546void cik_init_sdma_functions(struct si_context *sctx) 547{ 548 sctx->b.dma_copy = cik_sdma_copy; 549 sctx->b.dma_clear_buffer = cik_sdma_clear_buffer; 550} 551