r300_texture_desc.c revision 7b42ed6eb508e2f0b89f66f3f985ef1d76a0ef91
1/* 2 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> 3 * Copyright 2010 Marek Olšák <maraeo@gmail.com> 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */ 23 24#include "r300_texture_desc.h" 25#include "r300_context.h" 26 27#include "util/u_format.h" 28 29/* Returns the number of pixels that the texture should be aligned to 30 * in the given dimension. */ 31unsigned r300_get_pixel_alignment(enum pipe_format format, 32 unsigned num_samples, 33 enum radeon_bo_layout microtile, 34 enum radeon_bo_layout macrotile, 35 enum r300_dim dim, boolean is_rs690) 36{ 37 static const unsigned table[2][5][3][2] = 38 { 39 { 40 /* Macro: linear linear linear 41 Micro: linear tiled square-tiled */ 42 {{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */ 43 {{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */ 44 {{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */ 45 {{ 4, 1}, { 2, 2}, { 0, 0}}, /* 64 bits per pixel */ 46 {{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ 47 }, 48 { 49 /* Macro: tiled tiled tiled 50 Micro: linear tiled square-tiled */ 51 {{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */ 52 {{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */ 53 {{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */ 54 {{ 32, 8}, {16, 16}, { 0, 0}}, /* 64 bits per pixel */ 55 {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ 56 } 57 }; 58 59 static const unsigned aa_block[2] = {4, 8}; 60 unsigned tile = 0; 61 unsigned pixsize = util_format_get_blocksize(format); 62 63 assert(macrotile <= RADEON_LAYOUT_TILED); 64 assert(microtile <= RADEON_LAYOUT_SQUARETILED); 65 assert(pixsize <= 16); 66 assert(dim <= DIM_HEIGHT); 67 68 if (num_samples > 1) { 69 /* Multisampled textures have their own alignment scheme. */ 70 if (pixsize == 4) 71 tile = aa_block[dim]; 72 /* XXX FP16 AA. */ 73 } else { 74 /* Standard alignment. */ 75 tile = table[macrotile][util_logbase2(pixsize)][microtile][dim]; 76 if (macrotile == 0 && is_rs690 && dim == DIM_WIDTH) { 77 int align; 78 int h_tile; 79 h_tile = table[macrotile][util_logbase2(pixsize)][microtile][DIM_HEIGHT]; 80 align = 64 / (pixsize * h_tile); 81 if (tile < align) 82 tile = align; 83 } 84 } 85 86 assert(tile); 87 return tile; 88} 89 90/* Return true if macrotiling should be enabled on the miplevel. */ 91static boolean r300_texture_macro_switch(struct r300_resource *tex, 92 unsigned level, 93 boolean rv350_mode, 94 enum r300_dim dim) 95{ 96 unsigned tile, texdim; 97 98 tile = r300_get_pixel_alignment(tex->b.b.b.format, tex->b.b.b.nr_samples, 99 tex->tex.microtile, RADEON_LAYOUT_TILED, dim, 0); 100 if (dim == DIM_WIDTH) { 101 texdim = u_minify(tex->tex.width0, level); 102 } else { 103 texdim = u_minify(tex->tex.height0, level); 104 } 105 106 /* See TX_FILTER1_n.MACRO_SWITCH. */ 107 if (rv350_mode) { 108 return texdim >= tile; 109 } else { 110 return texdim > tile; 111 } 112} 113 114/** 115 * Return the stride, in bytes, of the texture image of the given texture 116 * at the given level. 117 */ 118static unsigned r300_texture_get_stride(struct r300_screen *screen, 119 struct r300_resource *tex, 120 unsigned level) 121{ 122 unsigned tile_width, width, stride; 123 boolean is_rs690 = (screen->caps.family == CHIP_FAMILY_RS600 || 124 screen->caps.family == CHIP_FAMILY_RS690 || 125 screen->caps.family == CHIP_FAMILY_RS740); 126 127 if (tex->tex.stride_in_bytes_override) 128 return tex->tex.stride_in_bytes_override; 129 130 /* Check the level. */ 131 if (level > tex->b.b.b.last_level) { 132 SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n", 133 __FUNCTION__, level, tex->b.b.b.last_level); 134 return 0; 135 } 136 137 width = u_minify(tex->tex.width0, level); 138 139 if (util_format_is_plain(tex->b.b.b.format)) { 140 tile_width = r300_get_pixel_alignment(tex->b.b.b.format, 141 tex->b.b.b.nr_samples, 142 tex->tex.microtile, 143 tex->tex.macrotile[level], 144 DIM_WIDTH, is_rs690); 145 width = align(width, tile_width); 146 147 stride = util_format_get_stride(tex->b.b.b.format, width); 148 /* The alignment to 32 bytes is sort of implied by the layout... */ 149 return stride; 150 } else { 151 return align(util_format_get_stride(tex->b.b.b.format, width), is_rs690 ? 64 : 32); 152 } 153} 154 155static unsigned r300_texture_get_nblocksy(struct r300_resource *tex, 156 unsigned level, 157 boolean *out_aligned_for_cbzb) 158{ 159 unsigned height, tile_height; 160 161 height = u_minify(tex->tex.height0, level); 162 163 /* Mipmapped and 3D textures must have their height aligned to POT. */ 164 if ((tex->b.b.b.target != PIPE_TEXTURE_1D && 165 tex->b.b.b.target != PIPE_TEXTURE_2D && 166 tex->b.b.b.target != PIPE_TEXTURE_RECT) || 167 tex->b.b.b.last_level != 0) { 168 height = util_next_power_of_two(height); 169 } 170 171 if (util_format_is_plain(tex->b.b.b.format)) { 172 tile_height = r300_get_pixel_alignment(tex->b.b.b.format, 173 tex->b.b.b.nr_samples, 174 tex->tex.microtile, 175 tex->tex.macrotile[level], 176 DIM_HEIGHT, 0); 177 height = align(height, tile_height); 178 179 /* See if the CBZB clear can be used on the buffer, 180 * taking the texture size into account. */ 181 if (out_aligned_for_cbzb) { 182 if (tex->tex.macrotile[level]) { 183 /* When clearing, the layer (width*height) is horizontally split 184 * into two, and the upper and lower halves are cleared by the CB 185 * and ZB units, respectively. Therefore, the number of macrotiles 186 * in the Y direction must be even. */ 187 188 /* Align the height so that there is an even number of macrotiles. 189 * Do so for 3 or more macrotiles in the Y direction. */ 190 if (level == 0 && tex->b.b.b.last_level == 0 && 191 (tex->b.b.b.target == PIPE_TEXTURE_1D || 192 tex->b.b.b.target == PIPE_TEXTURE_2D || 193 tex->b.b.b.target == PIPE_TEXTURE_RECT) && 194 height >= tile_height * 3) { 195 height = align(height, tile_height * 2); 196 } 197 198 *out_aligned_for_cbzb = height % (tile_height * 2) == 0; 199 } else { 200 *out_aligned_for_cbzb = FALSE; 201 } 202 } 203 } 204 205 return util_format_get_nblocksy(tex->b.b.b.format, height); 206} 207 208/* Get a width in pixels from a stride in bytes. */ 209static unsigned stride_to_width(enum pipe_format format, 210 unsigned stride_in_bytes) 211{ 212 return (stride_in_bytes / util_format_get_blocksize(format)) * 213 util_format_get_blockwidth(format); 214} 215 216static void r300_setup_miptree(struct r300_screen *screen, 217 struct r300_resource *tex, 218 boolean align_for_cbzb) 219{ 220 struct pipe_resource *base = &tex->b.b.b; 221 unsigned stride, size, layer_size, nblocksy, i; 222 boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350; 223 boolean aligned_for_cbzb; 224 225 tex->tex.size_in_bytes = 0; 226 227 SCREEN_DBG(screen, DBG_TEXALLOC, 228 "r300: Making miptree for texture, format %s\n", 229 util_format_short_name(base->format)); 230 231 for (i = 0; i <= base->last_level; i++) { 232 /* Let's see if this miplevel can be macrotiled. */ 233 tex->tex.macrotile[i] = 234 (tex->tex.macrotile[0] == RADEON_LAYOUT_TILED && 235 r300_texture_macro_switch(tex, i, rv350_mode, DIM_WIDTH) && 236 r300_texture_macro_switch(tex, i, rv350_mode, DIM_HEIGHT)) ? 237 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; 238 239 stride = r300_texture_get_stride(screen, tex, i); 240 241 /* Compute the number of blocks in Y, see if the CBZB clear can be 242 * used on the texture. */ 243 aligned_for_cbzb = FALSE; 244 if (align_for_cbzb && tex->tex.cbzb_allowed[i]) 245 nblocksy = r300_texture_get_nblocksy(tex, i, &aligned_for_cbzb); 246 else 247 nblocksy = r300_texture_get_nblocksy(tex, i, NULL); 248 249 layer_size = stride * nblocksy; 250 251 if (base->nr_samples) { 252 layer_size *= base->nr_samples; 253 } 254 255 if (base->target == PIPE_TEXTURE_CUBE) 256 size = layer_size * 6; 257 else 258 size = layer_size * u_minify(tex->tex.depth0, i); 259 260 tex->tex.offset_in_bytes[i] = tex->tex.size_in_bytes; 261 tex->tex.size_in_bytes = tex->tex.offset_in_bytes[i] + size; 262 tex->tex.layer_size_in_bytes[i] = layer_size; 263 tex->tex.stride_in_bytes[i] = stride; 264 tex->tex.stride_in_pixels[i] = stride_to_width(tex->b.b.b.format, stride); 265 tex->tex.cbzb_allowed[i] = tex->tex.cbzb_allowed[i] && aligned_for_cbzb; 266 267 SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " 268 "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", 269 i, u_minify(tex->tex.width0, i), u_minify(tex->tex.height0, i), 270 u_minify(tex->tex.depth0, i), stride, tex->tex.size_in_bytes, 271 tex->tex.macrotile[i] ? "TRUE" : "FALSE"); 272 } 273} 274 275static void r300_setup_flags(struct r300_resource *tex) 276{ 277 tex->tex.uses_stride_addressing = 278 !util_is_power_of_two(tex->b.b.b.width0) || 279 (tex->tex.stride_in_bytes_override && 280 stride_to_width(tex->b.b.b.format, 281 tex->tex.stride_in_bytes_override) != tex->b.b.b.width0); 282 283 tex->tex.is_npot = 284 tex->tex.uses_stride_addressing || 285 !util_is_power_of_two(tex->b.b.b.height0) || 286 !util_is_power_of_two(tex->b.b.b.depth0); 287} 288 289static void r300_setup_cbzb_flags(struct r300_screen *rscreen, 290 struct r300_resource *tex) 291{ 292 unsigned i, bpp; 293 boolean first_level_valid; 294 295 bpp = util_format_get_blocksizebits(tex->b.b.b.format); 296 297 /* 1) The texture must be point-sampled, 298 * 2) The depth must be 16 or 32 bits. 299 * 3) If the midpoint ZB offset is not aligned to 2048, it returns garbage 300 * with certain texture sizes. Macrotiling ensures the alignment. */ 301 first_level_valid = tex->b.b.b.nr_samples <= 1 && 302 (bpp == 16 || bpp == 32) && 303 tex->tex.macrotile[0]; 304 305 if (SCREEN_DBG_ON(rscreen, DBG_NO_CBZB)) 306 first_level_valid = FALSE; 307 308 for (i = 0; i <= tex->b.b.b.last_level; i++) 309 tex->tex.cbzb_allowed[i] = first_level_valid && tex->tex.macrotile[i]; 310} 311 312static unsigned r300_pixels_to_dwords(unsigned stride, 313 unsigned height, 314 unsigned xblock, unsigned yblock) 315{ 316 return (util_align_npot(stride, xblock) * align(height, yblock)) / (xblock * yblock); 317} 318 319static void r300_setup_hyperz_properties(struct r300_screen *screen, 320 struct r300_resource *tex) 321{ 322 /* The tile size of 1 DWORD in ZMASK RAM is: 323 * 324 * GPU Pipes 4x4 mode 8x8 mode 325 * ------------------------------------------ 326 * R580 4P/1Z 32x32 64x64 327 * RV570 3P/1Z 48x16 96x32 328 * RV530 1P/2Z 32x16 64x32 329 * 1P/1Z 16x16 32x32 330 */ 331 static unsigned zmask_blocks_x_per_dw[4] = {4, 8, 12, 8}; 332 static unsigned zmask_blocks_y_per_dw[4] = {4, 4, 4, 8}; 333 334 /* In HIZ RAM, one dword is always 8x8 pixels (each byte is 4x4 pixels), 335 * but the blocks have very weird ordering. 336 * 337 * With 2 pipes and an image of size 8xY, where Y >= 1, 338 * clearing 4 dwords clears blocks like this: 339 * 340 * 01012323 341 * 342 * where numbers correspond to dword indices. The blocks are interleaved 343 * in the X direction, so the alignment must be 4x1 blocks (32x8 pixels). 344 * 345 * With 4 pipes and an image of size 8xY, where Y >= 4, 346 * clearing 8 dwords clears blocks like this: 347 * 01012323 348 * 45456767 349 * 01012323 350 * 45456767 351 * where numbers correspond to dword indices. The blocks are interleaved 352 * in both directions, so the alignment must be 4x4 blocks (32x32 pixels) 353 */ 354 static unsigned hiz_align_x[4] = {8, 32, 48, 32}; 355 static unsigned hiz_align_y[4] = {8, 8, 8, 32}; 356 357 if (util_format_is_depth_or_stencil(tex->b.b.b.format) && 358 util_format_get_blocksizebits(tex->b.b.b.format) == 32 && 359 tex->tex.microtile) { 360 unsigned i, pipes; 361 362 if (screen->caps.family == CHIP_FAMILY_RV530) { 363 pipes = screen->info.r300_num_z_pipes; 364 } else { 365 pipes = screen->info.r300_num_gb_pipes; 366 } 367 368 for (i = 0; i <= tex->b.b.b.last_level; i++) { 369 unsigned zcomp_numdw, zcompsize, hiz_numdw, stride, height; 370 371 stride = align(tex->tex.stride_in_pixels[i], 16); 372 height = u_minify(tex->b.b.b.height0, i); 373 374 /* The 8x8 compression mode needs macrotiling. */ 375 zcompsize = screen->caps.z_compress == R300_ZCOMP_8X8 && 376 tex->tex.macrotile[i] && 377 tex->b.b.b.nr_samples <= 1 ? 8 : 4; 378 379 /* Get the ZMASK buffer size in dwords. */ 380 zcomp_numdw = r300_pixels_to_dwords(stride, height, 381 zmask_blocks_x_per_dw[pipes-1] * zcompsize, 382 zmask_blocks_y_per_dw[pipes-1] * zcompsize); 383 384 /* Check whether we have enough ZMASK memory. */ 385 if (util_format_get_blocksizebits(tex->b.b.b.format) == 32 && 386 zcomp_numdw <= screen->caps.zmask_ram * pipes) { 387 tex->tex.zmask_dwords[i] = zcomp_numdw; 388 tex->tex.zcomp8x8[i] = zcompsize == 8; 389 390 tex->tex.zmask_stride_in_pixels[i] = 391 util_align_npot(stride, zmask_blocks_x_per_dw[pipes-1] * zcompsize); 392 } else { 393 tex->tex.zmask_dwords[i] = 0; 394 tex->tex.zcomp8x8[i] = FALSE; 395 tex->tex.zmask_stride_in_pixels[i] = 0; 396 } 397 398 /* Now setup HIZ. */ 399 stride = util_align_npot(stride, hiz_align_x[pipes-1]); 400 height = align(height, hiz_align_y[pipes-1]); 401 402 /* Get the HIZ buffer size in dwords. */ 403 hiz_numdw = (stride * height) / (8*8 * pipes); 404 405 /* Check whether we have enough HIZ memory. */ 406 if (hiz_numdw <= screen->caps.hiz_ram * pipes) { 407 tex->tex.hiz_dwords[i] = hiz_numdw; 408 tex->tex.hiz_stride_in_pixels[i] = stride; 409 } else { 410 tex->tex.hiz_dwords[i] = 0; 411 tex->tex.hiz_stride_in_pixels[i] = 0; 412 } 413 } 414 } 415} 416 417static void r300_setup_tiling(struct r300_screen *screen, 418 struct r300_resource *tex) 419{ 420 enum pipe_format format = tex->b.b.b.format; 421 boolean rv350_mode = screen->caps.family >= CHIP_FAMILY_R350; 422 boolean is_zb = util_format_is_depth_or_stencil(format); 423 boolean dbg_no_tiling = SCREEN_DBG_ON(screen, DBG_NO_TILING); 424 425 tex->tex.microtile = RADEON_LAYOUT_LINEAR; 426 tex->tex.macrotile[0] = RADEON_LAYOUT_LINEAR; 427 428 if (!util_format_is_plain(format)) { 429 return; 430 } 431 432 /* If height == 1, disable microtiling except for zbuffer. */ 433 if (!is_zb && (tex->b.b.b.height0 == 1 || dbg_no_tiling)) { 434 return; 435 } 436 437 /* Set microtiling. */ 438 switch (util_format_get_blocksize(format)) { 439 case 1: 440 case 4: 441 case 8: 442 tex->tex.microtile = RADEON_LAYOUT_TILED; 443 break; 444 445 case 2: 446 tex->tex.microtile = RADEON_LAYOUT_SQUARETILED; 447 break; 448 } 449 450 if (dbg_no_tiling) { 451 return; 452 } 453 454 /* Set macrotiling. */ 455 if (r300_texture_macro_switch(tex, 0, rv350_mode, DIM_WIDTH) && 456 r300_texture_macro_switch(tex, 0, rv350_mode, DIM_HEIGHT)) { 457 tex->tex.macrotile[0] = RADEON_LAYOUT_TILED; 458 } 459} 460 461static void r300_tex_print_info(struct r300_resource *tex, 462 const char *func) 463{ 464 fprintf(stderr, 465 "r300: %s: Macro: %s, Micro: %s, Pitch: %i, Dim: %ix%ix%i, " 466 "LastLevel: %i, Size: %i, Format: %s\n", 467 func, 468 tex->tex.macrotile[0] ? "YES" : " NO", 469 tex->tex.microtile ? "YES" : " NO", 470 tex->tex.stride_in_pixels[0], 471 tex->b.b.b.width0, tex->b.b.b.height0, tex->b.b.b.depth0, 472 tex->b.b.b.last_level, tex->tex.size_in_bytes, 473 util_format_short_name(tex->b.b.b.format)); 474} 475 476boolean r300_texture_desc_init(struct r300_screen *rscreen, 477 struct r300_resource *tex, 478 const struct pipe_resource *base) 479{ 480 tex->b.b.b.target = base->target; 481 tex->b.b.b.format = base->format; 482 tex->b.b.b.width0 = base->width0; 483 tex->b.b.b.height0 = base->height0; 484 tex->b.b.b.depth0 = base->depth0; 485 tex->b.b.b.array_size = base->array_size; 486 tex->b.b.b.last_level = base->last_level; 487 tex->b.b.b.nr_samples = base->nr_samples; 488 tex->tex.width0 = base->width0; 489 tex->tex.height0 = base->height0; 490 tex->tex.depth0 = base->depth0; 491 492 r300_setup_flags(tex); 493 494 /* Align a 3D NPOT texture to POT. */ 495 if (base->target == PIPE_TEXTURE_3D && tex->tex.is_npot) { 496 tex->tex.width0 = util_next_power_of_two(tex->tex.width0); 497 tex->tex.height0 = util_next_power_of_two(tex->tex.height0); 498 tex->tex.depth0 = util_next_power_of_two(tex->tex.depth0); 499 } 500 501 /* Setup tiling. */ 502 if (tex->tex.microtile == RADEON_LAYOUT_UNKNOWN) { 503 r300_setup_tiling(rscreen, tex); 504 } 505 506 r300_setup_cbzb_flags(rscreen, tex); 507 508 /* Setup the miptree description. */ 509 r300_setup_miptree(rscreen, tex, TRUE); 510 /* If the required buffer size is larger than the given max size, 511 * try again without the alignment for the CBZB clear. */ 512 if (tex->buf && tex->tex.size_in_bytes > tex->buf->size) { 513 r300_setup_miptree(rscreen, tex, FALSE); 514 515 /* Make sure the buffer we got is large enough. */ 516 if (tex->tex.size_in_bytes > tex->buf->size) { 517 fprintf(stderr, "r300: texture_desc_init: The buffer is not " 518 "large enough. Got: %i, Need: %i, Info:\n", 519 tex->buf->size, tex->tex.size_in_bytes); 520 r300_tex_print_info(tex, "texture_desc_init"); 521 return FALSE; 522 } 523 } 524 525 r300_setup_hyperz_properties(rscreen, tex); 526 527 if (SCREEN_DBG_ON(rscreen, DBG_TEX)) 528 r300_tex_print_info(tex, "texture_desc_init"); 529 530 return TRUE; 531} 532 533unsigned r300_texture_get_offset(struct r300_resource *tex, 534 unsigned level, unsigned layer) 535{ 536 unsigned offset = tex->tex.offset_in_bytes[level]; 537 538 switch (tex->b.b.b.target) { 539 case PIPE_TEXTURE_3D: 540 case PIPE_TEXTURE_CUBE: 541 return offset + layer * tex->tex.layer_size_in_bytes[level]; 542 543 default: 544 assert(layer == 0); 545 return offset; 546 } 547} 548