1/* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28#include <drm/drmP.h> 29#include <drm/radeon_drm.h> 30#include "radeon_reg.h" 31#include "radeon.h" 32#include "radeon_asic.h" 33 34#include "r100d.h" 35#include "r200_reg_safe.h" 36 37#include "r100_track.h" 38 39static int r200_get_vtx_size_0(uint32_t vtx_fmt_0) 40{ 41 int vtx_size, i; 42 vtx_size = 2; 43 44 if (vtx_fmt_0 & R200_VTX_Z0) 45 vtx_size++; 46 if (vtx_fmt_0 & R200_VTX_W0) 47 vtx_size++; 48 /* blend weight */ 49 if (vtx_fmt_0 & (0x7 << R200_VTX_WEIGHT_COUNT_SHIFT)) 50 vtx_size += (vtx_fmt_0 >> R200_VTX_WEIGHT_COUNT_SHIFT) & 0x7; 51 if (vtx_fmt_0 & R200_VTX_PV_MATRIX_SEL) 52 vtx_size++; 53 if (vtx_fmt_0 & R200_VTX_N0) 54 vtx_size += 3; 55 if (vtx_fmt_0 & R200_VTX_POINT_SIZE) 56 vtx_size++; 57 if (vtx_fmt_0 & R200_VTX_DISCRETE_FOG) 58 vtx_size++; 59 if (vtx_fmt_0 & R200_VTX_SHININESS_0) 60 vtx_size++; 61 if (vtx_fmt_0 & R200_VTX_SHININESS_1) 62 vtx_size++; 63 for (i = 0; i < 8; i++) { 64 int color_size = (vtx_fmt_0 >> (11 + 2*i)) & 0x3; 65 switch (color_size) { 66 case 0: break; 67 case 1: vtx_size++; break; 68 case 2: vtx_size += 3; break; 69 case 3: vtx_size += 4; break; 70 } 71 } 72 if (vtx_fmt_0 & R200_VTX_XY1) 73 vtx_size += 2; 74 if (vtx_fmt_0 & R200_VTX_Z1) 75 vtx_size++; 76 if (vtx_fmt_0 & R200_VTX_W1) 77 vtx_size++; 78 if (vtx_fmt_0 & R200_VTX_N1) 79 vtx_size += 3; 80 return vtx_size; 81} 82 83struct radeon_fence *r200_copy_dma(struct radeon_device *rdev, 84 uint64_t src_offset, 85 uint64_t dst_offset, 86 unsigned num_gpu_pages, 87 struct reservation_object *resv) 88{ 89 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 90 struct radeon_fence *fence; 91 uint32_t size; 92 uint32_t cur_size; 93 int i, num_loops; 94 int r = 0; 95 96 /* radeon pitch is /64 */ 97 size = num_gpu_pages << RADEON_GPU_PAGE_SHIFT; 98 num_loops = DIV_ROUND_UP(size, 0x1FFFFF); 99 r = radeon_ring_lock(rdev, ring, num_loops * 4 + 64); 100 if (r) { 101 DRM_ERROR("radeon: moving bo (%d).\n", r); 102 return ERR_PTR(r); 103 } 104 /* Must wait for 2D idle & clean before DMA or hangs might happen */ 105 radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); 106 radeon_ring_write(ring, (1 << 16)); 107 for (i = 0; i < num_loops; i++) { 108 cur_size = size; 109 if (cur_size > 0x1FFFFF) { 110 cur_size = 0x1FFFFF; 111 } 112 size -= cur_size; 113 radeon_ring_write(ring, PACKET0(0x720, 2)); 114 radeon_ring_write(ring, src_offset); 115 radeon_ring_write(ring, dst_offset); 116 radeon_ring_write(ring, cur_size | (1 << 31) | (1 << 30)); 117 src_offset += cur_size; 118 dst_offset += cur_size; 119 } 120 radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); 121 radeon_ring_write(ring, RADEON_WAIT_DMA_GUI_IDLE); 122 r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX); 123 if (r) { 124 radeon_ring_unlock_undo(rdev, ring); 125 return ERR_PTR(r); 126 } 127 radeon_ring_unlock_commit(rdev, ring, false); 128 return fence; 129} 130 131 132static int r200_get_vtx_size_1(uint32_t vtx_fmt_1) 133{ 134 int vtx_size, i, tex_size; 135 vtx_size = 0; 136 for (i = 0; i < 6; i++) { 137 tex_size = (vtx_fmt_1 >> (i * 3)) & 0x7; 138 if (tex_size > 4) 139 continue; 140 vtx_size += tex_size; 141 } 142 return vtx_size; 143} 144 145int r200_packet0_check(struct radeon_cs_parser *p, 146 struct radeon_cs_packet *pkt, 147 unsigned idx, unsigned reg) 148{ 149 struct radeon_cs_reloc *reloc; 150 struct r100_cs_track *track; 151 volatile uint32_t *ib; 152 uint32_t tmp; 153 int r; 154 int i; 155 int face; 156 u32 tile_flags = 0; 157 u32 idx_value; 158 159 ib = p->ib.ptr; 160 track = (struct r100_cs_track *)p->track; 161 idx_value = radeon_get_ib_value(p, idx); 162 switch (reg) { 163 case RADEON_CRTC_GUI_TRIG_VLINE: 164 r = r100_cs_packet_parse_vline(p); 165 if (r) { 166 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 167 idx, reg); 168 radeon_cs_dump_packet(p, pkt); 169 return r; 170 } 171 break; 172 /* FIXME: only allow PACKET3 blit? easier to check for out of 173 * range access */ 174 case RADEON_DST_PITCH_OFFSET: 175 case RADEON_SRC_PITCH_OFFSET: 176 r = r100_reloc_pitch_offset(p, pkt, idx, reg); 177 if (r) 178 return r; 179 break; 180 case RADEON_RB3D_DEPTHOFFSET: 181 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 182 if (r) { 183 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 184 idx, reg); 185 radeon_cs_dump_packet(p, pkt); 186 return r; 187 } 188 track->zb.robj = reloc->robj; 189 track->zb.offset = idx_value; 190 track->zb_dirty = true; 191 ib[idx] = idx_value + ((u32)reloc->gpu_offset); 192 break; 193 case RADEON_RB3D_COLOROFFSET: 194 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 195 if (r) { 196 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 197 idx, reg); 198 radeon_cs_dump_packet(p, pkt); 199 return r; 200 } 201 track->cb[0].robj = reloc->robj; 202 track->cb[0].offset = idx_value; 203 track->cb_dirty = true; 204 ib[idx] = idx_value + ((u32)reloc->gpu_offset); 205 break; 206 case R200_PP_TXOFFSET_0: 207 case R200_PP_TXOFFSET_1: 208 case R200_PP_TXOFFSET_2: 209 case R200_PP_TXOFFSET_3: 210 case R200_PP_TXOFFSET_4: 211 case R200_PP_TXOFFSET_5: 212 i = (reg - R200_PP_TXOFFSET_0) / 24; 213 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 214 if (r) { 215 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 216 idx, reg); 217 radeon_cs_dump_packet(p, pkt); 218 return r; 219 } 220 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 221 if (reloc->tiling_flags & RADEON_TILING_MACRO) 222 tile_flags |= R200_TXO_MACRO_TILE; 223 if (reloc->tiling_flags & RADEON_TILING_MICRO) 224 tile_flags |= R200_TXO_MICRO_TILE; 225 226 tmp = idx_value & ~(0x7 << 2); 227 tmp |= tile_flags; 228 ib[idx] = tmp + ((u32)reloc->gpu_offset); 229 } else 230 ib[idx] = idx_value + ((u32)reloc->gpu_offset); 231 track->textures[i].robj = reloc->robj; 232 track->tex_dirty = true; 233 break; 234 case R200_PP_CUBIC_OFFSET_F1_0: 235 case R200_PP_CUBIC_OFFSET_F2_0: 236 case R200_PP_CUBIC_OFFSET_F3_0: 237 case R200_PP_CUBIC_OFFSET_F4_0: 238 case R200_PP_CUBIC_OFFSET_F5_0: 239 case R200_PP_CUBIC_OFFSET_F1_1: 240 case R200_PP_CUBIC_OFFSET_F2_1: 241 case R200_PP_CUBIC_OFFSET_F3_1: 242 case R200_PP_CUBIC_OFFSET_F4_1: 243 case R200_PP_CUBIC_OFFSET_F5_1: 244 case R200_PP_CUBIC_OFFSET_F1_2: 245 case R200_PP_CUBIC_OFFSET_F2_2: 246 case R200_PP_CUBIC_OFFSET_F3_2: 247 case R200_PP_CUBIC_OFFSET_F4_2: 248 case R200_PP_CUBIC_OFFSET_F5_2: 249 case R200_PP_CUBIC_OFFSET_F1_3: 250 case R200_PP_CUBIC_OFFSET_F2_3: 251 case R200_PP_CUBIC_OFFSET_F3_3: 252 case R200_PP_CUBIC_OFFSET_F4_3: 253 case R200_PP_CUBIC_OFFSET_F5_3: 254 case R200_PP_CUBIC_OFFSET_F1_4: 255 case R200_PP_CUBIC_OFFSET_F2_4: 256 case R200_PP_CUBIC_OFFSET_F3_4: 257 case R200_PP_CUBIC_OFFSET_F4_4: 258 case R200_PP_CUBIC_OFFSET_F5_4: 259 case R200_PP_CUBIC_OFFSET_F1_5: 260 case R200_PP_CUBIC_OFFSET_F2_5: 261 case R200_PP_CUBIC_OFFSET_F3_5: 262 case R200_PP_CUBIC_OFFSET_F4_5: 263 case R200_PP_CUBIC_OFFSET_F5_5: 264 i = (reg - R200_PP_TXOFFSET_0) / 24; 265 face = (reg - ((i * 24) + R200_PP_TXOFFSET_0)) / 4; 266 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 267 if (r) { 268 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 269 idx, reg); 270 radeon_cs_dump_packet(p, pkt); 271 return r; 272 } 273 track->textures[i].cube_info[face - 1].offset = idx_value; 274 ib[idx] = idx_value + ((u32)reloc->gpu_offset); 275 track->textures[i].cube_info[face - 1].robj = reloc->robj; 276 track->tex_dirty = true; 277 break; 278 case RADEON_RE_WIDTH_HEIGHT: 279 track->maxy = ((idx_value >> 16) & 0x7FF); 280 track->cb_dirty = true; 281 track->zb_dirty = true; 282 break; 283 case RADEON_RB3D_COLORPITCH: 284 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 285 if (r) { 286 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 287 idx, reg); 288 radeon_cs_dump_packet(p, pkt); 289 return r; 290 } 291 292 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 293 if (reloc->tiling_flags & RADEON_TILING_MACRO) 294 tile_flags |= RADEON_COLOR_TILE_ENABLE; 295 if (reloc->tiling_flags & RADEON_TILING_MICRO) 296 tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; 297 298 tmp = idx_value & ~(0x7 << 16); 299 tmp |= tile_flags; 300 ib[idx] = tmp; 301 } else 302 ib[idx] = idx_value; 303 304 track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK; 305 track->cb_dirty = true; 306 break; 307 case RADEON_RB3D_DEPTHPITCH: 308 track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK; 309 track->zb_dirty = true; 310 break; 311 case RADEON_RB3D_CNTL: 312 switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) { 313 case 7: 314 case 8: 315 case 9: 316 case 11: 317 case 12: 318 track->cb[0].cpp = 1; 319 break; 320 case 3: 321 case 4: 322 case 15: 323 track->cb[0].cpp = 2; 324 break; 325 case 6: 326 track->cb[0].cpp = 4; 327 break; 328 default: 329 DRM_ERROR("Invalid color buffer format (%d) !\n", 330 ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); 331 return -EINVAL; 332 } 333 if (idx_value & RADEON_DEPTHXY_OFFSET_ENABLE) { 334 DRM_ERROR("No support for depth xy offset in kms\n"); 335 return -EINVAL; 336 } 337 338 track->z_enabled = !!(idx_value & RADEON_Z_ENABLE); 339 track->cb_dirty = true; 340 track->zb_dirty = true; 341 break; 342 case RADEON_RB3D_ZSTENCILCNTL: 343 switch (idx_value & 0xf) { 344 case 0: 345 track->zb.cpp = 2; 346 break; 347 case 2: 348 case 3: 349 case 4: 350 case 5: 351 case 9: 352 case 11: 353 track->zb.cpp = 4; 354 break; 355 default: 356 break; 357 } 358 track->zb_dirty = true; 359 break; 360 case RADEON_RB3D_ZPASS_ADDR: 361 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 362 if (r) { 363 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 364 idx, reg); 365 radeon_cs_dump_packet(p, pkt); 366 return r; 367 } 368 ib[idx] = idx_value + ((u32)reloc->gpu_offset); 369 break; 370 case RADEON_PP_CNTL: 371 { 372 uint32_t temp = idx_value >> 4; 373 for (i = 0; i < track->num_texture; i++) 374 track->textures[i].enabled = !!(temp & (1 << i)); 375 track->tex_dirty = true; 376 } 377 break; 378 case RADEON_SE_VF_CNTL: 379 track->vap_vf_cntl = idx_value; 380 break; 381 case 0x210c: 382 /* VAP_VF_MAX_VTX_INDX */ 383 track->max_indx = idx_value & 0x00FFFFFFUL; 384 break; 385 case R200_SE_VTX_FMT_0: 386 track->vtx_size = r200_get_vtx_size_0(idx_value); 387 break; 388 case R200_SE_VTX_FMT_1: 389 track->vtx_size += r200_get_vtx_size_1(idx_value); 390 break; 391 case R200_PP_TXSIZE_0: 392 case R200_PP_TXSIZE_1: 393 case R200_PP_TXSIZE_2: 394 case R200_PP_TXSIZE_3: 395 case R200_PP_TXSIZE_4: 396 case R200_PP_TXSIZE_5: 397 i = (reg - R200_PP_TXSIZE_0) / 32; 398 track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1; 399 track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1; 400 track->tex_dirty = true; 401 break; 402 case R200_PP_TXPITCH_0: 403 case R200_PP_TXPITCH_1: 404 case R200_PP_TXPITCH_2: 405 case R200_PP_TXPITCH_3: 406 case R200_PP_TXPITCH_4: 407 case R200_PP_TXPITCH_5: 408 i = (reg - R200_PP_TXPITCH_0) / 32; 409 track->textures[i].pitch = idx_value + 32; 410 track->tex_dirty = true; 411 break; 412 case R200_PP_TXFILTER_0: 413 case R200_PP_TXFILTER_1: 414 case R200_PP_TXFILTER_2: 415 case R200_PP_TXFILTER_3: 416 case R200_PP_TXFILTER_4: 417 case R200_PP_TXFILTER_5: 418 i = (reg - R200_PP_TXFILTER_0) / 32; 419 track->textures[i].num_levels = ((idx_value & R200_MAX_MIP_LEVEL_MASK) 420 >> R200_MAX_MIP_LEVEL_SHIFT); 421 tmp = (idx_value >> 23) & 0x7; 422 if (tmp == 2 || tmp == 6) 423 track->textures[i].roundup_w = false; 424 tmp = (idx_value >> 27) & 0x7; 425 if (tmp == 2 || tmp == 6) 426 track->textures[i].roundup_h = false; 427 track->tex_dirty = true; 428 break; 429 case R200_PP_TXMULTI_CTL_0: 430 case R200_PP_TXMULTI_CTL_1: 431 case R200_PP_TXMULTI_CTL_2: 432 case R200_PP_TXMULTI_CTL_3: 433 case R200_PP_TXMULTI_CTL_4: 434 case R200_PP_TXMULTI_CTL_5: 435 i = (reg - R200_PP_TXMULTI_CTL_0) / 32; 436 break; 437 case R200_PP_TXFORMAT_X_0: 438 case R200_PP_TXFORMAT_X_1: 439 case R200_PP_TXFORMAT_X_2: 440 case R200_PP_TXFORMAT_X_3: 441 case R200_PP_TXFORMAT_X_4: 442 case R200_PP_TXFORMAT_X_5: 443 i = (reg - R200_PP_TXFORMAT_X_0) / 32; 444 track->textures[i].txdepth = idx_value & 0x7; 445 tmp = (idx_value >> 16) & 0x3; 446 /* 2D, 3D, CUBE */ 447 switch (tmp) { 448 case 0: 449 case 3: 450 case 4: 451 case 5: 452 case 6: 453 case 7: 454 /* 1D/2D */ 455 track->textures[i].tex_coord_type = 0; 456 break; 457 case 1: 458 /* CUBE */ 459 track->textures[i].tex_coord_type = 2; 460 break; 461 case 2: 462 /* 3D */ 463 track->textures[i].tex_coord_type = 1; 464 break; 465 } 466 track->tex_dirty = true; 467 break; 468 case R200_PP_TXFORMAT_0: 469 case R200_PP_TXFORMAT_1: 470 case R200_PP_TXFORMAT_2: 471 case R200_PP_TXFORMAT_3: 472 case R200_PP_TXFORMAT_4: 473 case R200_PP_TXFORMAT_5: 474 i = (reg - R200_PP_TXFORMAT_0) / 32; 475 if (idx_value & R200_TXFORMAT_NON_POWER2) { 476 track->textures[i].use_pitch = 1; 477 } else { 478 track->textures[i].use_pitch = 0; 479 track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); 480 track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); 481 } 482 if (idx_value & R200_TXFORMAT_LOOKUP_DISABLE) 483 track->textures[i].lookup_disable = true; 484 switch ((idx_value & RADEON_TXFORMAT_FORMAT_MASK)) { 485 case R200_TXFORMAT_I8: 486 case R200_TXFORMAT_RGB332: 487 case R200_TXFORMAT_Y8: 488 track->textures[i].cpp = 1; 489 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 490 break; 491 case R200_TXFORMAT_AI88: 492 case R200_TXFORMAT_ARGB1555: 493 case R200_TXFORMAT_RGB565: 494 case R200_TXFORMAT_ARGB4444: 495 case R200_TXFORMAT_VYUY422: 496 case R200_TXFORMAT_YVYU422: 497 case R200_TXFORMAT_LDVDU655: 498 case R200_TXFORMAT_DVDU88: 499 case R200_TXFORMAT_AVYU4444: 500 track->textures[i].cpp = 2; 501 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 502 break; 503 case R200_TXFORMAT_ARGB8888: 504 case R200_TXFORMAT_RGBA8888: 505 case R200_TXFORMAT_ABGR8888: 506 case R200_TXFORMAT_BGR111110: 507 case R200_TXFORMAT_LDVDU8888: 508 track->textures[i].cpp = 4; 509 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 510 break; 511 case R200_TXFORMAT_DXT1: 512 track->textures[i].cpp = 1; 513 track->textures[i].compress_format = R100_TRACK_COMP_DXT1; 514 break; 515 case R200_TXFORMAT_DXT23: 516 case R200_TXFORMAT_DXT45: 517 track->textures[i].cpp = 1; 518 track->textures[i].compress_format = R100_TRACK_COMP_DXT1; 519 break; 520 } 521 track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf); 522 track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf); 523 track->tex_dirty = true; 524 break; 525 case R200_PP_CUBIC_FACES_0: 526 case R200_PP_CUBIC_FACES_1: 527 case R200_PP_CUBIC_FACES_2: 528 case R200_PP_CUBIC_FACES_3: 529 case R200_PP_CUBIC_FACES_4: 530 case R200_PP_CUBIC_FACES_5: 531 tmp = idx_value; 532 i = (reg - R200_PP_CUBIC_FACES_0) / 32; 533 for (face = 0; face < 4; face++) { 534 track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf); 535 track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf); 536 } 537 track->tex_dirty = true; 538 break; 539 default: 540 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n", 541 reg, idx); 542 return -EINVAL; 543 } 544 return 0; 545} 546 547void r200_set_safe_registers(struct radeon_device *rdev) 548{ 549 rdev->config.r100.reg_safe_bm = r200_reg_safe_bm; 550 rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r200_reg_safe_bm); 551} 552