1/* 2 * Copyright 2008-2009 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Dave Airlie <airlied@redhat.com> 26 * Alex Deucher <alexander.deucher@amd.com> 27 */ 28 29#include <linux/module.h> 30 31#include "drmP.h" 32#include "drm.h" 33#include "radeon_drm.h" 34#include "radeon_drv.h" 35 36#define PFP_UCODE_SIZE 576 37#define PM4_UCODE_SIZE 1792 38#define R700_PFP_UCODE_SIZE 848 39#define R700_PM4_UCODE_SIZE 1360 40 41/* Firmware Names */ 42MODULE_FIRMWARE("radeon/R600_pfp.bin"); 43MODULE_FIRMWARE("radeon/R600_me.bin"); 44MODULE_FIRMWARE("radeon/RV610_pfp.bin"); 45MODULE_FIRMWARE("radeon/RV610_me.bin"); 46MODULE_FIRMWARE("radeon/RV630_pfp.bin"); 47MODULE_FIRMWARE("radeon/RV630_me.bin"); 48MODULE_FIRMWARE("radeon/RV620_pfp.bin"); 49MODULE_FIRMWARE("radeon/RV620_me.bin"); 50MODULE_FIRMWARE("radeon/RV635_pfp.bin"); 51MODULE_FIRMWARE("radeon/RV635_me.bin"); 52MODULE_FIRMWARE("radeon/RV670_pfp.bin"); 53MODULE_FIRMWARE("radeon/RV670_me.bin"); 54MODULE_FIRMWARE("radeon/RS780_pfp.bin"); 55MODULE_FIRMWARE("radeon/RS780_me.bin"); 56MODULE_FIRMWARE("radeon/RV770_pfp.bin"); 57MODULE_FIRMWARE("radeon/RV770_me.bin"); 58MODULE_FIRMWARE("radeon/RV730_pfp.bin"); 59MODULE_FIRMWARE("radeon/RV730_me.bin"); 60MODULE_FIRMWARE("radeon/RV710_pfp.bin"); 61MODULE_FIRMWARE("radeon/RV710_me.bin"); 62 63 64int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp, 65 unsigned family, u32 *ib, int *l); 66void r600_cs_legacy_init(void); 67 68 69# define ATI_PCIGART_PAGE_SIZE 4096 /**< PCI GART page size */ 70# define ATI_PCIGART_PAGE_MASK (~(ATI_PCIGART_PAGE_SIZE-1)) 71 72#define R600_PTE_VALID (1 << 0) 73#define R600_PTE_SYSTEM (1 << 1) 74#define R600_PTE_SNOOPED (1 << 2) 75#define R600_PTE_READABLE (1 << 5) 76#define R600_PTE_WRITEABLE (1 << 6) 77 78/* MAX values used for gfx init */ 79#define R6XX_MAX_SH_GPRS 256 80#define R6XX_MAX_TEMP_GPRS 16 81#define R6XX_MAX_SH_THREADS 256 82#define R6XX_MAX_SH_STACK_ENTRIES 4096 83#define R6XX_MAX_BACKENDS 8 84#define R6XX_MAX_BACKENDS_MASK 0xff 85#define R6XX_MAX_SIMDS 8 86#define R6XX_MAX_SIMDS_MASK 0xff 87#define R6XX_MAX_PIPES 8 88#define R6XX_MAX_PIPES_MASK 0xff 89 90#define R7XX_MAX_SH_GPRS 256 91#define R7XX_MAX_TEMP_GPRS 16 92#define R7XX_MAX_SH_THREADS 256 93#define R7XX_MAX_SH_STACK_ENTRIES 4096 94#define R7XX_MAX_BACKENDS 8 95#define R7XX_MAX_BACKENDS_MASK 0xff 96#define R7XX_MAX_SIMDS 16 97#define R7XX_MAX_SIMDS_MASK 0xffff 98#define R7XX_MAX_PIPES 8 99#define R7XX_MAX_PIPES_MASK 0xff 100 101static int r600_do_wait_for_fifo(drm_radeon_private_t *dev_priv, int entries) 102{ 103 int i; 104 105 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; 106 107 for (i = 0; i < dev_priv->usec_timeout; i++) { 108 int slots; 109 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) 110 slots = (RADEON_READ(R600_GRBM_STATUS) 111 & R700_CMDFIFO_AVAIL_MASK); 112 else 113 slots = (RADEON_READ(R600_GRBM_STATUS) 114 & R600_CMDFIFO_AVAIL_MASK); 115 if (slots >= entries) 116 return 0; 117 DRM_UDELAY(1); 118 } 119 DRM_INFO("wait for fifo failed status : 0x%08X 0x%08X\n", 120 RADEON_READ(R600_GRBM_STATUS), 121 RADEON_READ(R600_GRBM_STATUS2)); 122 123 return -EBUSY; 124} 125 126static int r600_do_wait_for_idle(drm_radeon_private_t *dev_priv) 127{ 128 int i, ret; 129 130 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; 131 132 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) 133 ret = r600_do_wait_for_fifo(dev_priv, 8); 134 else 135 ret = r600_do_wait_for_fifo(dev_priv, 16); 136 if (ret) 137 return ret; 138 for (i = 0; i < dev_priv->usec_timeout; i++) { 139 if (!(RADEON_READ(R600_GRBM_STATUS) & R600_GUI_ACTIVE)) 140 return 0; 141 DRM_UDELAY(1); 142 } 143 DRM_INFO("wait idle failed status : 0x%08X 0x%08X\n", 144 RADEON_READ(R600_GRBM_STATUS), 145 RADEON_READ(R600_GRBM_STATUS2)); 146 147 return -EBUSY; 148} 149 150void r600_page_table_cleanup(struct drm_device *dev, struct drm_ati_pcigart_info *gart_info) 151{ 152 struct drm_sg_mem *entry = dev->sg; 153 int max_pages; 154 int pages; 155 int i; 156 157 if (!entry) 158 return; 159 160 if (gart_info->bus_addr) { 161 max_pages = (gart_info->table_size / sizeof(u64)); 162 pages = (entry->pages <= max_pages) 163 ? entry->pages : max_pages; 164 165 for (i = 0; i < pages; i++) { 166 if (!entry->busaddr[i]) 167 break; 168 pci_unmap_page(dev->pdev, entry->busaddr[i], 169 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 170 } 171 if (gart_info->gart_table_location == DRM_ATI_GART_MAIN) 172 gart_info->bus_addr = 0; 173 } 174} 175 176/* R600 has page table setup */ 177int r600_page_table_init(struct drm_device *dev) 178{ 179 drm_radeon_private_t *dev_priv = dev->dev_private; 180 struct drm_ati_pcigart_info *gart_info = &dev_priv->gart_info; 181 struct drm_local_map *map = &gart_info->mapping; 182 struct drm_sg_mem *entry = dev->sg; 183 int ret = 0; 184 int i, j; 185 int pages; 186 u64 page_base; 187 dma_addr_t entry_addr; 188 int max_ati_pages, max_real_pages, gart_idx; 189 190 /* okay page table is available - lets rock */ 191 max_ati_pages = (gart_info->table_size / sizeof(u64)); 192 max_real_pages = max_ati_pages / (PAGE_SIZE / ATI_PCIGART_PAGE_SIZE); 193 194 pages = (entry->pages <= max_real_pages) ? 195 entry->pages : max_real_pages; 196 197 memset_io((void __iomem *)map->handle, 0, max_ati_pages * sizeof(u64)); 198 199 gart_idx = 0; 200 for (i = 0; i < pages; i++) { 201 entry->busaddr[i] = pci_map_page(dev->pdev, 202 entry->pagelist[i], 0, 203 PAGE_SIZE, 204 PCI_DMA_BIDIRECTIONAL); 205 if (pci_dma_mapping_error(dev->pdev, entry->busaddr[i])) { 206 DRM_ERROR("unable to map PCIGART pages!\n"); 207 r600_page_table_cleanup(dev, gart_info); 208 goto done; 209 } 210 entry_addr = entry->busaddr[i]; 211 for (j = 0; j < (PAGE_SIZE / ATI_PCIGART_PAGE_SIZE); j++) { 212 page_base = (u64) entry_addr & ATI_PCIGART_PAGE_MASK; 213 page_base |= R600_PTE_VALID | R600_PTE_SYSTEM | R600_PTE_SNOOPED; 214 page_base |= R600_PTE_READABLE | R600_PTE_WRITEABLE; 215 216 DRM_WRITE64(map, gart_idx * sizeof(u64), page_base); 217 218 gart_idx++; 219 220 if ((i % 128) == 0) 221 DRM_DEBUG("page entry %d: 0x%016llx\n", 222 i, (unsigned long long)page_base); 223 entry_addr += ATI_PCIGART_PAGE_SIZE; 224 } 225 } 226 ret = 1; 227done: 228 return ret; 229} 230 231static void r600_vm_flush_gart_range(struct drm_device *dev) 232{ 233 drm_radeon_private_t *dev_priv = dev->dev_private; 234 u32 resp, countdown = 1000; 235 RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_LOW_ADDR, dev_priv->gart_vm_start >> 12); 236 RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12); 237 RADEON_WRITE(R600_VM_CONTEXT0_REQUEST_RESPONSE, 2); 238 239 do { 240 resp = RADEON_READ(R600_VM_CONTEXT0_REQUEST_RESPONSE); 241 countdown--; 242 DRM_UDELAY(1); 243 } while (((resp & 0xf0) == 0) && countdown); 244} 245 246static void r600_vm_init(struct drm_device *dev) 247{ 248 drm_radeon_private_t *dev_priv = dev->dev_private; 249 /* initialise the VM to use the page table we constructed up there */ 250 u32 vm_c0, i; 251 u32 mc_rd_a; 252 u32 vm_l2_cntl, vm_l2_cntl3; 253 /* okay set up the PCIE aperture type thingo */ 254 RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12); 255 RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12); 256 RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0); 257 258 /* setup MC RD a */ 259 mc_rd_a = R600_MCD_L1_TLB | R600_MCD_L1_FRAG_PROC | R600_MCD_SYSTEM_ACCESS_MODE_IN_SYS | 260 R600_MCD_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | R600_MCD_EFFECTIVE_L1_TLB_SIZE(5) | 261 R600_MCD_EFFECTIVE_L1_QUEUE_SIZE(5) | R600_MCD_WAIT_L2_QUERY; 262 263 RADEON_WRITE(R600_MCD_RD_A_CNTL, mc_rd_a); 264 RADEON_WRITE(R600_MCD_RD_B_CNTL, mc_rd_a); 265 266 RADEON_WRITE(R600_MCD_WR_A_CNTL, mc_rd_a); 267 RADEON_WRITE(R600_MCD_WR_B_CNTL, mc_rd_a); 268 269 RADEON_WRITE(R600_MCD_RD_GFX_CNTL, mc_rd_a); 270 RADEON_WRITE(R600_MCD_WR_GFX_CNTL, mc_rd_a); 271 272 RADEON_WRITE(R600_MCD_RD_SYS_CNTL, mc_rd_a); 273 RADEON_WRITE(R600_MCD_WR_SYS_CNTL, mc_rd_a); 274 275 RADEON_WRITE(R600_MCD_RD_HDP_CNTL, mc_rd_a | R600_MCD_L1_STRICT_ORDERING); 276 RADEON_WRITE(R600_MCD_WR_HDP_CNTL, mc_rd_a /*| R600_MCD_L1_STRICT_ORDERING*/); 277 278 RADEON_WRITE(R600_MCD_RD_PDMA_CNTL, mc_rd_a); 279 RADEON_WRITE(R600_MCD_WR_PDMA_CNTL, mc_rd_a); 280 281 RADEON_WRITE(R600_MCD_RD_SEM_CNTL, mc_rd_a | R600_MCD_SEMAPHORE_MODE); 282 RADEON_WRITE(R600_MCD_WR_SEM_CNTL, mc_rd_a); 283 284 vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W; 285 vm_l2_cntl |= R600_VM_L2_CNTL_QUEUE_SIZE(7); 286 RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl); 287 288 RADEON_WRITE(R600_VM_L2_CNTL2, 0); 289 vm_l2_cntl3 = (R600_VM_L2_CNTL3_BANK_SELECT_0(0) | 290 R600_VM_L2_CNTL3_BANK_SELECT_1(1) | 291 R600_VM_L2_CNTL3_CACHE_UPDATE_MODE(2)); 292 RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3); 293 294 vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT; 295 296 RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0); 297 298 vm_c0 &= ~R600_VM_ENABLE_CONTEXT; 299 300 /* disable all other contexts */ 301 for (i = 1; i < 8; i++) 302 RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0); 303 304 RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12); 305 RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12); 306 RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12); 307 308 r600_vm_flush_gart_range(dev); 309} 310 311static int r600_cp_init_microcode(drm_radeon_private_t *dev_priv) 312{ 313 struct platform_device *pdev; 314 const char *chip_name; 315 size_t pfp_req_size, me_req_size; 316 char fw_name[30]; 317 int err; 318 319 pdev = platform_device_register_simple("r600_cp", 0, NULL, 0); 320 err = IS_ERR(pdev); 321 if (err) { 322 printk(KERN_ERR "r600_cp: Failed to register firmware\n"); 323 return -EINVAL; 324 } 325 326 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 327 case CHIP_R600: chip_name = "R600"; break; 328 case CHIP_RV610: chip_name = "RV610"; break; 329 case CHIP_RV630: chip_name = "RV630"; break; 330 case CHIP_RV620: chip_name = "RV620"; break; 331 case CHIP_RV635: chip_name = "RV635"; break; 332 case CHIP_RV670: chip_name = "RV670"; break; 333 case CHIP_RS780: 334 case CHIP_RS880: chip_name = "RS780"; break; 335 case CHIP_RV770: chip_name = "RV770"; break; 336 case CHIP_RV730: 337 case CHIP_RV740: chip_name = "RV730"; break; 338 case CHIP_RV710: chip_name = "RV710"; break; 339 default: BUG(); 340 } 341 342 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) { 343 pfp_req_size = R700_PFP_UCODE_SIZE * 4; 344 me_req_size = R700_PM4_UCODE_SIZE * 4; 345 } else { 346 pfp_req_size = PFP_UCODE_SIZE * 4; 347 me_req_size = PM4_UCODE_SIZE * 12; 348 } 349 350 DRM_INFO("Loading %s CP Microcode\n", chip_name); 351 352 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name); 353 err = request_firmware(&dev_priv->pfp_fw, fw_name, &pdev->dev); 354 if (err) 355 goto out; 356 if (dev_priv->pfp_fw->size != pfp_req_size) { 357 printk(KERN_ERR 358 "r600_cp: Bogus length %zu in firmware \"%s\"\n", 359 dev_priv->pfp_fw->size, fw_name); 360 err = -EINVAL; 361 goto out; 362 } 363 364 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name); 365 err = request_firmware(&dev_priv->me_fw, fw_name, &pdev->dev); 366 if (err) 367 goto out; 368 if (dev_priv->me_fw->size != me_req_size) { 369 printk(KERN_ERR 370 "r600_cp: Bogus length %zu in firmware \"%s\"\n", 371 dev_priv->me_fw->size, fw_name); 372 err = -EINVAL; 373 } 374out: 375 platform_device_unregister(pdev); 376 377 if (err) { 378 if (err != -EINVAL) 379 printk(KERN_ERR 380 "r600_cp: Failed to load firmware \"%s\"\n", 381 fw_name); 382 release_firmware(dev_priv->pfp_fw); 383 dev_priv->pfp_fw = NULL; 384 release_firmware(dev_priv->me_fw); 385 dev_priv->me_fw = NULL; 386 } 387 return err; 388} 389 390static void r600_cp_load_microcode(drm_radeon_private_t *dev_priv) 391{ 392 const __be32 *fw_data; 393 int i; 394 395 if (!dev_priv->me_fw || !dev_priv->pfp_fw) 396 return; 397 398 r600_do_cp_stop(dev_priv); 399 400 RADEON_WRITE(R600_CP_RB_CNTL, 401#ifdef __BIG_ENDIAN 402 R600_BUF_SWAP_32BIT | 403#endif 404 R600_RB_NO_UPDATE | 405 R600_RB_BLKSZ(15) | 406 R600_RB_BUFSZ(3)); 407 408 RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP); 409 RADEON_READ(R600_GRBM_SOFT_RESET); 410 mdelay(15); 411 RADEON_WRITE(R600_GRBM_SOFT_RESET, 0); 412 413 fw_data = (const __be32 *)dev_priv->me_fw->data; 414 RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); 415 for (i = 0; i < PM4_UCODE_SIZE * 3; i++) 416 RADEON_WRITE(R600_CP_ME_RAM_DATA, 417 be32_to_cpup(fw_data++)); 418 419 fw_data = (const __be32 *)dev_priv->pfp_fw->data; 420 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 421 for (i = 0; i < PFP_UCODE_SIZE; i++) 422 RADEON_WRITE(R600_CP_PFP_UCODE_DATA, 423 be32_to_cpup(fw_data++)); 424 425 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 426 RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); 427 RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0); 428 429} 430 431static void r700_vm_init(struct drm_device *dev) 432{ 433 drm_radeon_private_t *dev_priv = dev->dev_private; 434 /* initialise the VM to use the page table we constructed up there */ 435 u32 vm_c0, i; 436 u32 mc_vm_md_l1; 437 u32 vm_l2_cntl, vm_l2_cntl3; 438 /* okay set up the PCIE aperture type thingo */ 439 RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12); 440 RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12); 441 RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0); 442 443 mc_vm_md_l1 = R700_ENABLE_L1_TLB | 444 R700_ENABLE_L1_FRAGMENT_PROCESSING | 445 R700_SYSTEM_ACCESS_MODE_IN_SYS | 446 R700_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | 447 R700_EFFECTIVE_L1_TLB_SIZE(5) | 448 R700_EFFECTIVE_L1_QUEUE_SIZE(5); 449 450 RADEON_WRITE(R700_MC_VM_MD_L1_TLB0_CNTL, mc_vm_md_l1); 451 RADEON_WRITE(R700_MC_VM_MD_L1_TLB1_CNTL, mc_vm_md_l1); 452 RADEON_WRITE(R700_MC_VM_MD_L1_TLB2_CNTL, mc_vm_md_l1); 453 RADEON_WRITE(R700_MC_VM_MB_L1_TLB0_CNTL, mc_vm_md_l1); 454 RADEON_WRITE(R700_MC_VM_MB_L1_TLB1_CNTL, mc_vm_md_l1); 455 RADEON_WRITE(R700_MC_VM_MB_L1_TLB2_CNTL, mc_vm_md_l1); 456 RADEON_WRITE(R700_MC_VM_MB_L1_TLB3_CNTL, mc_vm_md_l1); 457 458 vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W; 459 vm_l2_cntl |= R700_VM_L2_CNTL_QUEUE_SIZE(7); 460 RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl); 461 462 RADEON_WRITE(R600_VM_L2_CNTL2, 0); 463 vm_l2_cntl3 = R700_VM_L2_CNTL3_BANK_SELECT(0) | R700_VM_L2_CNTL3_CACHE_UPDATE_MODE(2); 464 RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3); 465 466 vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT; 467 468 RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0); 469 470 vm_c0 &= ~R600_VM_ENABLE_CONTEXT; 471 472 /* disable all other contexts */ 473 for (i = 1; i < 8; i++) 474 RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0); 475 476 RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12); 477 RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12); 478 RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12); 479 480 r600_vm_flush_gart_range(dev); 481} 482 483static void r700_cp_load_microcode(drm_radeon_private_t *dev_priv) 484{ 485 const __be32 *fw_data; 486 int i; 487 488 if (!dev_priv->me_fw || !dev_priv->pfp_fw) 489 return; 490 491 r600_do_cp_stop(dev_priv); 492 493 RADEON_WRITE(R600_CP_RB_CNTL, 494#ifdef __BIG_ENDIAN 495 R600_BUF_SWAP_32BIT | 496#endif 497 R600_RB_NO_UPDATE | 498 R600_RB_BLKSZ(15) | 499 R600_RB_BUFSZ(3)); 500 501 RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP); 502 RADEON_READ(R600_GRBM_SOFT_RESET); 503 mdelay(15); 504 RADEON_WRITE(R600_GRBM_SOFT_RESET, 0); 505 506 fw_data = (const __be32 *)dev_priv->pfp_fw->data; 507 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 508 for (i = 0; i < R700_PFP_UCODE_SIZE; i++) 509 RADEON_WRITE(R600_CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++)); 510 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 511 512 fw_data = (const __be32 *)dev_priv->me_fw->data; 513 RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); 514 for (i = 0; i < R700_PM4_UCODE_SIZE; i++) 515 RADEON_WRITE(R600_CP_ME_RAM_DATA, be32_to_cpup(fw_data++)); 516 RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); 517 518 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 519 RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); 520 RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0); 521 522} 523 524static void r600_test_writeback(drm_radeon_private_t *dev_priv) 525{ 526 u32 tmp; 527 528 /* Start with assuming that writeback doesn't work */ 529 dev_priv->writeback_works = 0; 530 531 /* Writeback doesn't seem to work everywhere, test it here and possibly 532 * enable it if it appears to work 533 */ 534 radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0); 535 536 RADEON_WRITE(R600_SCRATCH_REG1, 0xdeadbeef); 537 538 for (tmp = 0; tmp < dev_priv->usec_timeout; tmp++) { 539 u32 val; 540 541 val = radeon_read_ring_rptr(dev_priv, R600_SCRATCHOFF(1)); 542 if (val == 0xdeadbeef) 543 break; 544 DRM_UDELAY(1); 545 } 546 547 if (tmp < dev_priv->usec_timeout) { 548 dev_priv->writeback_works = 1; 549 DRM_INFO("writeback test succeeded in %d usecs\n", tmp); 550 } else { 551 dev_priv->writeback_works = 0; 552 DRM_INFO("writeback test failed\n"); 553 } 554 if (radeon_no_wb == 1) { 555 dev_priv->writeback_works = 0; 556 DRM_INFO("writeback forced off\n"); 557 } 558 559 if (!dev_priv->writeback_works) { 560 /* Disable writeback to avoid unnecessary bus master transfer */ 561 RADEON_WRITE(R600_CP_RB_CNTL, 562#ifdef __BIG_ENDIAN 563 R600_BUF_SWAP_32BIT | 564#endif 565 RADEON_READ(R600_CP_RB_CNTL) | 566 R600_RB_NO_UPDATE); 567 RADEON_WRITE(R600_SCRATCH_UMSK, 0); 568 } 569} 570 571int r600_do_engine_reset(struct drm_device *dev) 572{ 573 drm_radeon_private_t *dev_priv = dev->dev_private; 574 u32 cp_ptr, cp_me_cntl, cp_rb_cntl; 575 576 DRM_INFO("Resetting GPU\n"); 577 578 cp_ptr = RADEON_READ(R600_CP_RB_WPTR); 579 cp_me_cntl = RADEON_READ(R600_CP_ME_CNTL); 580 RADEON_WRITE(R600_CP_ME_CNTL, R600_CP_ME_HALT); 581 582 RADEON_WRITE(R600_GRBM_SOFT_RESET, 0x7fff); 583 RADEON_READ(R600_GRBM_SOFT_RESET); 584 DRM_UDELAY(50); 585 RADEON_WRITE(R600_GRBM_SOFT_RESET, 0); 586 RADEON_READ(R600_GRBM_SOFT_RESET); 587 588 RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0); 589 cp_rb_cntl = RADEON_READ(R600_CP_RB_CNTL); 590 RADEON_WRITE(R600_CP_RB_CNTL, 591#ifdef __BIG_ENDIAN 592 R600_BUF_SWAP_32BIT | 593#endif 594 R600_RB_RPTR_WR_ENA); 595 596 RADEON_WRITE(R600_CP_RB_RPTR_WR, cp_ptr); 597 RADEON_WRITE(R600_CP_RB_WPTR, cp_ptr); 598 RADEON_WRITE(R600_CP_RB_CNTL, cp_rb_cntl); 599 RADEON_WRITE(R600_CP_ME_CNTL, cp_me_cntl); 600 601 /* Reset the CP ring */ 602 r600_do_cp_reset(dev_priv); 603 604 /* The CP is no longer running after an engine reset */ 605 dev_priv->cp_running = 0; 606 607 /* Reset any pending vertex, indirect buffers */ 608 radeon_freelist_reset(dev); 609 610 return 0; 611 612} 613 614static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes, 615 u32 num_backends, 616 u32 backend_disable_mask) 617{ 618 u32 backend_map = 0; 619 u32 enabled_backends_mask; 620 u32 enabled_backends_count; 621 u32 cur_pipe; 622 u32 swizzle_pipe[R6XX_MAX_PIPES]; 623 u32 cur_backend; 624 u32 i; 625 626 if (num_tile_pipes > R6XX_MAX_PIPES) 627 num_tile_pipes = R6XX_MAX_PIPES; 628 if (num_tile_pipes < 1) 629 num_tile_pipes = 1; 630 if (num_backends > R6XX_MAX_BACKENDS) 631 num_backends = R6XX_MAX_BACKENDS; 632 if (num_backends < 1) 633 num_backends = 1; 634 635 enabled_backends_mask = 0; 636 enabled_backends_count = 0; 637 for (i = 0; i < R6XX_MAX_BACKENDS; ++i) { 638 if (((backend_disable_mask >> i) & 1) == 0) { 639 enabled_backends_mask |= (1 << i); 640 ++enabled_backends_count; 641 } 642 if (enabled_backends_count == num_backends) 643 break; 644 } 645 646 if (enabled_backends_count == 0) { 647 enabled_backends_mask = 1; 648 enabled_backends_count = 1; 649 } 650 651 if (enabled_backends_count != num_backends) 652 num_backends = enabled_backends_count; 653 654 memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES); 655 switch (num_tile_pipes) { 656 case 1: 657 swizzle_pipe[0] = 0; 658 break; 659 case 2: 660 swizzle_pipe[0] = 0; 661 swizzle_pipe[1] = 1; 662 break; 663 case 3: 664 swizzle_pipe[0] = 0; 665 swizzle_pipe[1] = 1; 666 swizzle_pipe[2] = 2; 667 break; 668 case 4: 669 swizzle_pipe[0] = 0; 670 swizzle_pipe[1] = 1; 671 swizzle_pipe[2] = 2; 672 swizzle_pipe[3] = 3; 673 break; 674 case 5: 675 swizzle_pipe[0] = 0; 676 swizzle_pipe[1] = 1; 677 swizzle_pipe[2] = 2; 678 swizzle_pipe[3] = 3; 679 swizzle_pipe[4] = 4; 680 break; 681 case 6: 682 swizzle_pipe[0] = 0; 683 swizzle_pipe[1] = 2; 684 swizzle_pipe[2] = 4; 685 swizzle_pipe[3] = 5; 686 swizzle_pipe[4] = 1; 687 swizzle_pipe[5] = 3; 688 break; 689 case 7: 690 swizzle_pipe[0] = 0; 691 swizzle_pipe[1] = 2; 692 swizzle_pipe[2] = 4; 693 swizzle_pipe[3] = 6; 694 swizzle_pipe[4] = 1; 695 swizzle_pipe[5] = 3; 696 swizzle_pipe[6] = 5; 697 break; 698 case 8: 699 swizzle_pipe[0] = 0; 700 swizzle_pipe[1] = 2; 701 swizzle_pipe[2] = 4; 702 swizzle_pipe[3] = 6; 703 swizzle_pipe[4] = 1; 704 swizzle_pipe[5] = 3; 705 swizzle_pipe[6] = 5; 706 swizzle_pipe[7] = 7; 707 break; 708 } 709 710 cur_backend = 0; 711 for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) { 712 while (((1 << cur_backend) & enabled_backends_mask) == 0) 713 cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS; 714 715 backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2))); 716 717 cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS; 718 } 719 720 return backend_map; 721} 722 723static int r600_count_pipe_bits(uint32_t val) 724{ 725 int i, ret = 0; 726 for (i = 0; i < 32; i++) { 727 ret += val & 1; 728 val >>= 1; 729 } 730 return ret; 731} 732 733static void r600_gfx_init(struct drm_device *dev, 734 drm_radeon_private_t *dev_priv) 735{ 736 int i, j, num_qd_pipes; 737 u32 sx_debug_1; 738 u32 tc_cntl; 739 u32 arb_pop; 740 u32 num_gs_verts_per_thread; 741 u32 vgt_gs_per_es; 742 u32 gs_prim_buffer_depth = 0; 743 u32 sq_ms_fifo_sizes; 744 u32 sq_config; 745 u32 sq_gpr_resource_mgmt_1 = 0; 746 u32 sq_gpr_resource_mgmt_2 = 0; 747 u32 sq_thread_resource_mgmt = 0; 748 u32 sq_stack_resource_mgmt_1 = 0; 749 u32 sq_stack_resource_mgmt_2 = 0; 750 u32 hdp_host_path_cntl; 751 u32 backend_map; 752 u32 gb_tiling_config = 0; 753 u32 cc_rb_backend_disable; 754 u32 cc_gc_shader_pipe_config; 755 u32 ramcfg; 756 757 /* setup chip specs */ 758 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 759 case CHIP_R600: 760 dev_priv->r600_max_pipes = 4; 761 dev_priv->r600_max_tile_pipes = 8; 762 dev_priv->r600_max_simds = 4; 763 dev_priv->r600_max_backends = 4; 764 dev_priv->r600_max_gprs = 256; 765 dev_priv->r600_max_threads = 192; 766 dev_priv->r600_max_stack_entries = 256; 767 dev_priv->r600_max_hw_contexts = 8; 768 dev_priv->r600_max_gs_threads = 16; 769 dev_priv->r600_sx_max_export_size = 128; 770 dev_priv->r600_sx_max_export_pos_size = 16; 771 dev_priv->r600_sx_max_export_smx_size = 128; 772 dev_priv->r600_sq_num_cf_insts = 2; 773 break; 774 case CHIP_RV630: 775 case CHIP_RV635: 776 dev_priv->r600_max_pipes = 2; 777 dev_priv->r600_max_tile_pipes = 2; 778 dev_priv->r600_max_simds = 3; 779 dev_priv->r600_max_backends = 1; 780 dev_priv->r600_max_gprs = 128; 781 dev_priv->r600_max_threads = 192; 782 dev_priv->r600_max_stack_entries = 128; 783 dev_priv->r600_max_hw_contexts = 8; 784 dev_priv->r600_max_gs_threads = 4; 785 dev_priv->r600_sx_max_export_size = 128; 786 dev_priv->r600_sx_max_export_pos_size = 16; 787 dev_priv->r600_sx_max_export_smx_size = 128; 788 dev_priv->r600_sq_num_cf_insts = 2; 789 break; 790 case CHIP_RV610: 791 case CHIP_RS780: 792 case CHIP_RS880: 793 case CHIP_RV620: 794 dev_priv->r600_max_pipes = 1; 795 dev_priv->r600_max_tile_pipes = 1; 796 dev_priv->r600_max_simds = 2; 797 dev_priv->r600_max_backends = 1; 798 dev_priv->r600_max_gprs = 128; 799 dev_priv->r600_max_threads = 192; 800 dev_priv->r600_max_stack_entries = 128; 801 dev_priv->r600_max_hw_contexts = 4; 802 dev_priv->r600_max_gs_threads = 4; 803 dev_priv->r600_sx_max_export_size = 128; 804 dev_priv->r600_sx_max_export_pos_size = 16; 805 dev_priv->r600_sx_max_export_smx_size = 128; 806 dev_priv->r600_sq_num_cf_insts = 1; 807 break; 808 case CHIP_RV670: 809 dev_priv->r600_max_pipes = 4; 810 dev_priv->r600_max_tile_pipes = 4; 811 dev_priv->r600_max_simds = 4; 812 dev_priv->r600_max_backends = 4; 813 dev_priv->r600_max_gprs = 192; 814 dev_priv->r600_max_threads = 192; 815 dev_priv->r600_max_stack_entries = 256; 816 dev_priv->r600_max_hw_contexts = 8; 817 dev_priv->r600_max_gs_threads = 16; 818 dev_priv->r600_sx_max_export_size = 128; 819 dev_priv->r600_sx_max_export_pos_size = 16; 820 dev_priv->r600_sx_max_export_smx_size = 128; 821 dev_priv->r600_sq_num_cf_insts = 2; 822 break; 823 default: 824 break; 825 } 826 827 /* Initialize HDP */ 828 j = 0; 829 for (i = 0; i < 32; i++) { 830 RADEON_WRITE((0x2c14 + j), 0x00000000); 831 RADEON_WRITE((0x2c18 + j), 0x00000000); 832 RADEON_WRITE((0x2c1c + j), 0x00000000); 833 RADEON_WRITE((0x2c20 + j), 0x00000000); 834 RADEON_WRITE((0x2c24 + j), 0x00000000); 835 j += 0x18; 836 } 837 838 RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff)); 839 840 /* setup tiling, simd, pipe config */ 841 ramcfg = RADEON_READ(R600_RAMCFG); 842 843 switch (dev_priv->r600_max_tile_pipes) { 844 case 1: 845 gb_tiling_config |= R600_PIPE_TILING(0); 846 break; 847 case 2: 848 gb_tiling_config |= R600_PIPE_TILING(1); 849 break; 850 case 4: 851 gb_tiling_config |= R600_PIPE_TILING(2); 852 break; 853 case 8: 854 gb_tiling_config |= R600_PIPE_TILING(3); 855 break; 856 default: 857 break; 858 } 859 860 gb_tiling_config |= R600_BANK_TILING((ramcfg >> R600_NOOFBANK_SHIFT) & R600_NOOFBANK_MASK); 861 862 gb_tiling_config |= R600_GROUP_SIZE(0); 863 864 if (((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK) > 3) { 865 gb_tiling_config |= R600_ROW_TILING(3); 866 gb_tiling_config |= R600_SAMPLE_SPLIT(3); 867 } else { 868 gb_tiling_config |= 869 R600_ROW_TILING(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK)); 870 gb_tiling_config |= 871 R600_SAMPLE_SPLIT(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK)); 872 } 873 874 gb_tiling_config |= R600_BANK_SWAPS(1); 875 876 cc_rb_backend_disable = RADEON_READ(R600_CC_RB_BACKEND_DISABLE) & 0x00ff0000; 877 cc_rb_backend_disable |= 878 R600_BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R6XX_MAX_BACKENDS_MASK); 879 880 cc_gc_shader_pipe_config = RADEON_READ(R600_CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00; 881 cc_gc_shader_pipe_config |= 882 R600_INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R6XX_MAX_PIPES_MASK); 883 cc_gc_shader_pipe_config |= 884 R600_INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R6XX_MAX_SIMDS_MASK); 885 886 backend_map = r600_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes, 887 (R6XX_MAX_BACKENDS - 888 r600_count_pipe_bits((cc_rb_backend_disable & 889 R6XX_MAX_BACKENDS_MASK) >> 16)), 890 (cc_rb_backend_disable >> 16)); 891 gb_tiling_config |= R600_BACKEND_MAP(backend_map); 892 893 RADEON_WRITE(R600_GB_TILING_CONFIG, gb_tiling_config); 894 RADEON_WRITE(R600_DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 895 RADEON_WRITE(R600_HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 896 if (gb_tiling_config & 0xc0) { 897 dev_priv->r600_group_size = 512; 898 } else { 899 dev_priv->r600_group_size = 256; 900 } 901 dev_priv->r600_npipes = 1 << ((gb_tiling_config >> 1) & 0x7); 902 if (gb_tiling_config & 0x30) { 903 dev_priv->r600_nbanks = 8; 904 } else { 905 dev_priv->r600_nbanks = 4; 906 } 907 908 RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE, cc_rb_backend_disable); 909 RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); 910 RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); 911 912 num_qd_pipes = 913 R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK) >> 8); 914 RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK); 915 RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK); 916 917 /* set HW defaults for 3D engine */ 918 RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) | 919 R600_ROQ_IB2_START(0x2b))); 920 921 RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, (R600_MEQ_END(0x40) | 922 R600_ROQ_END(0x40))); 923 924 RADEON_WRITE(R600_TA_CNTL_AUX, (R600_DISABLE_CUBE_ANISO | 925 R600_SYNC_GRADIENT | 926 R600_SYNC_WALKER | 927 R600_SYNC_ALIGNER)); 928 929 if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670) 930 RADEON_WRITE(R600_ARB_GDEC_RD_CNTL, 0x00000021); 931 932 sx_debug_1 = RADEON_READ(R600_SX_DEBUG_1); 933 sx_debug_1 |= R600_SMX_EVENT_RELEASE; 934 if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600)) 935 sx_debug_1 |= R600_ENABLE_NEW_SMX_ADDRESS; 936 RADEON_WRITE(R600_SX_DEBUG_1, sx_debug_1); 937 938 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) || 939 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) || 940 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || 941 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || 942 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || 943 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) 944 RADEON_WRITE(R600_DB_DEBUG, R600_PREZ_MUST_WAIT_FOR_POSTZ_DONE); 945 else 946 RADEON_WRITE(R600_DB_DEBUG, 0); 947 948 RADEON_WRITE(R600_DB_WATERMARKS, (R600_DEPTH_FREE(4) | 949 R600_DEPTH_FLUSH(16) | 950 R600_DEPTH_PENDING_FREE(4) | 951 R600_DEPTH_CACHELINE_FREE(16))); 952 RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0); 953 RADEON_WRITE(R600_VGT_NUM_INSTANCES, 0); 954 955 RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0)); 956 RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(0)); 957 958 sq_ms_fifo_sizes = RADEON_READ(R600_SQ_MS_FIFO_SIZES); 959 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || 960 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || 961 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || 962 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) { 963 sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(0xa) | 964 R600_FETCH_FIFO_HIWATER(0xa) | 965 R600_DONE_FIFO_HIWATER(0xe0) | 966 R600_ALU_UPDATE_FIFO_HIWATER(0x8)); 967 } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) || 968 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630)) { 969 sq_ms_fifo_sizes &= ~R600_DONE_FIFO_HIWATER(0xff); 970 sq_ms_fifo_sizes |= R600_DONE_FIFO_HIWATER(0x4); 971 } 972 RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes); 973 974 /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT 975 * should be adjusted as needed by the 2D/3D drivers. This just sets default values 976 */ 977 sq_config = RADEON_READ(R600_SQ_CONFIG); 978 sq_config &= ~(R600_PS_PRIO(3) | 979 R600_VS_PRIO(3) | 980 R600_GS_PRIO(3) | 981 R600_ES_PRIO(3)); 982 sq_config |= (R600_DX9_CONSTS | 983 R600_VC_ENABLE | 984 R600_PS_PRIO(0) | 985 R600_VS_PRIO(1) | 986 R600_GS_PRIO(2) | 987 R600_ES_PRIO(3)); 988 989 if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) { 990 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(124) | 991 R600_NUM_VS_GPRS(124) | 992 R600_NUM_CLAUSE_TEMP_GPRS(4)); 993 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(0) | 994 R600_NUM_ES_GPRS(0)); 995 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(136) | 996 R600_NUM_VS_THREADS(48) | 997 R600_NUM_GS_THREADS(4) | 998 R600_NUM_ES_THREADS(4)); 999 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(128) | 1000 R600_NUM_VS_STACK_ENTRIES(128)); 1001 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(0) | 1002 R600_NUM_ES_STACK_ENTRIES(0)); 1003 } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || 1004 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || 1005 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || 1006 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) { 1007 /* no vertex cache */ 1008 sq_config &= ~R600_VC_ENABLE; 1009 1010 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) | 1011 R600_NUM_VS_GPRS(44) | 1012 R600_NUM_CLAUSE_TEMP_GPRS(2)); 1013 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) | 1014 R600_NUM_ES_GPRS(17)); 1015 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) | 1016 R600_NUM_VS_THREADS(78) | 1017 R600_NUM_GS_THREADS(4) | 1018 R600_NUM_ES_THREADS(31)); 1019 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) | 1020 R600_NUM_VS_STACK_ENTRIES(40)); 1021 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) | 1022 R600_NUM_ES_STACK_ENTRIES(16)); 1023 } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) || 1024 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV635)) { 1025 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) | 1026 R600_NUM_VS_GPRS(44) | 1027 R600_NUM_CLAUSE_TEMP_GPRS(2)); 1028 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(18) | 1029 R600_NUM_ES_GPRS(18)); 1030 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) | 1031 R600_NUM_VS_THREADS(78) | 1032 R600_NUM_GS_THREADS(4) | 1033 R600_NUM_ES_THREADS(31)); 1034 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) | 1035 R600_NUM_VS_STACK_ENTRIES(40)); 1036 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) | 1037 R600_NUM_ES_STACK_ENTRIES(16)); 1038 } else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670) { 1039 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) | 1040 R600_NUM_VS_GPRS(44) | 1041 R600_NUM_CLAUSE_TEMP_GPRS(2)); 1042 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) | 1043 R600_NUM_ES_GPRS(17)); 1044 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) | 1045 R600_NUM_VS_THREADS(78) | 1046 R600_NUM_GS_THREADS(4) | 1047 R600_NUM_ES_THREADS(31)); 1048 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(64) | 1049 R600_NUM_VS_STACK_ENTRIES(64)); 1050 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(64) | 1051 R600_NUM_ES_STACK_ENTRIES(64)); 1052 } 1053 1054 RADEON_WRITE(R600_SQ_CONFIG, sq_config); 1055 RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1, sq_gpr_resource_mgmt_1); 1056 RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2, sq_gpr_resource_mgmt_2); 1057 RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt); 1058 RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, sq_stack_resource_mgmt_1); 1059 RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, sq_stack_resource_mgmt_2); 1060 1061 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || 1062 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || 1063 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || 1064 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) 1065 RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_TC_ONLY)); 1066 else 1067 RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_VC_AND_TC)); 1068 1069 RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_2S, (R600_S0_X(0xc) | 1070 R600_S0_Y(0x4) | 1071 R600_S1_X(0x4) | 1072 R600_S1_Y(0xc))); 1073 RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_4S, (R600_S0_X(0xe) | 1074 R600_S0_Y(0xe) | 1075 R600_S1_X(0x2) | 1076 R600_S1_Y(0x2) | 1077 R600_S2_X(0xa) | 1078 R600_S2_Y(0x6) | 1079 R600_S3_X(0x6) | 1080 R600_S3_Y(0xa))); 1081 RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD0, (R600_S0_X(0xe) | 1082 R600_S0_Y(0xb) | 1083 R600_S1_X(0x4) | 1084 R600_S1_Y(0xc) | 1085 R600_S2_X(0x1) | 1086 R600_S2_Y(0x6) | 1087 R600_S3_X(0xa) | 1088 R600_S3_Y(0xe))); 1089 RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD1, (R600_S4_X(0x6) | 1090 R600_S4_Y(0x1) | 1091 R600_S5_X(0x0) | 1092 R600_S5_Y(0x0) | 1093 R600_S6_X(0xb) | 1094 R600_S6_Y(0x4) | 1095 R600_S7_X(0x7) | 1096 R600_S7_Y(0x8))); 1097 1098 1099 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 1100 case CHIP_R600: 1101 case CHIP_RV630: 1102 case CHIP_RV635: 1103 gs_prim_buffer_depth = 0; 1104 break; 1105 case CHIP_RV610: 1106 case CHIP_RS780: 1107 case CHIP_RS880: 1108 case CHIP_RV620: 1109 gs_prim_buffer_depth = 32; 1110 break; 1111 case CHIP_RV670: 1112 gs_prim_buffer_depth = 128; 1113 break; 1114 default: 1115 break; 1116 } 1117 1118 num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16; 1119 vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread; 1120 /* Max value for this is 256 */ 1121 if (vgt_gs_per_es > 256) 1122 vgt_gs_per_es = 256; 1123 1124 RADEON_WRITE(R600_VGT_ES_PER_GS, 128); 1125 RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es); 1126 RADEON_WRITE(R600_VGT_GS_PER_VS, 2); 1127 RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16); 1128 1129 /* more default values. 2D/3D driver should adjust as needed */ 1130 RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0); 1131 RADEON_WRITE(R600_VGT_STRMOUT_EN, 0); 1132 RADEON_WRITE(R600_SX_MISC, 0); 1133 RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0); 1134 RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0); 1135 RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0); 1136 RADEON_WRITE(R600_SPI_INPUT_Z, 0); 1137 RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2)); 1138 RADEON_WRITE(R600_CB_COLOR7_FRAG, 0); 1139 1140 /* clear render buffer base addresses */ 1141 RADEON_WRITE(R600_CB_COLOR0_BASE, 0); 1142 RADEON_WRITE(R600_CB_COLOR1_BASE, 0); 1143 RADEON_WRITE(R600_CB_COLOR2_BASE, 0); 1144 RADEON_WRITE(R600_CB_COLOR3_BASE, 0); 1145 RADEON_WRITE(R600_CB_COLOR4_BASE, 0); 1146 RADEON_WRITE(R600_CB_COLOR5_BASE, 0); 1147 RADEON_WRITE(R600_CB_COLOR6_BASE, 0); 1148 RADEON_WRITE(R600_CB_COLOR7_BASE, 0); 1149 1150 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 1151 case CHIP_RV610: 1152 case CHIP_RS780: 1153 case CHIP_RS880: 1154 case CHIP_RV620: 1155 tc_cntl = R600_TC_L2_SIZE(8); 1156 break; 1157 case CHIP_RV630: 1158 case CHIP_RV635: 1159 tc_cntl = R600_TC_L2_SIZE(4); 1160 break; 1161 case CHIP_R600: 1162 tc_cntl = R600_TC_L2_SIZE(0) | R600_L2_DISABLE_LATE_HIT; 1163 break; 1164 default: 1165 tc_cntl = R600_TC_L2_SIZE(0); 1166 break; 1167 } 1168 1169 RADEON_WRITE(R600_TC_CNTL, tc_cntl); 1170 1171 hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL); 1172 RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl); 1173 1174 arb_pop = RADEON_READ(R600_ARB_POP); 1175 arb_pop |= R600_ENABLE_TC128; 1176 RADEON_WRITE(R600_ARB_POP, arb_pop); 1177 1178 RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0); 1179 RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA | 1180 R600_NUM_CLIP_SEQ(3))); 1181 RADEON_WRITE(R600_PA_SC_ENHANCE, R600_FORCE_EOV_MAX_CLK_CNT(4095)); 1182 1183} 1184 1185static u32 r700_get_tile_pipe_to_backend_map(drm_radeon_private_t *dev_priv, 1186 u32 num_tile_pipes, 1187 u32 num_backends, 1188 u32 backend_disable_mask) 1189{ 1190 u32 backend_map = 0; 1191 u32 enabled_backends_mask; 1192 u32 enabled_backends_count; 1193 u32 cur_pipe; 1194 u32 swizzle_pipe[R7XX_MAX_PIPES]; 1195 u32 cur_backend; 1196 u32 i; 1197 bool force_no_swizzle; 1198 1199 if (num_tile_pipes > R7XX_MAX_PIPES) 1200 num_tile_pipes = R7XX_MAX_PIPES; 1201 if (num_tile_pipes < 1) 1202 num_tile_pipes = 1; 1203 if (num_backends > R7XX_MAX_BACKENDS) 1204 num_backends = R7XX_MAX_BACKENDS; 1205 if (num_backends < 1) 1206 num_backends = 1; 1207 1208 enabled_backends_mask = 0; 1209 enabled_backends_count = 0; 1210 for (i = 0; i < R7XX_MAX_BACKENDS; ++i) { 1211 if (((backend_disable_mask >> i) & 1) == 0) { 1212 enabled_backends_mask |= (1 << i); 1213 ++enabled_backends_count; 1214 } 1215 if (enabled_backends_count == num_backends) 1216 break; 1217 } 1218 1219 if (enabled_backends_count == 0) { 1220 enabled_backends_mask = 1; 1221 enabled_backends_count = 1; 1222 } 1223 1224 if (enabled_backends_count != num_backends) 1225 num_backends = enabled_backends_count; 1226 1227 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 1228 case CHIP_RV770: 1229 case CHIP_RV730: 1230 force_no_swizzle = false; 1231 break; 1232 case CHIP_RV710: 1233 case CHIP_RV740: 1234 default: 1235 force_no_swizzle = true; 1236 break; 1237 } 1238 1239 memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES); 1240 switch (num_tile_pipes) { 1241 case 1: 1242 swizzle_pipe[0] = 0; 1243 break; 1244 case 2: 1245 swizzle_pipe[0] = 0; 1246 swizzle_pipe[1] = 1; 1247 break; 1248 case 3: 1249 if (force_no_swizzle) { 1250 swizzle_pipe[0] = 0; 1251 swizzle_pipe[1] = 1; 1252 swizzle_pipe[2] = 2; 1253 } else { 1254 swizzle_pipe[0] = 0; 1255 swizzle_pipe[1] = 2; 1256 swizzle_pipe[2] = 1; 1257 } 1258 break; 1259 case 4: 1260 if (force_no_swizzle) { 1261 swizzle_pipe[0] = 0; 1262 swizzle_pipe[1] = 1; 1263 swizzle_pipe[2] = 2; 1264 swizzle_pipe[3] = 3; 1265 } else { 1266 swizzle_pipe[0] = 0; 1267 swizzle_pipe[1] = 2; 1268 swizzle_pipe[2] = 3; 1269 swizzle_pipe[3] = 1; 1270 } 1271 break; 1272 case 5: 1273 if (force_no_swizzle) { 1274 swizzle_pipe[0] = 0; 1275 swizzle_pipe[1] = 1; 1276 swizzle_pipe[2] = 2; 1277 swizzle_pipe[3] = 3; 1278 swizzle_pipe[4] = 4; 1279 } else { 1280 swizzle_pipe[0] = 0; 1281 swizzle_pipe[1] = 2; 1282 swizzle_pipe[2] = 4; 1283 swizzle_pipe[3] = 1; 1284 swizzle_pipe[4] = 3; 1285 } 1286 break; 1287 case 6: 1288 if (force_no_swizzle) { 1289 swizzle_pipe[0] = 0; 1290 swizzle_pipe[1] = 1; 1291 swizzle_pipe[2] = 2; 1292 swizzle_pipe[3] = 3; 1293 swizzle_pipe[4] = 4; 1294 swizzle_pipe[5] = 5; 1295 } else { 1296 swizzle_pipe[0] = 0; 1297 swizzle_pipe[1] = 2; 1298 swizzle_pipe[2] = 4; 1299 swizzle_pipe[3] = 5; 1300 swizzle_pipe[4] = 3; 1301 swizzle_pipe[5] = 1; 1302 } 1303 break; 1304 case 7: 1305 if (force_no_swizzle) { 1306 swizzle_pipe[0] = 0; 1307 swizzle_pipe[1] = 1; 1308 swizzle_pipe[2] = 2; 1309 swizzle_pipe[3] = 3; 1310 swizzle_pipe[4] = 4; 1311 swizzle_pipe[5] = 5; 1312 swizzle_pipe[6] = 6; 1313 } else { 1314 swizzle_pipe[0] = 0; 1315 swizzle_pipe[1] = 2; 1316 swizzle_pipe[2] = 4; 1317 swizzle_pipe[3] = 6; 1318 swizzle_pipe[4] = 3; 1319 swizzle_pipe[5] = 1; 1320 swizzle_pipe[6] = 5; 1321 } 1322 break; 1323 case 8: 1324 if (force_no_swizzle) { 1325 swizzle_pipe[0] = 0; 1326 swizzle_pipe[1] = 1; 1327 swizzle_pipe[2] = 2; 1328 swizzle_pipe[3] = 3; 1329 swizzle_pipe[4] = 4; 1330 swizzle_pipe[5] = 5; 1331 swizzle_pipe[6] = 6; 1332 swizzle_pipe[7] = 7; 1333 } else { 1334 swizzle_pipe[0] = 0; 1335 swizzle_pipe[1] = 2; 1336 swizzle_pipe[2] = 4; 1337 swizzle_pipe[3] = 6; 1338 swizzle_pipe[4] = 3; 1339 swizzle_pipe[5] = 1; 1340 swizzle_pipe[6] = 7; 1341 swizzle_pipe[7] = 5; 1342 } 1343 break; 1344 } 1345 1346 cur_backend = 0; 1347 for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) { 1348 while (((1 << cur_backend) & enabled_backends_mask) == 0) 1349 cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS; 1350 1351 backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2))); 1352 1353 cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS; 1354 } 1355 1356 return backend_map; 1357} 1358 1359static void r700_gfx_init(struct drm_device *dev, 1360 drm_radeon_private_t *dev_priv) 1361{ 1362 int i, j, num_qd_pipes; 1363 u32 ta_aux_cntl; 1364 u32 sx_debug_1; 1365 u32 smx_dc_ctl0; 1366 u32 db_debug3; 1367 u32 num_gs_verts_per_thread; 1368 u32 vgt_gs_per_es; 1369 u32 gs_prim_buffer_depth = 0; 1370 u32 sq_ms_fifo_sizes; 1371 u32 sq_config; 1372 u32 sq_thread_resource_mgmt; 1373 u32 hdp_host_path_cntl; 1374 u32 sq_dyn_gpr_size_simd_ab_0; 1375 u32 backend_map; 1376 u32 gb_tiling_config = 0; 1377 u32 cc_rb_backend_disable; 1378 u32 cc_gc_shader_pipe_config; 1379 u32 mc_arb_ramcfg; 1380 u32 db_debug4; 1381 1382 /* setup chip specs */ 1383 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 1384 case CHIP_RV770: 1385 dev_priv->r600_max_pipes = 4; 1386 dev_priv->r600_max_tile_pipes = 8; 1387 dev_priv->r600_max_simds = 10; 1388 dev_priv->r600_max_backends = 4; 1389 dev_priv->r600_max_gprs = 256; 1390 dev_priv->r600_max_threads = 248; 1391 dev_priv->r600_max_stack_entries = 512; 1392 dev_priv->r600_max_hw_contexts = 8; 1393 dev_priv->r600_max_gs_threads = 16 * 2; 1394 dev_priv->r600_sx_max_export_size = 128; 1395 dev_priv->r600_sx_max_export_pos_size = 16; 1396 dev_priv->r600_sx_max_export_smx_size = 112; 1397 dev_priv->r600_sq_num_cf_insts = 2; 1398 1399 dev_priv->r700_sx_num_of_sets = 7; 1400 dev_priv->r700_sc_prim_fifo_size = 0xF9; 1401 dev_priv->r700_sc_hiz_tile_fifo_size = 0x30; 1402 dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130; 1403 break; 1404 case CHIP_RV730: 1405 dev_priv->r600_max_pipes = 2; 1406 dev_priv->r600_max_tile_pipes = 4; 1407 dev_priv->r600_max_simds = 8; 1408 dev_priv->r600_max_backends = 2; 1409 dev_priv->r600_max_gprs = 128; 1410 dev_priv->r600_max_threads = 248; 1411 dev_priv->r600_max_stack_entries = 256; 1412 dev_priv->r600_max_hw_contexts = 8; 1413 dev_priv->r600_max_gs_threads = 16 * 2; 1414 dev_priv->r600_sx_max_export_size = 256; 1415 dev_priv->r600_sx_max_export_pos_size = 32; 1416 dev_priv->r600_sx_max_export_smx_size = 224; 1417 dev_priv->r600_sq_num_cf_insts = 2; 1418 1419 dev_priv->r700_sx_num_of_sets = 7; 1420 dev_priv->r700_sc_prim_fifo_size = 0xf9; 1421 dev_priv->r700_sc_hiz_tile_fifo_size = 0x30; 1422 dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130; 1423 if (dev_priv->r600_sx_max_export_pos_size > 16) { 1424 dev_priv->r600_sx_max_export_pos_size -= 16; 1425 dev_priv->r600_sx_max_export_smx_size += 16; 1426 } 1427 break; 1428 case CHIP_RV710: 1429 dev_priv->r600_max_pipes = 2; 1430 dev_priv->r600_max_tile_pipes = 2; 1431 dev_priv->r600_max_simds = 2; 1432 dev_priv->r600_max_backends = 1; 1433 dev_priv->r600_max_gprs = 256; 1434 dev_priv->r600_max_threads = 192; 1435 dev_priv->r600_max_stack_entries = 256; 1436 dev_priv->r600_max_hw_contexts = 4; 1437 dev_priv->r600_max_gs_threads = 8 * 2; 1438 dev_priv->r600_sx_max_export_size = 128; 1439 dev_priv->r600_sx_max_export_pos_size = 16; 1440 dev_priv->r600_sx_max_export_smx_size = 112; 1441 dev_priv->r600_sq_num_cf_insts = 1; 1442 1443 dev_priv->r700_sx_num_of_sets = 7; 1444 dev_priv->r700_sc_prim_fifo_size = 0x40; 1445 dev_priv->r700_sc_hiz_tile_fifo_size = 0x30; 1446 dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130; 1447 break; 1448 case CHIP_RV740: 1449 dev_priv->r600_max_pipes = 4; 1450 dev_priv->r600_max_tile_pipes = 4; 1451 dev_priv->r600_max_simds = 8; 1452 dev_priv->r600_max_backends = 4; 1453 dev_priv->r600_max_gprs = 256; 1454 dev_priv->r600_max_threads = 248; 1455 dev_priv->r600_max_stack_entries = 512; 1456 dev_priv->r600_max_hw_contexts = 8; 1457 dev_priv->r600_max_gs_threads = 16 * 2; 1458 dev_priv->r600_sx_max_export_size = 256; 1459 dev_priv->r600_sx_max_export_pos_size = 32; 1460 dev_priv->r600_sx_max_export_smx_size = 224; 1461 dev_priv->r600_sq_num_cf_insts = 2; 1462 1463 dev_priv->r700_sx_num_of_sets = 7; 1464 dev_priv->r700_sc_prim_fifo_size = 0x100; 1465 dev_priv->r700_sc_hiz_tile_fifo_size = 0x30; 1466 dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130; 1467 1468 if (dev_priv->r600_sx_max_export_pos_size > 16) { 1469 dev_priv->r600_sx_max_export_pos_size -= 16; 1470 dev_priv->r600_sx_max_export_smx_size += 16; 1471 } 1472 break; 1473 default: 1474 break; 1475 } 1476 1477 /* Initialize HDP */ 1478 j = 0; 1479 for (i = 0; i < 32; i++) { 1480 RADEON_WRITE((0x2c14 + j), 0x00000000); 1481 RADEON_WRITE((0x2c18 + j), 0x00000000); 1482 RADEON_WRITE((0x2c1c + j), 0x00000000); 1483 RADEON_WRITE((0x2c20 + j), 0x00000000); 1484 RADEON_WRITE((0x2c24 + j), 0x00000000); 1485 j += 0x18; 1486 } 1487 1488 RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff)); 1489 1490 /* setup tiling, simd, pipe config */ 1491 mc_arb_ramcfg = RADEON_READ(R700_MC_ARB_RAMCFG); 1492 1493 switch (dev_priv->r600_max_tile_pipes) { 1494 case 1: 1495 gb_tiling_config |= R600_PIPE_TILING(0); 1496 break; 1497 case 2: 1498 gb_tiling_config |= R600_PIPE_TILING(1); 1499 break; 1500 case 4: 1501 gb_tiling_config |= R600_PIPE_TILING(2); 1502 break; 1503 case 8: 1504 gb_tiling_config |= R600_PIPE_TILING(3); 1505 break; 1506 default: 1507 break; 1508 } 1509 1510 if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770) 1511 gb_tiling_config |= R600_BANK_TILING(1); 1512 else 1513 gb_tiling_config |= R600_BANK_TILING((mc_arb_ramcfg >> R700_NOOFBANK_SHIFT) & R700_NOOFBANK_MASK); 1514 1515 gb_tiling_config |= R600_GROUP_SIZE(0); 1516 1517 if (((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK) > 3) { 1518 gb_tiling_config |= R600_ROW_TILING(3); 1519 gb_tiling_config |= R600_SAMPLE_SPLIT(3); 1520 } else { 1521 gb_tiling_config |= 1522 R600_ROW_TILING(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK)); 1523 gb_tiling_config |= 1524 R600_SAMPLE_SPLIT(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK)); 1525 } 1526 1527 gb_tiling_config |= R600_BANK_SWAPS(1); 1528 1529 cc_rb_backend_disable = RADEON_READ(R600_CC_RB_BACKEND_DISABLE) & 0x00ff0000; 1530 cc_rb_backend_disable |= 1531 R600_BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R7XX_MAX_BACKENDS_MASK); 1532 1533 cc_gc_shader_pipe_config = RADEON_READ(R600_CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00; 1534 cc_gc_shader_pipe_config |= 1535 R600_INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R7XX_MAX_PIPES_MASK); 1536 cc_gc_shader_pipe_config |= 1537 R600_INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R7XX_MAX_SIMDS_MASK); 1538 1539 if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV740) 1540 backend_map = 0x28; 1541 else 1542 backend_map = r700_get_tile_pipe_to_backend_map(dev_priv, 1543 dev_priv->r600_max_tile_pipes, 1544 (R7XX_MAX_BACKENDS - 1545 r600_count_pipe_bits((cc_rb_backend_disable & 1546 R7XX_MAX_BACKENDS_MASK) >> 16)), 1547 (cc_rb_backend_disable >> 16)); 1548 gb_tiling_config |= R600_BACKEND_MAP(backend_map); 1549 1550 RADEON_WRITE(R600_GB_TILING_CONFIG, gb_tiling_config); 1551 RADEON_WRITE(R600_DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 1552 RADEON_WRITE(R600_HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 1553 if (gb_tiling_config & 0xc0) { 1554 dev_priv->r600_group_size = 512; 1555 } else { 1556 dev_priv->r600_group_size = 256; 1557 } 1558 dev_priv->r600_npipes = 1 << ((gb_tiling_config >> 1) & 0x7); 1559 if (gb_tiling_config & 0x30) { 1560 dev_priv->r600_nbanks = 8; 1561 } else { 1562 dev_priv->r600_nbanks = 4; 1563 } 1564 1565 RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE, cc_rb_backend_disable); 1566 RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); 1567 RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); 1568 1569 RADEON_WRITE(R700_CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable); 1570 RADEON_WRITE(R700_CGTS_SYS_TCC_DISABLE, 0); 1571 RADEON_WRITE(R700_CGTS_TCC_DISABLE, 0); 1572 RADEON_WRITE(R700_CGTS_USER_SYS_TCC_DISABLE, 0); 1573 RADEON_WRITE(R700_CGTS_USER_TCC_DISABLE, 0); 1574 1575 num_qd_pipes = 1576 R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK) >> 8); 1577 RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK); 1578 RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK); 1579 1580 /* set HW defaults for 3D engine */ 1581 RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) | 1582 R600_ROQ_IB2_START(0x2b))); 1583 1584 RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, R700_STQ_SPLIT(0x30)); 1585 1586 ta_aux_cntl = RADEON_READ(R600_TA_CNTL_AUX); 1587 RADEON_WRITE(R600_TA_CNTL_AUX, ta_aux_cntl | R600_DISABLE_CUBE_ANISO); 1588 1589 sx_debug_1 = RADEON_READ(R700_SX_DEBUG_1); 1590 sx_debug_1 |= R700_ENABLE_NEW_SMX_ADDRESS; 1591 RADEON_WRITE(R700_SX_DEBUG_1, sx_debug_1); 1592 1593 smx_dc_ctl0 = RADEON_READ(R600_SMX_DC_CTL0); 1594 smx_dc_ctl0 &= ~R700_CACHE_DEPTH(0x1ff); 1595 smx_dc_ctl0 |= R700_CACHE_DEPTH((dev_priv->r700_sx_num_of_sets * 64) - 1); 1596 RADEON_WRITE(R600_SMX_DC_CTL0, smx_dc_ctl0); 1597 1598 if ((dev_priv->flags & RADEON_FAMILY_MASK) != CHIP_RV740) 1599 RADEON_WRITE(R700_SMX_EVENT_CTL, (R700_ES_FLUSH_CTL(4) | 1600 R700_GS_FLUSH_CTL(4) | 1601 R700_ACK_FLUSH_CTL(3) | 1602 R700_SYNC_FLUSH_CTL)); 1603 1604 db_debug3 = RADEON_READ(R700_DB_DEBUG3); 1605 db_debug3 &= ~R700_DB_CLK_OFF_DELAY(0x1f); 1606 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 1607 case CHIP_RV770: 1608 case CHIP_RV740: 1609 db_debug3 |= R700_DB_CLK_OFF_DELAY(0x1f); 1610 break; 1611 case CHIP_RV710: 1612 case CHIP_RV730: 1613 default: 1614 db_debug3 |= R700_DB_CLK_OFF_DELAY(2); 1615 break; 1616 } 1617 RADEON_WRITE(R700_DB_DEBUG3, db_debug3); 1618 1619 if ((dev_priv->flags & RADEON_FAMILY_MASK) != CHIP_RV770) { 1620 db_debug4 = RADEON_READ(RV700_DB_DEBUG4); 1621 db_debug4 |= RV700_DISABLE_TILE_COVERED_FOR_PS_ITER; 1622 RADEON_WRITE(RV700_DB_DEBUG4, db_debug4); 1623 } 1624 1625 RADEON_WRITE(R600_SX_EXPORT_BUFFER_SIZES, (R600_COLOR_BUFFER_SIZE((dev_priv->r600_sx_max_export_size / 4) - 1) | 1626 R600_POSITION_BUFFER_SIZE((dev_priv->r600_sx_max_export_pos_size / 4) - 1) | 1627 R600_SMX_BUFFER_SIZE((dev_priv->r600_sx_max_export_smx_size / 4) - 1))); 1628 1629 RADEON_WRITE(R700_PA_SC_FIFO_SIZE_R7XX, (R700_SC_PRIM_FIFO_SIZE(dev_priv->r700_sc_prim_fifo_size) | 1630 R700_SC_HIZ_TILE_FIFO_SIZE(dev_priv->r700_sc_hiz_tile_fifo_size) | 1631 R700_SC_EARLYZ_TILE_FIFO_SIZE(dev_priv->r700_sc_earlyz_tile_fifo_fize))); 1632 1633 RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0); 1634 1635 RADEON_WRITE(R600_VGT_NUM_INSTANCES, 1); 1636 1637 RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0)); 1638 1639 RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(4)); 1640 1641 RADEON_WRITE(R600_CP_PERFMON_CNTL, 0); 1642 1643 sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(16 * dev_priv->r600_sq_num_cf_insts) | 1644 R600_DONE_FIFO_HIWATER(0xe0) | 1645 R600_ALU_UPDATE_FIFO_HIWATER(0x8)); 1646 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 1647 case CHIP_RV770: 1648 case CHIP_RV730: 1649 case CHIP_RV710: 1650 sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x1); 1651 break; 1652 case CHIP_RV740: 1653 default: 1654 sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x4); 1655 break; 1656 } 1657 RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes); 1658 1659 /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT 1660 * should be adjusted as needed by the 2D/3D drivers. This just sets default values 1661 */ 1662 sq_config = RADEON_READ(R600_SQ_CONFIG); 1663 sq_config &= ~(R600_PS_PRIO(3) | 1664 R600_VS_PRIO(3) | 1665 R600_GS_PRIO(3) | 1666 R600_ES_PRIO(3)); 1667 sq_config |= (R600_DX9_CONSTS | 1668 R600_VC_ENABLE | 1669 R600_EXPORT_SRC_C | 1670 R600_PS_PRIO(0) | 1671 R600_VS_PRIO(1) | 1672 R600_GS_PRIO(2) | 1673 R600_ES_PRIO(3)); 1674 if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710) 1675 /* no vertex cache */ 1676 sq_config &= ~R600_VC_ENABLE; 1677 1678 RADEON_WRITE(R600_SQ_CONFIG, sq_config); 1679 1680 RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1, (R600_NUM_PS_GPRS((dev_priv->r600_max_gprs * 24)/64) | 1681 R600_NUM_VS_GPRS((dev_priv->r600_max_gprs * 24)/64) | 1682 R600_NUM_CLAUSE_TEMP_GPRS(((dev_priv->r600_max_gprs * 24)/64)/2))); 1683 1684 RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2, (R600_NUM_GS_GPRS((dev_priv->r600_max_gprs * 7)/64) | 1685 R600_NUM_ES_GPRS((dev_priv->r600_max_gprs * 7)/64))); 1686 1687 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS((dev_priv->r600_max_threads * 4)/8) | 1688 R600_NUM_VS_THREADS((dev_priv->r600_max_threads * 2)/8) | 1689 R600_NUM_ES_THREADS((dev_priv->r600_max_threads * 1)/8)); 1690 if (((dev_priv->r600_max_threads * 1) / 8) > dev_priv->r600_max_gs_threads) 1691 sq_thread_resource_mgmt |= R600_NUM_GS_THREADS(dev_priv->r600_max_gs_threads); 1692 else 1693 sq_thread_resource_mgmt |= R600_NUM_GS_THREADS((dev_priv->r600_max_gs_threads * 1)/8); 1694 RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt); 1695 1696 RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, (R600_NUM_PS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) | 1697 R600_NUM_VS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4))); 1698 1699 RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, (R600_NUM_GS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) | 1700 R600_NUM_ES_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4))); 1701 1702 sq_dyn_gpr_size_simd_ab_0 = (R700_SIMDA_RING0((dev_priv->r600_max_gprs * 38)/64) | 1703 R700_SIMDA_RING1((dev_priv->r600_max_gprs * 38)/64) | 1704 R700_SIMDB_RING0((dev_priv->r600_max_gprs * 38)/64) | 1705 R700_SIMDB_RING1((dev_priv->r600_max_gprs * 38)/64)); 1706 1707 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0); 1708 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0); 1709 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0); 1710 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0); 1711 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0); 1712 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0); 1713 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0); 1714 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0); 1715 1716 RADEON_WRITE(R700_PA_SC_FORCE_EOV_MAX_CNTS, (R700_FORCE_EOV_MAX_CLK_CNT(4095) | 1717 R700_FORCE_EOV_MAX_REZ_CNT(255))); 1718 1719 if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710) 1720 RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_TC_ONLY) | 1721 R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO))); 1722 else 1723 RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_VC_AND_TC) | 1724 R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO))); 1725 1726 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 1727 case CHIP_RV770: 1728 case CHIP_RV730: 1729 case CHIP_RV740: 1730 gs_prim_buffer_depth = 384; 1731 break; 1732 case CHIP_RV710: 1733 gs_prim_buffer_depth = 128; 1734 break; 1735 default: 1736 break; 1737 } 1738 1739 num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16; 1740 vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread; 1741 /* Max value for this is 256 */ 1742 if (vgt_gs_per_es > 256) 1743 vgt_gs_per_es = 256; 1744 1745 RADEON_WRITE(R600_VGT_ES_PER_GS, 128); 1746 RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es); 1747 RADEON_WRITE(R600_VGT_GS_PER_VS, 2); 1748 1749 /* more default values. 2D/3D driver should adjust as needed */ 1750 RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16); 1751 RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0); 1752 RADEON_WRITE(R600_VGT_STRMOUT_EN, 0); 1753 RADEON_WRITE(R600_SX_MISC, 0); 1754 RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0); 1755 RADEON_WRITE(R700_PA_SC_EDGERULE, 0xaaaaaaaa); 1756 RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0); 1757 RADEON_WRITE(R600_PA_SC_CLIPRECT_RULE, 0xffff); 1758 RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0); 1759 RADEON_WRITE(R600_SPI_INPUT_Z, 0); 1760 RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2)); 1761 RADEON_WRITE(R600_CB_COLOR7_FRAG, 0); 1762 1763 /* clear render buffer base addresses */ 1764 RADEON_WRITE(R600_CB_COLOR0_BASE, 0); 1765 RADEON_WRITE(R600_CB_COLOR1_BASE, 0); 1766 RADEON_WRITE(R600_CB_COLOR2_BASE, 0); 1767 RADEON_WRITE(R600_CB_COLOR3_BASE, 0); 1768 RADEON_WRITE(R600_CB_COLOR4_BASE, 0); 1769 RADEON_WRITE(R600_CB_COLOR5_BASE, 0); 1770 RADEON_WRITE(R600_CB_COLOR6_BASE, 0); 1771 RADEON_WRITE(R600_CB_COLOR7_BASE, 0); 1772 1773 RADEON_WRITE(R700_TCP_CNTL, 0); 1774 1775 hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL); 1776 RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl); 1777 1778 RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0); 1779 1780 RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA | 1781 R600_NUM_CLIP_SEQ(3))); 1782 1783} 1784 1785static void r600_cp_init_ring_buffer(struct drm_device *dev, 1786 drm_radeon_private_t *dev_priv, 1787 struct drm_file *file_priv) 1788{ 1789 struct drm_radeon_master_private *master_priv; 1790 u32 ring_start; 1791 u64 rptr_addr; 1792 1793 if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) 1794 r700_gfx_init(dev, dev_priv); 1795 else 1796 r600_gfx_init(dev, dev_priv); 1797 1798 RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP); 1799 RADEON_READ(R600_GRBM_SOFT_RESET); 1800 mdelay(15); 1801 RADEON_WRITE(R600_GRBM_SOFT_RESET, 0); 1802 1803 1804 /* Set ring buffer size */ 1805#ifdef __BIG_ENDIAN 1806 RADEON_WRITE(R600_CP_RB_CNTL, 1807 R600_BUF_SWAP_32BIT | 1808 R600_RB_NO_UPDATE | 1809 (dev_priv->ring.rptr_update_l2qw << 8) | 1810 dev_priv->ring.size_l2qw); 1811#else 1812 RADEON_WRITE(R600_CP_RB_CNTL, 1813 RADEON_RB_NO_UPDATE | 1814 (dev_priv->ring.rptr_update_l2qw << 8) | 1815 dev_priv->ring.size_l2qw); 1816#endif 1817 1818 RADEON_WRITE(R600_CP_SEM_WAIT_TIMER, 0x0); 1819 1820 /* Set the write pointer delay */ 1821 RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0); 1822 1823#ifdef __BIG_ENDIAN 1824 RADEON_WRITE(R600_CP_RB_CNTL, 1825 R600_BUF_SWAP_32BIT | 1826 R600_RB_NO_UPDATE | 1827 R600_RB_RPTR_WR_ENA | 1828 (dev_priv->ring.rptr_update_l2qw << 8) | 1829 dev_priv->ring.size_l2qw); 1830#else 1831 RADEON_WRITE(R600_CP_RB_CNTL, 1832 R600_RB_NO_UPDATE | 1833 R600_RB_RPTR_WR_ENA | 1834 (dev_priv->ring.rptr_update_l2qw << 8) | 1835 dev_priv->ring.size_l2qw); 1836#endif 1837 1838 /* Initialize the ring buffer's read and write pointers */ 1839 RADEON_WRITE(R600_CP_RB_RPTR_WR, 0); 1840 RADEON_WRITE(R600_CP_RB_WPTR, 0); 1841 SET_RING_HEAD(dev_priv, 0); 1842 dev_priv->ring.tail = 0; 1843 1844#if __OS_HAS_AGP 1845 if (dev_priv->flags & RADEON_IS_AGP) { 1846 rptr_addr = dev_priv->ring_rptr->offset 1847 - dev->agp->base + 1848 dev_priv->gart_vm_start; 1849 } else 1850#endif 1851 { 1852 rptr_addr = dev_priv->ring_rptr->offset 1853 - ((unsigned long) dev->sg->virtual) 1854 + dev_priv->gart_vm_start; 1855 } 1856 RADEON_WRITE(R600_CP_RB_RPTR_ADDR, (rptr_addr & 0xfffffffc)); 1857 RADEON_WRITE(R600_CP_RB_RPTR_ADDR_HI, upper_32_bits(rptr_addr)); 1858 1859#ifdef __BIG_ENDIAN 1860 RADEON_WRITE(R600_CP_RB_CNTL, 1861 RADEON_BUF_SWAP_32BIT | 1862 (dev_priv->ring.rptr_update_l2qw << 8) | 1863 dev_priv->ring.size_l2qw); 1864#else 1865 RADEON_WRITE(R600_CP_RB_CNTL, 1866 (dev_priv->ring.rptr_update_l2qw << 8) | 1867 dev_priv->ring.size_l2qw); 1868#endif 1869 1870#if __OS_HAS_AGP 1871 if (dev_priv->flags & RADEON_IS_AGP) { 1872 /* XXX */ 1873 radeon_write_agp_base(dev_priv, dev->agp->base); 1874 1875 /* XXX */ 1876 radeon_write_agp_location(dev_priv, 1877 (((dev_priv->gart_vm_start - 1 + 1878 dev_priv->gart_size) & 0xffff0000) | 1879 (dev_priv->gart_vm_start >> 16))); 1880 1881 ring_start = (dev_priv->cp_ring->offset 1882 - dev->agp->base 1883 + dev_priv->gart_vm_start); 1884 } else 1885#endif 1886 ring_start = (dev_priv->cp_ring->offset 1887 - (unsigned long)dev->sg->virtual 1888 + dev_priv->gart_vm_start); 1889 1890 RADEON_WRITE(R600_CP_RB_BASE, ring_start >> 8); 1891 1892 RADEON_WRITE(R600_CP_ME_CNTL, 0xff); 1893 1894 RADEON_WRITE(R600_CP_DEBUG, (1 << 27) | (1 << 28)); 1895 1896 /* Initialize the scratch register pointer. This will cause 1897 * the scratch register values to be written out to memory 1898 * whenever they are updated. 1899 * 1900 * We simply put this behind the ring read pointer, this works 1901 * with PCI GART as well as (whatever kind of) AGP GART 1902 */ 1903 { 1904 u64 scratch_addr; 1905 1906 scratch_addr = RADEON_READ(R600_CP_RB_RPTR_ADDR) & 0xFFFFFFFC; 1907 scratch_addr |= ((u64)RADEON_READ(R600_CP_RB_RPTR_ADDR_HI)) << 32; 1908 scratch_addr += R600_SCRATCH_REG_OFFSET; 1909 scratch_addr >>= 8; 1910 scratch_addr &= 0xffffffff; 1911 1912 RADEON_WRITE(R600_SCRATCH_ADDR, (uint32_t)scratch_addr); 1913 } 1914 1915 RADEON_WRITE(R600_SCRATCH_UMSK, 0x7); 1916 1917 /* Turn on bus mastering */ 1918 radeon_enable_bm(dev_priv); 1919 1920 radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(0), 0); 1921 RADEON_WRITE(R600_LAST_FRAME_REG, 0); 1922 1923 radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0); 1924 RADEON_WRITE(R600_LAST_DISPATCH_REG, 0); 1925 1926 radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(2), 0); 1927 RADEON_WRITE(R600_LAST_CLEAR_REG, 0); 1928 1929 /* reset sarea copies of these */ 1930 master_priv = file_priv->master->driver_priv; 1931 if (master_priv->sarea_priv) { 1932 master_priv->sarea_priv->last_frame = 0; 1933 master_priv->sarea_priv->last_dispatch = 0; 1934 master_priv->sarea_priv->last_clear = 0; 1935 } 1936 1937 r600_do_wait_for_idle(dev_priv); 1938 1939} 1940 1941int r600_do_cleanup_cp(struct drm_device *dev) 1942{ 1943 drm_radeon_private_t *dev_priv = dev->dev_private; 1944 DRM_DEBUG("\n"); 1945 1946 /* Make sure interrupts are disabled here because the uninstall ioctl 1947 * may not have been called from userspace and after dev_private 1948 * is freed, it's too late. 1949 */ 1950 if (dev->irq_enabled) 1951 drm_irq_uninstall(dev); 1952 1953#if __OS_HAS_AGP 1954 if (dev_priv->flags & RADEON_IS_AGP) { 1955 if (dev_priv->cp_ring != NULL) { 1956 drm_core_ioremapfree(dev_priv->cp_ring, dev); 1957 dev_priv->cp_ring = NULL; 1958 } 1959 if (dev_priv->ring_rptr != NULL) { 1960 drm_core_ioremapfree(dev_priv->ring_rptr, dev); 1961 dev_priv->ring_rptr = NULL; 1962 } 1963 if (dev->agp_buffer_map != NULL) { 1964 drm_core_ioremapfree(dev->agp_buffer_map, dev); 1965 dev->agp_buffer_map = NULL; 1966 } 1967 } else 1968#endif 1969 { 1970 1971 if (dev_priv->gart_info.bus_addr) 1972 r600_page_table_cleanup(dev, &dev_priv->gart_info); 1973 1974 if (dev_priv->gart_info.gart_table_location == DRM_ATI_GART_FB) { 1975 drm_core_ioremapfree(&dev_priv->gart_info.mapping, dev); 1976 dev_priv->gart_info.addr = NULL; 1977 } 1978 } 1979 /* only clear to the start of flags */ 1980 memset(dev_priv, 0, offsetof(drm_radeon_private_t, flags)); 1981 1982 return 0; 1983} 1984 1985int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, 1986 struct drm_file *file_priv) 1987{ 1988 drm_radeon_private_t *dev_priv = dev->dev_private; 1989 struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv; 1990 1991 DRM_DEBUG("\n"); 1992 1993 mutex_init(&dev_priv->cs_mutex); 1994 r600_cs_legacy_init(); 1995 /* if we require new memory map but we don't have it fail */ 1996 if ((dev_priv->flags & RADEON_NEW_MEMMAP) && !dev_priv->new_memmap) { 1997 DRM_ERROR("Cannot initialise DRM on this card\nThis card requires a new X.org DDX for 3D\n"); 1998 r600_do_cleanup_cp(dev); 1999 return -EINVAL; 2000 } 2001 2002 if (init->is_pci && (dev_priv->flags & RADEON_IS_AGP)) { 2003 DRM_DEBUG("Forcing AGP card to PCI mode\n"); 2004 dev_priv->flags &= ~RADEON_IS_AGP; 2005 /* The writeback test succeeds, but when writeback is enabled, 2006 * the ring buffer read ptr update fails after first 128 bytes. 2007 */ 2008 radeon_no_wb = 1; 2009 } else if (!(dev_priv->flags & (RADEON_IS_AGP | RADEON_IS_PCI | RADEON_IS_PCIE)) 2010 && !init->is_pci) { 2011 DRM_DEBUG("Restoring AGP flag\n"); 2012 dev_priv->flags |= RADEON_IS_AGP; 2013 } 2014 2015 dev_priv->usec_timeout = init->usec_timeout; 2016 if (dev_priv->usec_timeout < 1 || 2017 dev_priv->usec_timeout > RADEON_MAX_USEC_TIMEOUT) { 2018 DRM_DEBUG("TIMEOUT problem!\n"); 2019 r600_do_cleanup_cp(dev); 2020 return -EINVAL; 2021 } 2022 2023 /* Enable vblank on CRTC1 for older X servers 2024 */ 2025 dev_priv->vblank_crtc = DRM_RADEON_VBLANK_CRTC1; 2026 dev_priv->do_boxes = 0; 2027 dev_priv->cp_mode = init->cp_mode; 2028 2029 /* We don't support anything other than bus-mastering ring mode, 2030 * but the ring can be in either AGP or PCI space for the ring 2031 * read pointer. 2032 */ 2033 if ((init->cp_mode != RADEON_CSQ_PRIBM_INDDIS) && 2034 (init->cp_mode != RADEON_CSQ_PRIBM_INDBM)) { 2035 DRM_DEBUG("BAD cp_mode (%x)!\n", init->cp_mode); 2036 r600_do_cleanup_cp(dev); 2037 return -EINVAL; 2038 } 2039 2040 switch (init->fb_bpp) { 2041 case 16: 2042 dev_priv->color_fmt = RADEON_COLOR_FORMAT_RGB565; 2043 break; 2044 case 32: 2045 default: 2046 dev_priv->color_fmt = RADEON_COLOR_FORMAT_ARGB8888; 2047 break; 2048 } 2049 dev_priv->front_offset = init->front_offset; 2050 dev_priv->front_pitch = init->front_pitch; 2051 dev_priv->back_offset = init->back_offset; 2052 dev_priv->back_pitch = init->back_pitch; 2053 2054 dev_priv->ring_offset = init->ring_offset; 2055 dev_priv->ring_rptr_offset = init->ring_rptr_offset; 2056 dev_priv->buffers_offset = init->buffers_offset; 2057 dev_priv->gart_textures_offset = init->gart_textures_offset; 2058 2059 master_priv->sarea = drm_getsarea(dev); 2060 if (!master_priv->sarea) { 2061 DRM_ERROR("could not find sarea!\n"); 2062 r600_do_cleanup_cp(dev); 2063 return -EINVAL; 2064 } 2065 2066 dev_priv->cp_ring = drm_core_findmap(dev, init->ring_offset); 2067 if (!dev_priv->cp_ring) { 2068 DRM_ERROR("could not find cp ring region!\n"); 2069 r600_do_cleanup_cp(dev); 2070 return -EINVAL; 2071 } 2072 dev_priv->ring_rptr = drm_core_findmap(dev, init->ring_rptr_offset); 2073 if (!dev_priv->ring_rptr) { 2074 DRM_ERROR("could not find ring read pointer!\n"); 2075 r600_do_cleanup_cp(dev); 2076 return -EINVAL; 2077 } 2078 dev->agp_buffer_token = init->buffers_offset; 2079 dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset); 2080 if (!dev->agp_buffer_map) { 2081 DRM_ERROR("could not find dma buffer region!\n"); 2082 r600_do_cleanup_cp(dev); 2083 return -EINVAL; 2084 } 2085 2086 if (init->gart_textures_offset) { 2087 dev_priv->gart_textures = 2088 drm_core_findmap(dev, init->gart_textures_offset); 2089 if (!dev_priv->gart_textures) { 2090 DRM_ERROR("could not find GART texture region!\n"); 2091 r600_do_cleanup_cp(dev); 2092 return -EINVAL; 2093 } 2094 } 2095 2096#if __OS_HAS_AGP 2097 /* XXX */ 2098 if (dev_priv->flags & RADEON_IS_AGP) { 2099 drm_core_ioremap_wc(dev_priv->cp_ring, dev); 2100 drm_core_ioremap_wc(dev_priv->ring_rptr, dev); 2101 drm_core_ioremap_wc(dev->agp_buffer_map, dev); 2102 if (!dev_priv->cp_ring->handle || 2103 !dev_priv->ring_rptr->handle || 2104 !dev->agp_buffer_map->handle) { 2105 DRM_ERROR("could not find ioremap agp regions!\n"); 2106 r600_do_cleanup_cp(dev); 2107 return -EINVAL; 2108 } 2109 } else 2110#endif 2111 { 2112 dev_priv->cp_ring->handle = (void *)(unsigned long)dev_priv->cp_ring->offset; 2113 dev_priv->ring_rptr->handle = 2114 (void *)(unsigned long)dev_priv->ring_rptr->offset; 2115 dev->agp_buffer_map->handle = 2116 (void *)(unsigned long)dev->agp_buffer_map->offset; 2117 2118 DRM_DEBUG("dev_priv->cp_ring->handle %p\n", 2119 dev_priv->cp_ring->handle); 2120 DRM_DEBUG("dev_priv->ring_rptr->handle %p\n", 2121 dev_priv->ring_rptr->handle); 2122 DRM_DEBUG("dev->agp_buffer_map->handle %p\n", 2123 dev->agp_buffer_map->handle); 2124 } 2125 2126 dev_priv->fb_location = (radeon_read_fb_location(dev_priv) & 0xffff) << 24; 2127 dev_priv->fb_size = 2128 (((radeon_read_fb_location(dev_priv) & 0xffff0000u) << 8) + 0x1000000) 2129 - dev_priv->fb_location; 2130 2131 dev_priv->front_pitch_offset = (((dev_priv->front_pitch / 64) << 22) | 2132 ((dev_priv->front_offset 2133 + dev_priv->fb_location) >> 10)); 2134 2135 dev_priv->back_pitch_offset = (((dev_priv->back_pitch / 64) << 22) | 2136 ((dev_priv->back_offset 2137 + dev_priv->fb_location) >> 10)); 2138 2139 dev_priv->depth_pitch_offset = (((dev_priv->depth_pitch / 64) << 22) | 2140 ((dev_priv->depth_offset 2141 + dev_priv->fb_location) >> 10)); 2142 2143 dev_priv->gart_size = init->gart_size; 2144 2145 /* New let's set the memory map ... */ 2146 if (dev_priv->new_memmap) { 2147 u32 base = 0; 2148 2149 DRM_INFO("Setting GART location based on new memory map\n"); 2150 2151 /* If using AGP, try to locate the AGP aperture at the same 2152 * location in the card and on the bus, though we have to 2153 * align it down. 2154 */ 2155#if __OS_HAS_AGP 2156 /* XXX */ 2157 if (dev_priv->flags & RADEON_IS_AGP) { 2158 base = dev->agp->base; 2159 /* Check if valid */ 2160 if ((base + dev_priv->gart_size - 1) >= dev_priv->fb_location && 2161 base < (dev_priv->fb_location + dev_priv->fb_size - 1)) { 2162 DRM_INFO("Can't use AGP base @0x%08lx, won't fit\n", 2163 dev->agp->base); 2164 base = 0; 2165 } 2166 } 2167#endif 2168 /* If not or if AGP is at 0 (Macs), try to put it elsewhere */ 2169 if (base == 0) { 2170 base = dev_priv->fb_location + dev_priv->fb_size; 2171 if (base < dev_priv->fb_location || 2172 ((base + dev_priv->gart_size) & 0xfffffffful) < base) 2173 base = dev_priv->fb_location 2174 - dev_priv->gart_size; 2175 } 2176 dev_priv->gart_vm_start = base & 0xffc00000u; 2177 if (dev_priv->gart_vm_start != base) 2178 DRM_INFO("GART aligned down from 0x%08x to 0x%08x\n", 2179 base, dev_priv->gart_vm_start); 2180 } 2181 2182#if __OS_HAS_AGP 2183 /* XXX */ 2184 if (dev_priv->flags & RADEON_IS_AGP) 2185 dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset 2186 - dev->agp->base 2187 + dev_priv->gart_vm_start); 2188 else 2189#endif 2190 dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset 2191 - (unsigned long)dev->sg->virtual 2192 + dev_priv->gart_vm_start); 2193 2194 DRM_DEBUG("fb 0x%08x size %d\n", 2195 (unsigned int) dev_priv->fb_location, 2196 (unsigned int) dev_priv->fb_size); 2197 DRM_DEBUG("dev_priv->gart_size %d\n", dev_priv->gart_size); 2198 DRM_DEBUG("dev_priv->gart_vm_start 0x%08x\n", 2199 (unsigned int) dev_priv->gart_vm_start); 2200 DRM_DEBUG("dev_priv->gart_buffers_offset 0x%08lx\n", 2201 dev_priv->gart_buffers_offset); 2202 2203 dev_priv->ring.start = (u32 *) dev_priv->cp_ring->handle; 2204 dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->handle 2205 + init->ring_size / sizeof(u32)); 2206 dev_priv->ring.size = init->ring_size; 2207 dev_priv->ring.size_l2qw = drm_order(init->ring_size / 8); 2208 2209 dev_priv->ring.rptr_update = /* init->rptr_update */ 4096; 2210 dev_priv->ring.rptr_update_l2qw = drm_order(/* init->rptr_update */ 4096 / 8); 2211 2212 dev_priv->ring.fetch_size = /* init->fetch_size */ 32; 2213 dev_priv->ring.fetch_size_l2ow = drm_order(/* init->fetch_size */ 32 / 16); 2214 2215 dev_priv->ring.tail_mask = (dev_priv->ring.size / sizeof(u32)) - 1; 2216 2217 dev_priv->ring.high_mark = RADEON_RING_HIGH_MARK; 2218 2219#if __OS_HAS_AGP 2220 if (dev_priv->flags & RADEON_IS_AGP) { 2221 /* XXX turn off pcie gart */ 2222 } else 2223#endif 2224 { 2225 dev_priv->gart_info.table_mask = DMA_BIT_MASK(32); 2226 /* if we have an offset set from userspace */ 2227 if (!dev_priv->pcigart_offset_set) { 2228 DRM_ERROR("Need gart offset from userspace\n"); 2229 r600_do_cleanup_cp(dev); 2230 return -EINVAL; 2231 } 2232 2233 DRM_DEBUG("Using gart offset 0x%08lx\n", dev_priv->pcigart_offset); 2234 2235 dev_priv->gart_info.bus_addr = 2236 dev_priv->pcigart_offset + dev_priv->fb_location; 2237 dev_priv->gart_info.mapping.offset = 2238 dev_priv->pcigart_offset + dev_priv->fb_aper_offset; 2239 dev_priv->gart_info.mapping.size = 2240 dev_priv->gart_info.table_size; 2241 2242 drm_core_ioremap_wc(&dev_priv->gart_info.mapping, dev); 2243 if (!dev_priv->gart_info.mapping.handle) { 2244 DRM_ERROR("ioremap failed.\n"); 2245 r600_do_cleanup_cp(dev); 2246 return -EINVAL; 2247 } 2248 2249 dev_priv->gart_info.addr = 2250 dev_priv->gart_info.mapping.handle; 2251 2252 DRM_DEBUG("Setting phys_pci_gart to %p %08lX\n", 2253 dev_priv->gart_info.addr, 2254 dev_priv->pcigart_offset); 2255 2256 if (!r600_page_table_init(dev)) { 2257 DRM_ERROR("Failed to init GART table\n"); 2258 r600_do_cleanup_cp(dev); 2259 return -EINVAL; 2260 } 2261 2262 if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) 2263 r700_vm_init(dev); 2264 else 2265 r600_vm_init(dev); 2266 } 2267 2268 if (!dev_priv->me_fw || !dev_priv->pfp_fw) { 2269 int err = r600_cp_init_microcode(dev_priv); 2270 if (err) { 2271 DRM_ERROR("Failed to load firmware!\n"); 2272 r600_do_cleanup_cp(dev); 2273 return err; 2274 } 2275 } 2276 if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) 2277 r700_cp_load_microcode(dev_priv); 2278 else 2279 r600_cp_load_microcode(dev_priv); 2280 2281 r600_cp_init_ring_buffer(dev, dev_priv, file_priv); 2282 2283 dev_priv->last_buf = 0; 2284 2285 r600_do_engine_reset(dev); 2286 r600_test_writeback(dev_priv); 2287 2288 return 0; 2289} 2290 2291int r600_do_resume_cp(struct drm_device *dev, struct drm_file *file_priv) 2292{ 2293 drm_radeon_private_t *dev_priv = dev->dev_private; 2294 2295 DRM_DEBUG("\n"); 2296 if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) { 2297 r700_vm_init(dev); 2298 r700_cp_load_microcode(dev_priv); 2299 } else { 2300 r600_vm_init(dev); 2301 r600_cp_load_microcode(dev_priv); 2302 } 2303 r600_cp_init_ring_buffer(dev, dev_priv, file_priv); 2304 r600_do_engine_reset(dev); 2305 2306 return 0; 2307} 2308 2309/* Wait for the CP to go idle. 2310 */ 2311int r600_do_cp_idle(drm_radeon_private_t *dev_priv) 2312{ 2313 RING_LOCALS; 2314 DRM_DEBUG("\n"); 2315 2316 BEGIN_RING(5); 2317 OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0)); 2318 OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT); 2319 /* wait for 3D idle clean */ 2320 OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); 2321 OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2); 2322 OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN); 2323 2324 ADVANCE_RING(); 2325 COMMIT_RING(); 2326 2327 return r600_do_wait_for_idle(dev_priv); 2328} 2329 2330/* Start the Command Processor. 2331 */ 2332void r600_do_cp_start(drm_radeon_private_t *dev_priv) 2333{ 2334 u32 cp_me; 2335 RING_LOCALS; 2336 DRM_DEBUG("\n"); 2337 2338 BEGIN_RING(7); 2339 OUT_RING(CP_PACKET3(R600_IT_ME_INITIALIZE, 5)); 2340 OUT_RING(0x00000001); 2341 if (((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) 2342 OUT_RING(0x00000003); 2343 else 2344 OUT_RING(0x00000000); 2345 OUT_RING((dev_priv->r600_max_hw_contexts - 1)); 2346 OUT_RING(R600_ME_INITIALIZE_DEVICE_ID(1)); 2347 OUT_RING(0x00000000); 2348 OUT_RING(0x00000000); 2349 ADVANCE_RING(); 2350 COMMIT_RING(); 2351 2352 /* set the mux and reset the halt bit */ 2353 cp_me = 0xff; 2354 RADEON_WRITE(R600_CP_ME_CNTL, cp_me); 2355 2356 dev_priv->cp_running = 1; 2357 2358} 2359 2360void r600_do_cp_reset(drm_radeon_private_t *dev_priv) 2361{ 2362 u32 cur_read_ptr; 2363 DRM_DEBUG("\n"); 2364 2365 cur_read_ptr = RADEON_READ(R600_CP_RB_RPTR); 2366 RADEON_WRITE(R600_CP_RB_WPTR, cur_read_ptr); 2367 SET_RING_HEAD(dev_priv, cur_read_ptr); 2368 dev_priv->ring.tail = cur_read_ptr; 2369} 2370 2371void r600_do_cp_stop(drm_radeon_private_t *dev_priv) 2372{ 2373 uint32_t cp_me; 2374 2375 DRM_DEBUG("\n"); 2376 2377 cp_me = 0xff | R600_CP_ME_HALT; 2378 2379 RADEON_WRITE(R600_CP_ME_CNTL, cp_me); 2380 2381 dev_priv->cp_running = 0; 2382} 2383 2384int r600_cp_dispatch_indirect(struct drm_device *dev, 2385 struct drm_buf *buf, int start, int end) 2386{ 2387 drm_radeon_private_t *dev_priv = dev->dev_private; 2388 RING_LOCALS; 2389 2390 if (start != end) { 2391 unsigned long offset = (dev_priv->gart_buffers_offset 2392 + buf->offset + start); 2393 int dwords = (end - start + 3) / sizeof(u32); 2394 2395 DRM_DEBUG("dwords:%d\n", dwords); 2396 DRM_DEBUG("offset 0x%lx\n", offset); 2397 2398 2399 /* Indirect buffer data must be a multiple of 16 dwords. 2400 * pad the data with a Type-2 CP packet. 2401 */ 2402 while (dwords & 0xf) { 2403 u32 *data = (u32 *) 2404 ((char *)dev->agp_buffer_map->handle 2405 + buf->offset + start); 2406 data[dwords++] = RADEON_CP_PACKET2; 2407 } 2408 2409 /* Fire off the indirect buffer */ 2410 BEGIN_RING(4); 2411 OUT_RING(CP_PACKET3(R600_IT_INDIRECT_BUFFER, 2)); 2412 OUT_RING((offset & 0xfffffffc)); 2413 OUT_RING((upper_32_bits(offset) & 0xff)); 2414 OUT_RING(dwords); 2415 ADVANCE_RING(); 2416 } 2417 2418 return 0; 2419} 2420 2421void r600_cp_dispatch_swap(struct drm_device *dev, struct drm_file *file_priv) 2422{ 2423 drm_radeon_private_t *dev_priv = dev->dev_private; 2424 struct drm_master *master = file_priv->master; 2425 struct drm_radeon_master_private *master_priv = master->driver_priv; 2426 drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv; 2427 int nbox = sarea_priv->nbox; 2428 struct drm_clip_rect *pbox = sarea_priv->boxes; 2429 int i, cpp, src_pitch, dst_pitch; 2430 uint64_t src, dst; 2431 RING_LOCALS; 2432 DRM_DEBUG("\n"); 2433 2434 if (dev_priv->color_fmt == RADEON_COLOR_FORMAT_ARGB8888) 2435 cpp = 4; 2436 else 2437 cpp = 2; 2438 2439 if (sarea_priv->pfCurrentPage == 0) { 2440 src_pitch = dev_priv->back_pitch; 2441 dst_pitch = dev_priv->front_pitch; 2442 src = dev_priv->back_offset + dev_priv->fb_location; 2443 dst = dev_priv->front_offset + dev_priv->fb_location; 2444 } else { 2445 src_pitch = dev_priv->front_pitch; 2446 dst_pitch = dev_priv->back_pitch; 2447 src = dev_priv->front_offset + dev_priv->fb_location; 2448 dst = dev_priv->back_offset + dev_priv->fb_location; 2449 } 2450 2451 if (r600_prepare_blit_copy(dev, file_priv)) { 2452 DRM_ERROR("unable to allocate vertex buffer for swap buffer\n"); 2453 return; 2454 } 2455 for (i = 0; i < nbox; i++) { 2456 int x = pbox[i].x1; 2457 int y = pbox[i].y1; 2458 int w = pbox[i].x2 - x; 2459 int h = pbox[i].y2 - y; 2460 2461 DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h); 2462 2463 r600_blit_swap(dev, 2464 src, dst, 2465 x, y, x, y, w, h, 2466 src_pitch, dst_pitch, cpp); 2467 } 2468 r600_done_blit_copy(dev); 2469 2470 /* Increment the frame counter. The client-side 3D driver must 2471 * throttle the framerate by waiting for this value before 2472 * performing the swapbuffer ioctl. 2473 */ 2474 sarea_priv->last_frame++; 2475 2476 BEGIN_RING(3); 2477 R600_FRAME_AGE(sarea_priv->last_frame); 2478 ADVANCE_RING(); 2479} 2480 2481int r600_cp_dispatch_texture(struct drm_device *dev, 2482 struct drm_file *file_priv, 2483 drm_radeon_texture_t *tex, 2484 drm_radeon_tex_image_t *image) 2485{ 2486 drm_radeon_private_t *dev_priv = dev->dev_private; 2487 struct drm_buf *buf; 2488 u32 *buffer; 2489 const u8 __user *data; 2490 int size, pass_size; 2491 u64 src_offset, dst_offset; 2492 2493 if (!radeon_check_offset(dev_priv, tex->offset)) { 2494 DRM_ERROR("Invalid destination offset\n"); 2495 return -EINVAL; 2496 } 2497 2498 /* this might fail for zero-sized uploads - are those illegal? */ 2499 if (!radeon_check_offset(dev_priv, tex->offset + tex->height * tex->pitch - 1)) { 2500 DRM_ERROR("Invalid final destination offset\n"); 2501 return -EINVAL; 2502 } 2503 2504 size = tex->height * tex->pitch; 2505 2506 if (size == 0) 2507 return 0; 2508 2509 dst_offset = tex->offset; 2510 2511 if (r600_prepare_blit_copy(dev, file_priv)) { 2512 DRM_ERROR("unable to allocate vertex buffer for swap buffer\n"); 2513 return -EAGAIN; 2514 } 2515 do { 2516 data = (const u8 __user *)image->data; 2517 pass_size = size; 2518 2519 buf = radeon_freelist_get(dev); 2520 if (!buf) { 2521 DRM_DEBUG("EAGAIN\n"); 2522 if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image))) 2523 return -EFAULT; 2524 return -EAGAIN; 2525 } 2526 2527 if (pass_size > buf->total) 2528 pass_size = buf->total; 2529 2530 /* Dispatch the indirect buffer. 2531 */ 2532 buffer = 2533 (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset); 2534 2535 if (DRM_COPY_FROM_USER(buffer, data, pass_size)) { 2536 DRM_ERROR("EFAULT on pad, %d bytes\n", pass_size); 2537 return -EFAULT; 2538 } 2539 2540 buf->file_priv = file_priv; 2541 buf->used = pass_size; 2542 src_offset = dev_priv->gart_buffers_offset + buf->offset; 2543 2544 r600_blit_copy(dev, src_offset, dst_offset, pass_size); 2545 2546 radeon_cp_discard_buffer(dev, file_priv->master, buf); 2547 2548 /* Update the input parameters for next time */ 2549 image->data = (const u8 __user *)image->data + pass_size; 2550 dst_offset += pass_size; 2551 size -= pass_size; 2552 } while (size > 0); 2553 r600_done_blit_copy(dev); 2554 2555 return 0; 2556} 2557 2558/* 2559 * Legacy cs ioctl 2560 */ 2561static u32 radeon_cs_id_get(struct drm_radeon_private *radeon) 2562{ 2563 /* FIXME: check if wrap affect last reported wrap & sequence */ 2564 radeon->cs_id_scnt = (radeon->cs_id_scnt + 1) & 0x00FFFFFF; 2565 if (!radeon->cs_id_scnt) { 2566 /* increment wrap counter */ 2567 radeon->cs_id_wcnt += 0x01000000; 2568 /* valid sequence counter start at 1 */ 2569 radeon->cs_id_scnt = 1; 2570 } 2571 return (radeon->cs_id_scnt | radeon->cs_id_wcnt); 2572} 2573 2574static void r600_cs_id_emit(drm_radeon_private_t *dev_priv, u32 *id) 2575{ 2576 RING_LOCALS; 2577 2578 *id = radeon_cs_id_get(dev_priv); 2579 2580 /* SCRATCH 2 */ 2581 BEGIN_RING(3); 2582 R600_CLEAR_AGE(*id); 2583 ADVANCE_RING(); 2584 COMMIT_RING(); 2585} 2586 2587static int r600_ib_get(struct drm_device *dev, 2588 struct drm_file *fpriv, 2589 struct drm_buf **buffer) 2590{ 2591 struct drm_buf *buf; 2592 2593 *buffer = NULL; 2594 buf = radeon_freelist_get(dev); 2595 if (!buf) { 2596 return -EBUSY; 2597 } 2598 buf->file_priv = fpriv; 2599 *buffer = buf; 2600 return 0; 2601} 2602 2603static void r600_ib_free(struct drm_device *dev, struct drm_buf *buf, 2604 struct drm_file *fpriv, int l, int r) 2605{ 2606 drm_radeon_private_t *dev_priv = dev->dev_private; 2607 2608 if (buf) { 2609 if (!r) 2610 r600_cp_dispatch_indirect(dev, buf, 0, l * 4); 2611 radeon_cp_discard_buffer(dev, fpriv->master, buf); 2612 COMMIT_RING(); 2613 } 2614} 2615 2616int r600_cs_legacy_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv) 2617{ 2618 struct drm_radeon_private *dev_priv = dev->dev_private; 2619 struct drm_radeon_cs *cs = data; 2620 struct drm_buf *buf; 2621 unsigned family; 2622 int l, r = 0; 2623 u32 *ib, cs_id = 0; 2624 2625 if (dev_priv == NULL) { 2626 DRM_ERROR("called with no initialization\n"); 2627 return -EINVAL; 2628 } 2629 family = dev_priv->flags & RADEON_FAMILY_MASK; 2630 if (family < CHIP_R600) { 2631 DRM_ERROR("cs ioctl valid only for R6XX & R7XX in legacy mode\n"); 2632 return -EINVAL; 2633 } 2634 mutex_lock(&dev_priv->cs_mutex); 2635 /* get ib */ 2636 r = r600_ib_get(dev, fpriv, &buf); 2637 if (r) { 2638 DRM_ERROR("ib_get failed\n"); 2639 goto out; 2640 } 2641 ib = dev->agp_buffer_map->handle + buf->offset; 2642 /* now parse command stream */ 2643 r = r600_cs_legacy(dev, data, fpriv, family, ib, &l); 2644 if (r) { 2645 goto out; 2646 } 2647 2648out: 2649 r600_ib_free(dev, buf, fpriv, l, r); 2650 /* emit cs id sequence */ 2651 r600_cs_id_emit(dev_priv, &cs_id); 2652 cs->cs_id = cs_id; 2653 mutex_unlock(&dev_priv->cs_mutex); 2654 return r; 2655} 2656 2657void r600_cs_legacy_get_tiling_conf(struct drm_device *dev, u32 *npipes, u32 *nbanks, u32 *group_size) 2658{ 2659 struct drm_radeon_private *dev_priv = dev->dev_private; 2660 2661 *npipes = dev_priv->r600_npipes; 2662 *nbanks = dev_priv->r600_nbanks; 2663 *group_size = dev_priv->r600_group_size; 2664} 2665