r600_cp.c revision c1556f71513f2e660fb2bbdc29344361b1ebff35
1/* 2 * Copyright 2008-2009 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Dave Airlie <airlied@redhat.com> 26 * Alex Deucher <alexander.deucher@amd.com> 27 */ 28 29#include "drmP.h" 30#include "drm.h" 31#include "radeon_drm.h" 32#include "radeon_drv.h" 33 34#include "r600_microcode.h" 35 36# define ATI_PCIGART_PAGE_SIZE 4096 /**< PCI GART page size */ 37# define ATI_PCIGART_PAGE_MASK (~(ATI_PCIGART_PAGE_SIZE-1)) 38 39#define R600_PTE_VALID (1 << 0) 40#define R600_PTE_SYSTEM (1 << 1) 41#define R600_PTE_SNOOPED (1 << 2) 42#define R600_PTE_READABLE (1 << 5) 43#define R600_PTE_WRITEABLE (1 << 6) 44 45/* MAX values used for gfx init */ 46#define R6XX_MAX_SH_GPRS 256 47#define R6XX_MAX_TEMP_GPRS 16 48#define R6XX_MAX_SH_THREADS 256 49#define R6XX_MAX_SH_STACK_ENTRIES 4096 50#define R6XX_MAX_BACKENDS 8 51#define R6XX_MAX_BACKENDS_MASK 0xff 52#define R6XX_MAX_SIMDS 8 53#define R6XX_MAX_SIMDS_MASK 0xff 54#define R6XX_MAX_PIPES 8 55#define R6XX_MAX_PIPES_MASK 0xff 56 57#define R7XX_MAX_SH_GPRS 256 58#define R7XX_MAX_TEMP_GPRS 16 59#define R7XX_MAX_SH_THREADS 256 60#define R7XX_MAX_SH_STACK_ENTRIES 4096 61#define R7XX_MAX_BACKENDS 8 62#define R7XX_MAX_BACKENDS_MASK 0xff 63#define R7XX_MAX_SIMDS 16 64#define R7XX_MAX_SIMDS_MASK 0xffff 65#define R7XX_MAX_PIPES 8 66#define R7XX_MAX_PIPES_MASK 0xff 67 68static int r600_do_wait_for_fifo(drm_radeon_private_t *dev_priv, int entries) 69{ 70 int i; 71 72 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; 73 74 for (i = 0; i < dev_priv->usec_timeout; i++) { 75 int slots; 76 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) 77 slots = (RADEON_READ(R600_GRBM_STATUS) 78 & R700_CMDFIFO_AVAIL_MASK); 79 else 80 slots = (RADEON_READ(R600_GRBM_STATUS) 81 & R600_CMDFIFO_AVAIL_MASK); 82 if (slots >= entries) 83 return 0; 84 DRM_UDELAY(1); 85 } 86 DRM_INFO("wait for fifo failed status : 0x%08X 0x%08X\n", 87 RADEON_READ(R600_GRBM_STATUS), 88 RADEON_READ(R600_GRBM_STATUS2)); 89 90 return -EBUSY; 91} 92 93static int r600_do_wait_for_idle(drm_radeon_private_t *dev_priv) 94{ 95 int i, ret; 96 97 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; 98 99 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) 100 ret = r600_do_wait_for_fifo(dev_priv, 8); 101 else 102 ret = r600_do_wait_for_fifo(dev_priv, 16); 103 if (ret) 104 return ret; 105 for (i = 0; i < dev_priv->usec_timeout; i++) { 106 if (!(RADEON_READ(R600_GRBM_STATUS) & R600_GUI_ACTIVE)) 107 return 0; 108 DRM_UDELAY(1); 109 } 110 DRM_INFO("wait idle failed status : 0x%08X 0x%08X\n", 111 RADEON_READ(R600_GRBM_STATUS), 112 RADEON_READ(R600_GRBM_STATUS2)); 113 114 return -EBUSY; 115} 116 117void r600_page_table_cleanup(struct drm_device *dev, struct drm_ati_pcigart_info *gart_info) 118{ 119 struct drm_sg_mem *entry = dev->sg; 120 int max_pages; 121 int pages; 122 int i; 123 124 if (gart_info->bus_addr) { 125 max_pages = (gart_info->table_size / sizeof(u32)); 126 pages = (entry->pages <= max_pages) 127 ? entry->pages : max_pages; 128 129 for (i = 0; i < pages; i++) { 130 if (!entry->busaddr[i]) 131 break; 132 pci_unmap_single(dev->pdev, entry->busaddr[i], 133 PAGE_SIZE, PCI_DMA_TODEVICE); 134 } 135 if (gart_info->gart_table_location == DRM_ATI_GART_MAIN) 136 gart_info->bus_addr = 0; 137 } 138} 139 140/* R600 has page table setup */ 141int r600_page_table_init(struct drm_device *dev) 142{ 143 drm_radeon_private_t *dev_priv = dev->dev_private; 144 struct drm_ati_pcigart_info *gart_info = &dev_priv->gart_info; 145 struct drm_sg_mem *entry = dev->sg; 146 int ret = 0; 147 int i, j; 148 int max_pages, pages; 149 u64 *pci_gart, page_base; 150 dma_addr_t entry_addr; 151 152 /* okay page table is available - lets rock */ 153 154 /* PTEs are 64-bits */ 155 pci_gart = (u64 *)gart_info->addr; 156 157 max_pages = (gart_info->table_size / sizeof(u64)); 158 pages = (entry->pages <= max_pages) ? entry->pages : max_pages; 159 160 memset(pci_gart, 0, max_pages * sizeof(u64)); 161 162 for (i = 0; i < pages; i++) { 163 entry->busaddr[i] = pci_map_single(dev->pdev, 164 page_address(entry-> 165 pagelist[i]), 166 PAGE_SIZE, PCI_DMA_TODEVICE); 167 if (entry->busaddr[i] == 0) { 168 DRM_ERROR("unable to map PCIGART pages!\n"); 169 r600_page_table_cleanup(dev, gart_info); 170 ret = -EINVAL; 171 goto done; 172 } 173 entry_addr = entry->busaddr[i]; 174 for (j = 0; j < (PAGE_SIZE / ATI_PCIGART_PAGE_SIZE); j++) { 175 page_base = (u64) entry_addr & ATI_PCIGART_PAGE_MASK; 176 page_base |= R600_PTE_VALID | R600_PTE_SYSTEM | R600_PTE_SNOOPED; 177 page_base |= R600_PTE_READABLE | R600_PTE_WRITEABLE; 178 179 *pci_gart = page_base; 180 181 if ((i % 128) == 0) 182 DRM_DEBUG("page entry %d: 0x%016llx\n", 183 i, (unsigned long long)page_base); 184 pci_gart++; 185 entry_addr += ATI_PCIGART_PAGE_SIZE; 186 } 187 } 188done: 189 return ret; 190} 191 192static void r600_vm_flush_gart_range(struct drm_device *dev) 193{ 194 drm_radeon_private_t *dev_priv = dev->dev_private; 195 u32 resp, countdown = 1000; 196 RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_LOW_ADDR, dev_priv->gart_vm_start >> 12); 197 RADEON_WRITE(R600_VM_CONTEXT0_INVALIDATION_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12); 198 RADEON_WRITE(R600_VM_CONTEXT0_REQUEST_RESPONSE, 2); 199 200 do { 201 resp = RADEON_READ(R600_VM_CONTEXT0_REQUEST_RESPONSE); 202 countdown--; 203 DRM_UDELAY(1); 204 } while (((resp & 0xf0) == 0) && countdown); 205} 206 207static void r600_vm_init(struct drm_device *dev) 208{ 209 drm_radeon_private_t *dev_priv = dev->dev_private; 210 /* initialise the VM to use the page table we constructed up there */ 211 u32 vm_c0, i; 212 u32 mc_rd_a; 213 u32 vm_l2_cntl, vm_l2_cntl3; 214 /* okay set up the PCIE aperture type thingo */ 215 RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12); 216 RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12); 217 RADEON_WRITE(R600_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0); 218 219 /* setup MC RD a */ 220 mc_rd_a = R600_MCD_L1_TLB | R600_MCD_L1_FRAG_PROC | R600_MCD_SYSTEM_ACCESS_MODE_IN_SYS | 221 R600_MCD_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | R600_MCD_EFFECTIVE_L1_TLB_SIZE(5) | 222 R600_MCD_EFFECTIVE_L1_QUEUE_SIZE(5) | R600_MCD_WAIT_L2_QUERY; 223 224 RADEON_WRITE(R600_MCD_RD_A_CNTL, mc_rd_a); 225 RADEON_WRITE(R600_MCD_RD_B_CNTL, mc_rd_a); 226 227 RADEON_WRITE(R600_MCD_WR_A_CNTL, mc_rd_a); 228 RADEON_WRITE(R600_MCD_WR_B_CNTL, mc_rd_a); 229 230 RADEON_WRITE(R600_MCD_RD_GFX_CNTL, mc_rd_a); 231 RADEON_WRITE(R600_MCD_WR_GFX_CNTL, mc_rd_a); 232 233 RADEON_WRITE(R600_MCD_RD_SYS_CNTL, mc_rd_a); 234 RADEON_WRITE(R600_MCD_WR_SYS_CNTL, mc_rd_a); 235 236 RADEON_WRITE(R600_MCD_RD_HDP_CNTL, mc_rd_a | R600_MCD_L1_STRICT_ORDERING); 237 RADEON_WRITE(R600_MCD_WR_HDP_CNTL, mc_rd_a /*| R600_MCD_L1_STRICT_ORDERING*/); 238 239 RADEON_WRITE(R600_MCD_RD_PDMA_CNTL, mc_rd_a); 240 RADEON_WRITE(R600_MCD_WR_PDMA_CNTL, mc_rd_a); 241 242 RADEON_WRITE(R600_MCD_RD_SEM_CNTL, mc_rd_a | R600_MCD_SEMAPHORE_MODE); 243 RADEON_WRITE(R600_MCD_WR_SEM_CNTL, mc_rd_a); 244 245 vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W; 246 vm_l2_cntl |= R600_VM_L2_CNTL_QUEUE_SIZE(7); 247 RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl); 248 249 RADEON_WRITE(R600_VM_L2_CNTL2, 0); 250 vm_l2_cntl3 = (R600_VM_L2_CNTL3_BANK_SELECT_0(0) | 251 R600_VM_L2_CNTL3_BANK_SELECT_1(1) | 252 R600_VM_L2_CNTL3_CACHE_UPDATE_MODE(2)); 253 RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3); 254 255 vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT; 256 257 RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0); 258 259 vm_c0 &= ~R600_VM_ENABLE_CONTEXT; 260 261 /* disable all other contexts */ 262 for (i = 1; i < 8; i++) 263 RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0); 264 265 RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12); 266 RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12); 267 RADEON_WRITE(R600_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12); 268 269 r600_vm_flush_gart_range(dev); 270} 271 272/* load r600 microcode */ 273static void r600_cp_load_microcode(drm_radeon_private_t *dev_priv) 274{ 275 int i; 276 277 r600_do_cp_stop(dev_priv); 278 279 RADEON_WRITE(R600_CP_RB_CNTL, 280 R600_RB_NO_UPDATE | 281 R600_RB_BLKSZ(15) | 282 R600_RB_BUFSZ(3)); 283 284 RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP); 285 RADEON_READ(R600_GRBM_SOFT_RESET); 286 DRM_UDELAY(15000); 287 RADEON_WRITE(R600_GRBM_SOFT_RESET, 0); 288 289 RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); 290 291 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600)) { 292 DRM_INFO("Loading R600 CP Microcode\n"); 293 for (i = 0; i < PM4_UCODE_SIZE; i++) { 294 RADEON_WRITE(R600_CP_ME_RAM_DATA, 295 R600_cp_microcode[i][0]); 296 RADEON_WRITE(R600_CP_ME_RAM_DATA, 297 R600_cp_microcode[i][1]); 298 RADEON_WRITE(R600_CP_ME_RAM_DATA, 299 R600_cp_microcode[i][2]); 300 } 301 302 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 303 DRM_INFO("Loading R600 PFP Microcode\n"); 304 for (i = 0; i < PFP_UCODE_SIZE; i++) 305 RADEON_WRITE(R600_CP_PFP_UCODE_DATA, R600_pfp_microcode[i]); 306 } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610)) { 307 DRM_INFO("Loading RV610 CP Microcode\n"); 308 for (i = 0; i < PM4_UCODE_SIZE; i++) { 309 RADEON_WRITE(R600_CP_ME_RAM_DATA, 310 RV610_cp_microcode[i][0]); 311 RADEON_WRITE(R600_CP_ME_RAM_DATA, 312 RV610_cp_microcode[i][1]); 313 RADEON_WRITE(R600_CP_ME_RAM_DATA, 314 RV610_cp_microcode[i][2]); 315 } 316 317 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 318 DRM_INFO("Loading RV610 PFP Microcode\n"); 319 for (i = 0; i < PFP_UCODE_SIZE; i++) 320 RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RV610_pfp_microcode[i]); 321 } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630)) { 322 DRM_INFO("Loading RV630 CP Microcode\n"); 323 for (i = 0; i < PM4_UCODE_SIZE; i++) { 324 RADEON_WRITE(R600_CP_ME_RAM_DATA, 325 RV630_cp_microcode[i][0]); 326 RADEON_WRITE(R600_CP_ME_RAM_DATA, 327 RV630_cp_microcode[i][1]); 328 RADEON_WRITE(R600_CP_ME_RAM_DATA, 329 RV630_cp_microcode[i][2]); 330 } 331 332 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 333 DRM_INFO("Loading RV630 PFP Microcode\n"); 334 for (i = 0; i < PFP_UCODE_SIZE; i++) 335 RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RV630_pfp_microcode[i]); 336 } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620)) { 337 DRM_INFO("Loading RV620 CP Microcode\n"); 338 for (i = 0; i < PM4_UCODE_SIZE; i++) { 339 RADEON_WRITE(R600_CP_ME_RAM_DATA, 340 RV620_cp_microcode[i][0]); 341 RADEON_WRITE(R600_CP_ME_RAM_DATA, 342 RV620_cp_microcode[i][1]); 343 RADEON_WRITE(R600_CP_ME_RAM_DATA, 344 RV620_cp_microcode[i][2]); 345 } 346 347 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 348 DRM_INFO("Loading RV620 PFP Microcode\n"); 349 for (i = 0; i < PFP_UCODE_SIZE; i++) 350 RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RV620_pfp_microcode[i]); 351 } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV635)) { 352 DRM_INFO("Loading RV635 CP Microcode\n"); 353 for (i = 0; i < PM4_UCODE_SIZE; i++) { 354 RADEON_WRITE(R600_CP_ME_RAM_DATA, 355 RV635_cp_microcode[i][0]); 356 RADEON_WRITE(R600_CP_ME_RAM_DATA, 357 RV635_cp_microcode[i][1]); 358 RADEON_WRITE(R600_CP_ME_RAM_DATA, 359 RV635_cp_microcode[i][2]); 360 } 361 362 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 363 DRM_INFO("Loading RV635 PFP Microcode\n"); 364 for (i = 0; i < PFP_UCODE_SIZE; i++) 365 RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RV635_pfp_microcode[i]); 366 } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670)) { 367 DRM_INFO("Loading RV670 CP Microcode\n"); 368 for (i = 0; i < PM4_UCODE_SIZE; i++) { 369 RADEON_WRITE(R600_CP_ME_RAM_DATA, 370 RV670_cp_microcode[i][0]); 371 RADEON_WRITE(R600_CP_ME_RAM_DATA, 372 RV670_cp_microcode[i][1]); 373 RADEON_WRITE(R600_CP_ME_RAM_DATA, 374 RV670_cp_microcode[i][2]); 375 } 376 377 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 378 DRM_INFO("Loading RV670 PFP Microcode\n"); 379 for (i = 0; i < PFP_UCODE_SIZE; i++) 380 RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RV670_pfp_microcode[i]); 381 } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) { 382 DRM_INFO("Loading RS780 CP Microcode\n"); 383 for (i = 0; i < PM4_UCODE_SIZE; i++) { 384 RADEON_WRITE(R600_CP_ME_RAM_DATA, 385 RV670_cp_microcode[i][0]); 386 RADEON_WRITE(R600_CP_ME_RAM_DATA, 387 RV670_cp_microcode[i][1]); 388 RADEON_WRITE(R600_CP_ME_RAM_DATA, 389 RV670_cp_microcode[i][2]); 390 } 391 392 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 393 DRM_INFO("Loading RS780 PFP Microcode\n"); 394 for (i = 0; i < PFP_UCODE_SIZE; i++) 395 RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RV670_pfp_microcode[i]); 396 } 397 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 398 RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); 399 RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0); 400 401} 402 403static void r700_vm_init(struct drm_device *dev) 404{ 405 drm_radeon_private_t *dev_priv = dev->dev_private; 406 /* initialise the VM to use the page table we constructed up there */ 407 u32 vm_c0, i; 408 u32 mc_vm_md_l1; 409 u32 vm_l2_cntl, vm_l2_cntl3; 410 /* okay set up the PCIE aperture type thingo */ 411 RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_LOW_ADDR, dev_priv->gart_vm_start >> 12); 412 RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12); 413 RADEON_WRITE(R700_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0); 414 415 mc_vm_md_l1 = R700_ENABLE_L1_TLB | 416 R700_ENABLE_L1_FRAGMENT_PROCESSING | 417 R700_SYSTEM_ACCESS_MODE_IN_SYS | 418 R700_SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | 419 R700_EFFECTIVE_L1_TLB_SIZE(5) | 420 R700_EFFECTIVE_L1_QUEUE_SIZE(5); 421 422 RADEON_WRITE(R700_MC_VM_MD_L1_TLB0_CNTL, mc_vm_md_l1); 423 RADEON_WRITE(R700_MC_VM_MD_L1_TLB1_CNTL, mc_vm_md_l1); 424 RADEON_WRITE(R700_MC_VM_MD_L1_TLB2_CNTL, mc_vm_md_l1); 425 RADEON_WRITE(R700_MC_VM_MB_L1_TLB0_CNTL, mc_vm_md_l1); 426 RADEON_WRITE(R700_MC_VM_MB_L1_TLB1_CNTL, mc_vm_md_l1); 427 RADEON_WRITE(R700_MC_VM_MB_L1_TLB2_CNTL, mc_vm_md_l1); 428 RADEON_WRITE(R700_MC_VM_MB_L1_TLB3_CNTL, mc_vm_md_l1); 429 430 vm_l2_cntl = R600_VM_L2_CACHE_EN | R600_VM_L2_FRAG_PROC | R600_VM_ENABLE_PTE_CACHE_LRU_W; 431 vm_l2_cntl |= R700_VM_L2_CNTL_QUEUE_SIZE(7); 432 RADEON_WRITE(R600_VM_L2_CNTL, vm_l2_cntl); 433 434 RADEON_WRITE(R600_VM_L2_CNTL2, 0); 435 vm_l2_cntl3 = R700_VM_L2_CNTL3_BANK_SELECT(0) | R700_VM_L2_CNTL3_CACHE_UPDATE_MODE(2); 436 RADEON_WRITE(R600_VM_L2_CNTL3, vm_l2_cntl3); 437 438 vm_c0 = R600_VM_ENABLE_CONTEXT | R600_VM_PAGE_TABLE_DEPTH_FLAT; 439 440 RADEON_WRITE(R600_VM_CONTEXT0_CNTL, vm_c0); 441 442 vm_c0 &= ~R600_VM_ENABLE_CONTEXT; 443 444 /* disable all other contexts */ 445 for (i = 1; i < 8; i++) 446 RADEON_WRITE(R600_VM_CONTEXT0_CNTL + (i * 4), vm_c0); 447 448 RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, dev_priv->gart_info.bus_addr >> 12); 449 RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_START_ADDR, dev_priv->gart_vm_start >> 12); 450 RADEON_WRITE(R700_VM_CONTEXT0_PAGE_TABLE_END_ADDR, (dev_priv->gart_vm_start + dev_priv->gart_size - 1) >> 12); 451 452 r600_vm_flush_gart_range(dev); 453} 454 455/* load r600 microcode */ 456static void r700_cp_load_microcode(drm_radeon_private_t *dev_priv) 457{ 458 int i; 459 460 r600_do_cp_stop(dev_priv); 461 462 RADEON_WRITE(R600_CP_RB_CNTL, 463 R600_RB_NO_UPDATE | 464 (15 << 8) | 465 (3 << 0)); 466 467 RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP); 468 RADEON_READ(R600_GRBM_SOFT_RESET); 469 DRM_UDELAY(15000); 470 RADEON_WRITE(R600_GRBM_SOFT_RESET, 0); 471 472 473 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770)) { 474 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 475 DRM_INFO("Loading RV770 PFP Microcode\n"); 476 for (i = 0; i < R700_PFP_UCODE_SIZE; i++) 477 RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RV770_pfp_microcode[i]); 478 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 479 480 RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); 481 DRM_INFO("Loading RV770 CP Microcode\n"); 482 for (i = 0; i < R700_PM4_UCODE_SIZE; i++) 483 RADEON_WRITE(R600_CP_ME_RAM_DATA, RV770_cp_microcode[i]); 484 RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); 485 486 } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV730)) { 487 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 488 DRM_INFO("Loading RV730 PFP Microcode\n"); 489 for (i = 0; i < R700_PFP_UCODE_SIZE; i++) 490 RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RV730_pfp_microcode[i]); 491 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 492 493 RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); 494 DRM_INFO("Loading RV730 CP Microcode\n"); 495 for (i = 0; i < R700_PM4_UCODE_SIZE; i++) 496 RADEON_WRITE(R600_CP_ME_RAM_DATA, RV730_cp_microcode[i]); 497 RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); 498 499 } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)) { 500 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 501 DRM_INFO("Loading RV710 PFP Microcode\n"); 502 for (i = 0; i < R700_PFP_UCODE_SIZE; i++) 503 RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RV710_pfp_microcode[i]); 504 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 505 506 RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); 507 DRM_INFO("Loading RV710 CP Microcode\n"); 508 for (i = 0; i < R700_PM4_UCODE_SIZE; i++) 509 RADEON_WRITE(R600_CP_ME_RAM_DATA, RV710_cp_microcode[i]); 510 RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); 511 512 } 513 RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); 514 RADEON_WRITE(R600_CP_ME_RAM_WADDR, 0); 515 RADEON_WRITE(R600_CP_ME_RAM_RADDR, 0); 516 517} 518 519static void r600_test_writeback(drm_radeon_private_t *dev_priv) 520{ 521 u32 tmp; 522 523 /* Start with assuming that writeback doesn't work */ 524 dev_priv->writeback_works = 0; 525 526 /* Writeback doesn't seem to work everywhere, test it here and possibly 527 * enable it if it appears to work 528 */ 529 radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0); 530 531 RADEON_WRITE(R600_SCRATCH_REG1, 0xdeadbeef); 532 533 for (tmp = 0; tmp < dev_priv->usec_timeout; tmp++) { 534 u32 val; 535 536 val = radeon_read_ring_rptr(dev_priv, R600_SCRATCHOFF(1)); 537 if (val == 0xdeadbeef) 538 break; 539 DRM_UDELAY(1); 540 } 541 542 if (tmp < dev_priv->usec_timeout) { 543 dev_priv->writeback_works = 1; 544 DRM_INFO("writeback test succeeded in %d usecs\n", tmp); 545 } else { 546 dev_priv->writeback_works = 0; 547 DRM_INFO("writeback test failed\n"); 548 } 549 if (radeon_no_wb == 1) { 550 dev_priv->writeback_works = 0; 551 DRM_INFO("writeback forced off\n"); 552 } 553 554 if (!dev_priv->writeback_works) { 555 /* Disable writeback to avoid unnecessary bus master transfer */ 556 RADEON_WRITE(R600_CP_RB_CNTL, RADEON_READ(R600_CP_RB_CNTL) | 557 RADEON_RB_NO_UPDATE); 558 RADEON_WRITE(R600_SCRATCH_UMSK, 0); 559 } 560} 561 562int r600_do_engine_reset(struct drm_device *dev) 563{ 564 drm_radeon_private_t *dev_priv = dev->dev_private; 565 u32 cp_ptr, cp_me_cntl, cp_rb_cntl; 566 567 DRM_INFO("Resetting GPU\n"); 568 569 cp_ptr = RADEON_READ(R600_CP_RB_WPTR); 570 cp_me_cntl = RADEON_READ(R600_CP_ME_CNTL); 571 RADEON_WRITE(R600_CP_ME_CNTL, R600_CP_ME_HALT); 572 573 RADEON_WRITE(R600_GRBM_SOFT_RESET, 0x7fff); 574 RADEON_READ(R600_GRBM_SOFT_RESET); 575 DRM_UDELAY(50); 576 RADEON_WRITE(R600_GRBM_SOFT_RESET, 0); 577 RADEON_READ(R600_GRBM_SOFT_RESET); 578 579 RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0); 580 cp_rb_cntl = RADEON_READ(R600_CP_RB_CNTL); 581 RADEON_WRITE(R600_CP_RB_CNTL, R600_RB_RPTR_WR_ENA); 582 583 RADEON_WRITE(R600_CP_RB_RPTR_WR, cp_ptr); 584 RADEON_WRITE(R600_CP_RB_WPTR, cp_ptr); 585 RADEON_WRITE(R600_CP_RB_CNTL, cp_rb_cntl); 586 RADEON_WRITE(R600_CP_ME_CNTL, cp_me_cntl); 587 588 /* Reset the CP ring */ 589 r600_do_cp_reset(dev_priv); 590 591 /* The CP is no longer running after an engine reset */ 592 dev_priv->cp_running = 0; 593 594 /* Reset any pending vertex, indirect buffers */ 595 radeon_freelist_reset(dev); 596 597 return 0; 598 599} 600 601static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes, 602 u32 num_backends, 603 u32 backend_disable_mask) 604{ 605 u32 backend_map = 0; 606 u32 enabled_backends_mask; 607 u32 enabled_backends_count; 608 u32 cur_pipe; 609 u32 swizzle_pipe[R6XX_MAX_PIPES]; 610 u32 cur_backend; 611 u32 i; 612 613 if (num_tile_pipes > R6XX_MAX_PIPES) 614 num_tile_pipes = R6XX_MAX_PIPES; 615 if (num_tile_pipes < 1) 616 num_tile_pipes = 1; 617 if (num_backends > R6XX_MAX_BACKENDS) 618 num_backends = R6XX_MAX_BACKENDS; 619 if (num_backends < 1) 620 num_backends = 1; 621 622 enabled_backends_mask = 0; 623 enabled_backends_count = 0; 624 for (i = 0; i < R6XX_MAX_BACKENDS; ++i) { 625 if (((backend_disable_mask >> i) & 1) == 0) { 626 enabled_backends_mask |= (1 << i); 627 ++enabled_backends_count; 628 } 629 if (enabled_backends_count == num_backends) 630 break; 631 } 632 633 if (enabled_backends_count == 0) { 634 enabled_backends_mask = 1; 635 enabled_backends_count = 1; 636 } 637 638 if (enabled_backends_count != num_backends) 639 num_backends = enabled_backends_count; 640 641 memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES); 642 switch (num_tile_pipes) { 643 case 1: 644 swizzle_pipe[0] = 0; 645 break; 646 case 2: 647 swizzle_pipe[0] = 0; 648 swizzle_pipe[1] = 1; 649 break; 650 case 3: 651 swizzle_pipe[0] = 0; 652 swizzle_pipe[1] = 1; 653 swizzle_pipe[2] = 2; 654 break; 655 case 4: 656 swizzle_pipe[0] = 0; 657 swizzle_pipe[1] = 1; 658 swizzle_pipe[2] = 2; 659 swizzle_pipe[3] = 3; 660 break; 661 case 5: 662 swizzle_pipe[0] = 0; 663 swizzle_pipe[1] = 1; 664 swizzle_pipe[2] = 2; 665 swizzle_pipe[3] = 3; 666 swizzle_pipe[4] = 4; 667 break; 668 case 6: 669 swizzle_pipe[0] = 0; 670 swizzle_pipe[1] = 2; 671 swizzle_pipe[2] = 4; 672 swizzle_pipe[3] = 5; 673 swizzle_pipe[4] = 1; 674 swizzle_pipe[5] = 3; 675 break; 676 case 7: 677 swizzle_pipe[0] = 0; 678 swizzle_pipe[1] = 2; 679 swizzle_pipe[2] = 4; 680 swizzle_pipe[3] = 6; 681 swizzle_pipe[4] = 1; 682 swizzle_pipe[5] = 3; 683 swizzle_pipe[6] = 5; 684 break; 685 case 8: 686 swizzle_pipe[0] = 0; 687 swizzle_pipe[1] = 2; 688 swizzle_pipe[2] = 4; 689 swizzle_pipe[3] = 6; 690 swizzle_pipe[4] = 1; 691 swizzle_pipe[5] = 3; 692 swizzle_pipe[6] = 5; 693 swizzle_pipe[7] = 7; 694 break; 695 } 696 697 cur_backend = 0; 698 for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) { 699 while (((1 << cur_backend) & enabled_backends_mask) == 0) 700 cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS; 701 702 backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2))); 703 704 cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS; 705 } 706 707 return backend_map; 708} 709 710static int r600_count_pipe_bits(uint32_t val) 711{ 712 int i, ret = 0; 713 for (i = 0; i < 32; i++) { 714 ret += val & 1; 715 val >>= 1; 716 } 717 return ret; 718} 719 720static void r600_gfx_init(struct drm_device *dev, 721 drm_radeon_private_t *dev_priv) 722{ 723 int i, j, num_qd_pipes; 724 u32 sx_debug_1; 725 u32 tc_cntl; 726 u32 arb_pop; 727 u32 num_gs_verts_per_thread; 728 u32 vgt_gs_per_es; 729 u32 gs_prim_buffer_depth = 0; 730 u32 sq_ms_fifo_sizes; 731 u32 sq_config; 732 u32 sq_gpr_resource_mgmt_1 = 0; 733 u32 sq_gpr_resource_mgmt_2 = 0; 734 u32 sq_thread_resource_mgmt = 0; 735 u32 sq_stack_resource_mgmt_1 = 0; 736 u32 sq_stack_resource_mgmt_2 = 0; 737 u32 hdp_host_path_cntl; 738 u32 backend_map; 739 u32 gb_tiling_config = 0; 740 u32 cc_rb_backend_disable = 0; 741 u32 cc_gc_shader_pipe_config = 0; 742 u32 ramcfg; 743 744 /* setup chip specs */ 745 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 746 case CHIP_R600: 747 dev_priv->r600_max_pipes = 4; 748 dev_priv->r600_max_tile_pipes = 8; 749 dev_priv->r600_max_simds = 4; 750 dev_priv->r600_max_backends = 4; 751 dev_priv->r600_max_gprs = 256; 752 dev_priv->r600_max_threads = 192; 753 dev_priv->r600_max_stack_entries = 256; 754 dev_priv->r600_max_hw_contexts = 8; 755 dev_priv->r600_max_gs_threads = 16; 756 dev_priv->r600_sx_max_export_size = 128; 757 dev_priv->r600_sx_max_export_pos_size = 16; 758 dev_priv->r600_sx_max_export_smx_size = 128; 759 dev_priv->r600_sq_num_cf_insts = 2; 760 break; 761 case CHIP_RV630: 762 case CHIP_RV635: 763 dev_priv->r600_max_pipes = 2; 764 dev_priv->r600_max_tile_pipes = 2; 765 dev_priv->r600_max_simds = 3; 766 dev_priv->r600_max_backends = 1; 767 dev_priv->r600_max_gprs = 128; 768 dev_priv->r600_max_threads = 192; 769 dev_priv->r600_max_stack_entries = 128; 770 dev_priv->r600_max_hw_contexts = 8; 771 dev_priv->r600_max_gs_threads = 4; 772 dev_priv->r600_sx_max_export_size = 128; 773 dev_priv->r600_sx_max_export_pos_size = 16; 774 dev_priv->r600_sx_max_export_smx_size = 128; 775 dev_priv->r600_sq_num_cf_insts = 2; 776 break; 777 case CHIP_RV610: 778 case CHIP_RS780: 779 case CHIP_RV620: 780 dev_priv->r600_max_pipes = 1; 781 dev_priv->r600_max_tile_pipes = 1; 782 dev_priv->r600_max_simds = 2; 783 dev_priv->r600_max_backends = 1; 784 dev_priv->r600_max_gprs = 128; 785 dev_priv->r600_max_threads = 192; 786 dev_priv->r600_max_stack_entries = 128; 787 dev_priv->r600_max_hw_contexts = 4; 788 dev_priv->r600_max_gs_threads = 4; 789 dev_priv->r600_sx_max_export_size = 128; 790 dev_priv->r600_sx_max_export_pos_size = 16; 791 dev_priv->r600_sx_max_export_smx_size = 128; 792 dev_priv->r600_sq_num_cf_insts = 1; 793 break; 794 case CHIP_RV670: 795 dev_priv->r600_max_pipes = 4; 796 dev_priv->r600_max_tile_pipes = 4; 797 dev_priv->r600_max_simds = 4; 798 dev_priv->r600_max_backends = 4; 799 dev_priv->r600_max_gprs = 192; 800 dev_priv->r600_max_threads = 192; 801 dev_priv->r600_max_stack_entries = 256; 802 dev_priv->r600_max_hw_contexts = 8; 803 dev_priv->r600_max_gs_threads = 16; 804 dev_priv->r600_sx_max_export_size = 128; 805 dev_priv->r600_sx_max_export_pos_size = 16; 806 dev_priv->r600_sx_max_export_smx_size = 128; 807 dev_priv->r600_sq_num_cf_insts = 2; 808 break; 809 default: 810 break; 811 } 812 813 /* Initialize HDP */ 814 j = 0; 815 for (i = 0; i < 32; i++) { 816 RADEON_WRITE((0x2c14 + j), 0x00000000); 817 RADEON_WRITE((0x2c18 + j), 0x00000000); 818 RADEON_WRITE((0x2c1c + j), 0x00000000); 819 RADEON_WRITE((0x2c20 + j), 0x00000000); 820 RADEON_WRITE((0x2c24 + j), 0x00000000); 821 j += 0x18; 822 } 823 824 RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff)); 825 826 /* setup tiling, simd, pipe config */ 827 ramcfg = RADEON_READ(R600_RAMCFG); 828 829 switch (dev_priv->r600_max_tile_pipes) { 830 case 1: 831 gb_tiling_config |= R600_PIPE_TILING(0); 832 break; 833 case 2: 834 gb_tiling_config |= R600_PIPE_TILING(1); 835 break; 836 case 4: 837 gb_tiling_config |= R600_PIPE_TILING(2); 838 break; 839 case 8: 840 gb_tiling_config |= R600_PIPE_TILING(3); 841 break; 842 default: 843 break; 844 } 845 846 gb_tiling_config |= R600_BANK_TILING((ramcfg >> R600_NOOFBANK_SHIFT) & R600_NOOFBANK_MASK); 847 848 gb_tiling_config |= R600_GROUP_SIZE(0); 849 850 if (((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK) > 3) { 851 gb_tiling_config |= R600_ROW_TILING(3); 852 gb_tiling_config |= R600_SAMPLE_SPLIT(3); 853 } else { 854 gb_tiling_config |= 855 R600_ROW_TILING(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK)); 856 gb_tiling_config |= 857 R600_SAMPLE_SPLIT(((ramcfg >> R600_NOOFROWS_SHIFT) & R600_NOOFROWS_MASK)); 858 } 859 860 gb_tiling_config |= R600_BANK_SWAPS(1); 861 862 backend_map = r600_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes, 863 dev_priv->r600_max_backends, 864 (0xff << dev_priv->r600_max_backends) & 0xff); 865 gb_tiling_config |= R600_BACKEND_MAP(backend_map); 866 867 cc_gc_shader_pipe_config = 868 R600_INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R6XX_MAX_PIPES_MASK); 869 cc_gc_shader_pipe_config |= 870 R600_INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R6XX_MAX_SIMDS_MASK); 871 872 cc_rb_backend_disable = 873 R600_BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R6XX_MAX_BACKENDS_MASK); 874 875 RADEON_WRITE(R600_GB_TILING_CONFIG, gb_tiling_config); 876 RADEON_WRITE(R600_DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 877 RADEON_WRITE(R600_HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 878 879 RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE, cc_rb_backend_disable); 880 RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); 881 RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); 882 883 num_qd_pipes = 884 R6XX_MAX_BACKENDS - r600_count_pipe_bits(cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK); 885 RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK); 886 RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK); 887 888 /* set HW defaults for 3D engine */ 889 RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) | 890 R600_ROQ_IB2_START(0x2b))); 891 892 RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, (R600_MEQ_END(0x40) | 893 R600_ROQ_END(0x40))); 894 895 RADEON_WRITE(R600_TA_CNTL_AUX, (R600_DISABLE_CUBE_ANISO | 896 R600_SYNC_GRADIENT | 897 R600_SYNC_WALKER | 898 R600_SYNC_ALIGNER)); 899 900 if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670) 901 RADEON_WRITE(R600_ARB_GDEC_RD_CNTL, 0x00000021); 902 903 sx_debug_1 = RADEON_READ(R600_SX_DEBUG_1); 904 sx_debug_1 |= R600_SMX_EVENT_RELEASE; 905 if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600)) 906 sx_debug_1 |= R600_ENABLE_NEW_SMX_ADDRESS; 907 RADEON_WRITE(R600_SX_DEBUG_1, sx_debug_1); 908 909 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) || 910 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) || 911 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || 912 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || 913 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) 914 RADEON_WRITE(R600_DB_DEBUG, R600_PREZ_MUST_WAIT_FOR_POSTZ_DONE); 915 else 916 RADEON_WRITE(R600_DB_DEBUG, 0); 917 918 RADEON_WRITE(R600_DB_WATERMARKS, (R600_DEPTH_FREE(4) | 919 R600_DEPTH_FLUSH(16) | 920 R600_DEPTH_PENDING_FREE(4) | 921 R600_DEPTH_CACHELINE_FREE(16))); 922 RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0); 923 RADEON_WRITE(R600_VGT_NUM_INSTANCES, 0); 924 925 RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0)); 926 RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(0)); 927 928 sq_ms_fifo_sizes = RADEON_READ(R600_SQ_MS_FIFO_SIZES); 929 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || 930 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || 931 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) { 932 sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(0xa) | 933 R600_FETCH_FIFO_HIWATER(0xa) | 934 R600_DONE_FIFO_HIWATER(0xe0) | 935 R600_ALU_UPDATE_FIFO_HIWATER(0x8)); 936 } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) || 937 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630)) { 938 sq_ms_fifo_sizes &= ~R600_DONE_FIFO_HIWATER(0xff); 939 sq_ms_fifo_sizes |= R600_DONE_FIFO_HIWATER(0x4); 940 } 941 RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes); 942 943 /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT 944 * should be adjusted as needed by the 2D/3D drivers. This just sets default values 945 */ 946 sq_config = RADEON_READ(R600_SQ_CONFIG); 947 sq_config &= ~(R600_PS_PRIO(3) | 948 R600_VS_PRIO(3) | 949 R600_GS_PRIO(3) | 950 R600_ES_PRIO(3)); 951 sq_config |= (R600_DX9_CONSTS | 952 R600_VC_ENABLE | 953 R600_PS_PRIO(0) | 954 R600_VS_PRIO(1) | 955 R600_GS_PRIO(2) | 956 R600_ES_PRIO(3)); 957 958 if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_R600) { 959 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(124) | 960 R600_NUM_VS_GPRS(124) | 961 R600_NUM_CLAUSE_TEMP_GPRS(4)); 962 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(0) | 963 R600_NUM_ES_GPRS(0)); 964 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(136) | 965 R600_NUM_VS_THREADS(48) | 966 R600_NUM_GS_THREADS(4) | 967 R600_NUM_ES_THREADS(4)); 968 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(128) | 969 R600_NUM_VS_STACK_ENTRIES(128)); 970 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(0) | 971 R600_NUM_ES_STACK_ENTRIES(0)); 972 } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || 973 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || 974 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) { 975 /* no vertex cache */ 976 sq_config &= ~R600_VC_ENABLE; 977 978 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) | 979 R600_NUM_VS_GPRS(44) | 980 R600_NUM_CLAUSE_TEMP_GPRS(2)); 981 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) | 982 R600_NUM_ES_GPRS(17)); 983 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) | 984 R600_NUM_VS_THREADS(78) | 985 R600_NUM_GS_THREADS(4) | 986 R600_NUM_ES_THREADS(31)); 987 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) | 988 R600_NUM_VS_STACK_ENTRIES(40)); 989 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) | 990 R600_NUM_ES_STACK_ENTRIES(16)); 991 } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) || 992 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV635)) { 993 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) | 994 R600_NUM_VS_GPRS(44) | 995 R600_NUM_CLAUSE_TEMP_GPRS(2)); 996 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(18) | 997 R600_NUM_ES_GPRS(18)); 998 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) | 999 R600_NUM_VS_THREADS(78) | 1000 R600_NUM_GS_THREADS(4) | 1001 R600_NUM_ES_THREADS(31)); 1002 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(40) | 1003 R600_NUM_VS_STACK_ENTRIES(40)); 1004 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(32) | 1005 R600_NUM_ES_STACK_ENTRIES(16)); 1006 } else if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV670) { 1007 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(44) | 1008 R600_NUM_VS_GPRS(44) | 1009 R600_NUM_CLAUSE_TEMP_GPRS(2)); 1010 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(17) | 1011 R600_NUM_ES_GPRS(17)); 1012 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(79) | 1013 R600_NUM_VS_THREADS(78) | 1014 R600_NUM_GS_THREADS(4) | 1015 R600_NUM_ES_THREADS(31)); 1016 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(64) | 1017 R600_NUM_VS_STACK_ENTRIES(64)); 1018 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(64) | 1019 R600_NUM_ES_STACK_ENTRIES(64)); 1020 } 1021 1022 RADEON_WRITE(R600_SQ_CONFIG, sq_config); 1023 RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1, sq_gpr_resource_mgmt_1); 1024 RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2, sq_gpr_resource_mgmt_2); 1025 RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt); 1026 RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, sq_stack_resource_mgmt_1); 1027 RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, sq_stack_resource_mgmt_2); 1028 1029 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || 1030 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || 1031 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) 1032 RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_TC_ONLY)); 1033 else 1034 RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_VC_AND_TC)); 1035 1036 RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_2S, (R600_S0_X(0xc) | 1037 R600_S0_Y(0x4) | 1038 R600_S1_X(0x4) | 1039 R600_S1_Y(0xc))); 1040 RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_4S, (R600_S0_X(0xe) | 1041 R600_S0_Y(0xe) | 1042 R600_S1_X(0x2) | 1043 R600_S1_Y(0x2) | 1044 R600_S2_X(0xa) | 1045 R600_S2_Y(0x6) | 1046 R600_S3_X(0x6) | 1047 R600_S3_Y(0xa))); 1048 RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD0, (R600_S0_X(0xe) | 1049 R600_S0_Y(0xb) | 1050 R600_S1_X(0x4) | 1051 R600_S1_Y(0xc) | 1052 R600_S2_X(0x1) | 1053 R600_S2_Y(0x6) | 1054 R600_S3_X(0xa) | 1055 R600_S3_Y(0xe))); 1056 RADEON_WRITE(R600_PA_SC_AA_SAMPLE_LOCS_8S_WD1, (R600_S4_X(0x6) | 1057 R600_S4_Y(0x1) | 1058 R600_S5_X(0x0) | 1059 R600_S5_Y(0x0) | 1060 R600_S6_X(0xb) | 1061 R600_S6_Y(0x4) | 1062 R600_S7_X(0x7) | 1063 R600_S7_Y(0x8))); 1064 1065 1066 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 1067 case CHIP_R600: 1068 case CHIP_RV630: 1069 case CHIP_RV635: 1070 gs_prim_buffer_depth = 0; 1071 break; 1072 case CHIP_RV610: 1073 case CHIP_RS780: 1074 case CHIP_RV620: 1075 gs_prim_buffer_depth = 32; 1076 break; 1077 case CHIP_RV670: 1078 gs_prim_buffer_depth = 128; 1079 break; 1080 default: 1081 break; 1082 } 1083 1084 num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16; 1085 vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread; 1086 /* Max value for this is 256 */ 1087 if (vgt_gs_per_es > 256) 1088 vgt_gs_per_es = 256; 1089 1090 RADEON_WRITE(R600_VGT_ES_PER_GS, 128); 1091 RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es); 1092 RADEON_WRITE(R600_VGT_GS_PER_VS, 2); 1093 RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16); 1094 1095 /* more default values. 2D/3D driver should adjust as needed */ 1096 RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0); 1097 RADEON_WRITE(R600_VGT_STRMOUT_EN, 0); 1098 RADEON_WRITE(R600_SX_MISC, 0); 1099 RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0); 1100 RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0); 1101 RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0); 1102 RADEON_WRITE(R600_SPI_INPUT_Z, 0); 1103 RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2)); 1104 RADEON_WRITE(R600_CB_COLOR7_FRAG, 0); 1105 1106 /* clear render buffer base addresses */ 1107 RADEON_WRITE(R600_CB_COLOR0_BASE, 0); 1108 RADEON_WRITE(R600_CB_COLOR1_BASE, 0); 1109 RADEON_WRITE(R600_CB_COLOR2_BASE, 0); 1110 RADEON_WRITE(R600_CB_COLOR3_BASE, 0); 1111 RADEON_WRITE(R600_CB_COLOR4_BASE, 0); 1112 RADEON_WRITE(R600_CB_COLOR5_BASE, 0); 1113 RADEON_WRITE(R600_CB_COLOR6_BASE, 0); 1114 RADEON_WRITE(R600_CB_COLOR7_BASE, 0); 1115 1116 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 1117 case CHIP_RV610: 1118 case CHIP_RS780: 1119 case CHIP_RV620: 1120 tc_cntl = R600_TC_L2_SIZE(8); 1121 break; 1122 case CHIP_RV630: 1123 case CHIP_RV635: 1124 tc_cntl = R600_TC_L2_SIZE(4); 1125 break; 1126 case CHIP_R600: 1127 tc_cntl = R600_TC_L2_SIZE(0) | R600_L2_DISABLE_LATE_HIT; 1128 break; 1129 default: 1130 tc_cntl = R600_TC_L2_SIZE(0); 1131 break; 1132 } 1133 1134 RADEON_WRITE(R600_TC_CNTL, tc_cntl); 1135 1136 hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL); 1137 RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl); 1138 1139 arb_pop = RADEON_READ(R600_ARB_POP); 1140 arb_pop |= R600_ENABLE_TC128; 1141 RADEON_WRITE(R600_ARB_POP, arb_pop); 1142 1143 RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0); 1144 RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA | 1145 R600_NUM_CLIP_SEQ(3))); 1146 RADEON_WRITE(R600_PA_SC_ENHANCE, R600_FORCE_EOV_MAX_CLK_CNT(4095)); 1147 1148} 1149 1150static u32 r700_get_tile_pipe_to_backend_map(u32 num_tile_pipes, 1151 u32 num_backends, 1152 u32 backend_disable_mask) 1153{ 1154 u32 backend_map = 0; 1155 u32 enabled_backends_mask; 1156 u32 enabled_backends_count; 1157 u32 cur_pipe; 1158 u32 swizzle_pipe[R7XX_MAX_PIPES]; 1159 u32 cur_backend; 1160 u32 i; 1161 1162 if (num_tile_pipes > R7XX_MAX_PIPES) 1163 num_tile_pipes = R7XX_MAX_PIPES; 1164 if (num_tile_pipes < 1) 1165 num_tile_pipes = 1; 1166 if (num_backends > R7XX_MAX_BACKENDS) 1167 num_backends = R7XX_MAX_BACKENDS; 1168 if (num_backends < 1) 1169 num_backends = 1; 1170 1171 enabled_backends_mask = 0; 1172 enabled_backends_count = 0; 1173 for (i = 0; i < R7XX_MAX_BACKENDS; ++i) { 1174 if (((backend_disable_mask >> i) & 1) == 0) { 1175 enabled_backends_mask |= (1 << i); 1176 ++enabled_backends_count; 1177 } 1178 if (enabled_backends_count == num_backends) 1179 break; 1180 } 1181 1182 if (enabled_backends_count == 0) { 1183 enabled_backends_mask = 1; 1184 enabled_backends_count = 1; 1185 } 1186 1187 if (enabled_backends_count != num_backends) 1188 num_backends = enabled_backends_count; 1189 1190 memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES); 1191 switch (num_tile_pipes) { 1192 case 1: 1193 swizzle_pipe[0] = 0; 1194 break; 1195 case 2: 1196 swizzle_pipe[0] = 0; 1197 swizzle_pipe[1] = 1; 1198 break; 1199 case 3: 1200 swizzle_pipe[0] = 0; 1201 swizzle_pipe[1] = 2; 1202 swizzle_pipe[2] = 1; 1203 break; 1204 case 4: 1205 swizzle_pipe[0] = 0; 1206 swizzle_pipe[1] = 2; 1207 swizzle_pipe[2] = 3; 1208 swizzle_pipe[3] = 1; 1209 break; 1210 case 5: 1211 swizzle_pipe[0] = 0; 1212 swizzle_pipe[1] = 2; 1213 swizzle_pipe[2] = 4; 1214 swizzle_pipe[3] = 1; 1215 swizzle_pipe[4] = 3; 1216 break; 1217 case 6: 1218 swizzle_pipe[0] = 0; 1219 swizzle_pipe[1] = 2; 1220 swizzle_pipe[2] = 4; 1221 swizzle_pipe[3] = 5; 1222 swizzle_pipe[4] = 3; 1223 swizzle_pipe[5] = 1; 1224 break; 1225 case 7: 1226 swizzle_pipe[0] = 0; 1227 swizzle_pipe[1] = 2; 1228 swizzle_pipe[2] = 4; 1229 swizzle_pipe[3] = 6; 1230 swizzle_pipe[4] = 3; 1231 swizzle_pipe[5] = 1; 1232 swizzle_pipe[6] = 5; 1233 break; 1234 case 8: 1235 swizzle_pipe[0] = 0; 1236 swizzle_pipe[1] = 2; 1237 swizzle_pipe[2] = 4; 1238 swizzle_pipe[3] = 6; 1239 swizzle_pipe[4] = 3; 1240 swizzle_pipe[5] = 1; 1241 swizzle_pipe[6] = 7; 1242 swizzle_pipe[7] = 5; 1243 break; 1244 } 1245 1246 cur_backend = 0; 1247 for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) { 1248 while (((1 << cur_backend) & enabled_backends_mask) == 0) 1249 cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS; 1250 1251 backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2))); 1252 1253 cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS; 1254 } 1255 1256 return backend_map; 1257} 1258 1259static void r700_gfx_init(struct drm_device *dev, 1260 drm_radeon_private_t *dev_priv) 1261{ 1262 int i, j, num_qd_pipes; 1263 u32 sx_debug_1; 1264 u32 smx_dc_ctl0; 1265 u32 num_gs_verts_per_thread; 1266 u32 vgt_gs_per_es; 1267 u32 gs_prim_buffer_depth = 0; 1268 u32 sq_ms_fifo_sizes; 1269 u32 sq_config; 1270 u32 sq_thread_resource_mgmt; 1271 u32 hdp_host_path_cntl; 1272 u32 sq_dyn_gpr_size_simd_ab_0; 1273 u32 backend_map; 1274 u32 gb_tiling_config = 0; 1275 u32 cc_rb_backend_disable = 0; 1276 u32 cc_gc_shader_pipe_config = 0; 1277 u32 mc_arb_ramcfg; 1278 u32 db_debug4; 1279 1280 /* setup chip specs */ 1281 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 1282 case CHIP_RV770: 1283 dev_priv->r600_max_pipes = 4; 1284 dev_priv->r600_max_tile_pipes = 8; 1285 dev_priv->r600_max_simds = 10; 1286 dev_priv->r600_max_backends = 4; 1287 dev_priv->r600_max_gprs = 256; 1288 dev_priv->r600_max_threads = 248; 1289 dev_priv->r600_max_stack_entries = 512; 1290 dev_priv->r600_max_hw_contexts = 8; 1291 dev_priv->r600_max_gs_threads = 16 * 2; 1292 dev_priv->r600_sx_max_export_size = 128; 1293 dev_priv->r600_sx_max_export_pos_size = 16; 1294 dev_priv->r600_sx_max_export_smx_size = 112; 1295 dev_priv->r600_sq_num_cf_insts = 2; 1296 1297 dev_priv->r700_sx_num_of_sets = 7; 1298 dev_priv->r700_sc_prim_fifo_size = 0xF9; 1299 dev_priv->r700_sc_hiz_tile_fifo_size = 0x30; 1300 dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130; 1301 break; 1302 case CHIP_RV730: 1303 dev_priv->r600_max_pipes = 2; 1304 dev_priv->r600_max_tile_pipes = 4; 1305 dev_priv->r600_max_simds = 8; 1306 dev_priv->r600_max_backends = 2; 1307 dev_priv->r600_max_gprs = 128; 1308 dev_priv->r600_max_threads = 248; 1309 dev_priv->r600_max_stack_entries = 256; 1310 dev_priv->r600_max_hw_contexts = 8; 1311 dev_priv->r600_max_gs_threads = 16 * 2; 1312 dev_priv->r600_sx_max_export_size = 256; 1313 dev_priv->r600_sx_max_export_pos_size = 32; 1314 dev_priv->r600_sx_max_export_smx_size = 224; 1315 dev_priv->r600_sq_num_cf_insts = 2; 1316 1317 dev_priv->r700_sx_num_of_sets = 7; 1318 dev_priv->r700_sc_prim_fifo_size = 0xf9; 1319 dev_priv->r700_sc_hiz_tile_fifo_size = 0x30; 1320 dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130; 1321 break; 1322 case CHIP_RV710: 1323 dev_priv->r600_max_pipes = 2; 1324 dev_priv->r600_max_tile_pipes = 2; 1325 dev_priv->r600_max_simds = 2; 1326 dev_priv->r600_max_backends = 1; 1327 dev_priv->r600_max_gprs = 256; 1328 dev_priv->r600_max_threads = 192; 1329 dev_priv->r600_max_stack_entries = 256; 1330 dev_priv->r600_max_hw_contexts = 4; 1331 dev_priv->r600_max_gs_threads = 8 * 2; 1332 dev_priv->r600_sx_max_export_size = 128; 1333 dev_priv->r600_sx_max_export_pos_size = 16; 1334 dev_priv->r600_sx_max_export_smx_size = 112; 1335 dev_priv->r600_sq_num_cf_insts = 1; 1336 1337 dev_priv->r700_sx_num_of_sets = 7; 1338 dev_priv->r700_sc_prim_fifo_size = 0x40; 1339 dev_priv->r700_sc_hiz_tile_fifo_size = 0x30; 1340 dev_priv->r700_sc_earlyz_tile_fifo_fize = 0x130; 1341 break; 1342 default: 1343 break; 1344 } 1345 1346 /* Initialize HDP */ 1347 j = 0; 1348 for (i = 0; i < 32; i++) { 1349 RADEON_WRITE((0x2c14 + j), 0x00000000); 1350 RADEON_WRITE((0x2c18 + j), 0x00000000); 1351 RADEON_WRITE((0x2c1c + j), 0x00000000); 1352 RADEON_WRITE((0x2c20 + j), 0x00000000); 1353 RADEON_WRITE((0x2c24 + j), 0x00000000); 1354 j += 0x18; 1355 } 1356 1357 RADEON_WRITE(R600_GRBM_CNTL, R600_GRBM_READ_TIMEOUT(0xff)); 1358 1359 /* setup tiling, simd, pipe config */ 1360 mc_arb_ramcfg = RADEON_READ(R700_MC_ARB_RAMCFG); 1361 1362 switch (dev_priv->r600_max_tile_pipes) { 1363 case 1: 1364 gb_tiling_config |= R600_PIPE_TILING(0); 1365 break; 1366 case 2: 1367 gb_tiling_config |= R600_PIPE_TILING(1); 1368 break; 1369 case 4: 1370 gb_tiling_config |= R600_PIPE_TILING(2); 1371 break; 1372 case 8: 1373 gb_tiling_config |= R600_PIPE_TILING(3); 1374 break; 1375 default: 1376 break; 1377 } 1378 1379 if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770) 1380 gb_tiling_config |= R600_BANK_TILING(1); 1381 else 1382 gb_tiling_config |= R600_BANK_TILING((mc_arb_ramcfg >> R700_NOOFBANK_SHIFT) & R700_NOOFBANK_MASK); 1383 1384 gb_tiling_config |= R600_GROUP_SIZE(0); 1385 1386 if (((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK) > 3) { 1387 gb_tiling_config |= R600_ROW_TILING(3); 1388 gb_tiling_config |= R600_SAMPLE_SPLIT(3); 1389 } else { 1390 gb_tiling_config |= 1391 R600_ROW_TILING(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK)); 1392 gb_tiling_config |= 1393 R600_SAMPLE_SPLIT(((mc_arb_ramcfg >> R700_NOOFROWS_SHIFT) & R700_NOOFROWS_MASK)); 1394 } 1395 1396 gb_tiling_config |= R600_BANK_SWAPS(1); 1397 1398 backend_map = r700_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes, 1399 dev_priv->r600_max_backends, 1400 (0xff << dev_priv->r600_max_backends) & 0xff); 1401 gb_tiling_config |= R600_BACKEND_MAP(backend_map); 1402 1403 cc_gc_shader_pipe_config = 1404 R600_INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R7XX_MAX_PIPES_MASK); 1405 cc_gc_shader_pipe_config |= 1406 R600_INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R7XX_MAX_SIMDS_MASK); 1407 1408 cc_rb_backend_disable = 1409 R600_BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R7XX_MAX_BACKENDS_MASK); 1410 1411 RADEON_WRITE(R600_GB_TILING_CONFIG, gb_tiling_config); 1412 RADEON_WRITE(R600_DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 1413 RADEON_WRITE(R600_HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 1414 1415 RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE, cc_rb_backend_disable); 1416 RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); 1417 RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); 1418 1419 RADEON_WRITE(R700_CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable); 1420 RADEON_WRITE(R700_CGTS_SYS_TCC_DISABLE, 0); 1421 RADEON_WRITE(R700_CGTS_TCC_DISABLE, 0); 1422 RADEON_WRITE(R700_CGTS_USER_SYS_TCC_DISABLE, 0); 1423 RADEON_WRITE(R700_CGTS_USER_TCC_DISABLE, 0); 1424 1425 num_qd_pipes = 1426 R7XX_MAX_BACKENDS - r600_count_pipe_bits(cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK); 1427 RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK); 1428 RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK); 1429 1430 /* set HW defaults for 3D engine */ 1431 RADEON_WRITE(R600_CP_QUEUE_THRESHOLDS, (R600_ROQ_IB1_START(0x16) | 1432 R600_ROQ_IB2_START(0x2b))); 1433 1434 RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, R700_STQ_SPLIT(0x30)); 1435 1436 RADEON_WRITE(R600_TA_CNTL_AUX, (R600_DISABLE_CUBE_ANISO | 1437 R600_SYNC_GRADIENT | 1438 R600_SYNC_WALKER | 1439 R600_SYNC_ALIGNER)); 1440 1441 sx_debug_1 = RADEON_READ(R700_SX_DEBUG_1); 1442 sx_debug_1 |= R700_ENABLE_NEW_SMX_ADDRESS; 1443 RADEON_WRITE(R700_SX_DEBUG_1, sx_debug_1); 1444 1445 smx_dc_ctl0 = RADEON_READ(R600_SMX_DC_CTL0); 1446 smx_dc_ctl0 &= ~R700_CACHE_DEPTH(0x1ff); 1447 smx_dc_ctl0 |= R700_CACHE_DEPTH((dev_priv->r700_sx_num_of_sets * 64) - 1); 1448 RADEON_WRITE(R600_SMX_DC_CTL0, smx_dc_ctl0); 1449 1450 RADEON_WRITE(R700_SMX_EVENT_CTL, (R700_ES_FLUSH_CTL(4) | 1451 R700_GS_FLUSH_CTL(4) | 1452 R700_ACK_FLUSH_CTL(3) | 1453 R700_SYNC_FLUSH_CTL)); 1454 1455 if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770) 1456 RADEON_WRITE(R700_DB_DEBUG3, R700_DB_CLK_OFF_DELAY(0x1f)); 1457 else { 1458 db_debug4 = RADEON_READ(RV700_DB_DEBUG4); 1459 db_debug4 |= RV700_DISABLE_TILE_COVERED_FOR_PS_ITER; 1460 RADEON_WRITE(RV700_DB_DEBUG4, db_debug4); 1461 } 1462 1463 RADEON_WRITE(R600_SX_EXPORT_BUFFER_SIZES, (R600_COLOR_BUFFER_SIZE((dev_priv->r600_sx_max_export_size / 4) - 1) | 1464 R600_POSITION_BUFFER_SIZE((dev_priv->r600_sx_max_export_pos_size / 4) - 1) | 1465 R600_SMX_BUFFER_SIZE((dev_priv->r600_sx_max_export_smx_size / 4) - 1))); 1466 1467 RADEON_WRITE(R700_PA_SC_FIFO_SIZE_R7XX, (R700_SC_PRIM_FIFO_SIZE(dev_priv->r700_sc_prim_fifo_size) | 1468 R700_SC_HIZ_TILE_FIFO_SIZE(dev_priv->r700_sc_hiz_tile_fifo_size) | 1469 R700_SC_EARLYZ_TILE_FIFO_SIZE(dev_priv->r700_sc_earlyz_tile_fifo_fize))); 1470 1471 RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0); 1472 1473 RADEON_WRITE(R600_VGT_NUM_INSTANCES, 1); 1474 1475 RADEON_WRITE(R600_SPI_CONFIG_CNTL, R600_GPR_WRITE_PRIORITY(0)); 1476 1477 RADEON_WRITE(R600_SPI_CONFIG_CNTL_1, R600_VTX_DONE_DELAY(4)); 1478 1479 RADEON_WRITE(R600_CP_PERFMON_CNTL, 0); 1480 1481 sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(16 * dev_priv->r600_sq_num_cf_insts) | 1482 R600_DONE_FIFO_HIWATER(0xe0) | 1483 R600_ALU_UPDATE_FIFO_HIWATER(0x8)); 1484 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 1485 case CHIP_RV770: 1486 sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x1); 1487 break; 1488 case CHIP_RV730: 1489 case CHIP_RV710: 1490 default: 1491 sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x4); 1492 break; 1493 } 1494 RADEON_WRITE(R600_SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes); 1495 1496 /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT 1497 * should be adjusted as needed by the 2D/3D drivers. This just sets default values 1498 */ 1499 sq_config = RADEON_READ(R600_SQ_CONFIG); 1500 sq_config &= ~(R600_PS_PRIO(3) | 1501 R600_VS_PRIO(3) | 1502 R600_GS_PRIO(3) | 1503 R600_ES_PRIO(3)); 1504 sq_config |= (R600_DX9_CONSTS | 1505 R600_VC_ENABLE | 1506 R600_EXPORT_SRC_C | 1507 R600_PS_PRIO(0) | 1508 R600_VS_PRIO(1) | 1509 R600_GS_PRIO(2) | 1510 R600_ES_PRIO(3)); 1511 if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710) 1512 /* no vertex cache */ 1513 sq_config &= ~R600_VC_ENABLE; 1514 1515 RADEON_WRITE(R600_SQ_CONFIG, sq_config); 1516 1517 RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_1, (R600_NUM_PS_GPRS((dev_priv->r600_max_gprs * 24)/64) | 1518 R600_NUM_VS_GPRS((dev_priv->r600_max_gprs * 24)/64) | 1519 R600_NUM_CLAUSE_TEMP_GPRS(((dev_priv->r600_max_gprs * 24)/64)/2))); 1520 1521 RADEON_WRITE(R600_SQ_GPR_RESOURCE_MGMT_2, (R600_NUM_GS_GPRS((dev_priv->r600_max_gprs * 7)/64) | 1522 R600_NUM_ES_GPRS((dev_priv->r600_max_gprs * 7)/64))); 1523 1524 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS((dev_priv->r600_max_threads * 4)/8) | 1525 R600_NUM_VS_THREADS((dev_priv->r600_max_threads * 2)/8) | 1526 R600_NUM_ES_THREADS((dev_priv->r600_max_threads * 1)/8)); 1527 if (((dev_priv->r600_max_threads * 1) / 8) > dev_priv->r600_max_gs_threads) 1528 sq_thread_resource_mgmt |= R600_NUM_GS_THREADS(dev_priv->r600_max_gs_threads); 1529 else 1530 sq_thread_resource_mgmt |= R600_NUM_GS_THREADS((dev_priv->r600_max_gs_threads * 1)/8); 1531 RADEON_WRITE(R600_SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt); 1532 1533 RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_1, (R600_NUM_PS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) | 1534 R600_NUM_VS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4))); 1535 1536 RADEON_WRITE(R600_SQ_STACK_RESOURCE_MGMT_2, (R600_NUM_GS_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4) | 1537 R600_NUM_ES_STACK_ENTRIES((dev_priv->r600_max_stack_entries * 1)/4))); 1538 1539 sq_dyn_gpr_size_simd_ab_0 = (R700_SIMDA_RING0((dev_priv->r600_max_gprs * 38)/64) | 1540 R700_SIMDA_RING1((dev_priv->r600_max_gprs * 38)/64) | 1541 R700_SIMDB_RING0((dev_priv->r600_max_gprs * 38)/64) | 1542 R700_SIMDB_RING1((dev_priv->r600_max_gprs * 38)/64)); 1543 1544 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0); 1545 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0); 1546 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0); 1547 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0); 1548 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0); 1549 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0); 1550 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0); 1551 RADEON_WRITE(R700_SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0); 1552 1553 RADEON_WRITE(R700_PA_SC_FORCE_EOV_MAX_CNTS, (R700_FORCE_EOV_MAX_CLK_CNT(4095) | 1554 R700_FORCE_EOV_MAX_REZ_CNT(255))); 1555 1556 if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710) 1557 RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_TC_ONLY) | 1558 R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO))); 1559 else 1560 RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, (R600_CACHE_INVALIDATION(R600_VC_AND_TC) | 1561 R700_AUTO_INVLD_EN(R700_ES_AND_GS_AUTO))); 1562 1563 switch (dev_priv->flags & RADEON_FAMILY_MASK) { 1564 case CHIP_RV770: 1565 case CHIP_RV730: 1566 gs_prim_buffer_depth = 384; 1567 break; 1568 case CHIP_RV710: 1569 gs_prim_buffer_depth = 128; 1570 break; 1571 default: 1572 break; 1573 } 1574 1575 num_gs_verts_per_thread = dev_priv->r600_max_pipes * 16; 1576 vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread; 1577 /* Max value for this is 256 */ 1578 if (vgt_gs_per_es > 256) 1579 vgt_gs_per_es = 256; 1580 1581 RADEON_WRITE(R600_VGT_ES_PER_GS, 128); 1582 RADEON_WRITE(R600_VGT_GS_PER_ES, vgt_gs_per_es); 1583 RADEON_WRITE(R600_VGT_GS_PER_VS, 2); 1584 1585 /* more default values. 2D/3D driver should adjust as needed */ 1586 RADEON_WRITE(R600_VGT_GS_VERTEX_REUSE, 16); 1587 RADEON_WRITE(R600_PA_SC_LINE_STIPPLE_STATE, 0); 1588 RADEON_WRITE(R600_VGT_STRMOUT_EN, 0); 1589 RADEON_WRITE(R600_SX_MISC, 0); 1590 RADEON_WRITE(R600_PA_SC_MODE_CNTL, 0); 1591 RADEON_WRITE(R700_PA_SC_EDGERULE, 0xaaaaaaaa); 1592 RADEON_WRITE(R600_PA_SC_AA_CONFIG, 0); 1593 RADEON_WRITE(R600_PA_SC_CLIPRECT_RULE, 0xffff); 1594 RADEON_WRITE(R600_PA_SC_LINE_STIPPLE, 0); 1595 RADEON_WRITE(R600_SPI_INPUT_Z, 0); 1596 RADEON_WRITE(R600_SPI_PS_IN_CONTROL_0, R600_NUM_INTERP(2)); 1597 RADEON_WRITE(R600_CB_COLOR7_FRAG, 0); 1598 1599 /* clear render buffer base addresses */ 1600 RADEON_WRITE(R600_CB_COLOR0_BASE, 0); 1601 RADEON_WRITE(R600_CB_COLOR1_BASE, 0); 1602 RADEON_WRITE(R600_CB_COLOR2_BASE, 0); 1603 RADEON_WRITE(R600_CB_COLOR3_BASE, 0); 1604 RADEON_WRITE(R600_CB_COLOR4_BASE, 0); 1605 RADEON_WRITE(R600_CB_COLOR5_BASE, 0); 1606 RADEON_WRITE(R600_CB_COLOR6_BASE, 0); 1607 RADEON_WRITE(R600_CB_COLOR7_BASE, 0); 1608 1609 RADEON_WRITE(R700_TCP_CNTL, 0); 1610 1611 hdp_host_path_cntl = RADEON_READ(R600_HDP_HOST_PATH_CNTL); 1612 RADEON_WRITE(R600_HDP_HOST_PATH_CNTL, hdp_host_path_cntl); 1613 1614 RADEON_WRITE(R600_PA_SC_MULTI_CHIP_CNTL, 0); 1615 1616 RADEON_WRITE(R600_PA_CL_ENHANCE, (R600_CLIP_VTX_REORDER_ENA | 1617 R600_NUM_CLIP_SEQ(3))); 1618 1619} 1620 1621static void r600_cp_init_ring_buffer(struct drm_device *dev, 1622 drm_radeon_private_t *dev_priv, 1623 struct drm_file *file_priv) 1624{ 1625 struct drm_radeon_master_private *master_priv; 1626 u32 ring_start; 1627 1628 if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) 1629 r700_gfx_init(dev, dev_priv); 1630 else 1631 r600_gfx_init(dev, dev_priv); 1632 1633 RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP); 1634 RADEON_READ(R600_GRBM_SOFT_RESET); 1635 DRM_UDELAY(15000); 1636 RADEON_WRITE(R600_GRBM_SOFT_RESET, 0); 1637 1638 1639 /* Set ring buffer size */ 1640#ifdef __BIG_ENDIAN 1641 RADEON_WRITE(R600_CP_RB_CNTL, 1642 RADEON_BUF_SWAP_32BIT | 1643 RADEON_RB_NO_UPDATE | 1644 (dev_priv->ring.rptr_update_l2qw << 8) | 1645 dev_priv->ring.size_l2qw); 1646#else 1647 RADEON_WRITE(R600_CP_RB_CNTL, 1648 RADEON_RB_NO_UPDATE | 1649 (dev_priv->ring.rptr_update_l2qw << 8) | 1650 dev_priv->ring.size_l2qw); 1651#endif 1652 1653 RADEON_WRITE(R600_CP_SEM_WAIT_TIMER, 0x4); 1654 1655 /* Set the write pointer delay */ 1656 RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0); 1657 1658#ifdef __BIG_ENDIAN 1659 RADEON_WRITE(R600_CP_RB_CNTL, 1660 RADEON_BUF_SWAP_32BIT | 1661 RADEON_RB_NO_UPDATE | 1662 RADEON_RB_RPTR_WR_ENA | 1663 (dev_priv->ring.rptr_update_l2qw << 8) | 1664 dev_priv->ring.size_l2qw); 1665#else 1666 RADEON_WRITE(R600_CP_RB_CNTL, 1667 RADEON_RB_NO_UPDATE | 1668 RADEON_RB_RPTR_WR_ENA | 1669 (dev_priv->ring.rptr_update_l2qw << 8) | 1670 dev_priv->ring.size_l2qw); 1671#endif 1672 1673 /* Initialize the ring buffer's read and write pointers */ 1674 RADEON_WRITE(R600_CP_RB_RPTR_WR, 0); 1675 RADEON_WRITE(R600_CP_RB_WPTR, 0); 1676 SET_RING_HEAD(dev_priv, 0); 1677 dev_priv->ring.tail = 0; 1678 1679#if __OS_HAS_AGP 1680 if (dev_priv->flags & RADEON_IS_AGP) { 1681 /* XXX */ 1682 RADEON_WRITE(R600_CP_RB_RPTR_ADDR, 1683 (dev_priv->ring_rptr->offset 1684 - dev->agp->base + dev_priv->gart_vm_start) >> 8); 1685 RADEON_WRITE(R600_CP_RB_RPTR_ADDR_HI, 0); 1686 } else 1687#endif 1688 { 1689 struct drm_sg_mem *entry = dev->sg; 1690 unsigned long tmp_ofs, page_ofs; 1691 1692 tmp_ofs = dev_priv->ring_rptr->offset - 1693 (unsigned long)dev->sg->virtual; 1694 page_ofs = tmp_ofs >> PAGE_SHIFT; 1695 1696 RADEON_WRITE(R600_CP_RB_RPTR_ADDR, entry->busaddr[page_ofs] >> 8); 1697 RADEON_WRITE(R600_CP_RB_RPTR_ADDR_HI, 0); 1698 DRM_DEBUG("ring rptr: offset=0x%08lx handle=0x%08lx\n", 1699 (unsigned long)entry->busaddr[page_ofs], 1700 entry->handle + tmp_ofs); 1701 } 1702 1703#ifdef __BIG_ENDIAN 1704 RADEON_WRITE(R600_CP_RB_CNTL, 1705 RADEON_BUF_SWAP_32BIT | 1706 (dev_priv->ring.rptr_update_l2qw << 8) | 1707 dev_priv->ring.size_l2qw); 1708#else 1709 RADEON_WRITE(R600_CP_RB_CNTL, 1710 (dev_priv->ring.rptr_update_l2qw << 8) | 1711 dev_priv->ring.size_l2qw); 1712#endif 1713 1714#if __OS_HAS_AGP 1715 if (dev_priv->flags & RADEON_IS_AGP) { 1716 /* XXX */ 1717 radeon_write_agp_base(dev_priv, dev->agp->base); 1718 1719 /* XXX */ 1720 radeon_write_agp_location(dev_priv, 1721 (((dev_priv->gart_vm_start - 1 + 1722 dev_priv->gart_size) & 0xffff0000) | 1723 (dev_priv->gart_vm_start >> 16))); 1724 1725 ring_start = (dev_priv->cp_ring->offset 1726 - dev->agp->base 1727 + dev_priv->gart_vm_start); 1728 } else 1729#endif 1730 ring_start = (dev_priv->cp_ring->offset 1731 - (unsigned long)dev->sg->virtual 1732 + dev_priv->gart_vm_start); 1733 1734 RADEON_WRITE(R600_CP_RB_BASE, ring_start >> 8); 1735 1736 RADEON_WRITE(R600_CP_ME_CNTL, 0xff); 1737 1738 RADEON_WRITE(R600_CP_DEBUG, (1 << 27) | (1 << 28)); 1739 1740 /* Start with assuming that writeback doesn't work */ 1741 dev_priv->writeback_works = 0; 1742 1743 /* Initialize the scratch register pointer. This will cause 1744 * the scratch register values to be written out to memory 1745 * whenever they are updated. 1746 * 1747 * We simply put this behind the ring read pointer, this works 1748 * with PCI GART as well as (whatever kind of) AGP GART 1749 */ 1750 RADEON_WRITE(R600_SCRATCH_ADDR, ((RADEON_READ(R600_CP_RB_RPTR_ADDR) << 8) 1751 + R600_SCRATCH_REG_OFFSET) >> 8); 1752 1753 RADEON_WRITE(R600_SCRATCH_UMSK, 0x7); 1754 1755 /* Turn on bus mastering */ 1756 radeon_enable_bm(dev_priv); 1757 1758 radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(0), 0); 1759 RADEON_WRITE(R600_LAST_FRAME_REG, 0); 1760 1761 radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(1), 0); 1762 RADEON_WRITE(R600_LAST_DISPATCH_REG, 0); 1763 1764 radeon_write_ring_rptr(dev_priv, R600_SCRATCHOFF(2), 0); 1765 RADEON_WRITE(R600_LAST_CLEAR_REG, 0); 1766 1767 /* reset sarea copies of these */ 1768 master_priv = file_priv->master->driver_priv; 1769 if (master_priv->sarea_priv) { 1770 master_priv->sarea_priv->last_frame = 0; 1771 master_priv->sarea_priv->last_dispatch = 0; 1772 master_priv->sarea_priv->last_clear = 0; 1773 } 1774 1775 r600_do_wait_for_idle(dev_priv); 1776 1777} 1778 1779int r600_do_cleanup_cp(struct drm_device *dev) 1780{ 1781 drm_radeon_private_t *dev_priv = dev->dev_private; 1782 DRM_DEBUG("\n"); 1783 1784 /* Make sure interrupts are disabled here because the uninstall ioctl 1785 * may not have been called from userspace and after dev_private 1786 * is freed, it's too late. 1787 */ 1788 if (dev->irq_enabled) 1789 drm_irq_uninstall(dev); 1790 1791#if __OS_HAS_AGP 1792 if (dev_priv->flags & RADEON_IS_AGP) { 1793 if (dev_priv->cp_ring != NULL) { 1794 drm_core_ioremapfree(dev_priv->cp_ring, dev); 1795 dev_priv->cp_ring = NULL; 1796 } 1797 if (dev_priv->ring_rptr != NULL) { 1798 drm_core_ioremapfree(dev_priv->ring_rptr, dev); 1799 dev_priv->ring_rptr = NULL; 1800 } 1801 if (dev->agp_buffer_map != NULL) { 1802 drm_core_ioremapfree(dev->agp_buffer_map, dev); 1803 dev->agp_buffer_map = NULL; 1804 } 1805 } else 1806#endif 1807 { 1808 1809 if (dev_priv->gart_info.bus_addr) 1810 r600_page_table_cleanup(dev, &dev_priv->gart_info); 1811 1812 if (dev_priv->gart_info.gart_table_location == DRM_ATI_GART_FB) { 1813 drm_core_ioremapfree(&dev_priv->gart_info.mapping, dev); 1814 dev_priv->gart_info.addr = 0; 1815 } 1816 } 1817 /* only clear to the start of flags */ 1818 memset(dev_priv, 0, offsetof(drm_radeon_private_t, flags)); 1819 1820 return 0; 1821} 1822 1823int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init, 1824 struct drm_file *file_priv) 1825{ 1826 drm_radeon_private_t *dev_priv = dev->dev_private; 1827 struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv; 1828 1829 DRM_DEBUG("\n"); 1830 1831 /* if we require new memory map but we don't have it fail */ 1832 if ((dev_priv->flags & RADEON_NEW_MEMMAP) && !dev_priv->new_memmap) { 1833 DRM_ERROR("Cannot initialise DRM on this card\nThis card requires a new X.org DDX for 3D\n"); 1834 r600_do_cleanup_cp(dev); 1835 return -EINVAL; 1836 } 1837 1838 if (init->is_pci && (dev_priv->flags & RADEON_IS_AGP)) { 1839 DRM_DEBUG("Forcing AGP card to PCI mode\n"); 1840 dev_priv->flags &= ~RADEON_IS_AGP; 1841 /* The writeback test succeeds, but when writeback is enabled, 1842 * the ring buffer read ptr update fails after first 128 bytes. 1843 */ 1844 radeon_no_wb = 1; 1845 } else if (!(dev_priv->flags & (RADEON_IS_AGP | RADEON_IS_PCI | RADEON_IS_PCIE)) 1846 && !init->is_pci) { 1847 DRM_DEBUG("Restoring AGP flag\n"); 1848 dev_priv->flags |= RADEON_IS_AGP; 1849 } 1850 1851 dev_priv->usec_timeout = init->usec_timeout; 1852 if (dev_priv->usec_timeout < 1 || 1853 dev_priv->usec_timeout > RADEON_MAX_USEC_TIMEOUT) { 1854 DRM_DEBUG("TIMEOUT problem!\n"); 1855 r600_do_cleanup_cp(dev); 1856 return -EINVAL; 1857 } 1858 1859 /* Enable vblank on CRTC1 for older X servers 1860 */ 1861 dev_priv->vblank_crtc = DRM_RADEON_VBLANK_CRTC1; 1862 1863 dev_priv->cp_mode = init->cp_mode; 1864 1865 /* We don't support anything other than bus-mastering ring mode, 1866 * but the ring can be in either AGP or PCI space for the ring 1867 * read pointer. 1868 */ 1869 if ((init->cp_mode != RADEON_CSQ_PRIBM_INDDIS) && 1870 (init->cp_mode != RADEON_CSQ_PRIBM_INDBM)) { 1871 DRM_DEBUG("BAD cp_mode (%x)!\n", init->cp_mode); 1872 r600_do_cleanup_cp(dev); 1873 return -EINVAL; 1874 } 1875 1876 switch (init->fb_bpp) { 1877 case 16: 1878 dev_priv->color_fmt = RADEON_COLOR_FORMAT_RGB565; 1879 break; 1880 case 32: 1881 default: 1882 dev_priv->color_fmt = RADEON_COLOR_FORMAT_ARGB8888; 1883 break; 1884 } 1885 dev_priv->front_offset = init->front_offset; 1886 dev_priv->front_pitch = init->front_pitch; 1887 dev_priv->back_offset = init->back_offset; 1888 dev_priv->back_pitch = init->back_pitch; 1889 1890 dev_priv->ring_offset = init->ring_offset; 1891 dev_priv->ring_rptr_offset = init->ring_rptr_offset; 1892 dev_priv->buffers_offset = init->buffers_offset; 1893 dev_priv->gart_textures_offset = init->gart_textures_offset; 1894 1895 master_priv->sarea = drm_getsarea(dev); 1896 if (!master_priv->sarea) { 1897 DRM_ERROR("could not find sarea!\n"); 1898 r600_do_cleanup_cp(dev); 1899 return -EINVAL; 1900 } 1901 1902 dev_priv->cp_ring = drm_core_findmap(dev, init->ring_offset); 1903 if (!dev_priv->cp_ring) { 1904 DRM_ERROR("could not find cp ring region!\n"); 1905 r600_do_cleanup_cp(dev); 1906 return -EINVAL; 1907 } 1908 dev_priv->ring_rptr = drm_core_findmap(dev, init->ring_rptr_offset); 1909 if (!dev_priv->ring_rptr) { 1910 DRM_ERROR("could not find ring read pointer!\n"); 1911 r600_do_cleanup_cp(dev); 1912 return -EINVAL; 1913 } 1914 dev->agp_buffer_token = init->buffers_offset; 1915 dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset); 1916 if (!dev->agp_buffer_map) { 1917 DRM_ERROR("could not find dma buffer region!\n"); 1918 r600_do_cleanup_cp(dev); 1919 return -EINVAL; 1920 } 1921 1922 if (init->gart_textures_offset) { 1923 dev_priv->gart_textures = 1924 drm_core_findmap(dev, init->gart_textures_offset); 1925 if (!dev_priv->gart_textures) { 1926 DRM_ERROR("could not find GART texture region!\n"); 1927 r600_do_cleanup_cp(dev); 1928 return -EINVAL; 1929 } 1930 } 1931 1932#if __OS_HAS_AGP 1933 /* XXX */ 1934 if (dev_priv->flags & RADEON_IS_AGP) { 1935 drm_core_ioremap_wc(dev_priv->cp_ring, dev); 1936 drm_core_ioremap_wc(dev_priv->ring_rptr, dev); 1937 drm_core_ioremap_wc(dev->agp_buffer_map, dev); 1938 if (!dev_priv->cp_ring->handle || 1939 !dev_priv->ring_rptr->handle || 1940 !dev->agp_buffer_map->handle) { 1941 DRM_ERROR("could not find ioremap agp regions!\n"); 1942 r600_do_cleanup_cp(dev); 1943 return -EINVAL; 1944 } 1945 } else 1946#endif 1947 { 1948 dev_priv->cp_ring->handle = (void *)dev_priv->cp_ring->offset; 1949 dev_priv->ring_rptr->handle = 1950 (void *)dev_priv->ring_rptr->offset; 1951 dev->agp_buffer_map->handle = 1952 (void *)dev->agp_buffer_map->offset; 1953 1954 DRM_DEBUG("dev_priv->cp_ring->handle %p\n", 1955 dev_priv->cp_ring->handle); 1956 DRM_DEBUG("dev_priv->ring_rptr->handle %p\n", 1957 dev_priv->ring_rptr->handle); 1958 DRM_DEBUG("dev->agp_buffer_map->handle %p\n", 1959 dev->agp_buffer_map->handle); 1960 } 1961 1962 dev_priv->fb_location = (radeon_read_fb_location(dev_priv) & 0xffff) << 24; 1963 dev_priv->fb_size = 1964 (((radeon_read_fb_location(dev_priv) & 0xffff0000u) << 8) + 0x1000000) 1965 - dev_priv->fb_location; 1966 1967 dev_priv->front_pitch_offset = (((dev_priv->front_pitch / 64) << 22) | 1968 ((dev_priv->front_offset 1969 + dev_priv->fb_location) >> 10)); 1970 1971 dev_priv->back_pitch_offset = (((dev_priv->back_pitch / 64) << 22) | 1972 ((dev_priv->back_offset 1973 + dev_priv->fb_location) >> 10)); 1974 1975 dev_priv->depth_pitch_offset = (((dev_priv->depth_pitch / 64) << 22) | 1976 ((dev_priv->depth_offset 1977 + dev_priv->fb_location) >> 10)); 1978 1979 dev_priv->gart_size = init->gart_size; 1980 1981 /* New let's set the memory map ... */ 1982 if (dev_priv->new_memmap) { 1983 u32 base = 0; 1984 1985 DRM_INFO("Setting GART location based on new memory map\n"); 1986 1987 /* If using AGP, try to locate the AGP aperture at the same 1988 * location in the card and on the bus, though we have to 1989 * align it down. 1990 */ 1991#if __OS_HAS_AGP 1992 /* XXX */ 1993 if (dev_priv->flags & RADEON_IS_AGP) { 1994 base = dev->agp->base; 1995 /* Check if valid */ 1996 if ((base + dev_priv->gart_size - 1) >= dev_priv->fb_location && 1997 base < (dev_priv->fb_location + dev_priv->fb_size - 1)) { 1998 DRM_INFO("Can't use AGP base @0x%08lx, won't fit\n", 1999 dev->agp->base); 2000 base = 0; 2001 } 2002 } 2003#endif 2004 /* If not or if AGP is at 0 (Macs), try to put it elsewhere */ 2005 if (base == 0) { 2006 base = dev_priv->fb_location + dev_priv->fb_size; 2007 if (base < dev_priv->fb_location || 2008 ((base + dev_priv->gart_size) & 0xfffffffful) < base) 2009 base = dev_priv->fb_location 2010 - dev_priv->gart_size; 2011 } 2012 dev_priv->gart_vm_start = base & 0xffc00000u; 2013 if (dev_priv->gart_vm_start != base) 2014 DRM_INFO("GART aligned down from 0x%08x to 0x%08x\n", 2015 base, dev_priv->gart_vm_start); 2016 } 2017 2018#if __OS_HAS_AGP 2019 /* XXX */ 2020 if (dev_priv->flags & RADEON_IS_AGP) 2021 dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset 2022 - dev->agp->base 2023 + dev_priv->gart_vm_start); 2024 else 2025#endif 2026 dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset 2027 - (unsigned long)dev->sg->virtual 2028 + dev_priv->gart_vm_start); 2029 2030 DRM_DEBUG("fb 0x%08x size %d\n", 2031 (unsigned int) dev_priv->fb_location, 2032 (unsigned int) dev_priv->fb_size); 2033 DRM_DEBUG("dev_priv->gart_size %d\n", dev_priv->gart_size); 2034 DRM_DEBUG("dev_priv->gart_vm_start 0x%08x\n", 2035 (unsigned int) dev_priv->gart_vm_start); 2036 DRM_DEBUG("dev_priv->gart_buffers_offset 0x%08lx\n", 2037 dev_priv->gart_buffers_offset); 2038 2039 dev_priv->ring.start = (u32 *) dev_priv->cp_ring->handle; 2040 dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->handle 2041 + init->ring_size / sizeof(u32)); 2042 dev_priv->ring.size = init->ring_size; 2043 dev_priv->ring.size_l2qw = drm_order(init->ring_size / 8); 2044 2045 dev_priv->ring.rptr_update = /* init->rptr_update */ 4096; 2046 dev_priv->ring.rptr_update_l2qw = drm_order(/* init->rptr_update */ 4096 / 8); 2047 2048 dev_priv->ring.fetch_size = /* init->fetch_size */ 32; 2049 dev_priv->ring.fetch_size_l2ow = drm_order(/* init->fetch_size */ 32 / 16); 2050 2051 dev_priv->ring.tail_mask = (dev_priv->ring.size / sizeof(u32)) - 1; 2052 2053 dev_priv->ring.high_mark = RADEON_RING_HIGH_MARK; 2054 2055#if __OS_HAS_AGP 2056 if (dev_priv->flags & RADEON_IS_AGP) { 2057 /* XXX turn off pcie gart */ 2058 } else 2059#endif 2060 { 2061 dev_priv->gart_info.table_mask = DMA_BIT_MASK(32); 2062 /* if we have an offset set from userspace */ 2063 if (!dev_priv->pcigart_offset_set) { 2064 DRM_ERROR("Need gart offset from userspace\n"); 2065 r600_do_cleanup_cp(dev); 2066 return -EINVAL; 2067 } 2068 2069 DRM_DEBUG("Using gart offset 0x%08lx\n", dev_priv->pcigart_offset); 2070 2071 dev_priv->gart_info.bus_addr = 2072 dev_priv->pcigart_offset + dev_priv->fb_location; 2073 dev_priv->gart_info.mapping.offset = 2074 dev_priv->pcigart_offset + dev_priv->fb_aper_offset; 2075 dev_priv->gart_info.mapping.size = 2076 dev_priv->gart_info.table_size; 2077 2078 drm_core_ioremap_wc(&dev_priv->gart_info.mapping, dev); 2079 if (!dev_priv->gart_info.mapping.handle) { 2080 DRM_ERROR("ioremap failed.\n"); 2081 r600_do_cleanup_cp(dev); 2082 return -EINVAL; 2083 } 2084 2085 dev_priv->gart_info.addr = 2086 dev_priv->gart_info.mapping.handle; 2087 2088 DRM_DEBUG("Setting phys_pci_gart to %p %08lX\n", 2089 dev_priv->gart_info.addr, 2090 dev_priv->pcigart_offset); 2091 2092 if (r600_page_table_init(dev)) { 2093 DRM_ERROR("Failed to init GART table\n"); 2094 r600_do_cleanup_cp(dev); 2095 return -EINVAL; 2096 } 2097 2098 if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) 2099 r700_vm_init(dev); 2100 else 2101 r600_vm_init(dev); 2102 } 2103 2104 if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) 2105 r700_cp_load_microcode(dev_priv); 2106 else 2107 r600_cp_load_microcode(dev_priv); 2108 2109 r600_cp_init_ring_buffer(dev, dev_priv, file_priv); 2110 2111 dev_priv->last_buf = 0; 2112 2113 r600_do_engine_reset(dev); 2114 r600_test_writeback(dev_priv); 2115 2116 return 0; 2117} 2118 2119int r600_do_resume_cp(struct drm_device *dev, struct drm_file *file_priv) 2120{ 2121 drm_radeon_private_t *dev_priv = dev->dev_private; 2122 2123 DRM_DEBUG("\n"); 2124 if (((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770)) { 2125 r700_vm_init(dev); 2126 r700_cp_load_microcode(dev_priv); 2127 } else { 2128 r600_vm_init(dev); 2129 r600_cp_load_microcode(dev_priv); 2130 } 2131 r600_cp_init_ring_buffer(dev, dev_priv, file_priv); 2132 r600_do_engine_reset(dev); 2133 2134 return 0; 2135} 2136 2137/* Wait for the CP to go idle. 2138 */ 2139int r600_do_cp_idle(drm_radeon_private_t *dev_priv) 2140{ 2141 RING_LOCALS; 2142 DRM_DEBUG("\n"); 2143 2144 BEGIN_RING(5); 2145 OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0)); 2146 OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT); 2147 /* wait for 3D idle clean */ 2148 OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); 2149 OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2); 2150 OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN); 2151 2152 ADVANCE_RING(); 2153 COMMIT_RING(); 2154 2155 return r600_do_wait_for_idle(dev_priv); 2156} 2157 2158/* Start the Command Processor. 2159 */ 2160void r600_do_cp_start(drm_radeon_private_t *dev_priv) 2161{ 2162 u32 cp_me; 2163 RING_LOCALS; 2164 DRM_DEBUG("\n"); 2165 2166 BEGIN_RING(7); 2167 OUT_RING(CP_PACKET3(R600_IT_ME_INITIALIZE, 5)); 2168 OUT_RING(0x00000001); 2169 if (((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) 2170 OUT_RING(0x00000003); 2171 else 2172 OUT_RING(0x00000000); 2173 OUT_RING((dev_priv->r600_max_hw_contexts - 1)); 2174 OUT_RING(R600_ME_INITIALIZE_DEVICE_ID(1)); 2175 OUT_RING(0x00000000); 2176 OUT_RING(0x00000000); 2177 ADVANCE_RING(); 2178 COMMIT_RING(); 2179 2180 /* set the mux and reset the halt bit */ 2181 cp_me = 0xff; 2182 RADEON_WRITE(R600_CP_ME_CNTL, cp_me); 2183 2184 dev_priv->cp_running = 1; 2185 2186} 2187 2188void r600_do_cp_reset(drm_radeon_private_t *dev_priv) 2189{ 2190 u32 cur_read_ptr; 2191 DRM_DEBUG("\n"); 2192 2193 cur_read_ptr = RADEON_READ(R600_CP_RB_RPTR); 2194 RADEON_WRITE(R600_CP_RB_WPTR, cur_read_ptr); 2195 SET_RING_HEAD(dev_priv, cur_read_ptr); 2196 dev_priv->ring.tail = cur_read_ptr; 2197} 2198 2199void r600_do_cp_stop(drm_radeon_private_t *dev_priv) 2200{ 2201 uint32_t cp_me; 2202 2203 DRM_DEBUG("\n"); 2204 2205 cp_me = 0xff | R600_CP_ME_HALT; 2206 2207 RADEON_WRITE(R600_CP_ME_CNTL, cp_me); 2208 2209 dev_priv->cp_running = 0; 2210} 2211 2212int r600_cp_dispatch_indirect(struct drm_device *dev, 2213 struct drm_buf *buf, int start, int end) 2214{ 2215 drm_radeon_private_t *dev_priv = dev->dev_private; 2216 RING_LOCALS; 2217 2218 if (start != end) { 2219 unsigned long offset = (dev_priv->gart_buffers_offset 2220 + buf->offset + start); 2221 int dwords = (end - start + 3) / sizeof(u32); 2222 2223 DRM_DEBUG("dwords:%d\n", dwords); 2224 DRM_DEBUG("offset 0x%lx\n", offset); 2225 2226 2227 /* Indirect buffer data must be a multiple of 16 dwords. 2228 * pad the data with a Type-2 CP packet. 2229 */ 2230 while (dwords & 0xf) { 2231 u32 *data = (u32 *) 2232 ((char *)dev->agp_buffer_map->handle 2233 + buf->offset + start); 2234 data[dwords++] = RADEON_CP_PACKET2; 2235 } 2236 2237 /* Fire off the indirect buffer */ 2238 BEGIN_RING(4); 2239 OUT_RING(CP_PACKET3(R600_IT_INDIRECT_BUFFER, 2)); 2240 OUT_RING((offset & 0xfffffffc)); 2241 OUT_RING((upper_32_bits(offset) & 0xff)); 2242 OUT_RING(dwords); 2243 ADVANCE_RING(); 2244 } 2245 2246 return 0; 2247} 2248