cache-sh4.c revision e76a0136a3cf1859fbc07f122e42293d22229558
1/* 2 * arch/sh/mm/cache-sh4.c 3 * 4 * Copyright (C) 1999, 2000, 2002 Niibe Yutaka 5 * Copyright (C) 2001 - 2007 Paul Mundt 6 * Copyright (C) 2003 Richard Curnow 7 * Copyright (c) 2007 STMicroelectronics (R&D) Ltd. 8 * 9 * This file is subject to the terms and conditions of the GNU General Public 10 * License. See the file "COPYING" in the main directory of this archive 11 * for more details. 12 */ 13#include <linux/init.h> 14#include <linux/mm.h> 15#include <linux/io.h> 16#include <linux/mutex.h> 17#include <linux/fs.h> 18#include <asm/mmu_context.h> 19#include <asm/cacheflush.h> 20 21/* 22 * The maximum number of pages we support up to when doing ranged dcache 23 * flushing. Anything exceeding this will simply flush the dcache in its 24 * entirety. 25 */ 26#define MAX_DCACHE_PAGES 64 /* XXX: Tune for ways */ 27#define MAX_ICACHE_PAGES 32 28 29static void __flush_cache_4096(unsigned long addr, unsigned long phys, 30 unsigned long exec_offset); 31 32/* 33 * This is initialised here to ensure that it is not placed in the BSS. If 34 * that were to happen, note that cache_init gets called before the BSS is 35 * cleared, so this would get nulled out which would be hopeless. 36 */ 37static void (*__flush_dcache_segment_fn)(unsigned long, unsigned long) = 38 (void (*)(unsigned long, unsigned long))0xdeadbeef; 39 40/* 41 * Write back the range of D-cache, and purge the I-cache. 42 * 43 * Called from kernel/module.c:sys_init_module and routine for a.out format, 44 * signal handler code and kprobes code 45 */ 46static void sh4_flush_icache_range(void *args) 47{ 48 struct flusher_data *data = args; 49 int icacheaddr; 50 unsigned long start, end; 51 unsigned long v; 52 int i; 53 54 start = data->addr1; 55 end = data->addr2; 56 57 /* If there are too many pages then just blow the caches */ 58 if (((end - start) >> PAGE_SHIFT) >= MAX_ICACHE_PAGES) { 59 local_flush_cache_all(args); 60 } else { 61 /* selectively flush d-cache then invalidate the i-cache */ 62 /* this is inefficient, so only use for small ranges */ 63 start &= ~(L1_CACHE_BYTES-1); 64 end += L1_CACHE_BYTES-1; 65 end &= ~(L1_CACHE_BYTES-1); 66 67 jump_to_uncached(); 68 69 for (v = start; v < end; v+=L1_CACHE_BYTES) { 70 __ocbwb(v); 71 72 icacheaddr = CACHE_IC_ADDRESS_ARRAY | 73 (v & cpu_data->icache.entry_mask); 74 75 for (i = 0; i < cpu_data->icache.ways; 76 i++, icacheaddr += cpu_data->icache.way_incr) 77 /* Clear i-cache line valid-bit */ 78 ctrl_outl(0, icacheaddr); 79 } 80 81 back_to_cached(); 82 } 83} 84 85static inline void flush_cache_4096(unsigned long start, 86 unsigned long phys) 87{ 88 unsigned long exec_offset = 0; 89 90 /* 91 * All types of SH-4 require PC to be in P2 to operate on the I-cache. 92 * Some types of SH-4 require PC to be in P2 to operate on the D-cache. 93 */ 94 if ((boot_cpu_data.flags & CPU_HAS_P2_FLUSH_BUG) || 95 (start < CACHE_OC_ADDRESS_ARRAY)) 96 exec_offset = 0x20000000; 97 98 __flush_cache_4096(start | SH_CACHE_ASSOC, 99 P1SEGADDR(phys), exec_offset); 100} 101 102/* 103 * Write back & invalidate the D-cache of the page. 104 * (To avoid "alias" issues) 105 */ 106static void sh4_flush_dcache_page(void *arg) 107{ 108 struct page *page = arg; 109#ifndef CONFIG_SMP 110 struct address_space *mapping = page_mapping(page); 111 112 if (mapping && !mapping_mapped(mapping)) 113 set_bit(PG_dcache_dirty, &page->flags); 114 else 115#endif 116 { 117 unsigned long phys = PHYSADDR(page_address(page)); 118 unsigned long addr = CACHE_OC_ADDRESS_ARRAY; 119 int i, n; 120 121 /* Loop all the D-cache */ 122 n = boot_cpu_data.dcache.n_aliases; 123 for (i = 0; i < n; i++, addr += 4096) 124 flush_cache_4096(addr, phys); 125 } 126 127 wmb(); 128} 129 130/* TODO: Selective icache invalidation through IC address array.. */ 131static void __uses_jump_to_uncached flush_icache_all(void) 132{ 133 unsigned long ccr; 134 135 jump_to_uncached(); 136 137 /* Flush I-cache */ 138 ccr = ctrl_inl(CCR); 139 ccr |= CCR_CACHE_ICI; 140 ctrl_outl(ccr, CCR); 141 142 /* 143 * back_to_cached() will take care of the barrier for us, don't add 144 * another one! 145 */ 146 back_to_cached(); 147} 148 149static inline void flush_dcache_all(void) 150{ 151 (*__flush_dcache_segment_fn)(0UL, boot_cpu_data.dcache.way_size); 152 wmb(); 153} 154 155static void sh4_flush_cache_all(void *unused) 156{ 157 flush_dcache_all(); 158 flush_icache_all(); 159} 160 161static void __flush_cache_mm(struct mm_struct *mm, unsigned long start, 162 unsigned long end) 163{ 164 unsigned long d = 0, p = start & PAGE_MASK; 165 unsigned long alias_mask = boot_cpu_data.dcache.alias_mask; 166 unsigned long n_aliases = boot_cpu_data.dcache.n_aliases; 167 unsigned long select_bit; 168 unsigned long all_aliases_mask; 169 unsigned long addr_offset; 170 pgd_t *dir; 171 pmd_t *pmd; 172 pud_t *pud; 173 pte_t *pte; 174 int i; 175 176 dir = pgd_offset(mm, p); 177 pud = pud_offset(dir, p); 178 pmd = pmd_offset(pud, p); 179 end = PAGE_ALIGN(end); 180 181 all_aliases_mask = (1 << n_aliases) - 1; 182 183 do { 184 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) { 185 p &= PMD_MASK; 186 p += PMD_SIZE; 187 pmd++; 188 189 continue; 190 } 191 192 pte = pte_offset_kernel(pmd, p); 193 194 do { 195 unsigned long phys; 196 pte_t entry = *pte; 197 198 if (!(pte_val(entry) & _PAGE_PRESENT)) { 199 pte++; 200 p += PAGE_SIZE; 201 continue; 202 } 203 204 phys = pte_val(entry) & PTE_PHYS_MASK; 205 206 if ((p ^ phys) & alias_mask) { 207 d |= 1 << ((p & alias_mask) >> PAGE_SHIFT); 208 d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT); 209 210 if (d == all_aliases_mask) 211 goto loop_exit; 212 } 213 214 pte++; 215 p += PAGE_SIZE; 216 } while (p < end && ((unsigned long)pte & ~PAGE_MASK)); 217 pmd++; 218 } while (p < end); 219 220loop_exit: 221 addr_offset = 0; 222 select_bit = 1; 223 224 for (i = 0; i < n_aliases; i++) { 225 if (d & select_bit) { 226 (*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE); 227 wmb(); 228 } 229 230 select_bit <<= 1; 231 addr_offset += PAGE_SIZE; 232 } 233} 234 235/* 236 * Note : (RPC) since the caches are physically tagged, the only point 237 * of flush_cache_mm for SH-4 is to get rid of aliases from the 238 * D-cache. The assumption elsewhere, e.g. flush_cache_range, is that 239 * lines can stay resident so long as the virtual address they were 240 * accessed with (hence cache set) is in accord with the physical 241 * address (i.e. tag). It's no different here. So I reckon we don't 242 * need to flush the I-cache, since aliases don't matter for that. We 243 * should try that. 244 * 245 * Caller takes mm->mmap_sem. 246 */ 247static void sh4_flush_cache_mm(void *arg) 248{ 249 struct mm_struct *mm = arg; 250 251 if (cpu_context(smp_processor_id(), mm) == NO_CONTEXT) 252 return; 253 254 /* 255 * If cache is only 4k-per-way, there are never any 'aliases'. Since 256 * the cache is physically tagged, the data can just be left in there. 257 */ 258 if (boot_cpu_data.dcache.n_aliases == 0) 259 return; 260 261 /* 262 * Don't bother groveling around the dcache for the VMA ranges 263 * if there are too many PTEs to make it worthwhile. 264 */ 265 if (mm->nr_ptes >= MAX_DCACHE_PAGES) 266 flush_dcache_all(); 267 else { 268 struct vm_area_struct *vma; 269 270 /* 271 * In this case there are reasonably sized ranges to flush, 272 * iterate through the VMA list and take care of any aliases. 273 */ 274 for (vma = mm->mmap; vma; vma = vma->vm_next) 275 __flush_cache_mm(mm, vma->vm_start, vma->vm_end); 276 } 277 278 /* Only touch the icache if one of the VMAs has VM_EXEC set. */ 279 if (mm->exec_vm) 280 flush_icache_all(); 281} 282 283/* 284 * Write back and invalidate I/D-caches for the page. 285 * 286 * ADDR: Virtual Address (U0 address) 287 * PFN: Physical page number 288 */ 289static void sh4_flush_cache_page(void *args) 290{ 291 struct flusher_data *data = args; 292 struct vm_area_struct *vma; 293 unsigned long address, pfn, phys; 294 unsigned int alias_mask; 295 296 vma = data->vma; 297 address = data->addr1; 298 pfn = data->addr2; 299 phys = pfn << PAGE_SHIFT; 300 301 if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT) 302 return; 303 304 alias_mask = boot_cpu_data.dcache.alias_mask; 305 306 /* We only need to flush D-cache when we have alias */ 307 if ((address^phys) & alias_mask) { 308 /* Loop 4K of the D-cache */ 309 flush_cache_4096( 310 CACHE_OC_ADDRESS_ARRAY | (address & alias_mask), 311 phys); 312 /* Loop another 4K of the D-cache */ 313 flush_cache_4096( 314 CACHE_OC_ADDRESS_ARRAY | (phys & alias_mask), 315 phys); 316 } 317 318 alias_mask = boot_cpu_data.icache.alias_mask; 319 if (vma->vm_flags & VM_EXEC) { 320 /* 321 * Evict entries from the portion of the cache from which code 322 * may have been executed at this address (virtual). There's 323 * no need to evict from the portion corresponding to the 324 * physical address as for the D-cache, because we know the 325 * kernel has never executed the code through its identity 326 * translation. 327 */ 328 flush_cache_4096( 329 CACHE_IC_ADDRESS_ARRAY | (address & alias_mask), 330 phys); 331 } 332} 333 334/* 335 * Write back and invalidate D-caches. 336 * 337 * START, END: Virtual Address (U0 address) 338 * 339 * NOTE: We need to flush the _physical_ page entry. 340 * Flushing the cache lines for U0 only isn't enough. 341 * We need to flush for P1 too, which may contain aliases. 342 */ 343static void sh4_flush_cache_range(void *args) 344{ 345 struct flusher_data *data = args; 346 struct vm_area_struct *vma; 347 unsigned long start, end; 348 349 vma = data->vma; 350 start = data->addr1; 351 end = data->addr2; 352 353 if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT) 354 return; 355 356 /* 357 * If cache is only 4k-per-way, there are never any 'aliases'. Since 358 * the cache is physically tagged, the data can just be left in there. 359 */ 360 if (boot_cpu_data.dcache.n_aliases == 0) 361 return; 362 363 /* 364 * Don't bother with the lookup and alias check if we have a 365 * wide range to cover, just blow away the dcache in its 366 * entirety instead. -- PFM. 367 */ 368 if (((end - start) >> PAGE_SHIFT) >= MAX_DCACHE_PAGES) 369 flush_dcache_all(); 370 else 371 __flush_cache_mm(vma->vm_mm, start, end); 372 373 if (vma->vm_flags & VM_EXEC) { 374 /* 375 * TODO: Is this required??? Need to look at how I-cache 376 * coherency is assured when new programs are loaded to see if 377 * this matters. 378 */ 379 flush_icache_all(); 380 } 381} 382 383/** 384 * __flush_cache_4096 385 * 386 * @addr: address in memory mapped cache array 387 * @phys: P1 address to flush (has to match tags if addr has 'A' bit 388 * set i.e. associative write) 389 * @exec_offset: set to 0x20000000 if flush has to be executed from P2 390 * region else 0x0 391 * 392 * The offset into the cache array implied by 'addr' selects the 393 * 'colour' of the virtual address range that will be flushed. The 394 * operation (purge/write-back) is selected by the lower 2 bits of 395 * 'phys'. 396 */ 397static void __flush_cache_4096(unsigned long addr, unsigned long phys, 398 unsigned long exec_offset) 399{ 400 int way_count; 401 unsigned long base_addr = addr; 402 struct cache_info *dcache; 403 unsigned long way_incr; 404 unsigned long a, ea, p; 405 unsigned long temp_pc; 406 407 dcache = &boot_cpu_data.dcache; 408 /* Write this way for better assembly. */ 409 way_count = dcache->ways; 410 way_incr = dcache->way_incr; 411 412 /* 413 * Apply exec_offset (i.e. branch to P2 if required.). 414 * 415 * FIXME: 416 * 417 * If I write "=r" for the (temp_pc), it puts this in r6 hence 418 * trashing exec_offset before it's been added on - why? Hence 419 * "=&r" as a 'workaround' 420 */ 421 asm volatile("mov.l 1f, %0\n\t" 422 "add %1, %0\n\t" 423 "jmp @%0\n\t" 424 "nop\n\t" 425 ".balign 4\n\t" 426 "1: .long 2f\n\t" 427 "2:\n" : "=&r" (temp_pc) : "r" (exec_offset)); 428 429 /* 430 * We know there will be >=1 iteration, so write as do-while to avoid 431 * pointless nead-of-loop check for 0 iterations. 432 */ 433 do { 434 ea = base_addr + PAGE_SIZE; 435 a = base_addr; 436 p = phys; 437 438 do { 439 *(volatile unsigned long *)a = p; 440 /* 441 * Next line: intentionally not p+32, saves an add, p 442 * will do since only the cache tag bits need to 443 * match. 444 */ 445 *(volatile unsigned long *)(a+32) = p; 446 a += 64; 447 p += 64; 448 } while (a < ea); 449 450 base_addr += way_incr; 451 } while (--way_count != 0); 452} 453 454/* 455 * Break the 1, 2 and 4 way variants of this out into separate functions to 456 * avoid nearly all the overhead of having the conditional stuff in the function 457 * bodies (+ the 1 and 2 way cases avoid saving any registers too). 458 */ 459static void __flush_dcache_segment_1way(unsigned long start, 460 unsigned long extent_per_way) 461{ 462 unsigned long orig_sr, sr_with_bl; 463 unsigned long base_addr; 464 unsigned long way_incr, linesz, way_size; 465 struct cache_info *dcache; 466 register unsigned long a0, a0e; 467 468 asm volatile("stc sr, %0" : "=r" (orig_sr)); 469 sr_with_bl = orig_sr | (1<<28); 470 base_addr = ((unsigned long)&empty_zero_page[0]); 471 472 /* 473 * The previous code aligned base_addr to 16k, i.e. the way_size of all 474 * existing SH-4 D-caches. Whilst I don't see a need to have this 475 * aligned to any better than the cache line size (which it will be 476 * anyway by construction), let's align it to at least the way_size of 477 * any existing or conceivable SH-4 D-cache. -- RPC 478 */ 479 base_addr = ((base_addr >> 16) << 16); 480 base_addr |= start; 481 482 dcache = &boot_cpu_data.dcache; 483 linesz = dcache->linesz; 484 way_incr = dcache->way_incr; 485 way_size = dcache->way_size; 486 487 a0 = base_addr; 488 a0e = base_addr + extent_per_way; 489 do { 490 asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); 491 asm volatile("movca.l r0, @%0\n\t" 492 "ocbi @%0" : : "r" (a0)); 493 a0 += linesz; 494 asm volatile("movca.l r0, @%0\n\t" 495 "ocbi @%0" : : "r" (a0)); 496 a0 += linesz; 497 asm volatile("movca.l r0, @%0\n\t" 498 "ocbi @%0" : : "r" (a0)); 499 a0 += linesz; 500 asm volatile("movca.l r0, @%0\n\t" 501 "ocbi @%0" : : "r" (a0)); 502 asm volatile("ldc %0, sr" : : "r" (orig_sr)); 503 a0 += linesz; 504 } while (a0 < a0e); 505} 506 507static void __flush_dcache_segment_2way(unsigned long start, 508 unsigned long extent_per_way) 509{ 510 unsigned long orig_sr, sr_with_bl; 511 unsigned long base_addr; 512 unsigned long way_incr, linesz, way_size; 513 struct cache_info *dcache; 514 register unsigned long a0, a1, a0e; 515 516 asm volatile("stc sr, %0" : "=r" (orig_sr)); 517 sr_with_bl = orig_sr | (1<<28); 518 base_addr = ((unsigned long)&empty_zero_page[0]); 519 520 /* See comment under 1-way above */ 521 base_addr = ((base_addr >> 16) << 16); 522 base_addr |= start; 523 524 dcache = &boot_cpu_data.dcache; 525 linesz = dcache->linesz; 526 way_incr = dcache->way_incr; 527 way_size = dcache->way_size; 528 529 a0 = base_addr; 530 a1 = a0 + way_incr; 531 a0e = base_addr + extent_per_way; 532 do { 533 asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); 534 asm volatile("movca.l r0, @%0\n\t" 535 "movca.l r0, @%1\n\t" 536 "ocbi @%0\n\t" 537 "ocbi @%1" : : 538 "r" (a0), "r" (a1)); 539 a0 += linesz; 540 a1 += linesz; 541 asm volatile("movca.l r0, @%0\n\t" 542 "movca.l r0, @%1\n\t" 543 "ocbi @%0\n\t" 544 "ocbi @%1" : : 545 "r" (a0), "r" (a1)); 546 a0 += linesz; 547 a1 += linesz; 548 asm volatile("movca.l r0, @%0\n\t" 549 "movca.l r0, @%1\n\t" 550 "ocbi @%0\n\t" 551 "ocbi @%1" : : 552 "r" (a0), "r" (a1)); 553 a0 += linesz; 554 a1 += linesz; 555 asm volatile("movca.l r0, @%0\n\t" 556 "movca.l r0, @%1\n\t" 557 "ocbi @%0\n\t" 558 "ocbi @%1" : : 559 "r" (a0), "r" (a1)); 560 asm volatile("ldc %0, sr" : : "r" (orig_sr)); 561 a0 += linesz; 562 a1 += linesz; 563 } while (a0 < a0e); 564} 565 566static void __flush_dcache_segment_4way(unsigned long start, 567 unsigned long extent_per_way) 568{ 569 unsigned long orig_sr, sr_with_bl; 570 unsigned long base_addr; 571 unsigned long way_incr, linesz, way_size; 572 struct cache_info *dcache; 573 register unsigned long a0, a1, a2, a3, a0e; 574 575 asm volatile("stc sr, %0" : "=r" (orig_sr)); 576 sr_with_bl = orig_sr | (1<<28); 577 base_addr = ((unsigned long)&empty_zero_page[0]); 578 579 /* See comment under 1-way above */ 580 base_addr = ((base_addr >> 16) << 16); 581 base_addr |= start; 582 583 dcache = &boot_cpu_data.dcache; 584 linesz = dcache->linesz; 585 way_incr = dcache->way_incr; 586 way_size = dcache->way_size; 587 588 a0 = base_addr; 589 a1 = a0 + way_incr; 590 a2 = a1 + way_incr; 591 a3 = a2 + way_incr; 592 a0e = base_addr + extent_per_way; 593 do { 594 asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); 595 asm volatile("movca.l r0, @%0\n\t" 596 "movca.l r0, @%1\n\t" 597 "movca.l r0, @%2\n\t" 598 "movca.l r0, @%3\n\t" 599 "ocbi @%0\n\t" 600 "ocbi @%1\n\t" 601 "ocbi @%2\n\t" 602 "ocbi @%3\n\t" : : 603 "r" (a0), "r" (a1), "r" (a2), "r" (a3)); 604 a0 += linesz; 605 a1 += linesz; 606 a2 += linesz; 607 a3 += linesz; 608 asm volatile("movca.l r0, @%0\n\t" 609 "movca.l r0, @%1\n\t" 610 "movca.l r0, @%2\n\t" 611 "movca.l r0, @%3\n\t" 612 "ocbi @%0\n\t" 613 "ocbi @%1\n\t" 614 "ocbi @%2\n\t" 615 "ocbi @%3\n\t" : : 616 "r" (a0), "r" (a1), "r" (a2), "r" (a3)); 617 a0 += linesz; 618 a1 += linesz; 619 a2 += linesz; 620 a3 += linesz; 621 asm volatile("movca.l r0, @%0\n\t" 622 "movca.l r0, @%1\n\t" 623 "movca.l r0, @%2\n\t" 624 "movca.l r0, @%3\n\t" 625 "ocbi @%0\n\t" 626 "ocbi @%1\n\t" 627 "ocbi @%2\n\t" 628 "ocbi @%3\n\t" : : 629 "r" (a0), "r" (a1), "r" (a2), "r" (a3)); 630 a0 += linesz; 631 a1 += linesz; 632 a2 += linesz; 633 a3 += linesz; 634 asm volatile("movca.l r0, @%0\n\t" 635 "movca.l r0, @%1\n\t" 636 "movca.l r0, @%2\n\t" 637 "movca.l r0, @%3\n\t" 638 "ocbi @%0\n\t" 639 "ocbi @%1\n\t" 640 "ocbi @%2\n\t" 641 "ocbi @%3\n\t" : : 642 "r" (a0), "r" (a1), "r" (a2), "r" (a3)); 643 asm volatile("ldc %0, sr" : : "r" (orig_sr)); 644 a0 += linesz; 645 a1 += linesz; 646 a2 += linesz; 647 a3 += linesz; 648 } while (a0 < a0e); 649} 650 651extern void __weak sh4__flush_region_init(void); 652 653/* 654 * SH-4 has virtually indexed and physically tagged cache. 655 */ 656void __init sh4_cache_init(void) 657{ 658 printk("PVR=%08x CVR=%08x PRR=%08x\n", 659 ctrl_inl(CCN_PVR), 660 ctrl_inl(CCN_CVR), 661 ctrl_inl(CCN_PRR)); 662 663 switch (boot_cpu_data.dcache.ways) { 664 case 1: 665 __flush_dcache_segment_fn = __flush_dcache_segment_1way; 666 break; 667 case 2: 668 __flush_dcache_segment_fn = __flush_dcache_segment_2way; 669 break; 670 case 4: 671 __flush_dcache_segment_fn = __flush_dcache_segment_4way; 672 break; 673 default: 674 panic("unknown number of cache ways\n"); 675 break; 676 } 677 678 local_flush_icache_range = sh4_flush_icache_range; 679 local_flush_dcache_page = sh4_flush_dcache_page; 680 local_flush_cache_all = sh4_flush_cache_all; 681 local_flush_cache_mm = sh4_flush_cache_mm; 682 local_flush_cache_dup_mm = sh4_flush_cache_mm; 683 local_flush_cache_page = sh4_flush_cache_page; 684 local_flush_cache_range = sh4_flush_cache_range; 685 686 sh4__flush_region_init(); 687} 688