cache-sh5.c revision a23ba43573a24c351640bc19c06c701798fe6e25
1/* 2 * arch/sh/mm/cache-sh5.c 3 * 4 * Original version Copyright (C) 2000, 2001 Paolo Alberelli 5 * Second version Copyright (C) benedict.gaster@superh.com 2002 6 * Third version Copyright Richard.Curnow@superh.com 2003 7 * Hacks to third version Copyright (C) 2003 Paul Mundt 8 * 9 * This file is subject to the terms and conditions of the GNU General Public 10 * License. See the file "COPYING" in the main directory of this archive 11 * for more details. 12 */ 13#include <linux/init.h> 14#include <linux/mman.h> 15#include <linux/mm.h> 16#include <linux/threads.h> 17#include <asm/page.h> 18#include <asm/pgtable.h> 19#include <asm/processor.h> 20#include <asm/cache.h> 21#include <asm/tlb.h> 22#include <asm/io.h> 23#include <asm/uaccess.h> 24#include <asm/mmu_context.h> 25#include <asm/pgalloc.h> /* for flush_itlb_range */ 26 27#include <linux/proc_fs.h> 28 29/* This function is in entry.S */ 30extern unsigned long switch_and_save_asid(unsigned long new_asid); 31 32/* Wired TLB entry for the D-cache */ 33static unsigned long long dtlb_cache_slot; 34 35/** 36 * sh64_cache_init() 37 * 38 * This is pretty much just a straightforward clone of the SH 39 * detect_cpu_and_cache_system(). 40 * 41 * This function is responsible for setting up all of the cache 42 * info dynamically as well as taking care of CPU probing and 43 * setting up the relevant subtype data. 44 * 45 * FIXME: For the time being, we only really support the SH5-101 46 * out of the box, and don't support dynamic probing for things 47 * like the SH5-103 or even cut2 of the SH5-101. Implement this 48 * later! 49 */ 50int __init sh64_cache_init(void) 51{ 52 /* 53 * First, setup some sane values for the I-cache. 54 */ 55 cpu_data->icache.ways = 4; 56 cpu_data->icache.sets = 256; 57 cpu_data->icache.linesz = L1_CACHE_BYTES; 58 59 /* 60 * FIXME: This can probably be cleaned up a bit as well.. for example, 61 * do we really need the way shift _and_ the way_step_shift ?? Judging 62 * by the existing code, I would guess no.. is there any valid reason 63 * why we need to be tracking this around? 64 */ 65 cpu_data->icache.way_shift = 13; 66 cpu_data->icache.entry_shift = 5; 67 cpu_data->icache.set_shift = 4; 68 cpu_data->icache.way_step_shift = 16; 69 cpu_data->icache.asid_shift = 2; 70 71 /* 72 * way offset = cache size / associativity, so just don't factor in 73 * associativity in the first place.. 74 */ 75 cpu_data->icache.way_ofs = cpu_data->icache.sets * 76 cpu_data->icache.linesz; 77 78 cpu_data->icache.asid_mask = 0x3fc; 79 cpu_data->icache.idx_mask = 0x1fe0; 80 cpu_data->icache.epn_mask = 0xffffe000; 81 cpu_data->icache.flags = 0; 82 83 /* 84 * Next, setup some sane values for the D-cache. 85 * 86 * On the SH5, these are pretty consistent with the I-cache settings, 87 * so we just copy over the existing definitions.. these can be fixed 88 * up later, especially if we add runtime CPU probing. 89 * 90 * Though in the meantime it saves us from having to duplicate all of 91 * the above definitions.. 92 */ 93 cpu_data->dcache = cpu_data->icache; 94 95 /* 96 * Setup any cache-related flags here 97 */ 98#if defined(CONFIG_DCACHE_WRITE_THROUGH) 99 set_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)); 100#elif defined(CONFIG_DCACHE_WRITE_BACK) 101 set_bit(SH_CACHE_MODE_WB, &(cpu_data->dcache.flags)); 102#endif 103 104 /* 105 * We also need to reserve a slot for the D-cache in the DTLB, so we 106 * do this now .. 107 */ 108 dtlb_cache_slot = sh64_get_wired_dtlb_entry(); 109 110 return 0; 111} 112 113#ifdef CONFIG_DCACHE_DISABLED 114#define sh64_dcache_purge_all() do { } while (0) 115#define sh64_dcache_purge_coloured_phy_page(paddr, eaddr) do { } while (0) 116#define sh64_dcache_purge_user_range(mm, start, end) do { } while (0) 117#define sh64_dcache_purge_phy_page(paddr) do { } while (0) 118#define sh64_dcache_purge_virt_page(mm, eaddr) do { } while (0) 119#define sh64_dcache_purge_kernel_range(start, end) do { } while (0) 120#define sh64_dcache_wback_current_user_range(start, end) do { } while (0) 121#endif 122 123/*##########################################################################*/ 124 125/* From here onwards, a rewrite of the implementation, 126 by Richard.Curnow@superh.com. 127 128 The major changes in this compared to the old version are; 129 1. use more selective purging through OCBP instead of using ALLOCO to purge 130 by natural replacement. This avoids purging out unrelated cache lines 131 that happen to be in the same set. 132 2. exploit the APIs copy_user_page and clear_user_page better 133 3. be more selective about I-cache purging, in particular use invalidate_all 134 more sparingly. 135 136 */ 137 138/*########################################################################## 139 SUPPORT FUNCTIONS 140 ##########################################################################*/ 141 142/****************************************************************************/ 143/* The following group of functions deal with mapping and unmapping a temporary 144 page into the DTLB slot that have been set aside for our exclusive use. */ 145/* In order to accomplish this, we use the generic interface for adding and 146 removing a wired slot entry as defined in arch/sh/mm/tlb-sh5.c */ 147/****************************************************************************/ 148 149static unsigned long slot_own_flags; 150 151static inline void sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, unsigned long paddr) 152{ 153 local_irq_save(slot_own_flags); 154 sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr); 155} 156 157static inline void sh64_teardown_dtlb_cache_slot(void) 158{ 159 sh64_teardown_tlb_slot(dtlb_cache_slot); 160 local_irq_restore(slot_own_flags); 161} 162 163/****************************************************************************/ 164 165#ifndef CONFIG_ICACHE_DISABLED 166 167static void __inline__ sh64_icache_inv_all(void) 168{ 169 unsigned long long addr, flag, data; 170 unsigned int flags; 171 172 addr=ICCR0; 173 flag=ICCR0_ICI; 174 data=0; 175 176 /* Make this a critical section for safety (probably not strictly necessary.) */ 177 local_irq_save(flags); 178 179 /* Without %1 it gets unexplicably wrong */ 180 asm volatile("getcfg %3, 0, %0\n\t" 181 "or %0, %2, %0\n\t" 182 "putcfg %3, 0, %0\n\t" 183 "synci" 184 : "=&r" (data) 185 : "0" (data), "r" (flag), "r" (addr)); 186 187 local_irq_restore(flags); 188} 189 190static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end) 191{ 192 /* Invalidate range of addresses [start,end] from the I-cache, where 193 * the addresses lie in the kernel superpage. */ 194 195 unsigned long long ullend, addr, aligned_start; 196#if (NEFF == 32) 197 aligned_start = (unsigned long long)(signed long long)(signed long) start; 198#else 199#error "NEFF != 32" 200#endif 201 aligned_start &= L1_CACHE_ALIGN_MASK; 202 addr = aligned_start; 203#if (NEFF == 32) 204 ullend = (unsigned long long) (signed long long) (signed long) end; 205#else 206#error "NEFF != 32" 207#endif 208 while (addr <= ullend) { 209 asm __volatile__ ("icbi %0, 0" : : "r" (addr)); 210 addr += L1_CACHE_BYTES; 211 } 212} 213 214static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr) 215{ 216 /* If we get called, we know that vma->vm_flags contains VM_EXEC. 217 Also, eaddr is page-aligned. */ 218 219 unsigned long long addr, end_addr; 220 unsigned long flags = 0; 221 unsigned long running_asid, vma_asid; 222 addr = eaddr; 223 end_addr = addr + PAGE_SIZE; 224 225 /* Check whether we can use the current ASID for the I-cache 226 invalidation. For example, if we're called via 227 access_process_vm->flush_cache_page->here, (e.g. when reading from 228 /proc), 'running_asid' will be that of the reader, not of the 229 victim. 230 231 Also, note the risk that we might get pre-empted between the ASID 232 compare and blocking IRQs, and before we regain control, the 233 pid->ASID mapping changes. However, the whole cache will get 234 invalidated when the mapping is renewed, so the worst that can 235 happen is that the loop below ends up invalidating somebody else's 236 cache entries. 237 */ 238 239 running_asid = get_asid(); 240 vma_asid = (vma->vm_mm->context & MMU_CONTEXT_ASID_MASK); 241 if (running_asid != vma_asid) { 242 local_irq_save(flags); 243 switch_and_save_asid(vma_asid); 244 } 245 while (addr < end_addr) { 246 /* Worth unrolling a little */ 247 asm __volatile__("icbi %0, 0" : : "r" (addr)); 248 asm __volatile__("icbi %0, 32" : : "r" (addr)); 249 asm __volatile__("icbi %0, 64" : : "r" (addr)); 250 asm __volatile__("icbi %0, 96" : : "r" (addr)); 251 addr += 128; 252 } 253 if (running_asid != vma_asid) { 254 switch_and_save_asid(running_asid); 255 local_irq_restore(flags); 256 } 257} 258 259/****************************************************************************/ 260 261static void sh64_icache_inv_user_page_range(struct mm_struct *mm, 262 unsigned long start, unsigned long end) 263{ 264 /* Used for invalidating big chunks of I-cache, i.e. assume the range 265 is whole pages. If 'start' or 'end' is not page aligned, the code 266 is conservative and invalidates to the ends of the enclosing pages. 267 This is functionally OK, just a performance loss. */ 268 269 /* See the comments below in sh64_dcache_purge_user_range() regarding 270 the choice of algorithm. However, for the I-cache option (2) isn't 271 available because there are no physical tags so aliases can't be 272 resolved. The icbi instruction has to be used through the user 273 mapping. Because icbi is cheaper than ocbp on a cache hit, it 274 would be cheaper to use the selective code for a large range than is 275 possible with the D-cache. Just assume 64 for now as a working 276 figure. 277 */ 278 279 int n_pages; 280 281 if (!mm) return; 282 283 n_pages = ((end - start) >> PAGE_SHIFT); 284 if (n_pages >= 64) { 285 sh64_icache_inv_all(); 286 } else { 287 unsigned long aligned_start; 288 unsigned long eaddr; 289 unsigned long after_last_page_start; 290 unsigned long mm_asid, current_asid; 291 unsigned long long flags = 0ULL; 292 293 mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; 294 current_asid = get_asid(); 295 296 if (mm_asid != current_asid) { 297 /* Switch ASID and run the invalidate loop under cli */ 298 local_irq_save(flags); 299 switch_and_save_asid(mm_asid); 300 } 301 302 aligned_start = start & PAGE_MASK; 303 after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK); 304 305 while (aligned_start < after_last_page_start) { 306 struct vm_area_struct *vma; 307 unsigned long vma_end; 308 vma = find_vma(mm, aligned_start); 309 if (!vma || (aligned_start <= vma->vm_end)) { 310 /* Avoid getting stuck in an error condition */ 311 aligned_start += PAGE_SIZE; 312 continue; 313 } 314 vma_end = vma->vm_end; 315 if (vma->vm_flags & VM_EXEC) { 316 /* Executable */ 317 eaddr = aligned_start; 318 while (eaddr < vma_end) { 319 sh64_icache_inv_user_page(vma, eaddr); 320 eaddr += PAGE_SIZE; 321 } 322 } 323 aligned_start = vma->vm_end; /* Skip to start of next region */ 324 } 325 if (mm_asid != current_asid) { 326 switch_and_save_asid(current_asid); 327 local_irq_restore(flags); 328 } 329 } 330} 331 332static void sh64_icache_inv_user_small_range(struct mm_struct *mm, 333 unsigned long start, int len) 334{ 335 336 /* Invalidate a small range of user context I-cache, not necessarily 337 page (or even cache-line) aligned. */ 338 339 unsigned long long eaddr = start; 340 unsigned long long eaddr_end = start + len; 341 unsigned long current_asid, mm_asid; 342 unsigned long long flags; 343 unsigned long long epage_start; 344 345 /* Since this is used inside ptrace, the ASID in the mm context 346 typically won't match current_asid. We'll have to switch ASID to do 347 this. For safety, and given that the range will be small, do all 348 this under cli. 349 350 Note, there is a hazard that the ASID in mm->context is no longer 351 actually associated with mm, i.e. if the mm->context has started a 352 new cycle since mm was last active. However, this is just a 353 performance issue: all that happens is that we invalidate lines 354 belonging to another mm, so the owning process has to refill them 355 when that mm goes live again. mm itself can't have any cache 356 entries because there will have been a flush_cache_all when the new 357 mm->context cycle started. */ 358 359 /* Align to start of cache line. Otherwise, suppose len==8 and start 360 was at 32N+28 : the last 4 bytes wouldn't get invalidated. */ 361 eaddr = start & L1_CACHE_ALIGN_MASK; 362 eaddr_end = start + len; 363 364 local_irq_save(flags); 365 mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; 366 current_asid = switch_and_save_asid(mm_asid); 367 368 epage_start = eaddr & PAGE_MASK; 369 370 while (eaddr < eaddr_end) 371 { 372 asm __volatile__("icbi %0, 0" : : "r" (eaddr)); 373 eaddr += L1_CACHE_BYTES; 374 } 375 switch_and_save_asid(current_asid); 376 local_irq_restore(flags); 377} 378 379static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end) 380{ 381 /* The icbi instruction never raises ITLBMISS. i.e. if there's not a 382 cache hit on the virtual tag the instruction ends there, without a 383 TLB lookup. */ 384 385 unsigned long long aligned_start; 386 unsigned long long ull_end; 387 unsigned long long addr; 388 389 ull_end = end; 390 391 /* Just invalidate over the range using the natural addresses. TLB 392 miss handling will be OK (TBC). Since it's for the current process, 393 either we're already in the right ASID context, or the ASIDs have 394 been recycled since we were last active in which case we might just 395 invalidate another processes I-cache entries : no worries, just a 396 performance drop for him. */ 397 aligned_start = start & L1_CACHE_ALIGN_MASK; 398 addr = aligned_start; 399 while (addr < ull_end) { 400 asm __volatile__ ("icbi %0, 0" : : "r" (addr)); 401 asm __volatile__ ("nop"); 402 asm __volatile__ ("nop"); 403 addr += L1_CACHE_BYTES; 404 } 405} 406 407#endif /* !CONFIG_ICACHE_DISABLED */ 408 409/****************************************************************************/ 410 411#ifndef CONFIG_DCACHE_DISABLED 412 413/* Buffer used as the target of alloco instructions to purge data from cache 414 sets by natural eviction. -- RPC */ 415#define DUMMY_ALLOCO_AREA_SIZE L1_CACHE_SIZE_BYTES + (1024 * 4) 416static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, }; 417 418/****************************************************************************/ 419 420static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets) 421{ 422 /* Purge all ways in a particular block of sets, specified by the base 423 set number and number of sets. Can handle wrap-around, if that's 424 needed. */ 425 426 int dummy_buffer_base_set; 427 unsigned long long eaddr, eaddr0, eaddr1; 428 int j; 429 int set_offset; 430 431 dummy_buffer_base_set = ((int)&dummy_alloco_area & cpu_data->dcache.idx_mask) >> cpu_data->dcache.entry_shift; 432 set_offset = sets_to_purge_base - dummy_buffer_base_set; 433 434 for (j=0; j<n_sets; j++, set_offset++) { 435 set_offset &= (cpu_data->dcache.sets - 1); 436 eaddr0 = (unsigned long long)dummy_alloco_area + (set_offset << cpu_data->dcache.entry_shift); 437 438 /* Do one alloco which hits the required set per cache way. For 439 write-back mode, this will purge the #ways resident lines. There's 440 little point unrolling this loop because the allocos stall more if 441 they're too close together. */ 442 eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways; 443 for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) { 444 asm __volatile__ ("alloco %0, 0" : : "r" (eaddr)); 445 asm __volatile__ ("synco"); /* TAKum03020 */ 446 } 447 448 eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways; 449 for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) { 450 /* Load from each address. Required because alloco is a NOP if 451 the cache is write-through. Write-through is a config option. */ 452 if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags))) 453 *(volatile unsigned char *)(int)eaddr; 454 } 455 } 456 457 /* Don't use OCBI to invalidate the lines. That costs cycles directly. 458 If the dummy block is just left resident, it will naturally get 459 evicted as required. */ 460 461 return; 462} 463 464/****************************************************************************/ 465 466static void sh64_dcache_purge_all(void) 467{ 468 /* Purge the entire contents of the dcache. The most efficient way to 469 achieve this is to use alloco instructions on a region of unused 470 memory equal in size to the cache, thereby causing the current 471 contents to be discarded by natural eviction. The alternative, 472 namely reading every tag, setting up a mapping for the corresponding 473 page and doing an OCBP for the line, would be much more expensive. 474 */ 475 476 sh64_dcache_purge_sets(0, cpu_data->dcache.sets); 477 478 return; 479 480} 481 482/****************************************************************************/ 483 484static void sh64_dcache_purge_kernel_range(unsigned long start, unsigned long end) 485{ 486 /* Purge the range of addresses [start,end] from the D-cache. The 487 addresses lie in the superpage mapping. There's no harm if we 488 overpurge at either end - just a small performance loss. */ 489 unsigned long long ullend, addr, aligned_start; 490#if (NEFF == 32) 491 aligned_start = (unsigned long long)(signed long long)(signed long) start; 492#else 493#error "NEFF != 32" 494#endif 495 aligned_start &= L1_CACHE_ALIGN_MASK; 496 addr = aligned_start; 497#if (NEFF == 32) 498 ullend = (unsigned long long) (signed long long) (signed long) end; 499#else 500#error "NEFF != 32" 501#endif 502 while (addr <= ullend) { 503 asm __volatile__ ("ocbp %0, 0" : : "r" (addr)); 504 addr += L1_CACHE_BYTES; 505 } 506 return; 507} 508 509/* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for 510 anything else in the kernel */ 511#define MAGIC_PAGE0_START 0xffffffffec000000ULL 512 513static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned long eaddr) 514{ 515 /* Purge the physical page 'paddr' from the cache. It's known that any 516 cache lines requiring attention have the same page colour as the the 517 address 'eaddr'. 518 519 This relies on the fact that the D-cache matches on physical tags 520 when no virtual tag matches. So we create an alias for the original 521 page and purge through that. (Alternatively, we could have done 522 this by switching ASID to match the original mapping and purged 523 through that, but that involves ASID switching cost + probably a 524 TLBMISS + refill anyway.) 525 */ 526 527 unsigned long long magic_page_start; 528 unsigned long long magic_eaddr, magic_eaddr_end; 529 530 magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK); 531 532 /* As long as the kernel is not pre-emptible, this doesn't need to be 533 under cli/sti. */ 534 535 sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr); 536 537 magic_eaddr = magic_page_start; 538 magic_eaddr_end = magic_eaddr + PAGE_SIZE; 539 while (magic_eaddr < magic_eaddr_end) { 540 /* Little point in unrolling this loop - the OCBPs are blocking 541 and won't go any quicker (i.e. the loop overhead is parallel 542 to part of the OCBP execution.) */ 543 asm __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr)); 544 magic_eaddr += L1_CACHE_BYTES; 545 } 546 547 sh64_teardown_dtlb_cache_slot(); 548} 549 550/****************************************************************************/ 551 552static void sh64_dcache_purge_phy_page(unsigned long paddr) 553{ 554 /* Pure a page given its physical start address, by creating a 555 temporary 1 page mapping and purging across that. Even if we know 556 the virtual address (& vma or mm) of the page, the method here is 557 more elegant because it avoids issues of coping with page faults on 558 the purge instructions (i.e. no special-case code required in the 559 critical path in the TLB miss handling). */ 560 561 unsigned long long eaddr_start, eaddr, eaddr_end; 562 int i; 563 564 /* As long as the kernel is not pre-emptible, this doesn't need to be 565 under cli/sti. */ 566 567 eaddr_start = MAGIC_PAGE0_START; 568 for (i=0; i < (1 << CACHE_OC_N_SYNBITS); i++) { 569 sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr); 570 571 eaddr = eaddr_start; 572 eaddr_end = eaddr + PAGE_SIZE; 573 while (eaddr < eaddr_end) { 574 asm __volatile__ ("ocbp %0, 0" : : "r" (eaddr)); 575 eaddr += L1_CACHE_BYTES; 576 } 577 578 sh64_teardown_dtlb_cache_slot(); 579 eaddr_start += PAGE_SIZE; 580 } 581} 582 583static void sh64_dcache_purge_user_pages(struct mm_struct *mm, 584 unsigned long addr, unsigned long end) 585{ 586 pgd_t *pgd; 587 pmd_t *pmd; 588 pte_t *pte; 589 pte_t entry; 590 spinlock_t *ptl; 591 unsigned long paddr; 592 593 if (!mm) 594 return; /* No way to find physical address of page */ 595 596 pgd = pgd_offset(mm, addr); 597 if (pgd_bad(*pgd)) 598 return; 599 600 pmd = pmd_offset(pgd, addr); 601 if (pmd_none(*pmd) || pmd_bad(*pmd)) 602 return; 603 604 pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 605 do { 606 entry = *pte; 607 if (pte_none(entry) || !pte_present(entry)) 608 continue; 609 paddr = pte_val(entry) & PAGE_MASK; 610 sh64_dcache_purge_coloured_phy_page(paddr, addr); 611 } while (pte++, addr += PAGE_SIZE, addr != end); 612 pte_unmap_unlock(pte - 1, ptl); 613} 614/****************************************************************************/ 615 616static void sh64_dcache_purge_user_range(struct mm_struct *mm, 617 unsigned long start, unsigned long end) 618{ 619 /* There are at least 5 choices for the implementation of this, with 620 pros (+), cons(-), comments(*): 621 622 1. ocbp each line in the range through the original user's ASID 623 + no lines spuriously evicted 624 - tlbmiss handling (must either handle faults on demand => extra 625 special-case code in tlbmiss critical path), or map the page in 626 advance (=> flush_tlb_range in advance to avoid multiple hits) 627 - ASID switching 628 - expensive for large ranges 629 630 2. temporarily map each page in the range to a special effective 631 address and ocbp through the temporary mapping; relies on the 632 fact that SH-5 OCB* always do TLB lookup and match on ptags (they 633 never look at the etags) 634 + no spurious evictions 635 - expensive for large ranges 636 * surely cheaper than (1) 637 638 3. walk all the lines in the cache, check the tags, if a match 639 occurs create a page mapping to ocbp the line through 640 + no spurious evictions 641 - tag inspection overhead 642 - (especially for small ranges) 643 - potential cost of setting up/tearing down page mapping for 644 every line that matches the range 645 * cost partly independent of range size 646 647 4. walk all the lines in the cache, check the tags, if a match 648 occurs use 4 * alloco to purge the line (+3 other probably 649 innocent victims) by natural eviction 650 + no tlb mapping overheads 651 - spurious evictions 652 - tag inspection overhead 653 654 5. implement like flush_cache_all 655 + no tag inspection overhead 656 - spurious evictions 657 - bad for small ranges 658 659 (1) can be ruled out as more expensive than (2). (2) appears best 660 for small ranges. The choice between (3), (4) and (5) for large 661 ranges and the range size for the large/small boundary need 662 benchmarking to determine. 663 664 For now use approach (2) for small ranges and (5) for large ones. 665 666 */ 667 668 int n_pages; 669 670 n_pages = ((end - start) >> PAGE_SHIFT); 671 if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) { 672#if 1 673 sh64_dcache_purge_all(); 674#else 675 unsigned long long set, way; 676 unsigned long mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; 677 for (set = 0; set < cpu_data->dcache.sets; set++) { 678 unsigned long long set_base_config_addr = CACHE_OC_ADDRESS_ARRAY + (set << cpu_data->dcache.set_shift); 679 for (way = 0; way < cpu_data->dcache.ways; way++) { 680 unsigned long long config_addr = set_base_config_addr + (way << cpu_data->dcache.way_step_shift); 681 unsigned long long tag0; 682 unsigned long line_valid; 683 684 asm __volatile__("getcfg %1, 0, %0" : "=r" (tag0) : "r" (config_addr)); 685 line_valid = tag0 & SH_CACHE_VALID; 686 if (line_valid) { 687 unsigned long cache_asid; 688 unsigned long epn; 689 690 cache_asid = (tag0 & cpu_data->dcache.asid_mask) >> cpu_data->dcache.asid_shift; 691 /* The next line needs some 692 explanation. The virtual tags 693 encode bits [31:13] of the virtual 694 address, bit [12] of the 'tag' being 695 implied by the cache set index. */ 696 epn = (tag0 & cpu_data->dcache.epn_mask) | ((set & 0x80) << cpu_data->dcache.entry_shift); 697 698 if ((cache_asid == mm_asid) && (start <= epn) && (epn < end)) { 699 /* TODO : could optimise this 700 call by batching multiple 701 adjacent sets together. */ 702 sh64_dcache_purge_sets(set, 1); 703 break; /* Don't waste time inspecting other ways for this set */ 704 } 705 } 706 } 707 } 708#endif 709 } else { 710 /* Small range, covered by a single page table page */ 711 start &= PAGE_MASK; /* should already be so */ 712 end = PAGE_ALIGN(end); /* should already be so */ 713 sh64_dcache_purge_user_pages(mm, start, end); 714 } 715 return; 716} 717 718static void sh64_dcache_wback_current_user_range(unsigned long start, unsigned long end) 719{ 720 unsigned long long aligned_start; 721 unsigned long long ull_end; 722 unsigned long long addr; 723 724 ull_end = end; 725 726 /* Just wback over the range using the natural addresses. TLB miss 727 handling will be OK (TBC) : the range has just been written to by 728 the signal frame setup code, so the PTEs must exist. 729 730 Note, if we have CONFIG_PREEMPT and get preempted inside this loop, 731 it doesn't matter, even if the pid->ASID mapping changes whilst 732 we're away. In that case the cache will have been flushed when the 733 mapping was renewed. So the writebacks below will be nugatory (and 734 we'll doubtless have to fault the TLB entry/ies in again with the 735 new ASID), but it's a rare case. 736 */ 737 aligned_start = start & L1_CACHE_ALIGN_MASK; 738 addr = aligned_start; 739 while (addr < ull_end) { 740 asm __volatile__ ("ocbwb %0, 0" : : "r" (addr)); 741 addr += L1_CACHE_BYTES; 742 } 743} 744 745/****************************************************************************/ 746 747/* These *MUST* lie in an area of virtual address space that's otherwise unused. */ 748#define UNIQUE_EADDR_START 0xe0000000UL 749#define UNIQUE_EADDR_END 0xe8000000UL 750 751static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, unsigned long paddr) 752{ 753 /* Given a physical address paddr, and a user virtual address 754 user_eaddr which will eventually be mapped to it, create a one-off 755 kernel-private eaddr mapped to the same paddr. This is used for 756 creating special destination pages for copy_user_page and 757 clear_user_page */ 758 759 static unsigned long current_pointer = UNIQUE_EADDR_START; 760 unsigned long coloured_pointer; 761 762 if (current_pointer == UNIQUE_EADDR_END) { 763 sh64_dcache_purge_all(); 764 current_pointer = UNIQUE_EADDR_START; 765 } 766 767 coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) | (user_eaddr & CACHE_OC_SYN_MASK); 768 sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr); 769 770 current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS); 771 772 return coloured_pointer; 773} 774 775/****************************************************************************/ 776 777static void sh64_copy_user_page_coloured(void *to, void *from, unsigned long address) 778{ 779 void *coloured_to; 780 781 /* Discard any existing cache entries of the wrong colour. These are 782 present quite often, if the kernel has recently used the page 783 internally, then given it up, then it's been allocated to the user. 784 */ 785 sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to); 786 787 coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to)); 788 sh64_page_copy(from, coloured_to); 789 790 sh64_teardown_dtlb_cache_slot(); 791} 792 793static void sh64_clear_user_page_coloured(void *to, unsigned long address) 794{ 795 void *coloured_to; 796 797 /* Discard any existing kernel-originated lines of the wrong colour (as 798 above) */ 799 sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to); 800 801 coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to)); 802 sh64_page_clear(coloured_to); 803 804 sh64_teardown_dtlb_cache_slot(); 805} 806 807#endif /* !CONFIG_DCACHE_DISABLED */ 808 809/****************************************************************************/ 810 811/*########################################################################## 812 EXTERNALLY CALLABLE API. 813 ##########################################################################*/ 814 815/* These functions are described in Documentation/cachetlb.txt. 816 Each one of these functions varies in behaviour depending on whether the 817 I-cache and/or D-cache are configured out. 818 819 Note that the Linux term 'flush' corresponds to what is termed 'purge' in 820 the sh/sh64 jargon for the D-cache, i.e. write back dirty data then 821 invalidate the cache lines, and 'invalidate' for the I-cache. 822 */ 823 824#undef FLUSH_TRACE 825 826void flush_cache_all(void) 827{ 828 /* Invalidate the entire contents of both caches, after writing back to 829 memory any dirty data from the D-cache. */ 830 sh64_dcache_purge_all(); 831 sh64_icache_inv_all(); 832} 833 834/****************************************************************************/ 835 836void flush_cache_mm(struct mm_struct *mm) 837{ 838 /* Invalidate an entire user-address space from both caches, after 839 writing back dirty data (e.g. for shared mmap etc). */ 840 841 /* This could be coded selectively by inspecting all the tags then 842 doing 4*alloco on any set containing a match (as for 843 flush_cache_range), but fork/exit/execve (where this is called from) 844 are expensive anyway. */ 845 846 /* Have to do a purge here, despite the comments re I-cache below. 847 There could be odd-coloured dirty data associated with the mm still 848 in the cache - if this gets written out through natural eviction 849 after the kernel has reused the page there will be chaos. 850 */ 851 852 sh64_dcache_purge_all(); 853 854 /* The mm being torn down won't ever be active again, so any Icache 855 lines tagged with its ASID won't be visible for the rest of the 856 lifetime of this ASID cycle. Before the ASID gets reused, there 857 will be a flush_cache_all. Hence we don't need to touch the 858 I-cache. This is similar to the lack of action needed in 859 flush_tlb_mm - see fault.c. */ 860} 861 862/****************************************************************************/ 863 864void flush_cache_range(struct vm_area_struct *vma, unsigned long start, 865 unsigned long end) 866{ 867 struct mm_struct *mm = vma->vm_mm; 868 869 /* Invalidate (from both caches) the range [start,end) of virtual 870 addresses from the user address space specified by mm, after writing 871 back any dirty data. 872 873 Note, 'end' is 1 byte beyond the end of the range to flush. */ 874 875 sh64_dcache_purge_user_range(mm, start, end); 876 sh64_icache_inv_user_page_range(mm, start, end); 877} 878 879/****************************************************************************/ 880 881void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned long pfn) 882{ 883 /* Invalidate any entries in either cache for the vma within the user 884 address space vma->vm_mm for the page starting at virtual address 885 'eaddr'. This seems to be used primarily in breaking COW. Note, 886 the I-cache must be searched too in case the page in question is 887 both writable and being executed from (e.g. stack trampolines.) 888 889 Note, this is called with pte lock held. 890 */ 891 892 sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT); 893 894 if (vma->vm_flags & VM_EXEC) { 895 sh64_icache_inv_user_page(vma, eaddr); 896 } 897} 898 899/****************************************************************************/ 900 901#ifndef CONFIG_DCACHE_DISABLED 902 903void copy_user_page(void *to, void *from, unsigned long address, struct page *page) 904{ 905 /* 'from' and 'to' are kernel virtual addresses (within the superpage 906 mapping of the physical RAM). 'address' is the user virtual address 907 where the copy 'to' will be mapped after. This allows a custom 908 mapping to be used to ensure that the new copy is placed in the 909 right cache sets for the user to see it without having to bounce it 910 out via memory. Note however : the call to flush_page_to_ram in 911 (generic)/mm/memory.c:(break_cow) undoes all this good work in that one 912 very important case! 913 914 TBD : can we guarantee that on every call, any cache entries for 915 'from' are in the same colour sets as 'address' also? i.e. is this 916 always used just to deal with COW? (I suspect not). */ 917 918 /* There are two possibilities here for when the page 'from' was last accessed: 919 * by the kernel : this is OK, no purge required. 920 * by the/a user (e.g. for break_COW) : need to purge. 921 922 If the potential user mapping at 'address' is the same colour as 923 'from' there is no need to purge any cache lines from the 'from' 924 page mapped into cache sets of colour 'address'. (The copy will be 925 accessing the page through 'from'). 926 */ 927 928 if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) { 929 sh64_dcache_purge_coloured_phy_page(__pa(from), address); 930 } 931 932 if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) { 933 /* No synonym problem on destination */ 934 sh64_page_copy(from, to); 935 } else { 936 sh64_copy_user_page_coloured(to, from, address); 937 } 938 939 /* Note, don't need to flush 'from' page from the cache again - it's 940 done anyway by the generic code */ 941} 942 943void clear_user_page(void *to, unsigned long address, struct page *page) 944{ 945 /* 'to' is a kernel virtual address (within the superpage 946 mapping of the physical RAM). 'address' is the user virtual address 947 where the 'to' page will be mapped after. This allows a custom 948 mapping to be used to ensure that the new copy is placed in the 949 right cache sets for the user to see it without having to bounce it 950 out via memory. 951 */ 952 953 if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) { 954 /* No synonym problem on destination */ 955 sh64_page_clear(to); 956 } else { 957 sh64_clear_user_page_coloured(to, address); 958 } 959} 960 961#endif /* !CONFIG_DCACHE_DISABLED */ 962 963/****************************************************************************/ 964 965void flush_dcache_page(struct page *page) 966{ 967 sh64_dcache_purge_phy_page(page_to_phys(page)); 968 wmb(); 969} 970 971/****************************************************************************/ 972 973void flush_icache_range(unsigned long start, unsigned long end) 974{ 975 /* Flush the range [start,end] of kernel virtual adddress space from 976 the I-cache. The corresponding range must be purged from the 977 D-cache also because the SH-5 doesn't have cache snooping between 978 the caches. The addresses will be visible through the superpage 979 mapping, therefore it's guaranteed that there no cache entries for 980 the range in cache sets of the wrong colour. 981 982 Primarily used for cohering the I-cache after a module has 983 been loaded. */ 984 985 /* We also make sure to purge the same range from the D-cache since 986 flush_page_to_ram() won't be doing this for us! */ 987 988 sh64_dcache_purge_kernel_range(start, end); 989 wmb(); 990 sh64_icache_inv_kernel_range(start, end); 991} 992 993/****************************************************************************/ 994 995void flush_icache_user_range(struct vm_area_struct *vma, 996 struct page *page, unsigned long addr, int len) 997{ 998 /* Flush the range of user (defined by vma->vm_mm) address space 999 starting at 'addr' for 'len' bytes from the cache. The range does 1000 not straddle a page boundary, the unique physical page containing 1001 the range is 'page'. This seems to be used mainly for invalidating 1002 an address range following a poke into the program text through the 1003 ptrace() call from another process (e.g. for BRK instruction 1004 insertion). */ 1005 1006 sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr); 1007 mb(); 1008 1009 if (vma->vm_flags & VM_EXEC) { 1010 sh64_icache_inv_user_small_range(vma->vm_mm, addr, len); 1011 } 1012} 1013 1014/*########################################################################## 1015 ARCH/SH64 PRIVATE CALLABLE API. 1016 ##########################################################################*/ 1017 1018void flush_cache_sigtramp(unsigned long start, unsigned long end) 1019{ 1020 /* For the address range [start,end), write back the data from the 1021 D-cache and invalidate the corresponding region of the I-cache for 1022 the current process. Used to flush signal trampolines on the stack 1023 to make them executable. */ 1024 1025 sh64_dcache_wback_current_user_range(start, end); 1026 wmb(); 1027 sh64_icache_inv_current_user_range(start, end); 1028} 1029 1030