cache-sh5.c revision 60b2249d45d44bd3494d55f5ea4bccd25c7f8281
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License.  See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * arch/sh64/mm/cache.c
7 *
8 * Original version Copyright (C) 2000, 2001  Paolo Alberelli
9 * Second version Copyright (C) benedict.gaster@superh.com 2002
10 * Third version Copyright Richard.Curnow@superh.com 2003
11 * Hacks to third version Copyright (C) 2003 Paul Mundt
12 */
13
14/****************************************************************************/
15
16#include <linux/init.h>
17#include <linux/mman.h>
18#include <linux/mm.h>
19#include <linux/threads.h>
20#include <asm/page.h>
21#include <asm/pgtable.h>
22#include <asm/processor.h>
23#include <asm/cache.h>
24#include <asm/tlb.h>
25#include <asm/io.h>
26#include <asm/uaccess.h>
27#include <asm/mmu_context.h>
28#include <asm/pgalloc.h> /* for flush_itlb_range */
29
30#include <linux/proc_fs.h>
31
32/* This function is in entry.S */
33extern unsigned long switch_and_save_asid(unsigned long new_asid);
34
35/* Wired TLB entry for the D-cache */
36static unsigned long long dtlb_cache_slot;
37
38/**
39 * sh64_cache_init()
40 *
41 * This is pretty much just a straightforward clone of the SH
42 * detect_cpu_and_cache_system().
43 *
44 * This function is responsible for setting up all of the cache
45 * info dynamically as well as taking care of CPU probing and
46 * setting up the relevant subtype data.
47 *
48 * FIXME: For the time being, we only really support the SH5-101
49 * out of the box, and don't support dynamic probing for things
50 * like the SH5-103 or even cut2 of the SH5-101. Implement this
51 * later!
52 */
53int __init sh64_cache_init(void)
54{
55	/*
56	 * First, setup some sane values for the I-cache.
57	 */
58	cpu_data->icache.ways		= 4;
59	cpu_data->icache.sets		= 256;
60	cpu_data->icache.linesz		= L1_CACHE_BYTES;
61
62	/*
63	 * FIXME: This can probably be cleaned up a bit as well.. for example,
64	 * do we really need the way shift _and_ the way_step_shift ?? Judging
65	 * by the existing code, I would guess no.. is there any valid reason
66	 * why we need to be tracking this around?
67	 */
68	cpu_data->icache.way_shift	= 13;
69	cpu_data->icache.entry_shift	= 5;
70	cpu_data->icache.set_shift	= 4;
71	cpu_data->icache.way_step_shift	= 16;
72	cpu_data->icache.asid_shift	= 2;
73
74	/*
75	 * way offset = cache size / associativity, so just don't factor in
76	 * associativity in the first place..
77	 */
78	cpu_data->icache.way_ofs	= cpu_data->icache.sets *
79					  cpu_data->icache.linesz;
80
81	cpu_data->icache.asid_mask	= 0x3fc;
82	cpu_data->icache.idx_mask	= 0x1fe0;
83	cpu_data->icache.epn_mask	= 0xffffe000;
84	cpu_data->icache.flags		= 0;
85
86	/*
87	 * Next, setup some sane values for the D-cache.
88	 *
89	 * On the SH5, these are pretty consistent with the I-cache settings,
90	 * so we just copy over the existing definitions.. these can be fixed
91	 * up later, especially if we add runtime CPU probing.
92	 *
93	 * Though in the meantime it saves us from having to duplicate all of
94	 * the above definitions..
95	 */
96	cpu_data->dcache		= cpu_data->icache;
97
98	/*
99	 * Setup any cache-related flags here
100	 */
101#if defined(CONFIG_DCACHE_WRITE_THROUGH)
102	set_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags));
103#elif defined(CONFIG_DCACHE_WRITE_BACK)
104	set_bit(SH_CACHE_MODE_WB, &(cpu_data->dcache.flags));
105#endif
106
107	/*
108	 * We also need to reserve a slot for the D-cache in the DTLB, so we
109	 * do this now ..
110	 */
111	dtlb_cache_slot			= sh64_get_wired_dtlb_entry();
112
113	return 0;
114}
115
116#ifdef CONFIG_DCACHE_DISABLED
117#define sh64_dcache_purge_all()					do { } while (0)
118#define sh64_dcache_purge_coloured_phy_page(paddr, eaddr)	do { } while (0)
119#define sh64_dcache_purge_user_range(mm, start, end)		do { } while (0)
120#define sh64_dcache_purge_phy_page(paddr)			do { } while (0)
121#define sh64_dcache_purge_virt_page(mm, eaddr)			do { } while (0)
122#define sh64_dcache_purge_kernel_range(start, end)		do { } while (0)
123#define sh64_dcache_wback_current_user_range(start, end)	do { } while (0)
124#endif
125
126/*##########################################################################*/
127
128/* From here onwards, a rewrite of the implementation,
129   by Richard.Curnow@superh.com.
130
131   The major changes in this compared to the old version are;
132   1. use more selective purging through OCBP instead of using ALLOCO to purge
133      by natural replacement.  This avoids purging out unrelated cache lines
134      that happen to be in the same set.
135   2. exploit the APIs copy_user_page and clear_user_page better
136   3. be more selective about I-cache purging, in particular use invalidate_all
137      more sparingly.
138
139   */
140
141/*##########################################################################
142			       SUPPORT FUNCTIONS
143  ##########################################################################*/
144
145/****************************************************************************/
146/* The following group of functions deal with mapping and unmapping a temporary
147   page into the DTLB slot that have been set aside for our exclusive use. */
148/* In order to accomplish this, we use the generic interface for adding and
149   removing a wired slot entry as defined in arch/sh64/mm/tlb.c */
150/****************************************************************************/
151
152static unsigned long slot_own_flags;
153
154static inline void sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, unsigned long paddr)
155{
156	local_irq_save(slot_own_flags);
157	sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr);
158}
159
160static inline void sh64_teardown_dtlb_cache_slot(void)
161{
162	sh64_teardown_tlb_slot(dtlb_cache_slot);
163	local_irq_restore(slot_own_flags);
164}
165
166/****************************************************************************/
167
168#ifndef CONFIG_ICACHE_DISABLED
169
170static void __inline__ sh64_icache_inv_all(void)
171{
172	unsigned long long addr, flag, data;
173	unsigned int flags;
174
175	addr=ICCR0;
176	flag=ICCR0_ICI;
177	data=0;
178
179	/* Make this a critical section for safety (probably not strictly necessary.) */
180	local_irq_save(flags);
181
182	/* Without %1 it gets unexplicably wrong */
183	asm volatile("getcfg	%3, 0, %0\n\t"
184			"or	%0, %2, %0\n\t"
185			"putcfg	%3, 0, %0\n\t"
186			"synci"
187			: "=&r" (data)
188			: "0" (data), "r" (flag), "r" (addr));
189
190	local_irq_restore(flags);
191}
192
193static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end)
194{
195	/* Invalidate range of addresses [start,end] from the I-cache, where
196	 * the addresses lie in the kernel superpage. */
197
198	unsigned long long ullend, addr, aligned_start;
199#if (NEFF == 32)
200	aligned_start = (unsigned long long)(signed long long)(signed long) start;
201#else
202#error "NEFF != 32"
203#endif
204	aligned_start &= L1_CACHE_ALIGN_MASK;
205	addr = aligned_start;
206#if (NEFF == 32)
207	ullend = (unsigned long long) (signed long long) (signed long) end;
208#else
209#error "NEFF != 32"
210#endif
211	while (addr <= ullend) {
212		asm __volatile__ ("icbi %0, 0" : : "r" (addr));
213		addr += L1_CACHE_BYTES;
214	}
215}
216
217static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr)
218{
219	/* If we get called, we know that vma->vm_flags contains VM_EXEC.
220	   Also, eaddr is page-aligned. */
221
222	unsigned long long addr, end_addr;
223	unsigned long flags = 0;
224	unsigned long running_asid, vma_asid;
225	addr = eaddr;
226	end_addr = addr + PAGE_SIZE;
227
228	/* Check whether we can use the current ASID for the I-cache
229	   invalidation.  For example, if we're called via
230	   access_process_vm->flush_cache_page->here, (e.g. when reading from
231	   /proc), 'running_asid' will be that of the reader, not of the
232	   victim.
233
234	   Also, note the risk that we might get pre-empted between the ASID
235	   compare and blocking IRQs, and before we regain control, the
236	   pid->ASID mapping changes.  However, the whole cache will get
237	   invalidated when the mapping is renewed, so the worst that can
238	   happen is that the loop below ends up invalidating somebody else's
239	   cache entries.
240	*/
241
242	running_asid = get_asid();
243	vma_asid = (vma->vm_mm->context & MMU_CONTEXT_ASID_MASK);
244	if (running_asid != vma_asid) {
245		local_irq_save(flags);
246		switch_and_save_asid(vma_asid);
247	}
248	while (addr < end_addr) {
249		/* Worth unrolling a little */
250		asm __volatile__("icbi %0,  0" : : "r" (addr));
251		asm __volatile__("icbi %0, 32" : : "r" (addr));
252		asm __volatile__("icbi %0, 64" : : "r" (addr));
253		asm __volatile__("icbi %0, 96" : : "r" (addr));
254		addr += 128;
255	}
256	if (running_asid != vma_asid) {
257		switch_and_save_asid(running_asid);
258		local_irq_restore(flags);
259	}
260}
261
262/****************************************************************************/
263
264static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
265			  unsigned long start, unsigned long end)
266{
267	/* Used for invalidating big chunks of I-cache, i.e. assume the range
268	   is whole pages.  If 'start' or 'end' is not page aligned, the code
269	   is conservative and invalidates to the ends of the enclosing pages.
270	   This is functionally OK, just a performance loss. */
271
272	/* See the comments below in sh64_dcache_purge_user_range() regarding
273	   the choice of algorithm.  However, for the I-cache option (2) isn't
274	   available because there are no physical tags so aliases can't be
275	   resolved.  The icbi instruction has to be used through the user
276	   mapping.   Because icbi is cheaper than ocbp on a cache hit, it
277	   would be cheaper to use the selective code for a large range than is
278	   possible with the D-cache.  Just assume 64 for now as a working
279	   figure.
280	   */
281
282	int n_pages;
283
284	if (!mm) return;
285
286	n_pages = ((end - start) >> PAGE_SHIFT);
287	if (n_pages >= 64) {
288		sh64_icache_inv_all();
289	} else {
290		unsigned long aligned_start;
291		unsigned long eaddr;
292		unsigned long after_last_page_start;
293		unsigned long mm_asid, current_asid;
294		unsigned long long flags = 0ULL;
295
296		mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
297		current_asid = get_asid();
298
299		if (mm_asid != current_asid) {
300			/* Switch ASID and run the invalidate loop under cli */
301			local_irq_save(flags);
302			switch_and_save_asid(mm_asid);
303		}
304
305		aligned_start = start & PAGE_MASK;
306		after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK);
307
308		while (aligned_start < after_last_page_start) {
309			struct vm_area_struct *vma;
310			unsigned long vma_end;
311			vma = find_vma(mm, aligned_start);
312			if (!vma || (aligned_start <= vma->vm_end)) {
313				/* Avoid getting stuck in an error condition */
314				aligned_start += PAGE_SIZE;
315				continue;
316			}
317			vma_end = vma->vm_end;
318			if (vma->vm_flags & VM_EXEC) {
319				/* Executable */
320				eaddr = aligned_start;
321				while (eaddr < vma_end) {
322					sh64_icache_inv_user_page(vma, eaddr);
323					eaddr += PAGE_SIZE;
324				}
325			}
326			aligned_start = vma->vm_end; /* Skip to start of next region */
327		}
328		if (mm_asid != current_asid) {
329			switch_and_save_asid(current_asid);
330			local_irq_restore(flags);
331		}
332	}
333}
334
335static void sh64_icache_inv_user_small_range(struct mm_struct *mm,
336						unsigned long start, int len)
337{
338
339	/* Invalidate a small range of user context I-cache, not necessarily
340	   page (or even cache-line) aligned. */
341
342	unsigned long long eaddr = start;
343	unsigned long long eaddr_end = start + len;
344	unsigned long current_asid, mm_asid;
345	unsigned long long flags;
346	unsigned long long epage_start;
347
348	/* Since this is used inside ptrace, the ASID in the mm context
349	   typically won't match current_asid.  We'll have to switch ASID to do
350	   this.  For safety, and given that the range will be small, do all
351	   this under cli.
352
353	   Note, there is a hazard that the ASID in mm->context is no longer
354	   actually associated with mm, i.e. if the mm->context has started a
355	   new cycle since mm was last active.  However, this is just a
356	   performance issue: all that happens is that we invalidate lines
357	   belonging to another mm, so the owning process has to refill them
358	   when that mm goes live again.  mm itself can't have any cache
359	   entries because there will have been a flush_cache_all when the new
360	   mm->context cycle started. */
361
362	/* Align to start of cache line.  Otherwise, suppose len==8 and start
363	   was at 32N+28 : the last 4 bytes wouldn't get invalidated. */
364	eaddr = start & L1_CACHE_ALIGN_MASK;
365	eaddr_end = start + len;
366
367	local_irq_save(flags);
368	mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
369	current_asid = switch_and_save_asid(mm_asid);
370
371	epage_start = eaddr & PAGE_MASK;
372
373	while (eaddr < eaddr_end)
374	{
375		asm __volatile__("icbi %0, 0" : : "r" (eaddr));
376		eaddr += L1_CACHE_BYTES;
377	}
378	switch_and_save_asid(current_asid);
379	local_irq_restore(flags);
380}
381
382static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end)
383{
384	/* The icbi instruction never raises ITLBMISS.  i.e. if there's not a
385	   cache hit on the virtual tag the instruction ends there, without a
386	   TLB lookup. */
387
388	unsigned long long aligned_start;
389	unsigned long long ull_end;
390	unsigned long long addr;
391
392	ull_end = end;
393
394	/* Just invalidate over the range using the natural addresses.  TLB
395	   miss handling will be OK (TBC).  Since it's for the current process,
396	   either we're already in the right ASID context, or the ASIDs have
397	   been recycled since we were last active in which case we might just
398	   invalidate another processes I-cache entries : no worries, just a
399	   performance drop for him. */
400	aligned_start = start & L1_CACHE_ALIGN_MASK;
401	addr = aligned_start;
402	while (addr < ull_end) {
403		asm __volatile__ ("icbi %0, 0" : : "r" (addr));
404		asm __volatile__ ("nop");
405		asm __volatile__ ("nop");
406		addr += L1_CACHE_BYTES;
407	}
408}
409
410#endif /* !CONFIG_ICACHE_DISABLED */
411
412/****************************************************************************/
413
414#ifndef CONFIG_DCACHE_DISABLED
415
416/* Buffer used as the target of alloco instructions to purge data from cache
417   sets by natural eviction. -- RPC */
418#define DUMMY_ALLOCO_AREA_SIZE L1_CACHE_SIZE_BYTES + (1024 * 4)
419static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
420
421/****************************************************************************/
422
423static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
424{
425	/* Purge all ways in a particular block of sets, specified by the base
426	   set number and number of sets.  Can handle wrap-around, if that's
427	   needed.  */
428
429	int dummy_buffer_base_set;
430	unsigned long long eaddr, eaddr0, eaddr1;
431	int j;
432	int set_offset;
433
434	dummy_buffer_base_set = ((int)&dummy_alloco_area & cpu_data->dcache.idx_mask) >> cpu_data->dcache.entry_shift;
435	set_offset = sets_to_purge_base - dummy_buffer_base_set;
436
437	for (j=0; j<n_sets; j++, set_offset++) {
438		set_offset &= (cpu_data->dcache.sets - 1);
439		eaddr0 = (unsigned long long)dummy_alloco_area + (set_offset << cpu_data->dcache.entry_shift);
440
441		/* Do one alloco which hits the required set per cache way.  For
442		   write-back mode, this will purge the #ways resident lines.   There's
443		   little point unrolling this loop because the allocos stall more if
444		   they're too close together. */
445		eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
446		for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
447			asm __volatile__ ("alloco %0, 0" : : "r" (eaddr));
448			asm __volatile__ ("synco"); /* TAKum03020 */
449		}
450
451		eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
452		for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
453			/* Load from each address.  Required because alloco is a NOP if
454			   the cache is write-through.  Write-through is a config option. */
455			if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
456				*(volatile unsigned char *)(int)eaddr;
457		}
458	}
459
460	/* Don't use OCBI to invalidate the lines.  That costs cycles directly.
461	   If the dummy block is just left resident, it will naturally get
462	   evicted as required.  */
463
464	return;
465}
466
467/****************************************************************************/
468
469static void sh64_dcache_purge_all(void)
470{
471	/* Purge the entire contents of the dcache.  The most efficient way to
472	   achieve this is to use alloco instructions on a region of unused
473	   memory equal in size to the cache, thereby causing the current
474	   contents to be discarded by natural eviction.  The alternative,
475	   namely reading every tag, setting up a mapping for the corresponding
476	   page and doing an OCBP for the line, would be much more expensive.
477	   */
478
479	sh64_dcache_purge_sets(0, cpu_data->dcache.sets);
480
481	return;
482
483}
484
485/****************************************************************************/
486
487static void sh64_dcache_purge_kernel_range(unsigned long start, unsigned long end)
488{
489	/* Purge the range of addresses [start,end] from the D-cache.  The
490	   addresses lie in the superpage mapping.  There's no harm if we
491	   overpurge at either end - just a small performance loss. */
492	unsigned long long ullend, addr, aligned_start;
493#if (NEFF == 32)
494	aligned_start = (unsigned long long)(signed long long)(signed long) start;
495#else
496#error "NEFF != 32"
497#endif
498	aligned_start &= L1_CACHE_ALIGN_MASK;
499	addr = aligned_start;
500#if (NEFF == 32)
501	ullend = (unsigned long long) (signed long long) (signed long) end;
502#else
503#error "NEFF != 32"
504#endif
505	while (addr <= ullend) {
506		asm __volatile__ ("ocbp %0, 0" : : "r" (addr));
507		addr += L1_CACHE_BYTES;
508	}
509	return;
510}
511
512/* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for
513   anything else in the kernel */
514#define MAGIC_PAGE0_START 0xffffffffec000000ULL
515
516static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned long eaddr)
517{
518	/* Purge the physical page 'paddr' from the cache.  It's known that any
519	   cache lines requiring attention have the same page colour as the the
520	   address 'eaddr'.
521
522	   This relies on the fact that the D-cache matches on physical tags
523	   when no virtual tag matches.  So we create an alias for the original
524	   page and purge through that.  (Alternatively, we could have done
525	   this by switching ASID to match the original mapping and purged
526	   through that, but that involves ASID switching cost + probably a
527	   TLBMISS + refill anyway.)
528	   */
529
530	unsigned long long magic_page_start;
531	unsigned long long magic_eaddr, magic_eaddr_end;
532
533	magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK);
534
535	/* As long as the kernel is not pre-emptible, this doesn't need to be
536	   under cli/sti. */
537
538	sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr);
539
540	magic_eaddr = magic_page_start;
541	magic_eaddr_end = magic_eaddr + PAGE_SIZE;
542	while (magic_eaddr < magic_eaddr_end) {
543		/* Little point in unrolling this loop - the OCBPs are blocking
544		   and won't go any quicker (i.e. the loop overhead is parallel
545		   to part of the OCBP execution.) */
546		asm __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr));
547		magic_eaddr += L1_CACHE_BYTES;
548	}
549
550	sh64_teardown_dtlb_cache_slot();
551}
552
553/****************************************************************************/
554
555static void sh64_dcache_purge_phy_page(unsigned long paddr)
556{
557	/* Pure a page given its physical start address, by creating a
558	   temporary 1 page mapping and purging across that.  Even if we know
559	   the virtual address (& vma or mm) of the page, the method here is
560	   more elegant because it avoids issues of coping with page faults on
561	   the purge instructions (i.e. no special-case code required in the
562	   critical path in the TLB miss handling). */
563
564	unsigned long long eaddr_start, eaddr, eaddr_end;
565	int i;
566
567	/* As long as the kernel is not pre-emptible, this doesn't need to be
568	   under cli/sti. */
569
570	eaddr_start = MAGIC_PAGE0_START;
571	for (i=0; i < (1 << CACHE_OC_N_SYNBITS); i++) {
572		sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr);
573
574		eaddr = eaddr_start;
575		eaddr_end = eaddr + PAGE_SIZE;
576		while (eaddr < eaddr_end) {
577			asm __volatile__ ("ocbp %0, 0" : : "r" (eaddr));
578			eaddr += L1_CACHE_BYTES;
579		}
580
581		sh64_teardown_dtlb_cache_slot();
582		eaddr_start += PAGE_SIZE;
583	}
584}
585
586static void sh64_dcache_purge_user_pages(struct mm_struct *mm,
587				unsigned long addr, unsigned long end)
588{
589	pgd_t *pgd;
590	pmd_t *pmd;
591	pte_t *pte;
592	pte_t entry;
593	spinlock_t *ptl;
594	unsigned long paddr;
595
596	if (!mm)
597		return; /* No way to find physical address of page */
598
599	pgd = pgd_offset(mm, addr);
600	if (pgd_bad(*pgd))
601		return;
602
603	pmd = pmd_offset(pgd, addr);
604	if (pmd_none(*pmd) || pmd_bad(*pmd))
605		return;
606
607	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
608	do {
609		entry = *pte;
610		if (pte_none(entry) || !pte_present(entry))
611			continue;
612		paddr = pte_val(entry) & PAGE_MASK;
613		sh64_dcache_purge_coloured_phy_page(paddr, addr);
614	} while (pte++, addr += PAGE_SIZE, addr != end);
615	pte_unmap_unlock(pte - 1, ptl);
616}
617/****************************************************************************/
618
619static void sh64_dcache_purge_user_range(struct mm_struct *mm,
620			  unsigned long start, unsigned long end)
621{
622	/* There are at least 5 choices for the implementation of this, with
623	   pros (+), cons(-), comments(*):
624
625	   1. ocbp each line in the range through the original user's ASID
626	      + no lines spuriously evicted
627	      - tlbmiss handling (must either handle faults on demand => extra
628		special-case code in tlbmiss critical path), or map the page in
629		advance (=> flush_tlb_range in advance to avoid multiple hits)
630	      - ASID switching
631	      - expensive for large ranges
632
633	   2. temporarily map each page in the range to a special effective
634	      address and ocbp through the temporary mapping; relies on the
635	      fact that SH-5 OCB* always do TLB lookup and match on ptags (they
636	      never look at the etags)
637	      + no spurious evictions
638	      - expensive for large ranges
639	      * surely cheaper than (1)
640
641	   3. walk all the lines in the cache, check the tags, if a match
642	      occurs create a page mapping to ocbp the line through
643	      + no spurious evictions
644	      - tag inspection overhead
645	      - (especially for small ranges)
646	      - potential cost of setting up/tearing down page mapping for
647		every line that matches the range
648	      * cost partly independent of range size
649
650	   4. walk all the lines in the cache, check the tags, if a match
651	      occurs use 4 * alloco to purge the line (+3 other probably
652	      innocent victims) by natural eviction
653	      + no tlb mapping overheads
654	      - spurious evictions
655	      - tag inspection overhead
656
657	   5. implement like flush_cache_all
658	      + no tag inspection overhead
659	      - spurious evictions
660	      - bad for small ranges
661
662	   (1) can be ruled out as more expensive than (2).  (2) appears best
663	   for small ranges.  The choice between (3), (4) and (5) for large
664	   ranges and the range size for the large/small boundary need
665	   benchmarking to determine.
666
667	   For now use approach (2) for small ranges and (5) for large ones.
668
669	   */
670
671	int n_pages;
672
673	n_pages = ((end - start) >> PAGE_SHIFT);
674	if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) {
675#if 1
676		sh64_dcache_purge_all();
677#else
678		unsigned long long set, way;
679		unsigned long mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
680		for (set = 0; set < cpu_data->dcache.sets; set++) {
681			unsigned long long set_base_config_addr = CACHE_OC_ADDRESS_ARRAY + (set << cpu_data->dcache.set_shift);
682			for (way = 0; way < cpu_data->dcache.ways; way++) {
683				unsigned long long config_addr = set_base_config_addr + (way << cpu_data->dcache.way_step_shift);
684				unsigned long long tag0;
685				unsigned long line_valid;
686
687				asm __volatile__("getcfg %1, 0, %0" : "=r" (tag0) : "r" (config_addr));
688				line_valid = tag0 & SH_CACHE_VALID;
689				if (line_valid) {
690					unsigned long cache_asid;
691					unsigned long epn;
692
693					cache_asid = (tag0 & cpu_data->dcache.asid_mask) >> cpu_data->dcache.asid_shift;
694					/* The next line needs some
695					   explanation.  The virtual tags
696					   encode bits [31:13] of the virtual
697					   address, bit [12] of the 'tag' being
698					   implied by the cache set index. */
699					epn = (tag0 & cpu_data->dcache.epn_mask) | ((set & 0x80) << cpu_data->dcache.entry_shift);
700
701					if ((cache_asid == mm_asid) && (start <= epn) && (epn < end)) {
702						/* TODO : could optimise this
703						   call by batching multiple
704						   adjacent sets together. */
705						sh64_dcache_purge_sets(set, 1);
706						break; /* Don't waste time inspecting other ways for this set */
707					}
708				}
709			}
710		}
711#endif
712	} else {
713		/* Small range, covered by a single page table page */
714		start &= PAGE_MASK;	/* should already be so */
715		end = PAGE_ALIGN(end);	/* should already be so */
716		sh64_dcache_purge_user_pages(mm, start, end);
717	}
718	return;
719}
720
721static void sh64_dcache_wback_current_user_range(unsigned long start, unsigned long end)
722{
723	unsigned long long aligned_start;
724	unsigned long long ull_end;
725	unsigned long long addr;
726
727	ull_end = end;
728
729	/* Just wback over the range using the natural addresses.  TLB miss
730	   handling will be OK (TBC) : the range has just been written to by
731	   the signal frame setup code, so the PTEs must exist.
732
733	   Note, if we have CONFIG_PREEMPT and get preempted inside this loop,
734	   it doesn't matter, even if the pid->ASID mapping changes whilst
735	   we're away.  In that case the cache will have been flushed when the
736	   mapping was renewed.  So the writebacks below will be nugatory (and
737	   we'll doubtless have to fault the TLB entry/ies in again with the
738	   new ASID), but it's a rare case.
739	   */
740	aligned_start = start & L1_CACHE_ALIGN_MASK;
741	addr = aligned_start;
742	while (addr < ull_end) {
743		asm __volatile__ ("ocbwb %0, 0" : : "r" (addr));
744		addr += L1_CACHE_BYTES;
745	}
746}
747
748/****************************************************************************/
749
750/* These *MUST* lie in an area of virtual address space that's otherwise unused. */
751#define UNIQUE_EADDR_START 0xe0000000UL
752#define UNIQUE_EADDR_END   0xe8000000UL
753
754static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, unsigned long paddr)
755{
756	/* Given a physical address paddr, and a user virtual address
757	   user_eaddr which will eventually be mapped to it, create a one-off
758	   kernel-private eaddr mapped to the same paddr.  This is used for
759	   creating special destination pages for copy_user_page and
760	   clear_user_page */
761
762	static unsigned long current_pointer = UNIQUE_EADDR_START;
763	unsigned long coloured_pointer;
764
765	if (current_pointer == UNIQUE_EADDR_END) {
766		sh64_dcache_purge_all();
767		current_pointer = UNIQUE_EADDR_START;
768	}
769
770	coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) | (user_eaddr & CACHE_OC_SYN_MASK);
771	sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr);
772
773	current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS);
774
775	return coloured_pointer;
776}
777
778/****************************************************************************/
779
780static void sh64_copy_user_page_coloured(void *to, void *from, unsigned long address)
781{
782	void *coloured_to;
783
784	/* Discard any existing cache entries of the wrong colour.  These are
785	   present quite often, if the kernel has recently used the page
786	   internally, then given it up, then it's been allocated to the user.
787	   */
788	sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
789
790	coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
791	sh64_page_copy(from, coloured_to);
792
793	sh64_teardown_dtlb_cache_slot();
794}
795
796static void sh64_clear_user_page_coloured(void *to, unsigned long address)
797{
798	void *coloured_to;
799
800	/* Discard any existing kernel-originated lines of the wrong colour (as
801	   above) */
802	sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
803
804	coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
805	sh64_page_clear(coloured_to);
806
807	sh64_teardown_dtlb_cache_slot();
808}
809
810#endif /* !CONFIG_DCACHE_DISABLED */
811
812/****************************************************************************/
813
814/*##########################################################################
815			    EXTERNALLY CALLABLE API.
816  ##########################################################################*/
817
818/* These functions are described in Documentation/cachetlb.txt.
819   Each one of these functions varies in behaviour depending on whether the
820   I-cache and/or D-cache are configured out.
821
822   Note that the Linux term 'flush' corresponds to what is termed 'purge' in
823   the sh/sh64 jargon for the D-cache, i.e. write back dirty data then
824   invalidate the cache lines, and 'invalidate' for the I-cache.
825   */
826
827#undef FLUSH_TRACE
828
829void flush_cache_all(void)
830{
831	/* Invalidate the entire contents of both caches, after writing back to
832	   memory any dirty data from the D-cache. */
833	sh64_dcache_purge_all();
834	sh64_icache_inv_all();
835}
836
837/****************************************************************************/
838
839void flush_cache_mm(struct mm_struct *mm)
840{
841	/* Invalidate an entire user-address space from both caches, after
842	   writing back dirty data (e.g. for shared mmap etc). */
843
844	/* This could be coded selectively by inspecting all the tags then
845	   doing 4*alloco on any set containing a match (as for
846	   flush_cache_range), but fork/exit/execve (where this is called from)
847	   are expensive anyway. */
848
849	/* Have to do a purge here, despite the comments re I-cache below.
850	   There could be odd-coloured dirty data associated with the mm still
851	   in the cache - if this gets written out through natural eviction
852	   after the kernel has reused the page there will be chaos.
853	   */
854
855	sh64_dcache_purge_all();
856
857	/* The mm being torn down won't ever be active again, so any Icache
858	   lines tagged with its ASID won't be visible for the rest of the
859	   lifetime of this ASID cycle.  Before the ASID gets reused, there
860	   will be a flush_cache_all.  Hence we don't need to touch the
861	   I-cache.  This is similar to the lack of action needed in
862	   flush_tlb_mm - see fault.c. */
863}
864
865/****************************************************************************/
866
867void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
868		       unsigned long end)
869{
870	struct mm_struct *mm = vma->vm_mm;
871
872	/* Invalidate (from both caches) the range [start,end) of virtual
873	   addresses from the user address space specified by mm, after writing
874	   back any dirty data.
875
876	   Note, 'end' is 1 byte beyond the end of the range to flush. */
877
878	sh64_dcache_purge_user_range(mm, start, end);
879	sh64_icache_inv_user_page_range(mm, start, end);
880}
881
882/****************************************************************************/
883
884void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned long pfn)
885{
886	/* Invalidate any entries in either cache for the vma within the user
887	   address space vma->vm_mm for the page starting at virtual address
888	   'eaddr'.   This seems to be used primarily in breaking COW.  Note,
889	   the I-cache must be searched too in case the page in question is
890	   both writable and being executed from (e.g. stack trampolines.)
891
892	   Note, this is called with pte lock held.
893	   */
894
895	sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
896
897	if (vma->vm_flags & VM_EXEC) {
898		sh64_icache_inv_user_page(vma, eaddr);
899	}
900}
901
902/****************************************************************************/
903
904#ifndef CONFIG_DCACHE_DISABLED
905
906void copy_user_page(void *to, void *from, unsigned long address, struct page *page)
907{
908	/* 'from' and 'to' are kernel virtual addresses (within the superpage
909	   mapping of the physical RAM).  'address' is the user virtual address
910	   where the copy 'to' will be mapped after.  This allows a custom
911	   mapping to be used to ensure that the new copy is placed in the
912	   right cache sets for the user to see it without having to bounce it
913	   out via memory.  Note however : the call to flush_page_to_ram in
914	   (generic)/mm/memory.c:(break_cow) undoes all this good work in that one
915	   very important case!
916
917	   TBD : can we guarantee that on every call, any cache entries for
918	   'from' are in the same colour sets as 'address' also?  i.e. is this
919	   always used just to deal with COW?  (I suspect not). */
920
921	/* There are two possibilities here for when the page 'from' was last accessed:
922	   * by the kernel : this is OK, no purge required.
923	   * by the/a user (e.g. for break_COW) : need to purge.
924
925	   If the potential user mapping at 'address' is the same colour as
926	   'from' there is no need to purge any cache lines from the 'from'
927	   page mapped into cache sets of colour 'address'.  (The copy will be
928	   accessing the page through 'from').
929	   */
930
931	if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) {
932		sh64_dcache_purge_coloured_phy_page(__pa(from), address);
933	}
934
935	if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
936		/* No synonym problem on destination */
937		sh64_page_copy(from, to);
938	} else {
939		sh64_copy_user_page_coloured(to, from, address);
940	}
941
942	/* Note, don't need to flush 'from' page from the cache again - it's
943	   done anyway by the generic code */
944}
945
946void clear_user_page(void *to, unsigned long address, struct page *page)
947{
948	/* 'to' is a kernel virtual address (within the superpage
949	   mapping of the physical RAM).  'address' is the user virtual address
950	   where the 'to' page will be mapped after.  This allows a custom
951	   mapping to be used to ensure that the new copy is placed in the
952	   right cache sets for the user to see it without having to bounce it
953	   out via memory.
954	*/
955
956	if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
957		/* No synonym problem on destination */
958		sh64_page_clear(to);
959	} else {
960		sh64_clear_user_page_coloured(to, address);
961	}
962}
963
964#endif /* !CONFIG_DCACHE_DISABLED */
965
966/****************************************************************************/
967
968void flush_dcache_page(struct page *page)
969{
970	sh64_dcache_purge_phy_page(page_to_phys(page));
971	wmb();
972}
973
974/****************************************************************************/
975
976void flush_icache_range(unsigned long start, unsigned long end)
977{
978	/* Flush the range [start,end] of kernel virtual adddress space from
979	   the I-cache.  The corresponding range must be purged from the
980	   D-cache also because the SH-5 doesn't have cache snooping between
981	   the caches.  The addresses will be visible through the superpage
982	   mapping, therefore it's guaranteed that there no cache entries for
983	   the range in cache sets of the wrong colour.
984
985	   Primarily used for cohering the I-cache after a module has
986	   been loaded.  */
987
988	/* We also make sure to purge the same range from the D-cache since
989	   flush_page_to_ram() won't be doing this for us! */
990
991	sh64_dcache_purge_kernel_range(start, end);
992	wmb();
993	sh64_icache_inv_kernel_range(start, end);
994}
995
996/****************************************************************************/
997
998void flush_icache_user_range(struct vm_area_struct *vma,
999			struct page *page, unsigned long addr, int len)
1000{
1001	/* Flush the range of user (defined by vma->vm_mm) address space
1002	   starting at 'addr' for 'len' bytes from the cache.  The range does
1003	   not straddle a page boundary, the unique physical page containing
1004	   the range is 'page'.  This seems to be used mainly for invalidating
1005	   an address range following a poke into the program text through the
1006	   ptrace() call from another process (e.g. for BRK instruction
1007	   insertion). */
1008
1009	sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr);
1010	mb();
1011
1012	if (vma->vm_flags & VM_EXEC) {
1013		sh64_icache_inv_user_small_range(vma->vm_mm, addr, len);
1014	}
1015}
1016
1017/*##########################################################################
1018			ARCH/SH64 PRIVATE CALLABLE API.
1019  ##########################################################################*/
1020
1021void flush_cache_sigtramp(unsigned long start, unsigned long end)
1022{
1023	/* For the address range [start,end), write back the data from the
1024	   D-cache and invalidate the corresponding region of the I-cache for
1025	   the current process.  Used to flush signal trampolines on the stack
1026	   to make them executable. */
1027
1028	sh64_dcache_wback_current_user_range(start, end);
1029	wmb();
1030	sh64_icache_inv_current_user_range(start, end);
1031}
1032
1033