1/* 2 * Page fault handler for SH with an MMU. 3 * 4 * Copyright (C) 1999 Niibe Yutaka 5 * Copyright (C) 2003 - 2009 Paul Mundt 6 * 7 * Based on linux/arch/i386/mm/fault.c: 8 * Copyright (C) 1995 Linus Torvalds 9 * 10 * This file is subject to the terms and conditions of the GNU General Public 11 * License. See the file "COPYING" in the main directory of this archive 12 * for more details. 13 */ 14#include <linux/kernel.h> 15#include <linux/mm.h> 16#include <linux/hardirq.h> 17#include <linux/kprobes.h> 18#include <linux/perf_event.h> 19#include <asm/io_trapped.h> 20#include <asm/mmu_context.h> 21#include <asm/tlbflush.h> 22#include <asm/traps.h> 23 24static inline int notify_page_fault(struct pt_regs *regs, int trap) 25{ 26 int ret = 0; 27 28 if (kprobes_built_in() && !user_mode(regs)) { 29 preempt_disable(); 30 if (kprobe_running() && kprobe_fault_handler(regs, trap)) 31 ret = 1; 32 preempt_enable(); 33 } 34 35 return ret; 36} 37 38static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) 39{ 40 unsigned index = pgd_index(address); 41 pgd_t *pgd_k; 42 pud_t *pud, *pud_k; 43 pmd_t *pmd, *pmd_k; 44 45 pgd += index; 46 pgd_k = init_mm.pgd + index; 47 48 if (!pgd_present(*pgd_k)) 49 return NULL; 50 51 pud = pud_offset(pgd, address); 52 pud_k = pud_offset(pgd_k, address); 53 if (!pud_present(*pud_k)) 54 return NULL; 55 56 if (!pud_present(*pud)) 57 set_pud(pud, *pud_k); 58 59 pmd = pmd_offset(pud, address); 60 pmd_k = pmd_offset(pud_k, address); 61 if (!pmd_present(*pmd_k)) 62 return NULL; 63 64 if (!pmd_present(*pmd)) 65 set_pmd(pmd, *pmd_k); 66 else { 67 /* 68 * The page tables are fully synchronised so there must 69 * be another reason for the fault. Return NULL here to 70 * signal that we have not taken care of the fault. 71 */ 72 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); 73 return NULL; 74 } 75 76 return pmd_k; 77} 78 79/* 80 * Handle a fault on the vmalloc or module mapping area 81 */ 82static noinline int vmalloc_fault(unsigned long address) 83{ 84 pgd_t *pgd_k; 85 pmd_t *pmd_k; 86 pte_t *pte_k; 87 88 /* Make sure we are in vmalloc/module/P3 area: */ 89 if (!(address >= P3SEG && address < P3_ADDR_MAX)) 90 return -1; 91 92 /* 93 * Synchronize this task's top level page-table 94 * with the 'reference' page table. 95 * 96 * Do _not_ use "current" here. We might be inside 97 * an interrupt in the middle of a task switch.. 98 */ 99 pgd_k = get_TTB(); 100 pmd_k = vmalloc_sync_one(pgd_k, address); 101 if (!pmd_k) 102 return -1; 103 104 pte_k = pte_offset_kernel(pmd_k, address); 105 if (!pte_present(*pte_k)) 106 return -1; 107 108 return 0; 109} 110 111static int fault_in_kernel_space(unsigned long address) 112{ 113 return address >= TASK_SIZE; 114} 115 116/* 117 * This routine handles page faults. It determines the address, 118 * and the problem, and then passes it off to one of the appropriate 119 * routines. 120 */ 121asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, 122 unsigned long writeaccess, 123 unsigned long address) 124{ 125 unsigned long vec; 126 struct task_struct *tsk; 127 struct mm_struct *mm; 128 struct vm_area_struct * vma; 129 int si_code; 130 int fault; 131 siginfo_t info; 132 133 tsk = current; 134 mm = tsk->mm; 135 si_code = SEGV_MAPERR; 136 vec = lookup_exception_vector(); 137 138 /* 139 * We fault-in kernel-space virtual memory on-demand. The 140 * 'reference' page table is init_mm.pgd. 141 * 142 * NOTE! We MUST NOT take any locks for this case. We may 143 * be in an interrupt or a critical region, and should 144 * only copy the information from the master page table, 145 * nothing more. 146 */ 147 if (unlikely(fault_in_kernel_space(address))) { 148 if (vmalloc_fault(address) >= 0) 149 return; 150 if (notify_page_fault(regs, vec)) 151 return; 152 153 goto bad_area_nosemaphore; 154 } 155 156 if (unlikely(notify_page_fault(regs, vec))) 157 return; 158 159 /* Only enable interrupts if they were on before the fault */ 160 if ((regs->sr & SR_IMASK) != SR_IMASK) 161 local_irq_enable(); 162 163 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); 164 165 /* 166 * If we're in an interrupt, have no user context or are running 167 * in an atomic region then we must not take the fault: 168 */ 169 if (in_atomic() || !mm) 170 goto no_context; 171 172 down_read(&mm->mmap_sem); 173 174 vma = find_vma(mm, address); 175 if (!vma) 176 goto bad_area; 177 if (vma->vm_start <= address) 178 goto good_area; 179 if (!(vma->vm_flags & VM_GROWSDOWN)) 180 goto bad_area; 181 if (expand_stack(vma, address)) 182 goto bad_area; 183 184 /* 185 * Ok, we have a good vm_area for this memory access, so 186 * we can handle it.. 187 */ 188good_area: 189 si_code = SEGV_ACCERR; 190 if (writeaccess) { 191 if (!(vma->vm_flags & VM_WRITE)) 192 goto bad_area; 193 } else { 194 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) 195 goto bad_area; 196 } 197 198 /* 199 * If for any reason at all we couldn't handle the fault, 200 * make sure we exit gracefully rather than endlessly redo 201 * the fault. 202 */ 203 fault = handle_mm_fault(mm, vma, address, writeaccess ? FAULT_FLAG_WRITE : 0); 204 if (unlikely(fault & VM_FAULT_ERROR)) { 205 if (fault & VM_FAULT_OOM) 206 goto out_of_memory; 207 else if (fault & VM_FAULT_SIGBUS) 208 goto do_sigbus; 209 BUG(); 210 } 211 if (fault & VM_FAULT_MAJOR) { 212 tsk->maj_flt++; 213 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 214 regs, address); 215 } else { 216 tsk->min_flt++; 217 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 218 regs, address); 219 } 220 221 up_read(&mm->mmap_sem); 222 return; 223 224 /* 225 * Something tried to access memory that isn't in our memory map.. 226 * Fix it, but check if it's kernel or user first.. 227 */ 228bad_area: 229 up_read(&mm->mmap_sem); 230 231bad_area_nosemaphore: 232 if (user_mode(regs)) { 233 info.si_signo = SIGSEGV; 234 info.si_errno = 0; 235 info.si_code = si_code; 236 info.si_addr = (void *) address; 237 force_sig_info(SIGSEGV, &info, tsk); 238 return; 239 } 240 241no_context: 242 /* Are we prepared to handle this kernel fault? */ 243 if (fixup_exception(regs)) 244 return; 245 246 if (handle_trapped_io(regs, address)) 247 return; 248/* 249 * Oops. The kernel tried to access some bad page. We'll have to 250 * terminate things with extreme prejudice. 251 * 252 */ 253 254 bust_spinlocks(1); 255 256 if (oops_may_print()) { 257 unsigned long page; 258 259 if (address < PAGE_SIZE) 260 printk(KERN_ALERT "Unable to handle kernel NULL " 261 "pointer dereference"); 262 else 263 printk(KERN_ALERT "Unable to handle kernel paging " 264 "request"); 265 printk(" at virtual address %08lx\n", address); 266 printk(KERN_ALERT "pc = %08lx\n", regs->pc); 267 page = (unsigned long)get_TTB(); 268 if (page) { 269 page = ((__typeof__(page) *)page)[address >> PGDIR_SHIFT]; 270 printk(KERN_ALERT "*pde = %08lx\n", page); 271 if (page & _PAGE_PRESENT) { 272 page &= PAGE_MASK; 273 address &= 0x003ff000; 274 page = ((__typeof__(page) *) 275 __va(page))[address >> 276 PAGE_SHIFT]; 277 printk(KERN_ALERT "*pte = %08lx\n", page); 278 } 279 } 280 } 281 282 die("Oops", regs, writeaccess); 283 bust_spinlocks(0); 284 do_exit(SIGKILL); 285 286/* 287 * We ran out of memory, or some other thing happened to us that made 288 * us unable to handle the page fault gracefully. 289 */ 290out_of_memory: 291 up_read(&mm->mmap_sem); 292 if (!user_mode(regs)) 293 goto no_context; 294 pagefault_out_of_memory(); 295 return; 296 297do_sigbus: 298 up_read(&mm->mmap_sem); 299 300 /* 301 * Send a sigbus, regardless of whether we were in kernel 302 * or user mode. 303 */ 304 info.si_signo = SIGBUS; 305 info.si_errno = 0; 306 info.si_code = BUS_ADRERR; 307 info.si_addr = (void *)address; 308 force_sig_info(SIGBUS, &info, tsk); 309 310 /* Kernel mode? Handle exceptions or die */ 311 if (!user_mode(regs)) 312 goto no_context; 313} 314 315/* 316 * Called with interrupts disabled. 317 */ 318asmlinkage int __kprobes 319handle_tlbmiss(struct pt_regs *regs, unsigned long writeaccess, 320 unsigned long address) 321{ 322 pgd_t *pgd; 323 pud_t *pud; 324 pmd_t *pmd; 325 pte_t *pte; 326 pte_t entry; 327 328 /* 329 * We don't take page faults for P1, P2, and parts of P4, these 330 * are always mapped, whether it be due to legacy behaviour in 331 * 29-bit mode, or due to PMB configuration in 32-bit mode. 332 */ 333 if (address >= P3SEG && address < P3_ADDR_MAX) { 334 pgd = pgd_offset_k(address); 335 } else { 336 if (unlikely(address >= TASK_SIZE || !current->mm)) 337 return 1; 338 339 pgd = pgd_offset(current->mm, address); 340 } 341 342 pud = pud_offset(pgd, address); 343 if (pud_none_or_clear_bad(pud)) 344 return 1; 345 pmd = pmd_offset(pud, address); 346 if (pmd_none_or_clear_bad(pmd)) 347 return 1; 348 pte = pte_offset_kernel(pmd, address); 349 entry = *pte; 350 if (unlikely(pte_none(entry) || pte_not_present(entry))) 351 return 1; 352 if (unlikely(writeaccess && !pte_write(entry))) 353 return 1; 354 355 if (writeaccess) 356 entry = pte_mkdirty(entry); 357 entry = pte_mkyoung(entry); 358 359 set_pte(pte, entry); 360 361#if defined(CONFIG_CPU_SH4) && !defined(CONFIG_SMP) 362 /* 363 * SH-4 does not set MMUCR.RC to the corresponding TLB entry in 364 * the case of an initial page write exception, so we need to 365 * flush it in order to avoid potential TLB entry duplication. 366 */ 367 if (writeaccess == 2) 368 local_flush_tlb_one(get_asid(), address & PAGE_MASK); 369#endif 370 371 update_mmu_cache(NULL, address, pte); 372 373 return 0; 374} 375