1/* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Copyright (C) 1995 - 2000 by Ralf Baechle 7 */ 8#include <linux/context_tracking.h> 9#include <linux/signal.h> 10#include <linux/sched.h> 11#include <linux/interrupt.h> 12#include <linux/kernel.h> 13#include <linux/errno.h> 14#include <linux/string.h> 15#include <linux/types.h> 16#include <linux/ptrace.h> 17#include <linux/mman.h> 18#include <linux/mm.h> 19#include <linux/smp.h> 20#include <linux/module.h> 21#include <linux/kprobes.h> 22#include <linux/perf_event.h> 23 24#include <asm/branch.h> 25#include <asm/mmu_context.h> 26#include <asm/uaccess.h> 27#include <asm/ptrace.h> 28#include <asm/highmem.h> /* For VMALLOC_END */ 29#include <linux/kdebug.h> 30 31/* 32 * This routine handles page faults. It determines the address, 33 * and the problem, and then passes it off to one of the appropriate 34 * routines. 35 */ 36static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, 37 unsigned long address) 38{ 39 struct vm_area_struct * vma = NULL; 40 struct task_struct *tsk = current; 41 struct mm_struct *mm = tsk->mm; 42 const int field = sizeof(unsigned long) * 2; 43 siginfo_t info; 44 int fault; 45 unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; 46 47#if 0 48 printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(), 49 current->comm, current->pid, field, address, write, 50 field, regs->cp0_epc); 51#endif 52 53#ifdef CONFIG_KPROBES 54 /* 55 * This is to notify the fault handler of the kprobes. The 56 * exception code is redundant as it is also carried in REGS, 57 * but we pass it anyhow. 58 */ 59 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1, 60 (regs->cp0_cause >> 2) & 0x1f, SIGSEGV) == NOTIFY_STOP) 61 return; 62#endif 63 64 info.si_code = SEGV_MAPERR; 65 66 /* 67 * We fault-in kernel-space virtual memory on-demand. The 68 * 'reference' page table is init_mm.pgd. 69 * 70 * NOTE! We MUST NOT take any locks for this case. We may 71 * be in an interrupt or a critical region, and should 72 * only copy the information from the master page table, 73 * nothing more. 74 */ 75#ifdef CONFIG_64BIT 76# define VMALLOC_FAULT_TARGET no_context 77#else 78# define VMALLOC_FAULT_TARGET vmalloc_fault 79#endif 80 81 if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END)) 82 goto VMALLOC_FAULT_TARGET; 83#ifdef MODULE_START 84 if (unlikely(address >= MODULE_START && address < MODULE_END)) 85 goto VMALLOC_FAULT_TARGET; 86#endif 87 88 /* 89 * If we're in an interrupt or have no user 90 * context, we must not take the fault.. 91 */ 92 if (in_atomic() || !mm) 93 goto bad_area_nosemaphore; 94 95 if (user_mode(regs)) 96 flags |= FAULT_FLAG_USER; 97retry: 98 down_read(&mm->mmap_sem); 99 vma = find_vma(mm, address); 100 if (!vma) 101 goto bad_area; 102 if (vma->vm_start <= address) 103 goto good_area; 104 if (!(vma->vm_flags & VM_GROWSDOWN)) 105 goto bad_area; 106 if (expand_stack(vma, address)) 107 goto bad_area; 108/* 109 * Ok, we have a good vm_area for this memory access, so 110 * we can handle it.. 111 */ 112good_area: 113 info.si_code = SEGV_ACCERR; 114 115 if (write) { 116 if (!(vma->vm_flags & VM_WRITE)) 117 goto bad_area; 118 flags |= FAULT_FLAG_WRITE; 119 } else { 120 if (cpu_has_rixi) { 121 if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) { 122#if 0 123 pr_notice("Cpu%d[%s:%d:%0*lx:%ld:%0*lx] XI violation\n", 124 raw_smp_processor_id(), 125 current->comm, current->pid, 126 field, address, write, 127 field, regs->cp0_epc); 128#endif 129 goto bad_area; 130 } 131 if (!(vma->vm_flags & VM_READ)) { 132#if 0 133 pr_notice("Cpu%d[%s:%d:%0*lx:%ld:%0*lx] RI violation\n", 134 raw_smp_processor_id(), 135 current->comm, current->pid, 136 field, address, write, 137 field, regs->cp0_epc); 138#endif 139 goto bad_area; 140 } 141 } else { 142 if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) 143 goto bad_area; 144 } 145 } 146 147 /* 148 * If for any reason at all we couldn't handle the fault, 149 * make sure we exit gracefully rather than endlessly redo 150 * the fault. 151 */ 152 fault = handle_mm_fault(mm, vma, address, flags); 153 154 if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) 155 return; 156 157 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); 158 if (unlikely(fault & VM_FAULT_ERROR)) { 159 if (fault & VM_FAULT_OOM) 160 goto out_of_memory; 161 else if (fault & VM_FAULT_SIGBUS) 162 goto do_sigbus; 163 BUG(); 164 } 165 if (flags & FAULT_FLAG_ALLOW_RETRY) { 166 if (fault & VM_FAULT_MAJOR) { 167 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 168 regs, address); 169 tsk->maj_flt++; 170 } else { 171 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 172 regs, address); 173 tsk->min_flt++; 174 } 175 if (fault & VM_FAULT_RETRY) { 176 flags &= ~FAULT_FLAG_ALLOW_RETRY; 177 flags |= FAULT_FLAG_TRIED; 178 179 /* 180 * No need to up_read(&mm->mmap_sem) as we would 181 * have already released it in __lock_page_or_retry 182 * in mm/filemap.c. 183 */ 184 185 goto retry; 186 } 187 } 188 189 up_read(&mm->mmap_sem); 190 return; 191 192/* 193 * Something tried to access memory that isn't in our memory map.. 194 * Fix it, but check if it's kernel or user first.. 195 */ 196bad_area: 197 up_read(&mm->mmap_sem); 198 199bad_area_nosemaphore: 200 /* User mode accesses just cause a SIGSEGV */ 201 if (user_mode(regs)) { 202 tsk->thread.cp0_badvaddr = address; 203 tsk->thread.error_code = write; 204#if 0 205 printk("do_page_fault() #2: sending SIGSEGV to %s for " 206 "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n", 207 tsk->comm, 208 write ? "write access to" : "read access from", 209 field, address, 210 field, (unsigned long) regs->cp0_epc, 211 field, (unsigned long) regs->regs[31]); 212#endif 213 info.si_signo = SIGSEGV; 214 info.si_errno = 0; 215 /* info.si_code has been set above */ 216 info.si_addr = (void __user *) address; 217 force_sig_info(SIGSEGV, &info, tsk); 218 return; 219 } 220 221no_context: 222 /* Are we prepared to handle this kernel fault? */ 223 if (fixup_exception(regs)) { 224 current->thread.cp0_baduaddr = address; 225 return; 226 } 227 228 /* 229 * Oops. The kernel tried to access some bad page. We'll have to 230 * terminate things with extreme prejudice. 231 */ 232 bust_spinlocks(1); 233 234 printk(KERN_ALERT "CPU %d Unable to handle kernel paging request at " 235 "virtual address %0*lx, epc == %0*lx, ra == %0*lx\n", 236 raw_smp_processor_id(), field, address, field, regs->cp0_epc, 237 field, regs->regs[31]); 238 die("Oops", regs); 239 240out_of_memory: 241 /* 242 * We ran out of memory, call the OOM killer, and return the userspace 243 * (which will retry the fault, or kill us if we got oom-killed). 244 */ 245 up_read(&mm->mmap_sem); 246 if (!user_mode(regs)) 247 goto no_context; 248 pagefault_out_of_memory(); 249 return; 250 251do_sigbus: 252 up_read(&mm->mmap_sem); 253 254 /* Kernel mode? Handle exceptions or die */ 255 if (!user_mode(regs)) 256 goto no_context; 257 else 258 /* 259 * Send a sigbus, regardless of whether we were in kernel 260 * or user mode. 261 */ 262#if 0 263 printk("do_page_fault() #3: sending SIGBUS to %s for " 264 "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n", 265 tsk->comm, 266 write ? "write access to" : "read access from", 267 field, address, 268 field, (unsigned long) regs->cp0_epc, 269 field, (unsigned long) regs->regs[31]); 270#endif 271 tsk->thread.cp0_badvaddr = address; 272 info.si_signo = SIGBUS; 273 info.si_errno = 0; 274 info.si_code = BUS_ADRERR; 275 info.si_addr = (void __user *) address; 276 force_sig_info(SIGBUS, &info, tsk); 277 278 return; 279#ifndef CONFIG_64BIT 280vmalloc_fault: 281 { 282 /* 283 * Synchronize this task's top level page-table 284 * with the 'reference' page table. 285 * 286 * Do _not_ use "tsk" here. We might be inside 287 * an interrupt in the middle of a task switch.. 288 */ 289 int offset = __pgd_offset(address); 290 pgd_t *pgd, *pgd_k; 291 pud_t *pud, *pud_k; 292 pmd_t *pmd, *pmd_k; 293 pte_t *pte_k; 294 295 pgd = (pgd_t *) pgd_current[raw_smp_processor_id()] + offset; 296 pgd_k = init_mm.pgd + offset; 297 298 if (!pgd_present(*pgd_k)) 299 goto no_context; 300 set_pgd(pgd, *pgd_k); 301 302 pud = pud_offset(pgd, address); 303 pud_k = pud_offset(pgd_k, address); 304 if (!pud_present(*pud_k)) 305 goto no_context; 306 307 pmd = pmd_offset(pud, address); 308 pmd_k = pmd_offset(pud_k, address); 309 if (!pmd_present(*pmd_k)) 310 goto no_context; 311 set_pmd(pmd, *pmd_k); 312 313 pte_k = pte_offset_kernel(pmd_k, address); 314 if (!pte_present(*pte_k)) 315 goto no_context; 316 return; 317 } 318#endif 319} 320 321asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, 322 unsigned long write, unsigned long address) 323{ 324 enum ctx_state prev_state; 325 326 prev_state = exception_enter(); 327 __do_page_fault(regs, write, address); 328 exception_exit(prev_state); 329} 330