kprobes.c revision 49ad2fd76c97133fb396edc24ded7fe26093a578
1/* 2 * Kernel Probes (KProbes) 3 * kernel/kprobes.c 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 * 19 * Copyright (C) IBM Corporation, 2002, 2004 20 * 21 * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel 22 * Probes initial implementation (includes suggestions from 23 * Rusty Russell). 24 * 2004-Aug Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with 25 * hlists and exceptions notifier as suggested by Andi Kleen. 26 * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes 27 * interface to access function arguments. 28 * 2004-Sep Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes 29 * exceptions notifier to be first on the priority list. 30 * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston 31 * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi 32 * <prasanna@in.ibm.com> added function-return probes. 33 */ 34#include <linux/kprobes.h> 35#include <linux/hash.h> 36#include <linux/init.h> 37#include <linux/slab.h> 38#include <linux/stddef.h> 39#include <linux/module.h> 40#include <linux/moduleloader.h> 41#include <linux/kallsyms.h> 42#include <linux/freezer.h> 43#include <linux/seq_file.h> 44#include <linux/debugfs.h> 45#include <linux/kdebug.h> 46 47#include <asm-generic/sections.h> 48#include <asm/cacheflush.h> 49#include <asm/errno.h> 50#include <asm/uaccess.h> 51 52#define KPROBE_HASH_BITS 6 53#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS) 54 55 56/* 57 * Some oddball architectures like 64bit powerpc have function descriptors 58 * so this must be overridable. 59 */ 60#ifndef kprobe_lookup_name 61#define kprobe_lookup_name(name, addr) \ 62 addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name))) 63#endif 64 65static int kprobes_initialized; 66static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; 67static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; 68 69/* NOTE: change this value only with kprobe_mutex held */ 70static bool kprobe_enabled; 71 72static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ 73static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; 74static struct { 75 spinlock_t lock ____cacheline_aligned_in_smp; 76} kretprobe_table_locks[KPROBE_TABLE_SIZE]; 77 78static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) 79{ 80 return &(kretprobe_table_locks[hash].lock); 81} 82 83/* 84 * Normally, functions that we'd want to prohibit kprobes in, are marked 85 * __kprobes. But, there are cases where such functions already belong to 86 * a different section (__sched for preempt_schedule) 87 * 88 * For such cases, we now have a blacklist 89 */ 90static struct kprobe_blackpoint kprobe_blacklist[] = { 91 {"preempt_schedule",}, 92 {NULL} /* Terminator */ 93}; 94 95#ifdef __ARCH_WANT_KPROBES_INSN_SLOT 96/* 97 * kprobe->ainsn.insn points to the copy of the instruction to be 98 * single-stepped. x86_64, POWER4 and above have no-exec support and 99 * stepping on the instruction on a vmalloced/kmalloced/data page 100 * is a recipe for disaster 101 */ 102#define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) 103 104struct kprobe_insn_page { 105 struct hlist_node hlist; 106 kprobe_opcode_t *insns; /* Page of instruction slots */ 107 char slot_used[INSNS_PER_PAGE]; 108 int nused; 109 int ngarbage; 110}; 111 112enum kprobe_slot_state { 113 SLOT_CLEAN = 0, 114 SLOT_DIRTY = 1, 115 SLOT_USED = 2, 116}; 117 118static DEFINE_MUTEX(kprobe_insn_mutex); /* Protects kprobe_insn_pages */ 119static struct hlist_head kprobe_insn_pages; 120static int kprobe_garbage_slots; 121static int collect_garbage_slots(void); 122 123static int __kprobes check_safety(void) 124{ 125 int ret = 0; 126#if defined(CONFIG_PREEMPT) && defined(CONFIG_PM) 127 ret = freeze_processes(); 128 if (ret == 0) { 129 struct task_struct *p, *q; 130 do_each_thread(p, q) { 131 if (p != current && p->state == TASK_RUNNING && 132 p->pid != 0) { 133 printk("Check failed: %s is running\n",p->comm); 134 ret = -1; 135 goto loop_end; 136 } 137 } while_each_thread(p, q); 138 } 139loop_end: 140 thaw_processes(); 141#else 142 synchronize_sched(); 143#endif 144 return ret; 145} 146 147/** 148 * __get_insn_slot() - Find a slot on an executable page for an instruction. 149 * We allocate an executable page if there's no room on existing ones. 150 */ 151static kprobe_opcode_t __kprobes *__get_insn_slot(void) 152{ 153 struct kprobe_insn_page *kip; 154 struct hlist_node *pos; 155 156 retry: 157 hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) { 158 if (kip->nused < INSNS_PER_PAGE) { 159 int i; 160 for (i = 0; i < INSNS_PER_PAGE; i++) { 161 if (kip->slot_used[i] == SLOT_CLEAN) { 162 kip->slot_used[i] = SLOT_USED; 163 kip->nused++; 164 return kip->insns + (i * MAX_INSN_SIZE); 165 } 166 } 167 /* Surprise! No unused slots. Fix kip->nused. */ 168 kip->nused = INSNS_PER_PAGE; 169 } 170 } 171 172 /* If there are any garbage slots, collect it and try again. */ 173 if (kprobe_garbage_slots && collect_garbage_slots() == 0) { 174 goto retry; 175 } 176 /* All out of space. Need to allocate a new page. Use slot 0. */ 177 kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL); 178 if (!kip) 179 return NULL; 180 181 /* 182 * Use module_alloc so this page is within +/- 2GB of where the 183 * kernel image and loaded module images reside. This is required 184 * so x86_64 can correctly handle the %rip-relative fixups. 185 */ 186 kip->insns = module_alloc(PAGE_SIZE); 187 if (!kip->insns) { 188 kfree(kip); 189 return NULL; 190 } 191 INIT_HLIST_NODE(&kip->hlist); 192 hlist_add_head(&kip->hlist, &kprobe_insn_pages); 193 memset(kip->slot_used, SLOT_CLEAN, INSNS_PER_PAGE); 194 kip->slot_used[0] = SLOT_USED; 195 kip->nused = 1; 196 kip->ngarbage = 0; 197 return kip->insns; 198} 199 200kprobe_opcode_t __kprobes *get_insn_slot(void) 201{ 202 kprobe_opcode_t *ret; 203 mutex_lock(&kprobe_insn_mutex); 204 ret = __get_insn_slot(); 205 mutex_unlock(&kprobe_insn_mutex); 206 return ret; 207} 208 209/* Return 1 if all garbages are collected, otherwise 0. */ 210static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) 211{ 212 kip->slot_used[idx] = SLOT_CLEAN; 213 kip->nused--; 214 if (kip->nused == 0) { 215 /* 216 * Page is no longer in use. Free it unless 217 * it's the last one. We keep the last one 218 * so as not to have to set it up again the 219 * next time somebody inserts a probe. 220 */ 221 hlist_del(&kip->hlist); 222 if (hlist_empty(&kprobe_insn_pages)) { 223 INIT_HLIST_NODE(&kip->hlist); 224 hlist_add_head(&kip->hlist, 225 &kprobe_insn_pages); 226 } else { 227 module_free(NULL, kip->insns); 228 kfree(kip); 229 } 230 return 1; 231 } 232 return 0; 233} 234 235static int __kprobes collect_garbage_slots(void) 236{ 237 struct kprobe_insn_page *kip; 238 struct hlist_node *pos, *next; 239 int safety; 240 241 /* Ensure no-one is preepmted on the garbages */ 242 mutex_unlock(&kprobe_insn_mutex); 243 safety = check_safety(); 244 mutex_lock(&kprobe_insn_mutex); 245 if (safety != 0) 246 return -EAGAIN; 247 248 hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) { 249 int i; 250 if (kip->ngarbage == 0) 251 continue; 252 kip->ngarbage = 0; /* we will collect all garbages */ 253 for (i = 0; i < INSNS_PER_PAGE; i++) { 254 if (kip->slot_used[i] == SLOT_DIRTY && 255 collect_one_slot(kip, i)) 256 break; 257 } 258 } 259 kprobe_garbage_slots = 0; 260 return 0; 261} 262 263void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty) 264{ 265 struct kprobe_insn_page *kip; 266 struct hlist_node *pos; 267 268 mutex_lock(&kprobe_insn_mutex); 269 hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) { 270 if (kip->insns <= slot && 271 slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) { 272 int i = (slot - kip->insns) / MAX_INSN_SIZE; 273 if (dirty) { 274 kip->slot_used[i] = SLOT_DIRTY; 275 kip->ngarbage++; 276 } else { 277 collect_one_slot(kip, i); 278 } 279 break; 280 } 281 } 282 283 if (dirty && ++kprobe_garbage_slots > INSNS_PER_PAGE) 284 collect_garbage_slots(); 285 286 mutex_unlock(&kprobe_insn_mutex); 287} 288#endif 289 290/* We have preemption disabled.. so it is safe to use __ versions */ 291static inline void set_kprobe_instance(struct kprobe *kp) 292{ 293 __get_cpu_var(kprobe_instance) = kp; 294} 295 296static inline void reset_kprobe_instance(void) 297{ 298 __get_cpu_var(kprobe_instance) = NULL; 299} 300 301/* 302 * This routine is called either: 303 * - under the kprobe_mutex - during kprobe_[un]register() 304 * OR 305 * - with preemption disabled - from arch/xxx/kernel/kprobes.c 306 */ 307struct kprobe __kprobes *get_kprobe(void *addr) 308{ 309 struct hlist_head *head; 310 struct hlist_node *node; 311 struct kprobe *p; 312 313 head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; 314 hlist_for_each_entry_rcu(p, node, head, hlist) { 315 if (p->addr == addr) 316 return p; 317 } 318 return NULL; 319} 320 321/* 322 * Aggregate handlers for multiple kprobes support - these handlers 323 * take care of invoking the individual kprobe handlers on p->list 324 */ 325static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) 326{ 327 struct kprobe *kp; 328 329 list_for_each_entry_rcu(kp, &p->list, list) { 330 if (kp->pre_handler && !kprobe_gone(kp)) { 331 set_kprobe_instance(kp); 332 if (kp->pre_handler(kp, regs)) 333 return 1; 334 } 335 reset_kprobe_instance(); 336 } 337 return 0; 338} 339 340static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, 341 unsigned long flags) 342{ 343 struct kprobe *kp; 344 345 list_for_each_entry_rcu(kp, &p->list, list) { 346 if (kp->post_handler && !kprobe_gone(kp)) { 347 set_kprobe_instance(kp); 348 kp->post_handler(kp, regs, flags); 349 reset_kprobe_instance(); 350 } 351 } 352} 353 354static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, 355 int trapnr) 356{ 357 struct kprobe *cur = __get_cpu_var(kprobe_instance); 358 359 /* 360 * if we faulted "during" the execution of a user specified 361 * probe handler, invoke just that probe's fault handler 362 */ 363 if (cur && cur->fault_handler) { 364 if (cur->fault_handler(cur, regs, trapnr)) 365 return 1; 366 } 367 return 0; 368} 369 370static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) 371{ 372 struct kprobe *cur = __get_cpu_var(kprobe_instance); 373 int ret = 0; 374 375 if (cur && cur->break_handler) { 376 if (cur->break_handler(cur, regs)) 377 ret = 1; 378 } 379 reset_kprobe_instance(); 380 return ret; 381} 382 383/* Walks the list and increments nmissed count for multiprobe case */ 384void __kprobes kprobes_inc_nmissed_count(struct kprobe *p) 385{ 386 struct kprobe *kp; 387 if (p->pre_handler != aggr_pre_handler) { 388 p->nmissed++; 389 } else { 390 list_for_each_entry_rcu(kp, &p->list, list) 391 kp->nmissed++; 392 } 393 return; 394} 395 396void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, 397 struct hlist_head *head) 398{ 399 struct kretprobe *rp = ri->rp; 400 401 /* remove rp inst off the rprobe_inst_table */ 402 hlist_del(&ri->hlist); 403 INIT_HLIST_NODE(&ri->hlist); 404 if (likely(rp)) { 405 spin_lock(&rp->lock); 406 hlist_add_head(&ri->hlist, &rp->free_instances); 407 spin_unlock(&rp->lock); 408 } else 409 /* Unregistering */ 410 hlist_add_head(&ri->hlist, head); 411} 412 413void __kprobes kretprobe_hash_lock(struct task_struct *tsk, 414 struct hlist_head **head, unsigned long *flags) 415{ 416 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); 417 spinlock_t *hlist_lock; 418 419 *head = &kretprobe_inst_table[hash]; 420 hlist_lock = kretprobe_table_lock_ptr(hash); 421 spin_lock_irqsave(hlist_lock, *flags); 422} 423 424static void __kprobes kretprobe_table_lock(unsigned long hash, 425 unsigned long *flags) 426{ 427 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); 428 spin_lock_irqsave(hlist_lock, *flags); 429} 430 431void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, 432 unsigned long *flags) 433{ 434 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); 435 spinlock_t *hlist_lock; 436 437 hlist_lock = kretprobe_table_lock_ptr(hash); 438 spin_unlock_irqrestore(hlist_lock, *flags); 439} 440 441void __kprobes kretprobe_table_unlock(unsigned long hash, unsigned long *flags) 442{ 443 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); 444 spin_unlock_irqrestore(hlist_lock, *flags); 445} 446 447/* 448 * This function is called from finish_task_switch when task tk becomes dead, 449 * so that we can recycle any function-return probe instances associated 450 * with this task. These left over instances represent probed functions 451 * that have been called but will never return. 452 */ 453void __kprobes kprobe_flush_task(struct task_struct *tk) 454{ 455 struct kretprobe_instance *ri; 456 struct hlist_head *head, empty_rp; 457 struct hlist_node *node, *tmp; 458 unsigned long hash, flags = 0; 459 460 if (unlikely(!kprobes_initialized)) 461 /* Early boot. kretprobe_table_locks not yet initialized. */ 462 return; 463 464 hash = hash_ptr(tk, KPROBE_HASH_BITS); 465 head = &kretprobe_inst_table[hash]; 466 kretprobe_table_lock(hash, &flags); 467 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { 468 if (ri->task == tk) 469 recycle_rp_inst(ri, &empty_rp); 470 } 471 kretprobe_table_unlock(hash, &flags); 472 INIT_HLIST_HEAD(&empty_rp); 473 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 474 hlist_del(&ri->hlist); 475 kfree(ri); 476 } 477} 478 479static inline void free_rp_inst(struct kretprobe *rp) 480{ 481 struct kretprobe_instance *ri; 482 struct hlist_node *pos, *next; 483 484 hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) { 485 hlist_del(&ri->hlist); 486 kfree(ri); 487 } 488} 489 490static void __kprobes cleanup_rp_inst(struct kretprobe *rp) 491{ 492 unsigned long flags, hash; 493 struct kretprobe_instance *ri; 494 struct hlist_node *pos, *next; 495 struct hlist_head *head; 496 497 /* No race here */ 498 for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) { 499 kretprobe_table_lock(hash, &flags); 500 head = &kretprobe_inst_table[hash]; 501 hlist_for_each_entry_safe(ri, pos, next, head, hlist) { 502 if (ri->rp == rp) 503 ri->rp = NULL; 504 } 505 kretprobe_table_unlock(hash, &flags); 506 } 507 free_rp_inst(rp); 508} 509 510/* 511 * Keep all fields in the kprobe consistent 512 */ 513static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) 514{ 515 memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t)); 516 memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn)); 517} 518 519/* 520* Add the new probe to old_p->list. Fail if this is the 521* second jprobe at the address - two jprobes can't coexist 522*/ 523static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) 524{ 525 if (p->break_handler) { 526 if (old_p->break_handler) 527 return -EEXIST; 528 list_add_tail_rcu(&p->list, &old_p->list); 529 old_p->break_handler = aggr_break_handler; 530 } else 531 list_add_rcu(&p->list, &old_p->list); 532 if (p->post_handler && !old_p->post_handler) 533 old_p->post_handler = aggr_post_handler; 534 return 0; 535} 536 537/* 538 * Fill in the required fields of the "manager kprobe". Replace the 539 * earlier kprobe in the hlist with the manager kprobe 540 */ 541static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) 542{ 543 copy_kprobe(p, ap); 544 flush_insn_slot(ap); 545 ap->addr = p->addr; 546 ap->pre_handler = aggr_pre_handler; 547 ap->fault_handler = aggr_fault_handler; 548 /* We don't care the kprobe which has gone. */ 549 if (p->post_handler && !kprobe_gone(p)) 550 ap->post_handler = aggr_post_handler; 551 if (p->break_handler && !kprobe_gone(p)) 552 ap->break_handler = aggr_break_handler; 553 554 INIT_LIST_HEAD(&ap->list); 555 list_add_rcu(&p->list, &ap->list); 556 557 hlist_replace_rcu(&p->hlist, &ap->hlist); 558} 559 560/* 561 * This is the second or subsequent kprobe at the address - handle 562 * the intricacies 563 */ 564static int __kprobes register_aggr_kprobe(struct kprobe *old_p, 565 struct kprobe *p) 566{ 567 int ret = 0; 568 struct kprobe *ap; 569 570 if (kprobe_gone(old_p)) { 571 /* 572 * Attempting to insert new probe at the same location that 573 * had a probe in the module vaddr area which already 574 * freed. So, the instruction slot has already been 575 * released. We need a new slot for the new probe. 576 */ 577 ret = arch_prepare_kprobe(old_p); 578 if (ret) 579 return ret; 580 } 581 if (old_p->pre_handler == aggr_pre_handler) { 582 copy_kprobe(old_p, p); 583 ret = add_new_kprobe(old_p, p); 584 ap = old_p; 585 } else { 586 ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL); 587 if (!ap) { 588 if (kprobe_gone(old_p)) 589 arch_remove_kprobe(old_p); 590 return -ENOMEM; 591 } 592 add_aggr_kprobe(ap, old_p); 593 copy_kprobe(ap, p); 594 ret = add_new_kprobe(ap, p); 595 } 596 if (kprobe_gone(old_p)) { 597 /* 598 * If the old_p has gone, its breakpoint has been disarmed. 599 * We have to arm it again after preparing real kprobes. 600 */ 601 ap->flags &= ~KPROBE_FLAG_GONE; 602 if (kprobe_enabled) 603 arch_arm_kprobe(ap); 604 } 605 return ret; 606} 607 608static int __kprobes in_kprobes_functions(unsigned long addr) 609{ 610 struct kprobe_blackpoint *kb; 611 612 if (addr >= (unsigned long)__kprobes_text_start && 613 addr < (unsigned long)__kprobes_text_end) 614 return -EINVAL; 615 /* 616 * If there exists a kprobe_blacklist, verify and 617 * fail any probe registration in the prohibited area 618 */ 619 for (kb = kprobe_blacklist; kb->name != NULL; kb++) { 620 if (kb->start_addr) { 621 if (addr >= kb->start_addr && 622 addr < (kb->start_addr + kb->range)) 623 return -EINVAL; 624 } 625 } 626 return 0; 627} 628 629/* 630 * If we have a symbol_name argument, look it up and add the offset field 631 * to it. This way, we can specify a relative address to a symbol. 632 */ 633static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) 634{ 635 kprobe_opcode_t *addr = p->addr; 636 if (p->symbol_name) { 637 if (addr) 638 return NULL; 639 kprobe_lookup_name(p->symbol_name, addr); 640 } 641 642 if (!addr) 643 return NULL; 644 return (kprobe_opcode_t *)(((char *)addr) + p->offset); 645} 646 647int __kprobes register_kprobe(struct kprobe *p) 648{ 649 int ret = 0; 650 struct kprobe *old_p; 651 struct module *probed_mod; 652 kprobe_opcode_t *addr; 653 654 addr = kprobe_addr(p); 655 if (!addr) 656 return -EINVAL; 657 p->addr = addr; 658 659 preempt_disable(); 660 if (!__kernel_text_address((unsigned long) p->addr) || 661 in_kprobes_functions((unsigned long) p->addr)) { 662 preempt_enable(); 663 return -EINVAL; 664 } 665 666 p->flags = 0; 667 /* 668 * Check if are we probing a module. 669 */ 670 probed_mod = __module_text_address((unsigned long) p->addr); 671 if (probed_mod) { 672 /* 673 * We must hold a refcount of the probed module while updating 674 * its code to prohibit unexpected unloading. 675 */ 676 if (unlikely(!try_module_get(probed_mod))) { 677 preempt_enable(); 678 return -EINVAL; 679 } 680 } 681 preempt_enable(); 682 683 p->nmissed = 0; 684 INIT_LIST_HEAD(&p->list); 685 mutex_lock(&kprobe_mutex); 686 old_p = get_kprobe(p->addr); 687 if (old_p) { 688 ret = register_aggr_kprobe(old_p, p); 689 goto out; 690 } 691 692 ret = arch_prepare_kprobe(p); 693 if (ret) 694 goto out; 695 696 INIT_HLIST_NODE(&p->hlist); 697 hlist_add_head_rcu(&p->hlist, 698 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); 699 700 if (kprobe_enabled) 701 arch_arm_kprobe(p); 702 703out: 704 mutex_unlock(&kprobe_mutex); 705 706 if (probed_mod) 707 module_put(probed_mod); 708 709 return ret; 710} 711 712/* 713 * Unregister a kprobe without a scheduler synchronization. 714 */ 715static int __kprobes __unregister_kprobe_top(struct kprobe *p) 716{ 717 struct kprobe *old_p, *list_p; 718 719 old_p = get_kprobe(p->addr); 720 if (unlikely(!old_p)) 721 return -EINVAL; 722 723 if (p != old_p) { 724 list_for_each_entry_rcu(list_p, &old_p->list, list) 725 if (list_p == p) 726 /* kprobe p is a valid probe */ 727 goto valid_p; 728 return -EINVAL; 729 } 730valid_p: 731 if (old_p == p || 732 (old_p->pre_handler == aggr_pre_handler && 733 list_is_singular(&old_p->list))) { 734 /* 735 * Only probe on the hash list. Disarm only if kprobes are 736 * enabled and not gone - otherwise, the breakpoint would 737 * already have been removed. We save on flushing icache. 738 */ 739 if (kprobe_enabled && !kprobe_gone(old_p)) 740 arch_disarm_kprobe(p); 741 hlist_del_rcu(&old_p->hlist); 742 } else { 743 if (p->break_handler && !kprobe_gone(p)) 744 old_p->break_handler = NULL; 745 if (p->post_handler && !kprobe_gone(p)) { 746 list_for_each_entry_rcu(list_p, &old_p->list, list) { 747 if ((list_p != p) && (list_p->post_handler)) 748 goto noclean; 749 } 750 old_p->post_handler = NULL; 751 } 752noclean: 753 list_del_rcu(&p->list); 754 } 755 return 0; 756} 757 758static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) 759{ 760 struct kprobe *old_p; 761 762 if (list_empty(&p->list)) 763 arch_remove_kprobe(p); 764 else if (list_is_singular(&p->list)) { 765 /* "p" is the last child of an aggr_kprobe */ 766 old_p = list_entry(p->list.next, struct kprobe, list); 767 list_del(&p->list); 768 arch_remove_kprobe(old_p); 769 kfree(old_p); 770 } 771} 772 773int __kprobes register_kprobes(struct kprobe **kps, int num) 774{ 775 int i, ret = 0; 776 777 if (num <= 0) 778 return -EINVAL; 779 for (i = 0; i < num; i++) { 780 ret = register_kprobe(kps[i]); 781 if (ret < 0) { 782 if (i > 0) 783 unregister_kprobes(kps, i); 784 break; 785 } 786 } 787 return ret; 788} 789 790void __kprobes unregister_kprobe(struct kprobe *p) 791{ 792 unregister_kprobes(&p, 1); 793} 794 795void __kprobes unregister_kprobes(struct kprobe **kps, int num) 796{ 797 int i; 798 799 if (num <= 0) 800 return; 801 mutex_lock(&kprobe_mutex); 802 for (i = 0; i < num; i++) 803 if (__unregister_kprobe_top(kps[i]) < 0) 804 kps[i]->addr = NULL; 805 mutex_unlock(&kprobe_mutex); 806 807 synchronize_sched(); 808 for (i = 0; i < num; i++) 809 if (kps[i]->addr) 810 __unregister_kprobe_bottom(kps[i]); 811} 812 813static struct notifier_block kprobe_exceptions_nb = { 814 .notifier_call = kprobe_exceptions_notify, 815 .priority = 0x7fffffff /* we need to be notified first */ 816}; 817 818unsigned long __weak arch_deref_entry_point(void *entry) 819{ 820 return (unsigned long)entry; 821} 822 823int __kprobes register_jprobes(struct jprobe **jps, int num) 824{ 825 struct jprobe *jp; 826 int ret = 0, i; 827 828 if (num <= 0) 829 return -EINVAL; 830 for (i = 0; i < num; i++) { 831 unsigned long addr; 832 jp = jps[i]; 833 addr = arch_deref_entry_point(jp->entry); 834 835 if (!kernel_text_address(addr)) 836 ret = -EINVAL; 837 else { 838 /* Todo: Verify probepoint is a function entry point */ 839 jp->kp.pre_handler = setjmp_pre_handler; 840 jp->kp.break_handler = longjmp_break_handler; 841 ret = register_kprobe(&jp->kp); 842 } 843 if (ret < 0) { 844 if (i > 0) 845 unregister_jprobes(jps, i); 846 break; 847 } 848 } 849 return ret; 850} 851 852int __kprobes register_jprobe(struct jprobe *jp) 853{ 854 return register_jprobes(&jp, 1); 855} 856 857void __kprobes unregister_jprobe(struct jprobe *jp) 858{ 859 unregister_jprobes(&jp, 1); 860} 861 862void __kprobes unregister_jprobes(struct jprobe **jps, int num) 863{ 864 int i; 865 866 if (num <= 0) 867 return; 868 mutex_lock(&kprobe_mutex); 869 for (i = 0; i < num; i++) 870 if (__unregister_kprobe_top(&jps[i]->kp) < 0) 871 jps[i]->kp.addr = NULL; 872 mutex_unlock(&kprobe_mutex); 873 874 synchronize_sched(); 875 for (i = 0; i < num; i++) { 876 if (jps[i]->kp.addr) 877 __unregister_kprobe_bottom(&jps[i]->kp); 878 } 879} 880 881#ifdef CONFIG_KRETPROBES 882/* 883 * This kprobe pre_handler is registered with every kretprobe. When probe 884 * hits it will set up the return probe. 885 */ 886static int __kprobes pre_handler_kretprobe(struct kprobe *p, 887 struct pt_regs *regs) 888{ 889 struct kretprobe *rp = container_of(p, struct kretprobe, kp); 890 unsigned long hash, flags = 0; 891 struct kretprobe_instance *ri; 892 893 /*TODO: consider to only swap the RA after the last pre_handler fired */ 894 hash = hash_ptr(current, KPROBE_HASH_BITS); 895 spin_lock_irqsave(&rp->lock, flags); 896 if (!hlist_empty(&rp->free_instances)) { 897 ri = hlist_entry(rp->free_instances.first, 898 struct kretprobe_instance, hlist); 899 hlist_del(&ri->hlist); 900 spin_unlock_irqrestore(&rp->lock, flags); 901 902 ri->rp = rp; 903 ri->task = current; 904 905 if (rp->entry_handler && rp->entry_handler(ri, regs)) { 906 spin_unlock_irqrestore(&rp->lock, flags); 907 return 0; 908 } 909 910 arch_prepare_kretprobe(ri, regs); 911 912 /* XXX(hch): why is there no hlist_move_head? */ 913 INIT_HLIST_NODE(&ri->hlist); 914 kretprobe_table_lock(hash, &flags); 915 hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]); 916 kretprobe_table_unlock(hash, &flags); 917 } else { 918 rp->nmissed++; 919 spin_unlock_irqrestore(&rp->lock, flags); 920 } 921 return 0; 922} 923 924int __kprobes register_kretprobe(struct kretprobe *rp) 925{ 926 int ret = 0; 927 struct kretprobe_instance *inst; 928 int i; 929 void *addr; 930 931 if (kretprobe_blacklist_size) { 932 addr = kprobe_addr(&rp->kp); 933 if (!addr) 934 return -EINVAL; 935 936 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { 937 if (kretprobe_blacklist[i].addr == addr) 938 return -EINVAL; 939 } 940 } 941 942 rp->kp.pre_handler = pre_handler_kretprobe; 943 rp->kp.post_handler = NULL; 944 rp->kp.fault_handler = NULL; 945 rp->kp.break_handler = NULL; 946 947 /* Pre-allocate memory for max kretprobe instances */ 948 if (rp->maxactive <= 0) { 949#ifdef CONFIG_PREEMPT 950 rp->maxactive = max(10, 2 * NR_CPUS); 951#else 952 rp->maxactive = NR_CPUS; 953#endif 954 } 955 spin_lock_init(&rp->lock); 956 INIT_HLIST_HEAD(&rp->free_instances); 957 for (i = 0; i < rp->maxactive; i++) { 958 inst = kmalloc(sizeof(struct kretprobe_instance) + 959 rp->data_size, GFP_KERNEL); 960 if (inst == NULL) { 961 free_rp_inst(rp); 962 return -ENOMEM; 963 } 964 INIT_HLIST_NODE(&inst->hlist); 965 hlist_add_head(&inst->hlist, &rp->free_instances); 966 } 967 968 rp->nmissed = 0; 969 /* Establish function entry probe point */ 970 ret = register_kprobe(&rp->kp); 971 if (ret != 0) 972 free_rp_inst(rp); 973 return ret; 974} 975 976int __kprobes register_kretprobes(struct kretprobe **rps, int num) 977{ 978 int ret = 0, i; 979 980 if (num <= 0) 981 return -EINVAL; 982 for (i = 0; i < num; i++) { 983 ret = register_kretprobe(rps[i]); 984 if (ret < 0) { 985 if (i > 0) 986 unregister_kretprobes(rps, i); 987 break; 988 } 989 } 990 return ret; 991} 992 993void __kprobes unregister_kretprobe(struct kretprobe *rp) 994{ 995 unregister_kretprobes(&rp, 1); 996} 997 998void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) 999{ 1000 int i; 1001 1002 if (num <= 0) 1003 return; 1004 mutex_lock(&kprobe_mutex); 1005 for (i = 0; i < num; i++) 1006 if (__unregister_kprobe_top(&rps[i]->kp) < 0) 1007 rps[i]->kp.addr = NULL; 1008 mutex_unlock(&kprobe_mutex); 1009 1010 synchronize_sched(); 1011 for (i = 0; i < num; i++) { 1012 if (rps[i]->kp.addr) { 1013 __unregister_kprobe_bottom(&rps[i]->kp); 1014 cleanup_rp_inst(rps[i]); 1015 } 1016 } 1017} 1018 1019#else /* CONFIG_KRETPROBES */ 1020int __kprobes register_kretprobe(struct kretprobe *rp) 1021{ 1022 return -ENOSYS; 1023} 1024 1025int __kprobes register_kretprobes(struct kretprobe **rps, int num) 1026{ 1027 return -ENOSYS; 1028} 1029void __kprobes unregister_kretprobe(struct kretprobe *rp) 1030{ 1031} 1032 1033void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) 1034{ 1035} 1036 1037static int __kprobes pre_handler_kretprobe(struct kprobe *p, 1038 struct pt_regs *regs) 1039{ 1040 return 0; 1041} 1042 1043#endif /* CONFIG_KRETPROBES */ 1044 1045/* Set the kprobe gone and remove its instruction buffer. */ 1046static void __kprobes kill_kprobe(struct kprobe *p) 1047{ 1048 struct kprobe *kp; 1049 p->flags |= KPROBE_FLAG_GONE; 1050 if (p->pre_handler == aggr_pre_handler) { 1051 /* 1052 * If this is an aggr_kprobe, we have to list all the 1053 * chained probes and mark them GONE. 1054 */ 1055 list_for_each_entry_rcu(kp, &p->list, list) 1056 kp->flags |= KPROBE_FLAG_GONE; 1057 p->post_handler = NULL; 1058 p->break_handler = NULL; 1059 } 1060 /* 1061 * Here, we can remove insn_slot safely, because no thread calls 1062 * the original probed function (which will be freed soon) any more. 1063 */ 1064 arch_remove_kprobe(p); 1065} 1066 1067/* Module notifier call back, checking kprobes on the module */ 1068static int __kprobes kprobes_module_callback(struct notifier_block *nb, 1069 unsigned long val, void *data) 1070{ 1071 struct module *mod = data; 1072 struct hlist_head *head; 1073 struct hlist_node *node; 1074 struct kprobe *p; 1075 unsigned int i; 1076 1077 if (val != MODULE_STATE_GOING) 1078 return NOTIFY_DONE; 1079 1080 /* 1081 * module .text section will be freed. We need to 1082 * disable kprobes which have been inserted in the section. 1083 */ 1084 mutex_lock(&kprobe_mutex); 1085 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1086 head = &kprobe_table[i]; 1087 hlist_for_each_entry_rcu(p, node, head, hlist) 1088 if (within_module_core((unsigned long)p->addr, mod)) { 1089 /* 1090 * The vaddr this probe is installed will soon 1091 * be vfreed buy not synced to disk. Hence, 1092 * disarming the breakpoint isn't needed. 1093 */ 1094 kill_kprobe(p); 1095 } 1096 } 1097 mutex_unlock(&kprobe_mutex); 1098 return NOTIFY_DONE; 1099} 1100 1101static struct notifier_block kprobe_module_nb = { 1102 .notifier_call = kprobes_module_callback, 1103 .priority = 0 1104}; 1105 1106static int __init init_kprobes(void) 1107{ 1108 int i, err = 0; 1109 unsigned long offset = 0, size = 0; 1110 char *modname, namebuf[128]; 1111 const char *symbol_name; 1112 void *addr; 1113 struct kprobe_blackpoint *kb; 1114 1115 /* FIXME allocate the probe table, currently defined statically */ 1116 /* initialize all list heads */ 1117 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1118 INIT_HLIST_HEAD(&kprobe_table[i]); 1119 INIT_HLIST_HEAD(&kretprobe_inst_table[i]); 1120 spin_lock_init(&(kretprobe_table_locks[i].lock)); 1121 } 1122 1123 /* 1124 * Lookup and populate the kprobe_blacklist. 1125 * 1126 * Unlike the kretprobe blacklist, we'll need to determine 1127 * the range of addresses that belong to the said functions, 1128 * since a kprobe need not necessarily be at the beginning 1129 * of a function. 1130 */ 1131 for (kb = kprobe_blacklist; kb->name != NULL; kb++) { 1132 kprobe_lookup_name(kb->name, addr); 1133 if (!addr) 1134 continue; 1135 1136 kb->start_addr = (unsigned long)addr; 1137 symbol_name = kallsyms_lookup(kb->start_addr, 1138 &size, &offset, &modname, namebuf); 1139 if (!symbol_name) 1140 kb->range = 0; 1141 else 1142 kb->range = size; 1143 } 1144 1145 if (kretprobe_blacklist_size) { 1146 /* lookup the function address from its name */ 1147 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { 1148 kprobe_lookup_name(kretprobe_blacklist[i].name, 1149 kretprobe_blacklist[i].addr); 1150 if (!kretprobe_blacklist[i].addr) 1151 printk("kretprobe: lookup failed: %s\n", 1152 kretprobe_blacklist[i].name); 1153 } 1154 } 1155 1156 /* By default, kprobes are enabled */ 1157 kprobe_enabled = true; 1158 1159 err = arch_init_kprobes(); 1160 if (!err) 1161 err = register_die_notifier(&kprobe_exceptions_nb); 1162 if (!err) 1163 err = register_module_notifier(&kprobe_module_nb); 1164 1165 kprobes_initialized = (err == 0); 1166 1167 if (!err) 1168 init_test_probes(); 1169 return err; 1170} 1171 1172#ifdef CONFIG_DEBUG_FS 1173static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p, 1174 const char *sym, int offset,char *modname) 1175{ 1176 char *kprobe_type; 1177 1178 if (p->pre_handler == pre_handler_kretprobe) 1179 kprobe_type = "r"; 1180 else if (p->pre_handler == setjmp_pre_handler) 1181 kprobe_type = "j"; 1182 else 1183 kprobe_type = "k"; 1184 if (sym) 1185 seq_printf(pi, "%p %s %s+0x%x %s %s\n", p->addr, kprobe_type, 1186 sym, offset, (modname ? modname : " "), 1187 (kprobe_gone(p) ? "[GONE]" : "")); 1188 else 1189 seq_printf(pi, "%p %s %p %s\n", p->addr, kprobe_type, p->addr, 1190 (kprobe_gone(p) ? "[GONE]" : "")); 1191} 1192 1193static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) 1194{ 1195 return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL; 1196} 1197 1198static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos) 1199{ 1200 (*pos)++; 1201 if (*pos >= KPROBE_TABLE_SIZE) 1202 return NULL; 1203 return pos; 1204} 1205 1206static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v) 1207{ 1208 /* Nothing to do */ 1209} 1210 1211static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v) 1212{ 1213 struct hlist_head *head; 1214 struct hlist_node *node; 1215 struct kprobe *p, *kp; 1216 const char *sym = NULL; 1217 unsigned int i = *(loff_t *) v; 1218 unsigned long offset = 0; 1219 char *modname, namebuf[128]; 1220 1221 head = &kprobe_table[i]; 1222 preempt_disable(); 1223 hlist_for_each_entry_rcu(p, node, head, hlist) { 1224 sym = kallsyms_lookup((unsigned long)p->addr, NULL, 1225 &offset, &modname, namebuf); 1226 if (p->pre_handler == aggr_pre_handler) { 1227 list_for_each_entry_rcu(kp, &p->list, list) 1228 report_probe(pi, kp, sym, offset, modname); 1229 } else 1230 report_probe(pi, p, sym, offset, modname); 1231 } 1232 preempt_enable(); 1233 return 0; 1234} 1235 1236static struct seq_operations kprobes_seq_ops = { 1237 .start = kprobe_seq_start, 1238 .next = kprobe_seq_next, 1239 .stop = kprobe_seq_stop, 1240 .show = show_kprobe_addr 1241}; 1242 1243static int __kprobes kprobes_open(struct inode *inode, struct file *filp) 1244{ 1245 return seq_open(filp, &kprobes_seq_ops); 1246} 1247 1248static struct file_operations debugfs_kprobes_operations = { 1249 .open = kprobes_open, 1250 .read = seq_read, 1251 .llseek = seq_lseek, 1252 .release = seq_release, 1253}; 1254 1255static void __kprobes enable_all_kprobes(void) 1256{ 1257 struct hlist_head *head; 1258 struct hlist_node *node; 1259 struct kprobe *p; 1260 unsigned int i; 1261 1262 mutex_lock(&kprobe_mutex); 1263 1264 /* If kprobes are already enabled, just return */ 1265 if (kprobe_enabled) 1266 goto already_enabled; 1267 1268 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1269 head = &kprobe_table[i]; 1270 hlist_for_each_entry_rcu(p, node, head, hlist) 1271 if (!kprobe_gone(p)) 1272 arch_arm_kprobe(p); 1273 } 1274 1275 kprobe_enabled = true; 1276 printk(KERN_INFO "Kprobes globally enabled\n"); 1277 1278already_enabled: 1279 mutex_unlock(&kprobe_mutex); 1280 return; 1281} 1282 1283static void __kprobes disable_all_kprobes(void) 1284{ 1285 struct hlist_head *head; 1286 struct hlist_node *node; 1287 struct kprobe *p; 1288 unsigned int i; 1289 1290 mutex_lock(&kprobe_mutex); 1291 1292 /* If kprobes are already disabled, just return */ 1293 if (!kprobe_enabled) 1294 goto already_disabled; 1295 1296 kprobe_enabled = false; 1297 printk(KERN_INFO "Kprobes globally disabled\n"); 1298 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1299 head = &kprobe_table[i]; 1300 hlist_for_each_entry_rcu(p, node, head, hlist) { 1301 if (!arch_trampoline_kprobe(p) && !kprobe_gone(p)) 1302 arch_disarm_kprobe(p); 1303 } 1304 } 1305 1306 mutex_unlock(&kprobe_mutex); 1307 /* Allow all currently running kprobes to complete */ 1308 synchronize_sched(); 1309 return; 1310 1311already_disabled: 1312 mutex_unlock(&kprobe_mutex); 1313 return; 1314} 1315 1316/* 1317 * XXX: The debugfs bool file interface doesn't allow for callbacks 1318 * when the bool state is switched. We can reuse that facility when 1319 * available 1320 */ 1321static ssize_t read_enabled_file_bool(struct file *file, 1322 char __user *user_buf, size_t count, loff_t *ppos) 1323{ 1324 char buf[3]; 1325 1326 if (kprobe_enabled) 1327 buf[0] = '1'; 1328 else 1329 buf[0] = '0'; 1330 buf[1] = '\n'; 1331 buf[2] = 0x00; 1332 return simple_read_from_buffer(user_buf, count, ppos, buf, 2); 1333} 1334 1335static ssize_t write_enabled_file_bool(struct file *file, 1336 const char __user *user_buf, size_t count, loff_t *ppos) 1337{ 1338 char buf[32]; 1339 int buf_size; 1340 1341 buf_size = min(count, (sizeof(buf)-1)); 1342 if (copy_from_user(buf, user_buf, buf_size)) 1343 return -EFAULT; 1344 1345 switch (buf[0]) { 1346 case 'y': 1347 case 'Y': 1348 case '1': 1349 enable_all_kprobes(); 1350 break; 1351 case 'n': 1352 case 'N': 1353 case '0': 1354 disable_all_kprobes(); 1355 break; 1356 } 1357 1358 return count; 1359} 1360 1361static struct file_operations fops_kp = { 1362 .read = read_enabled_file_bool, 1363 .write = write_enabled_file_bool, 1364}; 1365 1366static int __kprobes debugfs_kprobe_init(void) 1367{ 1368 struct dentry *dir, *file; 1369 unsigned int value = 1; 1370 1371 dir = debugfs_create_dir("kprobes", NULL); 1372 if (!dir) 1373 return -ENOMEM; 1374 1375 file = debugfs_create_file("list", 0444, dir, NULL, 1376 &debugfs_kprobes_operations); 1377 if (!file) { 1378 debugfs_remove(dir); 1379 return -ENOMEM; 1380 } 1381 1382 file = debugfs_create_file("enabled", 0600, dir, 1383 &value, &fops_kp); 1384 if (!file) { 1385 debugfs_remove(dir); 1386 return -ENOMEM; 1387 } 1388 1389 return 0; 1390} 1391 1392late_initcall(debugfs_kprobe_init); 1393#endif /* CONFIG_DEBUG_FS */ 1394 1395module_init(init_kprobes); 1396 1397EXPORT_SYMBOL_GPL(register_kprobe); 1398EXPORT_SYMBOL_GPL(unregister_kprobe); 1399EXPORT_SYMBOL_GPL(register_kprobes); 1400EXPORT_SYMBOL_GPL(unregister_kprobes); 1401EXPORT_SYMBOL_GPL(register_jprobe); 1402EXPORT_SYMBOL_GPL(unregister_jprobe); 1403EXPORT_SYMBOL_GPL(register_jprobes); 1404EXPORT_SYMBOL_GPL(unregister_jprobes); 1405EXPORT_SYMBOL_GPL(jprobe_return); 1406EXPORT_SYMBOL_GPL(register_kretprobe); 1407EXPORT_SYMBOL_GPL(unregister_kretprobe); 1408EXPORT_SYMBOL_GPL(register_kretprobes); 1409EXPORT_SYMBOL_GPL(unregister_kretprobes); 1410