kprobes.c revision 5a4ccaf37ffece09ef33f1cfec67efa8ee56f967
1/* 2 * Kernel Probes (KProbes) 3 * kernel/kprobes.c 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 * 19 * Copyright (C) IBM Corporation, 2002, 2004 20 * 21 * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel 22 * Probes initial implementation (includes suggestions from 23 * Rusty Russell). 24 * 2004-Aug Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with 25 * hlists and exceptions notifier as suggested by Andi Kleen. 26 * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes 27 * interface to access function arguments. 28 * 2004-Sep Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes 29 * exceptions notifier to be first on the priority list. 30 * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston 31 * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi 32 * <prasanna@in.ibm.com> added function-return probes. 33 */ 34#include <linux/kprobes.h> 35#include <linux/hash.h> 36#include <linux/init.h> 37#include <linux/slab.h> 38#include <linux/stddef.h> 39#include <linux/module.h> 40#include <linux/moduleloader.h> 41#include <linux/kallsyms.h> 42#include <linux/freezer.h> 43#include <linux/seq_file.h> 44#include <linux/debugfs.h> 45#include <linux/kdebug.h> 46 47#include <asm-generic/sections.h> 48#include <asm/cacheflush.h> 49#include <asm/errno.h> 50#include <asm/uaccess.h> 51 52#define KPROBE_HASH_BITS 6 53#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS) 54 55 56/* 57 * Some oddball architectures like 64bit powerpc have function descriptors 58 * so this must be overridable. 59 */ 60#ifndef kprobe_lookup_name 61#define kprobe_lookup_name(name, addr) \ 62 addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name))) 63#endif 64 65static int kprobes_initialized; 66static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; 67static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; 68 69/* NOTE: change this value only with kprobe_mutex held */ 70static bool kprobe_enabled; 71 72static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ 73static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; 74static struct { 75 spinlock_t lock ____cacheline_aligned_in_smp; 76} kretprobe_table_locks[KPROBE_TABLE_SIZE]; 77 78static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) 79{ 80 return &(kretprobe_table_locks[hash].lock); 81} 82 83/* 84 * Normally, functions that we'd want to prohibit kprobes in, are marked 85 * __kprobes. But, there are cases where such functions already belong to 86 * a different section (__sched for preempt_schedule) 87 * 88 * For such cases, we now have a blacklist 89 */ 90static struct kprobe_blackpoint kprobe_blacklist[] = { 91 {"preempt_schedule",}, 92 {NULL} /* Terminator */ 93}; 94 95#ifdef __ARCH_WANT_KPROBES_INSN_SLOT 96/* 97 * kprobe->ainsn.insn points to the copy of the instruction to be 98 * single-stepped. x86_64, POWER4 and above have no-exec support and 99 * stepping on the instruction on a vmalloced/kmalloced/data page 100 * is a recipe for disaster 101 */ 102#define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) 103 104struct kprobe_insn_page { 105 struct hlist_node hlist; 106 kprobe_opcode_t *insns; /* Page of instruction slots */ 107 char slot_used[INSNS_PER_PAGE]; 108 int nused; 109 int ngarbage; 110}; 111 112enum kprobe_slot_state { 113 SLOT_CLEAN = 0, 114 SLOT_DIRTY = 1, 115 SLOT_USED = 2, 116}; 117 118static DEFINE_MUTEX(kprobe_insn_mutex); /* Protects kprobe_insn_pages */ 119static struct hlist_head kprobe_insn_pages; 120static int kprobe_garbage_slots; 121static int collect_garbage_slots(void); 122 123static int __kprobes check_safety(void) 124{ 125 int ret = 0; 126#if defined(CONFIG_PREEMPT) && defined(CONFIG_FREEZER) 127 ret = freeze_processes(); 128 if (ret == 0) { 129 struct task_struct *p, *q; 130 do_each_thread(p, q) { 131 if (p != current && p->state == TASK_RUNNING && 132 p->pid != 0) { 133 printk("Check failed: %s is running\n",p->comm); 134 ret = -1; 135 goto loop_end; 136 } 137 } while_each_thread(p, q); 138 } 139loop_end: 140 thaw_processes(); 141#else 142 synchronize_sched(); 143#endif 144 return ret; 145} 146 147/** 148 * __get_insn_slot() - Find a slot on an executable page for an instruction. 149 * We allocate an executable page if there's no room on existing ones. 150 */ 151static kprobe_opcode_t __kprobes *__get_insn_slot(void) 152{ 153 struct kprobe_insn_page *kip; 154 struct hlist_node *pos; 155 156 retry: 157 hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) { 158 if (kip->nused < INSNS_PER_PAGE) { 159 int i; 160 for (i = 0; i < INSNS_PER_PAGE; i++) { 161 if (kip->slot_used[i] == SLOT_CLEAN) { 162 kip->slot_used[i] = SLOT_USED; 163 kip->nused++; 164 return kip->insns + (i * MAX_INSN_SIZE); 165 } 166 } 167 /* Surprise! No unused slots. Fix kip->nused. */ 168 kip->nused = INSNS_PER_PAGE; 169 } 170 } 171 172 /* If there are any garbage slots, collect it and try again. */ 173 if (kprobe_garbage_slots && collect_garbage_slots() == 0) { 174 goto retry; 175 } 176 /* All out of space. Need to allocate a new page. Use slot 0. */ 177 kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL); 178 if (!kip) 179 return NULL; 180 181 /* 182 * Use module_alloc so this page is within +/- 2GB of where the 183 * kernel image and loaded module images reside. This is required 184 * so x86_64 can correctly handle the %rip-relative fixups. 185 */ 186 kip->insns = module_alloc(PAGE_SIZE); 187 if (!kip->insns) { 188 kfree(kip); 189 return NULL; 190 } 191 INIT_HLIST_NODE(&kip->hlist); 192 hlist_add_head(&kip->hlist, &kprobe_insn_pages); 193 memset(kip->slot_used, SLOT_CLEAN, INSNS_PER_PAGE); 194 kip->slot_used[0] = SLOT_USED; 195 kip->nused = 1; 196 kip->ngarbage = 0; 197 return kip->insns; 198} 199 200kprobe_opcode_t __kprobes *get_insn_slot(void) 201{ 202 kprobe_opcode_t *ret; 203 mutex_lock(&kprobe_insn_mutex); 204 ret = __get_insn_slot(); 205 mutex_unlock(&kprobe_insn_mutex); 206 return ret; 207} 208 209/* Return 1 if all garbages are collected, otherwise 0. */ 210static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) 211{ 212 kip->slot_used[idx] = SLOT_CLEAN; 213 kip->nused--; 214 if (kip->nused == 0) { 215 /* 216 * Page is no longer in use. Free it unless 217 * it's the last one. We keep the last one 218 * so as not to have to set it up again the 219 * next time somebody inserts a probe. 220 */ 221 hlist_del(&kip->hlist); 222 if (hlist_empty(&kprobe_insn_pages)) { 223 INIT_HLIST_NODE(&kip->hlist); 224 hlist_add_head(&kip->hlist, 225 &kprobe_insn_pages); 226 } else { 227 module_free(NULL, kip->insns); 228 kfree(kip); 229 } 230 return 1; 231 } 232 return 0; 233} 234 235static int __kprobes collect_garbage_slots(void) 236{ 237 struct kprobe_insn_page *kip; 238 struct hlist_node *pos, *next; 239 int safety; 240 241 /* Ensure no-one is preepmted on the garbages */ 242 mutex_unlock(&kprobe_insn_mutex); 243 safety = check_safety(); 244 mutex_lock(&kprobe_insn_mutex); 245 if (safety != 0) 246 return -EAGAIN; 247 248 hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) { 249 int i; 250 if (kip->ngarbage == 0) 251 continue; 252 kip->ngarbage = 0; /* we will collect all garbages */ 253 for (i = 0; i < INSNS_PER_PAGE; i++) { 254 if (kip->slot_used[i] == SLOT_DIRTY && 255 collect_one_slot(kip, i)) 256 break; 257 } 258 } 259 kprobe_garbage_slots = 0; 260 return 0; 261} 262 263void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty) 264{ 265 struct kprobe_insn_page *kip; 266 struct hlist_node *pos; 267 268 mutex_lock(&kprobe_insn_mutex); 269 hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) { 270 if (kip->insns <= slot && 271 slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) { 272 int i = (slot - kip->insns) / MAX_INSN_SIZE; 273 if (dirty) { 274 kip->slot_used[i] = SLOT_DIRTY; 275 kip->ngarbage++; 276 } else { 277 collect_one_slot(kip, i); 278 } 279 break; 280 } 281 } 282 283 if (dirty && ++kprobe_garbage_slots > INSNS_PER_PAGE) 284 collect_garbage_slots(); 285 286 mutex_unlock(&kprobe_insn_mutex); 287} 288#endif 289 290/* We have preemption disabled.. so it is safe to use __ versions */ 291static inline void set_kprobe_instance(struct kprobe *kp) 292{ 293 __get_cpu_var(kprobe_instance) = kp; 294} 295 296static inline void reset_kprobe_instance(void) 297{ 298 __get_cpu_var(kprobe_instance) = NULL; 299} 300 301/* 302 * This routine is called either: 303 * - under the kprobe_mutex - during kprobe_[un]register() 304 * OR 305 * - with preemption disabled - from arch/xxx/kernel/kprobes.c 306 */ 307struct kprobe __kprobes *get_kprobe(void *addr) 308{ 309 struct hlist_head *head; 310 struct hlist_node *node; 311 struct kprobe *p; 312 313 head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; 314 hlist_for_each_entry_rcu(p, node, head, hlist) { 315 if (p->addr == addr) 316 return p; 317 } 318 return NULL; 319} 320 321/* 322 * Aggregate handlers for multiple kprobes support - these handlers 323 * take care of invoking the individual kprobe handlers on p->list 324 */ 325static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) 326{ 327 struct kprobe *kp; 328 329 list_for_each_entry_rcu(kp, &p->list, list) { 330 if (kp->pre_handler && !kprobe_gone(kp)) { 331 set_kprobe_instance(kp); 332 if (kp->pre_handler(kp, regs)) 333 return 1; 334 } 335 reset_kprobe_instance(); 336 } 337 return 0; 338} 339 340static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, 341 unsigned long flags) 342{ 343 struct kprobe *kp; 344 345 list_for_each_entry_rcu(kp, &p->list, list) { 346 if (kp->post_handler && !kprobe_gone(kp)) { 347 set_kprobe_instance(kp); 348 kp->post_handler(kp, regs, flags); 349 reset_kprobe_instance(); 350 } 351 } 352} 353 354static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, 355 int trapnr) 356{ 357 struct kprobe *cur = __get_cpu_var(kprobe_instance); 358 359 /* 360 * if we faulted "during" the execution of a user specified 361 * probe handler, invoke just that probe's fault handler 362 */ 363 if (cur && cur->fault_handler) { 364 if (cur->fault_handler(cur, regs, trapnr)) 365 return 1; 366 } 367 return 0; 368} 369 370static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) 371{ 372 struct kprobe *cur = __get_cpu_var(kprobe_instance); 373 int ret = 0; 374 375 if (cur && cur->break_handler) { 376 if (cur->break_handler(cur, regs)) 377 ret = 1; 378 } 379 reset_kprobe_instance(); 380 return ret; 381} 382 383/* Walks the list and increments nmissed count for multiprobe case */ 384void __kprobes kprobes_inc_nmissed_count(struct kprobe *p) 385{ 386 struct kprobe *kp; 387 if (p->pre_handler != aggr_pre_handler) { 388 p->nmissed++; 389 } else { 390 list_for_each_entry_rcu(kp, &p->list, list) 391 kp->nmissed++; 392 } 393 return; 394} 395 396void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, 397 struct hlist_head *head) 398{ 399 struct kretprobe *rp = ri->rp; 400 401 /* remove rp inst off the rprobe_inst_table */ 402 hlist_del(&ri->hlist); 403 INIT_HLIST_NODE(&ri->hlist); 404 if (likely(rp)) { 405 spin_lock(&rp->lock); 406 hlist_add_head(&ri->hlist, &rp->free_instances); 407 spin_unlock(&rp->lock); 408 } else 409 /* Unregistering */ 410 hlist_add_head(&ri->hlist, head); 411} 412 413void __kprobes kretprobe_hash_lock(struct task_struct *tsk, 414 struct hlist_head **head, unsigned long *flags) 415{ 416 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); 417 spinlock_t *hlist_lock; 418 419 *head = &kretprobe_inst_table[hash]; 420 hlist_lock = kretprobe_table_lock_ptr(hash); 421 spin_lock_irqsave(hlist_lock, *flags); 422} 423 424static void __kprobes kretprobe_table_lock(unsigned long hash, 425 unsigned long *flags) 426{ 427 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); 428 spin_lock_irqsave(hlist_lock, *flags); 429} 430 431void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, 432 unsigned long *flags) 433{ 434 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); 435 spinlock_t *hlist_lock; 436 437 hlist_lock = kretprobe_table_lock_ptr(hash); 438 spin_unlock_irqrestore(hlist_lock, *flags); 439} 440 441void __kprobes kretprobe_table_unlock(unsigned long hash, unsigned long *flags) 442{ 443 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); 444 spin_unlock_irqrestore(hlist_lock, *flags); 445} 446 447/* 448 * This function is called from finish_task_switch when task tk becomes dead, 449 * so that we can recycle any function-return probe instances associated 450 * with this task. These left over instances represent probed functions 451 * that have been called but will never return. 452 */ 453void __kprobes kprobe_flush_task(struct task_struct *tk) 454{ 455 struct kretprobe_instance *ri; 456 struct hlist_head *head, empty_rp; 457 struct hlist_node *node, *tmp; 458 unsigned long hash, flags = 0; 459 460 if (unlikely(!kprobes_initialized)) 461 /* Early boot. kretprobe_table_locks not yet initialized. */ 462 return; 463 464 hash = hash_ptr(tk, KPROBE_HASH_BITS); 465 head = &kretprobe_inst_table[hash]; 466 kretprobe_table_lock(hash, &flags); 467 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { 468 if (ri->task == tk) 469 recycle_rp_inst(ri, &empty_rp); 470 } 471 kretprobe_table_unlock(hash, &flags); 472 INIT_HLIST_HEAD(&empty_rp); 473 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 474 hlist_del(&ri->hlist); 475 kfree(ri); 476 } 477} 478 479static inline void free_rp_inst(struct kretprobe *rp) 480{ 481 struct kretprobe_instance *ri; 482 struct hlist_node *pos, *next; 483 484 hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) { 485 hlist_del(&ri->hlist); 486 kfree(ri); 487 } 488} 489 490static void __kprobes cleanup_rp_inst(struct kretprobe *rp) 491{ 492 unsigned long flags, hash; 493 struct kretprobe_instance *ri; 494 struct hlist_node *pos, *next; 495 struct hlist_head *head; 496 497 /* No race here */ 498 for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) { 499 kretprobe_table_lock(hash, &flags); 500 head = &kretprobe_inst_table[hash]; 501 hlist_for_each_entry_safe(ri, pos, next, head, hlist) { 502 if (ri->rp == rp) 503 ri->rp = NULL; 504 } 505 kretprobe_table_unlock(hash, &flags); 506 } 507 free_rp_inst(rp); 508} 509 510/* 511 * Keep all fields in the kprobe consistent 512 */ 513static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) 514{ 515 memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t)); 516 memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn)); 517} 518 519/* 520* Add the new probe to old_p->list. Fail if this is the 521* second jprobe at the address - two jprobes can't coexist 522*/ 523static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) 524{ 525 if (p->break_handler) { 526 if (old_p->break_handler) 527 return -EEXIST; 528 list_add_tail_rcu(&p->list, &old_p->list); 529 old_p->break_handler = aggr_break_handler; 530 } else 531 list_add_rcu(&p->list, &old_p->list); 532 if (p->post_handler && !old_p->post_handler) 533 old_p->post_handler = aggr_post_handler; 534 return 0; 535} 536 537/* 538 * Fill in the required fields of the "manager kprobe". Replace the 539 * earlier kprobe in the hlist with the manager kprobe 540 */ 541static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) 542{ 543 copy_kprobe(p, ap); 544 flush_insn_slot(ap); 545 ap->addr = p->addr; 546 ap->pre_handler = aggr_pre_handler; 547 ap->fault_handler = aggr_fault_handler; 548 /* We don't care the kprobe which has gone. */ 549 if (p->post_handler && !kprobe_gone(p)) 550 ap->post_handler = aggr_post_handler; 551 if (p->break_handler && !kprobe_gone(p)) 552 ap->break_handler = aggr_break_handler; 553 554 INIT_LIST_HEAD(&ap->list); 555 list_add_rcu(&p->list, &ap->list); 556 557 hlist_replace_rcu(&p->hlist, &ap->hlist); 558} 559 560/* 561 * This is the second or subsequent kprobe at the address - handle 562 * the intricacies 563 */ 564static int __kprobes register_aggr_kprobe(struct kprobe *old_p, 565 struct kprobe *p) 566{ 567 int ret = 0; 568 struct kprobe *ap; 569 570 if (kprobe_gone(old_p)) { 571 /* 572 * Attempting to insert new probe at the same location that 573 * had a probe in the module vaddr area which already 574 * freed. So, the instruction slot has already been 575 * released. We need a new slot for the new probe. 576 */ 577 ret = arch_prepare_kprobe(old_p); 578 if (ret) 579 return ret; 580 } 581 if (old_p->pre_handler == aggr_pre_handler) { 582 copy_kprobe(old_p, p); 583 ret = add_new_kprobe(old_p, p); 584 ap = old_p; 585 } else { 586 ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL); 587 if (!ap) { 588 if (kprobe_gone(old_p)) 589 arch_remove_kprobe(old_p); 590 return -ENOMEM; 591 } 592 add_aggr_kprobe(ap, old_p); 593 copy_kprobe(ap, p); 594 ret = add_new_kprobe(ap, p); 595 } 596 if (kprobe_gone(old_p)) { 597 /* 598 * If the old_p has gone, its breakpoint has been disarmed. 599 * We have to arm it again after preparing real kprobes. 600 */ 601 ap->flags &= ~KPROBE_FLAG_GONE; 602 if (kprobe_enabled) 603 arch_arm_kprobe(ap); 604 } 605 return ret; 606} 607 608static int __kprobes in_kprobes_functions(unsigned long addr) 609{ 610 struct kprobe_blackpoint *kb; 611 612 if (addr >= (unsigned long)__kprobes_text_start && 613 addr < (unsigned long)__kprobes_text_end) 614 return -EINVAL; 615 /* 616 * If there exists a kprobe_blacklist, verify and 617 * fail any probe registration in the prohibited area 618 */ 619 for (kb = kprobe_blacklist; kb->name != NULL; kb++) { 620 if (kb->start_addr) { 621 if (addr >= kb->start_addr && 622 addr < (kb->start_addr + kb->range)) 623 return -EINVAL; 624 } 625 } 626 return 0; 627} 628 629/* 630 * If we have a symbol_name argument, look it up and add the offset field 631 * to it. This way, we can specify a relative address to a symbol. 632 */ 633static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) 634{ 635 kprobe_opcode_t *addr = p->addr; 636 if (p->symbol_name) { 637 if (addr) 638 return NULL; 639 kprobe_lookup_name(p->symbol_name, addr); 640 } 641 642 if (!addr) 643 return NULL; 644 return (kprobe_opcode_t *)(((char *)addr) + p->offset); 645} 646 647int __kprobes register_kprobe(struct kprobe *p) 648{ 649 int ret = 0; 650 struct kprobe *old_p; 651 struct module *probed_mod; 652 kprobe_opcode_t *addr; 653 654 addr = kprobe_addr(p); 655 if (!addr) 656 return -EINVAL; 657 p->addr = addr; 658 659 preempt_disable(); 660 if (!__kernel_text_address((unsigned long) p->addr) || 661 in_kprobes_functions((unsigned long) p->addr)) { 662 preempt_enable(); 663 return -EINVAL; 664 } 665 666 p->flags = 0; 667 /* 668 * Check if are we probing a module. 669 */ 670 probed_mod = __module_text_address((unsigned long) p->addr); 671 if (probed_mod) { 672 /* 673 * We must hold a refcount of the probed module while updating 674 * its code to prohibit unexpected unloading. 675 */ 676 if (unlikely(!try_module_get(probed_mod))) { 677 preempt_enable(); 678 return -EINVAL; 679 } 680 /* 681 * If the module freed .init.text, we couldn't insert 682 * kprobes in there. 683 */ 684 if (within_module_init((unsigned long)p->addr, probed_mod) && 685 probed_mod->state != MODULE_STATE_COMING) { 686 module_put(probed_mod); 687 preempt_enable(); 688 return -EINVAL; 689 } 690 } 691 preempt_enable(); 692 693 p->nmissed = 0; 694 INIT_LIST_HEAD(&p->list); 695 mutex_lock(&kprobe_mutex); 696 old_p = get_kprobe(p->addr); 697 if (old_p) { 698 ret = register_aggr_kprobe(old_p, p); 699 goto out; 700 } 701 702 ret = arch_prepare_kprobe(p); 703 if (ret) 704 goto out; 705 706 INIT_HLIST_NODE(&p->hlist); 707 hlist_add_head_rcu(&p->hlist, 708 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); 709 710 if (kprobe_enabled) 711 arch_arm_kprobe(p); 712 713out: 714 mutex_unlock(&kprobe_mutex); 715 716 if (probed_mod) 717 module_put(probed_mod); 718 719 return ret; 720} 721 722/* 723 * Unregister a kprobe without a scheduler synchronization. 724 */ 725static int __kprobes __unregister_kprobe_top(struct kprobe *p) 726{ 727 struct kprobe *old_p, *list_p; 728 729 old_p = get_kprobe(p->addr); 730 if (unlikely(!old_p)) 731 return -EINVAL; 732 733 if (p != old_p) { 734 list_for_each_entry_rcu(list_p, &old_p->list, list) 735 if (list_p == p) 736 /* kprobe p is a valid probe */ 737 goto valid_p; 738 return -EINVAL; 739 } 740valid_p: 741 if (old_p == p || 742 (old_p->pre_handler == aggr_pre_handler && 743 list_is_singular(&old_p->list))) { 744 /* 745 * Only probe on the hash list. Disarm only if kprobes are 746 * enabled and not gone - otherwise, the breakpoint would 747 * already have been removed. We save on flushing icache. 748 */ 749 if (kprobe_enabled && !kprobe_gone(old_p)) 750 arch_disarm_kprobe(p); 751 hlist_del_rcu(&old_p->hlist); 752 } else { 753 if (p->break_handler && !kprobe_gone(p)) 754 old_p->break_handler = NULL; 755 if (p->post_handler && !kprobe_gone(p)) { 756 list_for_each_entry_rcu(list_p, &old_p->list, list) { 757 if ((list_p != p) && (list_p->post_handler)) 758 goto noclean; 759 } 760 old_p->post_handler = NULL; 761 } 762noclean: 763 list_del_rcu(&p->list); 764 } 765 return 0; 766} 767 768static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) 769{ 770 struct kprobe *old_p; 771 772 if (list_empty(&p->list)) 773 arch_remove_kprobe(p); 774 else if (list_is_singular(&p->list)) { 775 /* "p" is the last child of an aggr_kprobe */ 776 old_p = list_entry(p->list.next, struct kprobe, list); 777 list_del(&p->list); 778 arch_remove_kprobe(old_p); 779 kfree(old_p); 780 } 781} 782 783int __kprobes register_kprobes(struct kprobe **kps, int num) 784{ 785 int i, ret = 0; 786 787 if (num <= 0) 788 return -EINVAL; 789 for (i = 0; i < num; i++) { 790 ret = register_kprobe(kps[i]); 791 if (ret < 0) { 792 if (i > 0) 793 unregister_kprobes(kps, i); 794 break; 795 } 796 } 797 return ret; 798} 799 800void __kprobes unregister_kprobe(struct kprobe *p) 801{ 802 unregister_kprobes(&p, 1); 803} 804 805void __kprobes unregister_kprobes(struct kprobe **kps, int num) 806{ 807 int i; 808 809 if (num <= 0) 810 return; 811 mutex_lock(&kprobe_mutex); 812 for (i = 0; i < num; i++) 813 if (__unregister_kprobe_top(kps[i]) < 0) 814 kps[i]->addr = NULL; 815 mutex_unlock(&kprobe_mutex); 816 817 synchronize_sched(); 818 for (i = 0; i < num; i++) 819 if (kps[i]->addr) 820 __unregister_kprobe_bottom(kps[i]); 821} 822 823static struct notifier_block kprobe_exceptions_nb = { 824 .notifier_call = kprobe_exceptions_notify, 825 .priority = 0x7fffffff /* we need to be notified first */ 826}; 827 828unsigned long __weak arch_deref_entry_point(void *entry) 829{ 830 return (unsigned long)entry; 831} 832 833int __kprobes register_jprobes(struct jprobe **jps, int num) 834{ 835 struct jprobe *jp; 836 int ret = 0, i; 837 838 if (num <= 0) 839 return -EINVAL; 840 for (i = 0; i < num; i++) { 841 unsigned long addr; 842 jp = jps[i]; 843 addr = arch_deref_entry_point(jp->entry); 844 845 if (!kernel_text_address(addr)) 846 ret = -EINVAL; 847 else { 848 /* Todo: Verify probepoint is a function entry point */ 849 jp->kp.pre_handler = setjmp_pre_handler; 850 jp->kp.break_handler = longjmp_break_handler; 851 ret = register_kprobe(&jp->kp); 852 } 853 if (ret < 0) { 854 if (i > 0) 855 unregister_jprobes(jps, i); 856 break; 857 } 858 } 859 return ret; 860} 861 862int __kprobes register_jprobe(struct jprobe *jp) 863{ 864 return register_jprobes(&jp, 1); 865} 866 867void __kprobes unregister_jprobe(struct jprobe *jp) 868{ 869 unregister_jprobes(&jp, 1); 870} 871 872void __kprobes unregister_jprobes(struct jprobe **jps, int num) 873{ 874 int i; 875 876 if (num <= 0) 877 return; 878 mutex_lock(&kprobe_mutex); 879 for (i = 0; i < num; i++) 880 if (__unregister_kprobe_top(&jps[i]->kp) < 0) 881 jps[i]->kp.addr = NULL; 882 mutex_unlock(&kprobe_mutex); 883 884 synchronize_sched(); 885 for (i = 0; i < num; i++) { 886 if (jps[i]->kp.addr) 887 __unregister_kprobe_bottom(&jps[i]->kp); 888 } 889} 890 891#ifdef CONFIG_KRETPROBES 892/* 893 * This kprobe pre_handler is registered with every kretprobe. When probe 894 * hits it will set up the return probe. 895 */ 896static int __kprobes pre_handler_kretprobe(struct kprobe *p, 897 struct pt_regs *regs) 898{ 899 struct kretprobe *rp = container_of(p, struct kretprobe, kp); 900 unsigned long hash, flags = 0; 901 struct kretprobe_instance *ri; 902 903 /*TODO: consider to only swap the RA after the last pre_handler fired */ 904 hash = hash_ptr(current, KPROBE_HASH_BITS); 905 spin_lock_irqsave(&rp->lock, flags); 906 if (!hlist_empty(&rp->free_instances)) { 907 ri = hlist_entry(rp->free_instances.first, 908 struct kretprobe_instance, hlist); 909 hlist_del(&ri->hlist); 910 spin_unlock_irqrestore(&rp->lock, flags); 911 912 ri->rp = rp; 913 ri->task = current; 914 915 if (rp->entry_handler && rp->entry_handler(ri, regs)) { 916 spin_unlock_irqrestore(&rp->lock, flags); 917 return 0; 918 } 919 920 arch_prepare_kretprobe(ri, regs); 921 922 /* XXX(hch): why is there no hlist_move_head? */ 923 INIT_HLIST_NODE(&ri->hlist); 924 kretprobe_table_lock(hash, &flags); 925 hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]); 926 kretprobe_table_unlock(hash, &flags); 927 } else { 928 rp->nmissed++; 929 spin_unlock_irqrestore(&rp->lock, flags); 930 } 931 return 0; 932} 933 934int __kprobes register_kretprobe(struct kretprobe *rp) 935{ 936 int ret = 0; 937 struct kretprobe_instance *inst; 938 int i; 939 void *addr; 940 941 if (kretprobe_blacklist_size) { 942 addr = kprobe_addr(&rp->kp); 943 if (!addr) 944 return -EINVAL; 945 946 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { 947 if (kretprobe_blacklist[i].addr == addr) 948 return -EINVAL; 949 } 950 } 951 952 rp->kp.pre_handler = pre_handler_kretprobe; 953 rp->kp.post_handler = NULL; 954 rp->kp.fault_handler = NULL; 955 rp->kp.break_handler = NULL; 956 957 /* Pre-allocate memory for max kretprobe instances */ 958 if (rp->maxactive <= 0) { 959#ifdef CONFIG_PREEMPT 960 rp->maxactive = max(10, 2 * NR_CPUS); 961#else 962 rp->maxactive = NR_CPUS; 963#endif 964 } 965 spin_lock_init(&rp->lock); 966 INIT_HLIST_HEAD(&rp->free_instances); 967 for (i = 0; i < rp->maxactive; i++) { 968 inst = kmalloc(sizeof(struct kretprobe_instance) + 969 rp->data_size, GFP_KERNEL); 970 if (inst == NULL) { 971 free_rp_inst(rp); 972 return -ENOMEM; 973 } 974 INIT_HLIST_NODE(&inst->hlist); 975 hlist_add_head(&inst->hlist, &rp->free_instances); 976 } 977 978 rp->nmissed = 0; 979 /* Establish function entry probe point */ 980 ret = register_kprobe(&rp->kp); 981 if (ret != 0) 982 free_rp_inst(rp); 983 return ret; 984} 985 986int __kprobes register_kretprobes(struct kretprobe **rps, int num) 987{ 988 int ret = 0, i; 989 990 if (num <= 0) 991 return -EINVAL; 992 for (i = 0; i < num; i++) { 993 ret = register_kretprobe(rps[i]); 994 if (ret < 0) { 995 if (i > 0) 996 unregister_kretprobes(rps, i); 997 break; 998 } 999 } 1000 return ret; 1001} 1002 1003void __kprobes unregister_kretprobe(struct kretprobe *rp) 1004{ 1005 unregister_kretprobes(&rp, 1); 1006} 1007 1008void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) 1009{ 1010 int i; 1011 1012 if (num <= 0) 1013 return; 1014 mutex_lock(&kprobe_mutex); 1015 for (i = 0; i < num; i++) 1016 if (__unregister_kprobe_top(&rps[i]->kp) < 0) 1017 rps[i]->kp.addr = NULL; 1018 mutex_unlock(&kprobe_mutex); 1019 1020 synchronize_sched(); 1021 for (i = 0; i < num; i++) { 1022 if (rps[i]->kp.addr) { 1023 __unregister_kprobe_bottom(&rps[i]->kp); 1024 cleanup_rp_inst(rps[i]); 1025 } 1026 } 1027} 1028 1029#else /* CONFIG_KRETPROBES */ 1030int __kprobes register_kretprobe(struct kretprobe *rp) 1031{ 1032 return -ENOSYS; 1033} 1034 1035int __kprobes register_kretprobes(struct kretprobe **rps, int num) 1036{ 1037 return -ENOSYS; 1038} 1039void __kprobes unregister_kretprobe(struct kretprobe *rp) 1040{ 1041} 1042 1043void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) 1044{ 1045} 1046 1047static int __kprobes pre_handler_kretprobe(struct kprobe *p, 1048 struct pt_regs *regs) 1049{ 1050 return 0; 1051} 1052 1053#endif /* CONFIG_KRETPROBES */ 1054 1055/* Set the kprobe gone and remove its instruction buffer. */ 1056static void __kprobes kill_kprobe(struct kprobe *p) 1057{ 1058 struct kprobe *kp; 1059 p->flags |= KPROBE_FLAG_GONE; 1060 if (p->pre_handler == aggr_pre_handler) { 1061 /* 1062 * If this is an aggr_kprobe, we have to list all the 1063 * chained probes and mark them GONE. 1064 */ 1065 list_for_each_entry_rcu(kp, &p->list, list) 1066 kp->flags |= KPROBE_FLAG_GONE; 1067 p->post_handler = NULL; 1068 p->break_handler = NULL; 1069 } 1070 /* 1071 * Here, we can remove insn_slot safely, because no thread calls 1072 * the original probed function (which will be freed soon) any more. 1073 */ 1074 arch_remove_kprobe(p); 1075} 1076 1077/* Module notifier call back, checking kprobes on the module */ 1078static int __kprobes kprobes_module_callback(struct notifier_block *nb, 1079 unsigned long val, void *data) 1080{ 1081 struct module *mod = data; 1082 struct hlist_head *head; 1083 struct hlist_node *node; 1084 struct kprobe *p; 1085 unsigned int i; 1086 int checkcore = (val == MODULE_STATE_GOING); 1087 1088 if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE) 1089 return NOTIFY_DONE; 1090 1091 /* 1092 * When MODULE_STATE_GOING was notified, both of module .text and 1093 * .init.text sections would be freed. When MODULE_STATE_LIVE was 1094 * notified, only .init.text section would be freed. We need to 1095 * disable kprobes which have been inserted in the sections. 1096 */ 1097 mutex_lock(&kprobe_mutex); 1098 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1099 head = &kprobe_table[i]; 1100 hlist_for_each_entry_rcu(p, node, head, hlist) 1101 if (within_module_init((unsigned long)p->addr, mod) || 1102 (checkcore && 1103 within_module_core((unsigned long)p->addr, mod))) { 1104 /* 1105 * The vaddr this probe is installed will soon 1106 * be vfreed buy not synced to disk. Hence, 1107 * disarming the breakpoint isn't needed. 1108 */ 1109 kill_kprobe(p); 1110 } 1111 } 1112 mutex_unlock(&kprobe_mutex); 1113 return NOTIFY_DONE; 1114} 1115 1116static struct notifier_block kprobe_module_nb = { 1117 .notifier_call = kprobes_module_callback, 1118 .priority = 0 1119}; 1120 1121static int __init init_kprobes(void) 1122{ 1123 int i, err = 0; 1124 unsigned long offset = 0, size = 0; 1125 char *modname, namebuf[128]; 1126 const char *symbol_name; 1127 void *addr; 1128 struct kprobe_blackpoint *kb; 1129 1130 /* FIXME allocate the probe table, currently defined statically */ 1131 /* initialize all list heads */ 1132 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1133 INIT_HLIST_HEAD(&kprobe_table[i]); 1134 INIT_HLIST_HEAD(&kretprobe_inst_table[i]); 1135 spin_lock_init(&(kretprobe_table_locks[i].lock)); 1136 } 1137 1138 /* 1139 * Lookup and populate the kprobe_blacklist. 1140 * 1141 * Unlike the kretprobe blacklist, we'll need to determine 1142 * the range of addresses that belong to the said functions, 1143 * since a kprobe need not necessarily be at the beginning 1144 * of a function. 1145 */ 1146 for (kb = kprobe_blacklist; kb->name != NULL; kb++) { 1147 kprobe_lookup_name(kb->name, addr); 1148 if (!addr) 1149 continue; 1150 1151 kb->start_addr = (unsigned long)addr; 1152 symbol_name = kallsyms_lookup(kb->start_addr, 1153 &size, &offset, &modname, namebuf); 1154 if (!symbol_name) 1155 kb->range = 0; 1156 else 1157 kb->range = size; 1158 } 1159 1160 if (kretprobe_blacklist_size) { 1161 /* lookup the function address from its name */ 1162 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { 1163 kprobe_lookup_name(kretprobe_blacklist[i].name, 1164 kretprobe_blacklist[i].addr); 1165 if (!kretprobe_blacklist[i].addr) 1166 printk("kretprobe: lookup failed: %s\n", 1167 kretprobe_blacklist[i].name); 1168 } 1169 } 1170 1171 /* By default, kprobes are enabled */ 1172 kprobe_enabled = true; 1173 1174 err = arch_init_kprobes(); 1175 if (!err) 1176 err = register_die_notifier(&kprobe_exceptions_nb); 1177 if (!err) 1178 err = register_module_notifier(&kprobe_module_nb); 1179 1180 kprobes_initialized = (err == 0); 1181 1182 if (!err) 1183 init_test_probes(); 1184 return err; 1185} 1186 1187#ifdef CONFIG_DEBUG_FS 1188static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p, 1189 const char *sym, int offset,char *modname) 1190{ 1191 char *kprobe_type; 1192 1193 if (p->pre_handler == pre_handler_kretprobe) 1194 kprobe_type = "r"; 1195 else if (p->pre_handler == setjmp_pre_handler) 1196 kprobe_type = "j"; 1197 else 1198 kprobe_type = "k"; 1199 if (sym) 1200 seq_printf(pi, "%p %s %s+0x%x %s %s\n", p->addr, kprobe_type, 1201 sym, offset, (modname ? modname : " "), 1202 (kprobe_gone(p) ? "[GONE]" : "")); 1203 else 1204 seq_printf(pi, "%p %s %p %s\n", p->addr, kprobe_type, p->addr, 1205 (kprobe_gone(p) ? "[GONE]" : "")); 1206} 1207 1208static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) 1209{ 1210 return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL; 1211} 1212 1213static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos) 1214{ 1215 (*pos)++; 1216 if (*pos >= KPROBE_TABLE_SIZE) 1217 return NULL; 1218 return pos; 1219} 1220 1221static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v) 1222{ 1223 /* Nothing to do */ 1224} 1225 1226static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v) 1227{ 1228 struct hlist_head *head; 1229 struct hlist_node *node; 1230 struct kprobe *p, *kp; 1231 const char *sym = NULL; 1232 unsigned int i = *(loff_t *) v; 1233 unsigned long offset = 0; 1234 char *modname, namebuf[128]; 1235 1236 head = &kprobe_table[i]; 1237 preempt_disable(); 1238 hlist_for_each_entry_rcu(p, node, head, hlist) { 1239 sym = kallsyms_lookup((unsigned long)p->addr, NULL, 1240 &offset, &modname, namebuf); 1241 if (p->pre_handler == aggr_pre_handler) { 1242 list_for_each_entry_rcu(kp, &p->list, list) 1243 report_probe(pi, kp, sym, offset, modname); 1244 } else 1245 report_probe(pi, p, sym, offset, modname); 1246 } 1247 preempt_enable(); 1248 return 0; 1249} 1250 1251static struct seq_operations kprobes_seq_ops = { 1252 .start = kprobe_seq_start, 1253 .next = kprobe_seq_next, 1254 .stop = kprobe_seq_stop, 1255 .show = show_kprobe_addr 1256}; 1257 1258static int __kprobes kprobes_open(struct inode *inode, struct file *filp) 1259{ 1260 return seq_open(filp, &kprobes_seq_ops); 1261} 1262 1263static struct file_operations debugfs_kprobes_operations = { 1264 .open = kprobes_open, 1265 .read = seq_read, 1266 .llseek = seq_lseek, 1267 .release = seq_release, 1268}; 1269 1270static void __kprobes enable_all_kprobes(void) 1271{ 1272 struct hlist_head *head; 1273 struct hlist_node *node; 1274 struct kprobe *p; 1275 unsigned int i; 1276 1277 mutex_lock(&kprobe_mutex); 1278 1279 /* If kprobes are already enabled, just return */ 1280 if (kprobe_enabled) 1281 goto already_enabled; 1282 1283 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1284 head = &kprobe_table[i]; 1285 hlist_for_each_entry_rcu(p, node, head, hlist) 1286 if (!kprobe_gone(p)) 1287 arch_arm_kprobe(p); 1288 } 1289 1290 kprobe_enabled = true; 1291 printk(KERN_INFO "Kprobes globally enabled\n"); 1292 1293already_enabled: 1294 mutex_unlock(&kprobe_mutex); 1295 return; 1296} 1297 1298static void __kprobes disable_all_kprobes(void) 1299{ 1300 struct hlist_head *head; 1301 struct hlist_node *node; 1302 struct kprobe *p; 1303 unsigned int i; 1304 1305 mutex_lock(&kprobe_mutex); 1306 1307 /* If kprobes are already disabled, just return */ 1308 if (!kprobe_enabled) 1309 goto already_disabled; 1310 1311 kprobe_enabled = false; 1312 printk(KERN_INFO "Kprobes globally disabled\n"); 1313 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1314 head = &kprobe_table[i]; 1315 hlist_for_each_entry_rcu(p, node, head, hlist) { 1316 if (!arch_trampoline_kprobe(p) && !kprobe_gone(p)) 1317 arch_disarm_kprobe(p); 1318 } 1319 } 1320 1321 mutex_unlock(&kprobe_mutex); 1322 /* Allow all currently running kprobes to complete */ 1323 synchronize_sched(); 1324 return; 1325 1326already_disabled: 1327 mutex_unlock(&kprobe_mutex); 1328 return; 1329} 1330 1331/* 1332 * XXX: The debugfs bool file interface doesn't allow for callbacks 1333 * when the bool state is switched. We can reuse that facility when 1334 * available 1335 */ 1336static ssize_t read_enabled_file_bool(struct file *file, 1337 char __user *user_buf, size_t count, loff_t *ppos) 1338{ 1339 char buf[3]; 1340 1341 if (kprobe_enabled) 1342 buf[0] = '1'; 1343 else 1344 buf[0] = '0'; 1345 buf[1] = '\n'; 1346 buf[2] = 0x00; 1347 return simple_read_from_buffer(user_buf, count, ppos, buf, 2); 1348} 1349 1350static ssize_t write_enabled_file_bool(struct file *file, 1351 const char __user *user_buf, size_t count, loff_t *ppos) 1352{ 1353 char buf[32]; 1354 int buf_size; 1355 1356 buf_size = min(count, (sizeof(buf)-1)); 1357 if (copy_from_user(buf, user_buf, buf_size)) 1358 return -EFAULT; 1359 1360 switch (buf[0]) { 1361 case 'y': 1362 case 'Y': 1363 case '1': 1364 enable_all_kprobes(); 1365 break; 1366 case 'n': 1367 case 'N': 1368 case '0': 1369 disable_all_kprobes(); 1370 break; 1371 } 1372 1373 return count; 1374} 1375 1376static struct file_operations fops_kp = { 1377 .read = read_enabled_file_bool, 1378 .write = write_enabled_file_bool, 1379}; 1380 1381static int __kprobes debugfs_kprobe_init(void) 1382{ 1383 struct dentry *dir, *file; 1384 unsigned int value = 1; 1385 1386 dir = debugfs_create_dir("kprobes", NULL); 1387 if (!dir) 1388 return -ENOMEM; 1389 1390 file = debugfs_create_file("list", 0444, dir, NULL, 1391 &debugfs_kprobes_operations); 1392 if (!file) { 1393 debugfs_remove(dir); 1394 return -ENOMEM; 1395 } 1396 1397 file = debugfs_create_file("enabled", 0600, dir, 1398 &value, &fops_kp); 1399 if (!file) { 1400 debugfs_remove(dir); 1401 return -ENOMEM; 1402 } 1403 1404 return 0; 1405} 1406 1407late_initcall(debugfs_kprobe_init); 1408#endif /* CONFIG_DEBUG_FS */ 1409 1410module_init(init_kprobes); 1411 1412EXPORT_SYMBOL_GPL(register_kprobe); 1413EXPORT_SYMBOL_GPL(unregister_kprobe); 1414EXPORT_SYMBOL_GPL(register_kprobes); 1415EXPORT_SYMBOL_GPL(unregister_kprobes); 1416EXPORT_SYMBOL_GPL(register_jprobe); 1417EXPORT_SYMBOL_GPL(unregister_jprobe); 1418EXPORT_SYMBOL_GPL(register_jprobes); 1419EXPORT_SYMBOL_GPL(unregister_jprobes); 1420EXPORT_SYMBOL_GPL(jprobe_return); 1421EXPORT_SYMBOL_GPL(register_kretprobe); 1422EXPORT_SYMBOL_GPL(unregister_kretprobe); 1423EXPORT_SYMBOL_GPL(register_kretprobes); 1424EXPORT_SYMBOL_GPL(unregister_kretprobes); 1425