kprobes.c revision 017c39bdb1b3ac1da6db339474a77b528043c05a
1/* 2 * Kernel Probes (KProbes) 3 * kernel/kprobes.c 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 * 19 * Copyright (C) IBM Corporation, 2002, 2004 20 * 21 * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel 22 * Probes initial implementation (includes suggestions from 23 * Rusty Russell). 24 * 2004-Aug Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with 25 * hlists and exceptions notifier as suggested by Andi Kleen. 26 * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes 27 * interface to access function arguments. 28 * 2004-Sep Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes 29 * exceptions notifier to be first on the priority list. 30 * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston 31 * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi 32 * <prasanna@in.ibm.com> added function-return probes. 33 */ 34#include <linux/kprobes.h> 35#include <linux/hash.h> 36#include <linux/init.h> 37#include <linux/slab.h> 38#include <linux/stddef.h> 39#include <linux/module.h> 40#include <linux/moduleloader.h> 41#include <linux/kallsyms.h> 42#include <linux/freezer.h> 43#include <linux/seq_file.h> 44#include <linux/debugfs.h> 45#include <linux/kdebug.h> 46 47#include <asm-generic/sections.h> 48#include <asm/cacheflush.h> 49#include <asm/errno.h> 50#include <asm/uaccess.h> 51 52#define KPROBE_HASH_BITS 6 53#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS) 54 55 56/* 57 * Some oddball architectures like 64bit powerpc have function descriptors 58 * so this must be overridable. 59 */ 60#ifndef kprobe_lookup_name 61#define kprobe_lookup_name(name, addr) \ 62 addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name))) 63#endif 64 65static int kprobes_initialized; 66static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; 67static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; 68 69/* NOTE: change this value only with kprobe_mutex held */ 70static bool kprobe_enabled; 71 72static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ 73static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; 74static struct { 75 spinlock_t lock ____cacheline_aligned_in_smp; 76} kretprobe_table_locks[KPROBE_TABLE_SIZE]; 77 78static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) 79{ 80 return &(kretprobe_table_locks[hash].lock); 81} 82 83/* 84 * Normally, functions that we'd want to prohibit kprobes in, are marked 85 * __kprobes. But, there are cases where such functions already belong to 86 * a different section (__sched for preempt_schedule) 87 * 88 * For such cases, we now have a blacklist 89 */ 90static struct kprobe_blackpoint kprobe_blacklist[] = { 91 {"preempt_schedule",}, 92 {NULL} /* Terminator */ 93}; 94 95#ifdef __ARCH_WANT_KPROBES_INSN_SLOT 96/* 97 * kprobe->ainsn.insn points to the copy of the instruction to be 98 * single-stepped. x86_64, POWER4 and above have no-exec support and 99 * stepping on the instruction on a vmalloced/kmalloced/data page 100 * is a recipe for disaster 101 */ 102#define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) 103 104struct kprobe_insn_page { 105 struct hlist_node hlist; 106 kprobe_opcode_t *insns; /* Page of instruction slots */ 107 char slot_used[INSNS_PER_PAGE]; 108 int nused; 109 int ngarbage; 110}; 111 112enum kprobe_slot_state { 113 SLOT_CLEAN = 0, 114 SLOT_DIRTY = 1, 115 SLOT_USED = 2, 116}; 117 118static DEFINE_MUTEX(kprobe_insn_mutex); /* Protects kprobe_insn_pages */ 119static struct hlist_head kprobe_insn_pages; 120static int kprobe_garbage_slots; 121static int collect_garbage_slots(void); 122 123static int __kprobes check_safety(void) 124{ 125 int ret = 0; 126#if defined(CONFIG_PREEMPT) && defined(CONFIG_PM) 127 ret = freeze_processes(); 128 if (ret == 0) { 129 struct task_struct *p, *q; 130 do_each_thread(p, q) { 131 if (p != current && p->state == TASK_RUNNING && 132 p->pid != 0) { 133 printk("Check failed: %s is running\n",p->comm); 134 ret = -1; 135 goto loop_end; 136 } 137 } while_each_thread(p, q); 138 } 139loop_end: 140 thaw_processes(); 141#else 142 synchronize_sched(); 143#endif 144 return ret; 145} 146 147/** 148 * __get_insn_slot() - Find a slot on an executable page for an instruction. 149 * We allocate an executable page if there's no room on existing ones. 150 */ 151static kprobe_opcode_t __kprobes *__get_insn_slot(void) 152{ 153 struct kprobe_insn_page *kip; 154 struct hlist_node *pos; 155 156 retry: 157 hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) { 158 if (kip->nused < INSNS_PER_PAGE) { 159 int i; 160 for (i = 0; i < INSNS_PER_PAGE; i++) { 161 if (kip->slot_used[i] == SLOT_CLEAN) { 162 kip->slot_used[i] = SLOT_USED; 163 kip->nused++; 164 return kip->insns + (i * MAX_INSN_SIZE); 165 } 166 } 167 /* Surprise! No unused slots. Fix kip->nused. */ 168 kip->nused = INSNS_PER_PAGE; 169 } 170 } 171 172 /* If there are any garbage slots, collect it and try again. */ 173 if (kprobe_garbage_slots && collect_garbage_slots() == 0) { 174 goto retry; 175 } 176 /* All out of space. Need to allocate a new page. Use slot 0. */ 177 kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL); 178 if (!kip) 179 return NULL; 180 181 /* 182 * Use module_alloc so this page is within +/- 2GB of where the 183 * kernel image and loaded module images reside. This is required 184 * so x86_64 can correctly handle the %rip-relative fixups. 185 */ 186 kip->insns = module_alloc(PAGE_SIZE); 187 if (!kip->insns) { 188 kfree(kip); 189 return NULL; 190 } 191 INIT_HLIST_NODE(&kip->hlist); 192 hlist_add_head(&kip->hlist, &kprobe_insn_pages); 193 memset(kip->slot_used, SLOT_CLEAN, INSNS_PER_PAGE); 194 kip->slot_used[0] = SLOT_USED; 195 kip->nused = 1; 196 kip->ngarbage = 0; 197 return kip->insns; 198} 199 200kprobe_opcode_t __kprobes *get_insn_slot(void) 201{ 202 kprobe_opcode_t *ret; 203 mutex_lock(&kprobe_insn_mutex); 204 ret = __get_insn_slot(); 205 mutex_unlock(&kprobe_insn_mutex); 206 return ret; 207} 208 209/* Return 1 if all garbages are collected, otherwise 0. */ 210static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) 211{ 212 kip->slot_used[idx] = SLOT_CLEAN; 213 kip->nused--; 214 if (kip->nused == 0) { 215 /* 216 * Page is no longer in use. Free it unless 217 * it's the last one. We keep the last one 218 * so as not to have to set it up again the 219 * next time somebody inserts a probe. 220 */ 221 hlist_del(&kip->hlist); 222 if (hlist_empty(&kprobe_insn_pages)) { 223 INIT_HLIST_NODE(&kip->hlist); 224 hlist_add_head(&kip->hlist, 225 &kprobe_insn_pages); 226 } else { 227 module_free(NULL, kip->insns); 228 kfree(kip); 229 } 230 return 1; 231 } 232 return 0; 233} 234 235static int __kprobes collect_garbage_slots(void) 236{ 237 struct kprobe_insn_page *kip; 238 struct hlist_node *pos, *next; 239 int safety; 240 241 /* Ensure no-one is preepmted on the garbages */ 242 mutex_unlock(&kprobe_insn_mutex); 243 safety = check_safety(); 244 mutex_lock(&kprobe_insn_mutex); 245 if (safety != 0) 246 return -EAGAIN; 247 248 hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) { 249 int i; 250 if (kip->ngarbage == 0) 251 continue; 252 kip->ngarbage = 0; /* we will collect all garbages */ 253 for (i = 0; i < INSNS_PER_PAGE; i++) { 254 if (kip->slot_used[i] == SLOT_DIRTY && 255 collect_one_slot(kip, i)) 256 break; 257 } 258 } 259 kprobe_garbage_slots = 0; 260 return 0; 261} 262 263void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty) 264{ 265 struct kprobe_insn_page *kip; 266 struct hlist_node *pos; 267 268 mutex_lock(&kprobe_insn_mutex); 269 hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) { 270 if (kip->insns <= slot && 271 slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) { 272 int i = (slot - kip->insns) / MAX_INSN_SIZE; 273 if (dirty) { 274 kip->slot_used[i] = SLOT_DIRTY; 275 kip->ngarbage++; 276 } else { 277 collect_one_slot(kip, i); 278 } 279 break; 280 } 281 } 282 283 if (dirty && ++kprobe_garbage_slots > INSNS_PER_PAGE) 284 collect_garbage_slots(); 285 286 mutex_unlock(&kprobe_insn_mutex); 287} 288#endif 289 290/* We have preemption disabled.. so it is safe to use __ versions */ 291static inline void set_kprobe_instance(struct kprobe *kp) 292{ 293 __get_cpu_var(kprobe_instance) = kp; 294} 295 296static inline void reset_kprobe_instance(void) 297{ 298 __get_cpu_var(kprobe_instance) = NULL; 299} 300 301/* 302 * This routine is called either: 303 * - under the kprobe_mutex - during kprobe_[un]register() 304 * OR 305 * - with preemption disabled - from arch/xxx/kernel/kprobes.c 306 */ 307struct kprobe __kprobes *get_kprobe(void *addr) 308{ 309 struct hlist_head *head; 310 struct hlist_node *node; 311 struct kprobe *p; 312 313 head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; 314 hlist_for_each_entry_rcu(p, node, head, hlist) { 315 if (p->addr == addr) 316 return p; 317 } 318 return NULL; 319} 320 321/* 322 * Aggregate handlers for multiple kprobes support - these handlers 323 * take care of invoking the individual kprobe handlers on p->list 324 */ 325static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) 326{ 327 struct kprobe *kp; 328 329 list_for_each_entry_rcu(kp, &p->list, list) { 330 if (kp->pre_handler) { 331 set_kprobe_instance(kp); 332 if (kp->pre_handler(kp, regs)) 333 return 1; 334 } 335 reset_kprobe_instance(); 336 } 337 return 0; 338} 339 340static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, 341 unsigned long flags) 342{ 343 struct kprobe *kp; 344 345 list_for_each_entry_rcu(kp, &p->list, list) { 346 if (kp->post_handler) { 347 set_kprobe_instance(kp); 348 kp->post_handler(kp, regs, flags); 349 reset_kprobe_instance(); 350 } 351 } 352} 353 354static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, 355 int trapnr) 356{ 357 struct kprobe *cur = __get_cpu_var(kprobe_instance); 358 359 /* 360 * if we faulted "during" the execution of a user specified 361 * probe handler, invoke just that probe's fault handler 362 */ 363 if (cur && cur->fault_handler) { 364 if (cur->fault_handler(cur, regs, trapnr)) 365 return 1; 366 } 367 return 0; 368} 369 370static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) 371{ 372 struct kprobe *cur = __get_cpu_var(kprobe_instance); 373 int ret = 0; 374 375 if (cur && cur->break_handler) { 376 if (cur->break_handler(cur, regs)) 377 ret = 1; 378 } 379 reset_kprobe_instance(); 380 return ret; 381} 382 383/* Walks the list and increments nmissed count for multiprobe case */ 384void __kprobes kprobes_inc_nmissed_count(struct kprobe *p) 385{ 386 struct kprobe *kp; 387 if (p->pre_handler != aggr_pre_handler) { 388 p->nmissed++; 389 } else { 390 list_for_each_entry_rcu(kp, &p->list, list) 391 kp->nmissed++; 392 } 393 return; 394} 395 396void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, 397 struct hlist_head *head) 398{ 399 struct kretprobe *rp = ri->rp; 400 401 /* remove rp inst off the rprobe_inst_table */ 402 hlist_del(&ri->hlist); 403 INIT_HLIST_NODE(&ri->hlist); 404 if (likely(rp)) { 405 spin_lock(&rp->lock); 406 hlist_add_head(&ri->hlist, &rp->free_instances); 407 spin_unlock(&rp->lock); 408 } else 409 /* Unregistering */ 410 hlist_add_head(&ri->hlist, head); 411} 412 413void __kprobes kretprobe_hash_lock(struct task_struct *tsk, 414 struct hlist_head **head, unsigned long *flags) 415{ 416 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); 417 spinlock_t *hlist_lock; 418 419 *head = &kretprobe_inst_table[hash]; 420 hlist_lock = kretprobe_table_lock_ptr(hash); 421 spin_lock_irqsave(hlist_lock, *flags); 422} 423 424static void __kprobes kretprobe_table_lock(unsigned long hash, 425 unsigned long *flags) 426{ 427 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); 428 spin_lock_irqsave(hlist_lock, *flags); 429} 430 431void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, 432 unsigned long *flags) 433{ 434 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); 435 spinlock_t *hlist_lock; 436 437 hlist_lock = kretprobe_table_lock_ptr(hash); 438 spin_unlock_irqrestore(hlist_lock, *flags); 439} 440 441void __kprobes kretprobe_table_unlock(unsigned long hash, unsigned long *flags) 442{ 443 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); 444 spin_unlock_irqrestore(hlist_lock, *flags); 445} 446 447/* 448 * This function is called from finish_task_switch when task tk becomes dead, 449 * so that we can recycle any function-return probe instances associated 450 * with this task. These left over instances represent probed functions 451 * that have been called but will never return. 452 */ 453void __kprobes kprobe_flush_task(struct task_struct *tk) 454{ 455 struct kretprobe_instance *ri; 456 struct hlist_head *head, empty_rp; 457 struct hlist_node *node, *tmp; 458 unsigned long hash, flags = 0; 459 460 if (unlikely(!kprobes_initialized)) 461 /* Early boot. kretprobe_table_locks not yet initialized. */ 462 return; 463 464 hash = hash_ptr(tk, KPROBE_HASH_BITS); 465 head = &kretprobe_inst_table[hash]; 466 kretprobe_table_lock(hash, &flags); 467 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { 468 if (ri->task == tk) 469 recycle_rp_inst(ri, &empty_rp); 470 } 471 kretprobe_table_unlock(hash, &flags); 472 INIT_HLIST_HEAD(&empty_rp); 473 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 474 hlist_del(&ri->hlist); 475 kfree(ri); 476 } 477} 478 479static inline void free_rp_inst(struct kretprobe *rp) 480{ 481 struct kretprobe_instance *ri; 482 struct hlist_node *pos, *next; 483 484 hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) { 485 hlist_del(&ri->hlist); 486 kfree(ri); 487 } 488} 489 490static void __kprobes cleanup_rp_inst(struct kretprobe *rp) 491{ 492 unsigned long flags, hash; 493 struct kretprobe_instance *ri; 494 struct hlist_node *pos, *next; 495 struct hlist_head *head; 496 497 /* No race here */ 498 for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) { 499 kretprobe_table_lock(hash, &flags); 500 head = &kretprobe_inst_table[hash]; 501 hlist_for_each_entry_safe(ri, pos, next, head, hlist) { 502 if (ri->rp == rp) 503 ri->rp = NULL; 504 } 505 kretprobe_table_unlock(hash, &flags); 506 } 507 free_rp_inst(rp); 508} 509 510/* 511 * Keep all fields in the kprobe consistent 512 */ 513static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) 514{ 515 memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t)); 516 memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn)); 517} 518 519/* 520* Add the new probe to old_p->list. Fail if this is the 521* second jprobe at the address - two jprobes can't coexist 522*/ 523static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) 524{ 525 if (p->break_handler) { 526 if (old_p->break_handler) 527 return -EEXIST; 528 list_add_tail_rcu(&p->list, &old_p->list); 529 old_p->break_handler = aggr_break_handler; 530 } else 531 list_add_rcu(&p->list, &old_p->list); 532 if (p->post_handler && !old_p->post_handler) 533 old_p->post_handler = aggr_post_handler; 534 return 0; 535} 536 537/* 538 * Fill in the required fields of the "manager kprobe". Replace the 539 * earlier kprobe in the hlist with the manager kprobe 540 */ 541static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) 542{ 543 copy_kprobe(p, ap); 544 flush_insn_slot(ap); 545 ap->addr = p->addr; 546 ap->pre_handler = aggr_pre_handler; 547 ap->fault_handler = aggr_fault_handler; 548 if (p->post_handler) 549 ap->post_handler = aggr_post_handler; 550 if (p->break_handler) 551 ap->break_handler = aggr_break_handler; 552 553 INIT_LIST_HEAD(&ap->list); 554 list_add_rcu(&p->list, &ap->list); 555 556 hlist_replace_rcu(&p->hlist, &ap->hlist); 557} 558 559/* 560 * This is the second or subsequent kprobe at the address - handle 561 * the intricacies 562 */ 563static int __kprobes register_aggr_kprobe(struct kprobe *old_p, 564 struct kprobe *p) 565{ 566 int ret = 0; 567 struct kprobe *ap; 568 569 if (old_p->pre_handler == aggr_pre_handler) { 570 copy_kprobe(old_p, p); 571 ret = add_new_kprobe(old_p, p); 572 } else { 573 ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL); 574 if (!ap) 575 return -ENOMEM; 576 add_aggr_kprobe(ap, old_p); 577 copy_kprobe(ap, p); 578 ret = add_new_kprobe(ap, p); 579 } 580 return ret; 581} 582 583static int __kprobes in_kprobes_functions(unsigned long addr) 584{ 585 struct kprobe_blackpoint *kb; 586 587 if (addr >= (unsigned long)__kprobes_text_start && 588 addr < (unsigned long)__kprobes_text_end) 589 return -EINVAL; 590 /* 591 * If there exists a kprobe_blacklist, verify and 592 * fail any probe registration in the prohibited area 593 */ 594 for (kb = kprobe_blacklist; kb->name != NULL; kb++) { 595 if (kb->start_addr) { 596 if (addr >= kb->start_addr && 597 addr < (kb->start_addr + kb->range)) 598 return -EINVAL; 599 } 600 } 601 return 0; 602} 603 604/* 605 * If we have a symbol_name argument, look it up and add the offset field 606 * to it. This way, we can specify a relative address to a symbol. 607 */ 608static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) 609{ 610 kprobe_opcode_t *addr = p->addr; 611 if (p->symbol_name) { 612 if (addr) 613 return NULL; 614 kprobe_lookup_name(p->symbol_name, addr); 615 } 616 617 if (!addr) 618 return NULL; 619 return (kprobe_opcode_t *)(((char *)addr) + p->offset); 620} 621 622static int __kprobes __register_kprobe(struct kprobe *p, 623 unsigned long called_from) 624{ 625 int ret = 0; 626 struct kprobe *old_p; 627 struct module *probed_mod; 628 kprobe_opcode_t *addr; 629 630 addr = kprobe_addr(p); 631 if (!addr) 632 return -EINVAL; 633 p->addr = addr; 634 635 preempt_disable(); 636 if (!__kernel_text_address((unsigned long) p->addr) || 637 in_kprobes_functions((unsigned long) p->addr)) { 638 preempt_enable(); 639 return -EINVAL; 640 } 641 642 p->mod_refcounted = 0; 643 644 /* 645 * Check if are we probing a module. 646 */ 647 probed_mod = __module_text_address((unsigned long) p->addr); 648 if (probed_mod) { 649 struct module *calling_mod; 650 calling_mod = __module_text_address(called_from); 651 /* 652 * We must allow modules to probe themself and in this case 653 * avoid incrementing the module refcount, so as to allow 654 * unloading of self probing modules. 655 */ 656 if (calling_mod != probed_mod) { 657 if (unlikely(!try_module_get(probed_mod))) { 658 preempt_enable(); 659 return -EINVAL; 660 } 661 p->mod_refcounted = 1; 662 } else 663 probed_mod = NULL; 664 } 665 preempt_enable(); 666 667 p->nmissed = 0; 668 INIT_LIST_HEAD(&p->list); 669 mutex_lock(&kprobe_mutex); 670 old_p = get_kprobe(p->addr); 671 if (old_p) { 672 ret = register_aggr_kprobe(old_p, p); 673 goto out; 674 } 675 676 ret = arch_prepare_kprobe(p); 677 if (ret) 678 goto out; 679 680 INIT_HLIST_NODE(&p->hlist); 681 hlist_add_head_rcu(&p->hlist, 682 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); 683 684 if (kprobe_enabled) 685 arch_arm_kprobe(p); 686 687out: 688 mutex_unlock(&kprobe_mutex); 689 690 if (ret && probed_mod) 691 module_put(probed_mod); 692 return ret; 693} 694 695/* 696 * Unregister a kprobe without a scheduler synchronization. 697 */ 698static int __kprobes __unregister_kprobe_top(struct kprobe *p) 699{ 700 struct kprobe *old_p, *list_p; 701 702 old_p = get_kprobe(p->addr); 703 if (unlikely(!old_p)) 704 return -EINVAL; 705 706 if (p != old_p) { 707 list_for_each_entry_rcu(list_p, &old_p->list, list) 708 if (list_p == p) 709 /* kprobe p is a valid probe */ 710 goto valid_p; 711 return -EINVAL; 712 } 713valid_p: 714 if (old_p == p || 715 (old_p->pre_handler == aggr_pre_handler && 716 list_is_singular(&old_p->list))) { 717 /* 718 * Only probe on the hash list. Disarm only if kprobes are 719 * enabled - otherwise, the breakpoint would already have 720 * been removed. We save on flushing icache. 721 */ 722 if (kprobe_enabled) 723 arch_disarm_kprobe(p); 724 hlist_del_rcu(&old_p->hlist); 725 } else { 726 if (p->break_handler) 727 old_p->break_handler = NULL; 728 if (p->post_handler) { 729 list_for_each_entry_rcu(list_p, &old_p->list, list) { 730 if ((list_p != p) && (list_p->post_handler)) 731 goto noclean; 732 } 733 old_p->post_handler = NULL; 734 } 735noclean: 736 list_del_rcu(&p->list); 737 } 738 return 0; 739} 740 741static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) 742{ 743 struct module *mod; 744 struct kprobe *old_p; 745 746 if (p->mod_refcounted) { 747 /* 748 * Since we've already incremented refcount, 749 * we don't need to disable preemption. 750 */ 751 mod = module_text_address((unsigned long)p->addr); 752 if (mod) 753 module_put(mod); 754 } 755 756 if (list_empty(&p->list) || list_is_singular(&p->list)) { 757 if (!list_empty(&p->list)) { 758 /* "p" is the last child of an aggr_kprobe */ 759 old_p = list_entry(p->list.next, struct kprobe, list); 760 list_del(&p->list); 761 kfree(old_p); 762 } 763 arch_remove_kprobe(p); 764 } 765} 766 767static int __kprobes __register_kprobes(struct kprobe **kps, int num, 768 unsigned long called_from) 769{ 770 int i, ret = 0; 771 772 if (num <= 0) 773 return -EINVAL; 774 for (i = 0; i < num; i++) { 775 ret = __register_kprobe(kps[i], called_from); 776 if (ret < 0) { 777 if (i > 0) 778 unregister_kprobes(kps, i); 779 break; 780 } 781 } 782 return ret; 783} 784 785/* 786 * Registration and unregistration functions for kprobe. 787 */ 788int __kprobes register_kprobe(struct kprobe *p) 789{ 790 return __register_kprobes(&p, 1, 791 (unsigned long)__builtin_return_address(0)); 792} 793 794void __kprobes unregister_kprobe(struct kprobe *p) 795{ 796 unregister_kprobes(&p, 1); 797} 798 799int __kprobes register_kprobes(struct kprobe **kps, int num) 800{ 801 return __register_kprobes(kps, num, 802 (unsigned long)__builtin_return_address(0)); 803} 804 805void __kprobes unregister_kprobes(struct kprobe **kps, int num) 806{ 807 int i; 808 809 if (num <= 0) 810 return; 811 mutex_lock(&kprobe_mutex); 812 for (i = 0; i < num; i++) 813 if (__unregister_kprobe_top(kps[i]) < 0) 814 kps[i]->addr = NULL; 815 mutex_unlock(&kprobe_mutex); 816 817 synchronize_sched(); 818 for (i = 0; i < num; i++) 819 if (kps[i]->addr) 820 __unregister_kprobe_bottom(kps[i]); 821} 822 823static struct notifier_block kprobe_exceptions_nb = { 824 .notifier_call = kprobe_exceptions_notify, 825 .priority = 0x7fffffff /* we need to be notified first */ 826}; 827 828unsigned long __weak arch_deref_entry_point(void *entry) 829{ 830 return (unsigned long)entry; 831} 832 833static int __kprobes __register_jprobes(struct jprobe **jps, int num, 834 unsigned long called_from) 835{ 836 struct jprobe *jp; 837 int ret = 0, i; 838 839 if (num <= 0) 840 return -EINVAL; 841 for (i = 0; i < num; i++) { 842 unsigned long addr; 843 jp = jps[i]; 844 addr = arch_deref_entry_point(jp->entry); 845 846 if (!kernel_text_address(addr)) 847 ret = -EINVAL; 848 else { 849 /* Todo: Verify probepoint is a function entry point */ 850 jp->kp.pre_handler = setjmp_pre_handler; 851 jp->kp.break_handler = longjmp_break_handler; 852 ret = __register_kprobe(&jp->kp, called_from); 853 } 854 if (ret < 0) { 855 if (i > 0) 856 unregister_jprobes(jps, i); 857 break; 858 } 859 } 860 return ret; 861} 862 863int __kprobes register_jprobe(struct jprobe *jp) 864{ 865 return __register_jprobes(&jp, 1, 866 (unsigned long)__builtin_return_address(0)); 867} 868 869void __kprobes unregister_jprobe(struct jprobe *jp) 870{ 871 unregister_jprobes(&jp, 1); 872} 873 874int __kprobes register_jprobes(struct jprobe **jps, int num) 875{ 876 return __register_jprobes(jps, num, 877 (unsigned long)__builtin_return_address(0)); 878} 879 880void __kprobes unregister_jprobes(struct jprobe **jps, int num) 881{ 882 int i; 883 884 if (num <= 0) 885 return; 886 mutex_lock(&kprobe_mutex); 887 for (i = 0; i < num; i++) 888 if (__unregister_kprobe_top(&jps[i]->kp) < 0) 889 jps[i]->kp.addr = NULL; 890 mutex_unlock(&kprobe_mutex); 891 892 synchronize_sched(); 893 for (i = 0; i < num; i++) { 894 if (jps[i]->kp.addr) 895 __unregister_kprobe_bottom(&jps[i]->kp); 896 } 897} 898 899#ifdef CONFIG_KRETPROBES 900/* 901 * This kprobe pre_handler is registered with every kretprobe. When probe 902 * hits it will set up the return probe. 903 */ 904static int __kprobes pre_handler_kretprobe(struct kprobe *p, 905 struct pt_regs *regs) 906{ 907 struct kretprobe *rp = container_of(p, struct kretprobe, kp); 908 unsigned long hash, flags = 0; 909 struct kretprobe_instance *ri; 910 911 /*TODO: consider to only swap the RA after the last pre_handler fired */ 912 hash = hash_ptr(current, KPROBE_HASH_BITS); 913 spin_lock_irqsave(&rp->lock, flags); 914 if (!hlist_empty(&rp->free_instances)) { 915 ri = hlist_entry(rp->free_instances.first, 916 struct kretprobe_instance, hlist); 917 hlist_del(&ri->hlist); 918 spin_unlock_irqrestore(&rp->lock, flags); 919 920 ri->rp = rp; 921 ri->task = current; 922 923 if (rp->entry_handler && rp->entry_handler(ri, regs)) { 924 spin_unlock_irqrestore(&rp->lock, flags); 925 return 0; 926 } 927 928 arch_prepare_kretprobe(ri, regs); 929 930 /* XXX(hch): why is there no hlist_move_head? */ 931 INIT_HLIST_NODE(&ri->hlist); 932 kretprobe_table_lock(hash, &flags); 933 hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]); 934 kretprobe_table_unlock(hash, &flags); 935 } else { 936 rp->nmissed++; 937 spin_unlock_irqrestore(&rp->lock, flags); 938 } 939 return 0; 940} 941 942static int __kprobes __register_kretprobe(struct kretprobe *rp, 943 unsigned long called_from) 944{ 945 int ret = 0; 946 struct kretprobe_instance *inst; 947 int i; 948 void *addr; 949 950 if (kretprobe_blacklist_size) { 951 addr = kprobe_addr(&rp->kp); 952 if (!addr) 953 return -EINVAL; 954 955 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { 956 if (kretprobe_blacklist[i].addr == addr) 957 return -EINVAL; 958 } 959 } 960 961 rp->kp.pre_handler = pre_handler_kretprobe; 962 rp->kp.post_handler = NULL; 963 rp->kp.fault_handler = NULL; 964 rp->kp.break_handler = NULL; 965 966 /* Pre-allocate memory for max kretprobe instances */ 967 if (rp->maxactive <= 0) { 968#ifdef CONFIG_PREEMPT 969 rp->maxactive = max(10, 2 * NR_CPUS); 970#else 971 rp->maxactive = NR_CPUS; 972#endif 973 } 974 spin_lock_init(&rp->lock); 975 INIT_HLIST_HEAD(&rp->free_instances); 976 for (i = 0; i < rp->maxactive; i++) { 977 inst = kmalloc(sizeof(struct kretprobe_instance) + 978 rp->data_size, GFP_KERNEL); 979 if (inst == NULL) { 980 free_rp_inst(rp); 981 return -ENOMEM; 982 } 983 INIT_HLIST_NODE(&inst->hlist); 984 hlist_add_head(&inst->hlist, &rp->free_instances); 985 } 986 987 rp->nmissed = 0; 988 /* Establish function entry probe point */ 989 ret = __register_kprobe(&rp->kp, called_from); 990 if (ret != 0) 991 free_rp_inst(rp); 992 return ret; 993} 994 995static int __kprobes __register_kretprobes(struct kretprobe **rps, int num, 996 unsigned long called_from) 997{ 998 int ret = 0, i; 999 1000 if (num <= 0) 1001 return -EINVAL; 1002 for (i = 0; i < num; i++) { 1003 ret = __register_kretprobe(rps[i], called_from); 1004 if (ret < 0) { 1005 if (i > 0) 1006 unregister_kretprobes(rps, i); 1007 break; 1008 } 1009 } 1010 return ret; 1011} 1012 1013int __kprobes register_kretprobe(struct kretprobe *rp) 1014{ 1015 return __register_kretprobes(&rp, 1, 1016 (unsigned long)__builtin_return_address(0)); 1017} 1018 1019void __kprobes unregister_kretprobe(struct kretprobe *rp) 1020{ 1021 unregister_kretprobes(&rp, 1); 1022} 1023 1024int __kprobes register_kretprobes(struct kretprobe **rps, int num) 1025{ 1026 return __register_kretprobes(rps, num, 1027 (unsigned long)__builtin_return_address(0)); 1028} 1029 1030void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) 1031{ 1032 int i; 1033 1034 if (num <= 0) 1035 return; 1036 mutex_lock(&kprobe_mutex); 1037 for (i = 0; i < num; i++) 1038 if (__unregister_kprobe_top(&rps[i]->kp) < 0) 1039 rps[i]->kp.addr = NULL; 1040 mutex_unlock(&kprobe_mutex); 1041 1042 synchronize_sched(); 1043 for (i = 0; i < num; i++) { 1044 if (rps[i]->kp.addr) { 1045 __unregister_kprobe_bottom(&rps[i]->kp); 1046 cleanup_rp_inst(rps[i]); 1047 } 1048 } 1049} 1050 1051#else /* CONFIG_KRETPROBES */ 1052int __kprobes register_kretprobe(struct kretprobe *rp) 1053{ 1054 return -ENOSYS; 1055} 1056 1057int __kprobes register_kretprobes(struct kretprobe **rps, int num) 1058{ 1059 return -ENOSYS; 1060} 1061void __kprobes unregister_kretprobe(struct kretprobe *rp) 1062{ 1063} 1064 1065void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) 1066{ 1067} 1068 1069static int __kprobes pre_handler_kretprobe(struct kprobe *p, 1070 struct pt_regs *regs) 1071{ 1072 return 0; 1073} 1074 1075#endif /* CONFIG_KRETPROBES */ 1076 1077static int __init init_kprobes(void) 1078{ 1079 int i, err = 0; 1080 unsigned long offset = 0, size = 0; 1081 char *modname, namebuf[128]; 1082 const char *symbol_name; 1083 void *addr; 1084 struct kprobe_blackpoint *kb; 1085 1086 /* FIXME allocate the probe table, currently defined statically */ 1087 /* initialize all list heads */ 1088 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1089 INIT_HLIST_HEAD(&kprobe_table[i]); 1090 INIT_HLIST_HEAD(&kretprobe_inst_table[i]); 1091 spin_lock_init(&(kretprobe_table_locks[i].lock)); 1092 } 1093 1094 /* 1095 * Lookup and populate the kprobe_blacklist. 1096 * 1097 * Unlike the kretprobe blacklist, we'll need to determine 1098 * the range of addresses that belong to the said functions, 1099 * since a kprobe need not necessarily be at the beginning 1100 * of a function. 1101 */ 1102 for (kb = kprobe_blacklist; kb->name != NULL; kb++) { 1103 kprobe_lookup_name(kb->name, addr); 1104 if (!addr) 1105 continue; 1106 1107 kb->start_addr = (unsigned long)addr; 1108 symbol_name = kallsyms_lookup(kb->start_addr, 1109 &size, &offset, &modname, namebuf); 1110 if (!symbol_name) 1111 kb->range = 0; 1112 else 1113 kb->range = size; 1114 } 1115 1116 if (kretprobe_blacklist_size) { 1117 /* lookup the function address from its name */ 1118 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { 1119 kprobe_lookup_name(kretprobe_blacklist[i].name, 1120 kretprobe_blacklist[i].addr); 1121 if (!kretprobe_blacklist[i].addr) 1122 printk("kretprobe: lookup failed: %s\n", 1123 kretprobe_blacklist[i].name); 1124 } 1125 } 1126 1127 /* By default, kprobes are enabled */ 1128 kprobe_enabled = true; 1129 1130 err = arch_init_kprobes(); 1131 if (!err) 1132 err = register_die_notifier(&kprobe_exceptions_nb); 1133 kprobes_initialized = (err == 0); 1134 1135 if (!err) 1136 init_test_probes(); 1137 return err; 1138} 1139 1140#ifdef CONFIG_DEBUG_FS 1141static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p, 1142 const char *sym, int offset,char *modname) 1143{ 1144 char *kprobe_type; 1145 1146 if (p->pre_handler == pre_handler_kretprobe) 1147 kprobe_type = "r"; 1148 else if (p->pre_handler == setjmp_pre_handler) 1149 kprobe_type = "j"; 1150 else 1151 kprobe_type = "k"; 1152 if (sym) 1153 seq_printf(pi, "%p %s %s+0x%x %s\n", p->addr, kprobe_type, 1154 sym, offset, (modname ? modname : " ")); 1155 else 1156 seq_printf(pi, "%p %s %p\n", p->addr, kprobe_type, p->addr); 1157} 1158 1159static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) 1160{ 1161 return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL; 1162} 1163 1164static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos) 1165{ 1166 (*pos)++; 1167 if (*pos >= KPROBE_TABLE_SIZE) 1168 return NULL; 1169 return pos; 1170} 1171 1172static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v) 1173{ 1174 /* Nothing to do */ 1175} 1176 1177static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v) 1178{ 1179 struct hlist_head *head; 1180 struct hlist_node *node; 1181 struct kprobe *p, *kp; 1182 const char *sym = NULL; 1183 unsigned int i = *(loff_t *) v; 1184 unsigned long offset = 0; 1185 char *modname, namebuf[128]; 1186 1187 head = &kprobe_table[i]; 1188 preempt_disable(); 1189 hlist_for_each_entry_rcu(p, node, head, hlist) { 1190 sym = kallsyms_lookup((unsigned long)p->addr, NULL, 1191 &offset, &modname, namebuf); 1192 if (p->pre_handler == aggr_pre_handler) { 1193 list_for_each_entry_rcu(kp, &p->list, list) 1194 report_probe(pi, kp, sym, offset, modname); 1195 } else 1196 report_probe(pi, p, sym, offset, modname); 1197 } 1198 preempt_enable(); 1199 return 0; 1200} 1201 1202static struct seq_operations kprobes_seq_ops = { 1203 .start = kprobe_seq_start, 1204 .next = kprobe_seq_next, 1205 .stop = kprobe_seq_stop, 1206 .show = show_kprobe_addr 1207}; 1208 1209static int __kprobes kprobes_open(struct inode *inode, struct file *filp) 1210{ 1211 return seq_open(filp, &kprobes_seq_ops); 1212} 1213 1214static struct file_operations debugfs_kprobes_operations = { 1215 .open = kprobes_open, 1216 .read = seq_read, 1217 .llseek = seq_lseek, 1218 .release = seq_release, 1219}; 1220 1221static void __kprobes enable_all_kprobes(void) 1222{ 1223 struct hlist_head *head; 1224 struct hlist_node *node; 1225 struct kprobe *p; 1226 unsigned int i; 1227 1228 mutex_lock(&kprobe_mutex); 1229 1230 /* If kprobes are already enabled, just return */ 1231 if (kprobe_enabled) 1232 goto already_enabled; 1233 1234 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1235 head = &kprobe_table[i]; 1236 hlist_for_each_entry_rcu(p, node, head, hlist) 1237 arch_arm_kprobe(p); 1238 } 1239 1240 kprobe_enabled = true; 1241 printk(KERN_INFO "Kprobes globally enabled\n"); 1242 1243already_enabled: 1244 mutex_unlock(&kprobe_mutex); 1245 return; 1246} 1247 1248static void __kprobes disable_all_kprobes(void) 1249{ 1250 struct hlist_head *head; 1251 struct hlist_node *node; 1252 struct kprobe *p; 1253 unsigned int i; 1254 1255 mutex_lock(&kprobe_mutex); 1256 1257 /* If kprobes are already disabled, just return */ 1258 if (!kprobe_enabled) 1259 goto already_disabled; 1260 1261 kprobe_enabled = false; 1262 printk(KERN_INFO "Kprobes globally disabled\n"); 1263 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1264 head = &kprobe_table[i]; 1265 hlist_for_each_entry_rcu(p, node, head, hlist) { 1266 if (!arch_trampoline_kprobe(p)) 1267 arch_disarm_kprobe(p); 1268 } 1269 } 1270 1271 mutex_unlock(&kprobe_mutex); 1272 /* Allow all currently running kprobes to complete */ 1273 synchronize_sched(); 1274 return; 1275 1276already_disabled: 1277 mutex_unlock(&kprobe_mutex); 1278 return; 1279} 1280 1281/* 1282 * XXX: The debugfs bool file interface doesn't allow for callbacks 1283 * when the bool state is switched. We can reuse that facility when 1284 * available 1285 */ 1286static ssize_t read_enabled_file_bool(struct file *file, 1287 char __user *user_buf, size_t count, loff_t *ppos) 1288{ 1289 char buf[3]; 1290 1291 if (kprobe_enabled) 1292 buf[0] = '1'; 1293 else 1294 buf[0] = '0'; 1295 buf[1] = '\n'; 1296 buf[2] = 0x00; 1297 return simple_read_from_buffer(user_buf, count, ppos, buf, 2); 1298} 1299 1300static ssize_t write_enabled_file_bool(struct file *file, 1301 const char __user *user_buf, size_t count, loff_t *ppos) 1302{ 1303 char buf[32]; 1304 int buf_size; 1305 1306 buf_size = min(count, (sizeof(buf)-1)); 1307 if (copy_from_user(buf, user_buf, buf_size)) 1308 return -EFAULT; 1309 1310 switch (buf[0]) { 1311 case 'y': 1312 case 'Y': 1313 case '1': 1314 enable_all_kprobes(); 1315 break; 1316 case 'n': 1317 case 'N': 1318 case '0': 1319 disable_all_kprobes(); 1320 break; 1321 } 1322 1323 return count; 1324} 1325 1326static struct file_operations fops_kp = { 1327 .read = read_enabled_file_bool, 1328 .write = write_enabled_file_bool, 1329}; 1330 1331static int __kprobes debugfs_kprobe_init(void) 1332{ 1333 struct dentry *dir, *file; 1334 unsigned int value = 1; 1335 1336 dir = debugfs_create_dir("kprobes", NULL); 1337 if (!dir) 1338 return -ENOMEM; 1339 1340 file = debugfs_create_file("list", 0444, dir, NULL, 1341 &debugfs_kprobes_operations); 1342 if (!file) { 1343 debugfs_remove(dir); 1344 return -ENOMEM; 1345 } 1346 1347 file = debugfs_create_file("enabled", 0600, dir, 1348 &value, &fops_kp); 1349 if (!file) { 1350 debugfs_remove(dir); 1351 return -ENOMEM; 1352 } 1353 1354 return 0; 1355} 1356 1357late_initcall(debugfs_kprobe_init); 1358#endif /* CONFIG_DEBUG_FS */ 1359 1360module_init(init_kprobes); 1361 1362EXPORT_SYMBOL_GPL(register_kprobe); 1363EXPORT_SYMBOL_GPL(unregister_kprobe); 1364EXPORT_SYMBOL_GPL(register_kprobes); 1365EXPORT_SYMBOL_GPL(unregister_kprobes); 1366EXPORT_SYMBOL_GPL(register_jprobe); 1367EXPORT_SYMBOL_GPL(unregister_jprobe); 1368EXPORT_SYMBOL_GPL(register_jprobes); 1369EXPORT_SYMBOL_GPL(unregister_jprobes); 1370EXPORT_SYMBOL_GPL(jprobe_return); 1371EXPORT_SYMBOL_GPL(register_kretprobe); 1372EXPORT_SYMBOL_GPL(unregister_kretprobe); 1373EXPORT_SYMBOL_GPL(register_kretprobes); 1374EXPORT_SYMBOL_GPL(unregister_kretprobes); 1375