kprobes.c revision 129415607845d4daea11ddcba706005c69dcb942
1/* 2 * Kernel Probes (KProbes) 3 * kernel/kprobes.c 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 * 19 * Copyright (C) IBM Corporation, 2002, 2004 20 * 21 * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel 22 * Probes initial implementation (includes suggestions from 23 * Rusty Russell). 24 * 2004-Aug Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with 25 * hlists and exceptions notifier as suggested by Andi Kleen. 26 * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes 27 * interface to access function arguments. 28 * 2004-Sep Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes 29 * exceptions notifier to be first on the priority list. 30 * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston 31 * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi 32 * <prasanna@in.ibm.com> added function-return probes. 33 */ 34#include <linux/kprobes.h> 35#include <linux/hash.h> 36#include <linux/init.h> 37#include <linux/slab.h> 38#include <linux/stddef.h> 39#include <linux/module.h> 40#include <linux/moduleloader.h> 41#include <linux/kallsyms.h> 42#include <linux/freezer.h> 43#include <linux/seq_file.h> 44#include <linux/debugfs.h> 45#include <linux/kdebug.h> 46 47#include <asm-generic/sections.h> 48#include <asm/cacheflush.h> 49#include <asm/errno.h> 50#include <asm/uaccess.h> 51 52#define KPROBE_HASH_BITS 6 53#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS) 54 55 56/* 57 * Some oddball architectures like 64bit powerpc have function descriptors 58 * so this must be overridable. 59 */ 60#ifndef kprobe_lookup_name 61#define kprobe_lookup_name(name, addr) \ 62 addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name))) 63#endif 64 65static int kprobes_initialized; 66static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; 67static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; 68 69/* NOTE: change this value only with kprobe_mutex held */ 70static bool kprobe_enabled; 71 72static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ 73static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; 74static struct { 75 spinlock_t lock ____cacheline_aligned_in_smp; 76} kretprobe_table_locks[KPROBE_TABLE_SIZE]; 77 78static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) 79{ 80 return &(kretprobe_table_locks[hash].lock); 81} 82 83/* 84 * Normally, functions that we'd want to prohibit kprobes in, are marked 85 * __kprobes. But, there are cases where such functions already belong to 86 * a different section (__sched for preempt_schedule) 87 * 88 * For such cases, we now have a blacklist 89 */ 90static struct kprobe_blackpoint kprobe_blacklist[] = { 91 {"preempt_schedule",}, 92 {NULL} /* Terminator */ 93}; 94 95#ifdef __ARCH_WANT_KPROBES_INSN_SLOT 96/* 97 * kprobe->ainsn.insn points to the copy of the instruction to be 98 * single-stepped. x86_64, POWER4 and above have no-exec support and 99 * stepping on the instruction on a vmalloced/kmalloced/data page 100 * is a recipe for disaster 101 */ 102#define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) 103 104struct kprobe_insn_page { 105 struct hlist_node hlist; 106 kprobe_opcode_t *insns; /* Page of instruction slots */ 107 char slot_used[INSNS_PER_PAGE]; 108 int nused; 109 int ngarbage; 110}; 111 112enum kprobe_slot_state { 113 SLOT_CLEAN = 0, 114 SLOT_DIRTY = 1, 115 SLOT_USED = 2, 116}; 117 118static DEFINE_MUTEX(kprobe_insn_mutex); /* Protects kprobe_insn_pages */ 119static struct hlist_head kprobe_insn_pages; 120static int kprobe_garbage_slots; 121static int collect_garbage_slots(void); 122 123static int __kprobes check_safety(void) 124{ 125 int ret = 0; 126#if defined(CONFIG_PREEMPT) && defined(CONFIG_PM) 127 ret = freeze_processes(); 128 if (ret == 0) { 129 struct task_struct *p, *q; 130 do_each_thread(p, q) { 131 if (p != current && p->state == TASK_RUNNING && 132 p->pid != 0) { 133 printk("Check failed: %s is running\n",p->comm); 134 ret = -1; 135 goto loop_end; 136 } 137 } while_each_thread(p, q); 138 } 139loop_end: 140 thaw_processes(); 141#else 142 synchronize_sched(); 143#endif 144 return ret; 145} 146 147/** 148 * __get_insn_slot() - Find a slot on an executable page for an instruction. 149 * We allocate an executable page if there's no room on existing ones. 150 */ 151static kprobe_opcode_t __kprobes *__get_insn_slot(void) 152{ 153 struct kprobe_insn_page *kip; 154 struct hlist_node *pos; 155 156 retry: 157 hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) { 158 if (kip->nused < INSNS_PER_PAGE) { 159 int i; 160 for (i = 0; i < INSNS_PER_PAGE; i++) { 161 if (kip->slot_used[i] == SLOT_CLEAN) { 162 kip->slot_used[i] = SLOT_USED; 163 kip->nused++; 164 return kip->insns + (i * MAX_INSN_SIZE); 165 } 166 } 167 /* Surprise! No unused slots. Fix kip->nused. */ 168 kip->nused = INSNS_PER_PAGE; 169 } 170 } 171 172 /* If there are any garbage slots, collect it and try again. */ 173 if (kprobe_garbage_slots && collect_garbage_slots() == 0) { 174 goto retry; 175 } 176 /* All out of space. Need to allocate a new page. Use slot 0. */ 177 kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL); 178 if (!kip) 179 return NULL; 180 181 /* 182 * Use module_alloc so this page is within +/- 2GB of where the 183 * kernel image and loaded module images reside. This is required 184 * so x86_64 can correctly handle the %rip-relative fixups. 185 */ 186 kip->insns = module_alloc(PAGE_SIZE); 187 if (!kip->insns) { 188 kfree(kip); 189 return NULL; 190 } 191 INIT_HLIST_NODE(&kip->hlist); 192 hlist_add_head(&kip->hlist, &kprobe_insn_pages); 193 memset(kip->slot_used, SLOT_CLEAN, INSNS_PER_PAGE); 194 kip->slot_used[0] = SLOT_USED; 195 kip->nused = 1; 196 kip->ngarbage = 0; 197 return kip->insns; 198} 199 200kprobe_opcode_t __kprobes *get_insn_slot(void) 201{ 202 kprobe_opcode_t *ret; 203 mutex_lock(&kprobe_insn_mutex); 204 ret = __get_insn_slot(); 205 mutex_unlock(&kprobe_insn_mutex); 206 return ret; 207} 208 209/* Return 1 if all garbages are collected, otherwise 0. */ 210static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) 211{ 212 kip->slot_used[idx] = SLOT_CLEAN; 213 kip->nused--; 214 if (kip->nused == 0) { 215 /* 216 * Page is no longer in use. Free it unless 217 * it's the last one. We keep the last one 218 * so as not to have to set it up again the 219 * next time somebody inserts a probe. 220 */ 221 hlist_del(&kip->hlist); 222 if (hlist_empty(&kprobe_insn_pages)) { 223 INIT_HLIST_NODE(&kip->hlist); 224 hlist_add_head(&kip->hlist, 225 &kprobe_insn_pages); 226 } else { 227 module_free(NULL, kip->insns); 228 kfree(kip); 229 } 230 return 1; 231 } 232 return 0; 233} 234 235static int __kprobes collect_garbage_slots(void) 236{ 237 struct kprobe_insn_page *kip; 238 struct hlist_node *pos, *next; 239 int safety; 240 241 /* Ensure no-one is preepmted on the garbages */ 242 mutex_unlock(&kprobe_insn_mutex); 243 safety = check_safety(); 244 mutex_lock(&kprobe_insn_mutex); 245 if (safety != 0) 246 return -EAGAIN; 247 248 hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) { 249 int i; 250 if (kip->ngarbage == 0) 251 continue; 252 kip->ngarbage = 0; /* we will collect all garbages */ 253 for (i = 0; i < INSNS_PER_PAGE; i++) { 254 if (kip->slot_used[i] == SLOT_DIRTY && 255 collect_one_slot(kip, i)) 256 break; 257 } 258 } 259 kprobe_garbage_slots = 0; 260 return 0; 261} 262 263void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty) 264{ 265 struct kprobe_insn_page *kip; 266 struct hlist_node *pos; 267 268 mutex_lock(&kprobe_insn_mutex); 269 hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) { 270 if (kip->insns <= slot && 271 slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) { 272 int i = (slot - kip->insns) / MAX_INSN_SIZE; 273 if (dirty) { 274 kip->slot_used[i] = SLOT_DIRTY; 275 kip->ngarbage++; 276 } else { 277 collect_one_slot(kip, i); 278 } 279 break; 280 } 281 } 282 283 if (dirty && ++kprobe_garbage_slots > INSNS_PER_PAGE) 284 collect_garbage_slots(); 285 286 mutex_unlock(&kprobe_insn_mutex); 287} 288#endif 289 290/* We have preemption disabled.. so it is safe to use __ versions */ 291static inline void set_kprobe_instance(struct kprobe *kp) 292{ 293 __get_cpu_var(kprobe_instance) = kp; 294} 295 296static inline void reset_kprobe_instance(void) 297{ 298 __get_cpu_var(kprobe_instance) = NULL; 299} 300 301/* 302 * This routine is called either: 303 * - under the kprobe_mutex - during kprobe_[un]register() 304 * OR 305 * - with preemption disabled - from arch/xxx/kernel/kprobes.c 306 */ 307struct kprobe __kprobes *get_kprobe(void *addr) 308{ 309 struct hlist_head *head; 310 struct hlist_node *node; 311 struct kprobe *p; 312 313 head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; 314 hlist_for_each_entry_rcu(p, node, head, hlist) { 315 if (p->addr == addr) 316 return p; 317 } 318 return NULL; 319} 320 321/* 322 * Aggregate handlers for multiple kprobes support - these handlers 323 * take care of invoking the individual kprobe handlers on p->list 324 */ 325static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) 326{ 327 struct kprobe *kp; 328 329 list_for_each_entry_rcu(kp, &p->list, list) { 330 if (kp->pre_handler) { 331 set_kprobe_instance(kp); 332 if (kp->pre_handler(kp, regs)) 333 return 1; 334 } 335 reset_kprobe_instance(); 336 } 337 return 0; 338} 339 340static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, 341 unsigned long flags) 342{ 343 struct kprobe *kp; 344 345 list_for_each_entry_rcu(kp, &p->list, list) { 346 if (kp->post_handler) { 347 set_kprobe_instance(kp); 348 kp->post_handler(kp, regs, flags); 349 reset_kprobe_instance(); 350 } 351 } 352} 353 354static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, 355 int trapnr) 356{ 357 struct kprobe *cur = __get_cpu_var(kprobe_instance); 358 359 /* 360 * if we faulted "during" the execution of a user specified 361 * probe handler, invoke just that probe's fault handler 362 */ 363 if (cur && cur->fault_handler) { 364 if (cur->fault_handler(cur, regs, trapnr)) 365 return 1; 366 } 367 return 0; 368} 369 370static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) 371{ 372 struct kprobe *cur = __get_cpu_var(kprobe_instance); 373 int ret = 0; 374 375 if (cur && cur->break_handler) { 376 if (cur->break_handler(cur, regs)) 377 ret = 1; 378 } 379 reset_kprobe_instance(); 380 return ret; 381} 382 383/* Walks the list and increments nmissed count for multiprobe case */ 384void __kprobes kprobes_inc_nmissed_count(struct kprobe *p) 385{ 386 struct kprobe *kp; 387 if (p->pre_handler != aggr_pre_handler) { 388 p->nmissed++; 389 } else { 390 list_for_each_entry_rcu(kp, &p->list, list) 391 kp->nmissed++; 392 } 393 return; 394} 395 396void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, 397 struct hlist_head *head) 398{ 399 struct kretprobe *rp = ri->rp; 400 401 /* remove rp inst off the rprobe_inst_table */ 402 hlist_del(&ri->hlist); 403 INIT_HLIST_NODE(&ri->hlist); 404 if (likely(rp)) { 405 spin_lock(&rp->lock); 406 hlist_add_head(&ri->hlist, &rp->free_instances); 407 spin_unlock(&rp->lock); 408 } else 409 /* Unregistering */ 410 hlist_add_head(&ri->hlist, head); 411} 412 413void kretprobe_hash_lock(struct task_struct *tsk, 414 struct hlist_head **head, unsigned long *flags) 415{ 416 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); 417 spinlock_t *hlist_lock; 418 419 *head = &kretprobe_inst_table[hash]; 420 hlist_lock = kretprobe_table_lock_ptr(hash); 421 spin_lock_irqsave(hlist_lock, *flags); 422} 423 424static void kretprobe_table_lock(unsigned long hash, unsigned long *flags) 425{ 426 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); 427 spin_lock_irqsave(hlist_lock, *flags); 428} 429 430void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags) 431{ 432 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); 433 spinlock_t *hlist_lock; 434 435 hlist_lock = kretprobe_table_lock_ptr(hash); 436 spin_unlock_irqrestore(hlist_lock, *flags); 437} 438 439void kretprobe_table_unlock(unsigned long hash, unsigned long *flags) 440{ 441 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); 442 spin_unlock_irqrestore(hlist_lock, *flags); 443} 444 445/* 446 * This function is called from finish_task_switch when task tk becomes dead, 447 * so that we can recycle any function-return probe instances associated 448 * with this task. These left over instances represent probed functions 449 * that have been called but will never return. 450 */ 451void __kprobes kprobe_flush_task(struct task_struct *tk) 452{ 453 struct kretprobe_instance *ri; 454 struct hlist_head *head, empty_rp; 455 struct hlist_node *node, *tmp; 456 unsigned long hash, flags = 0; 457 458 if (unlikely(!kprobes_initialized)) 459 /* Early boot. kretprobe_table_locks not yet initialized. */ 460 return; 461 462 hash = hash_ptr(tk, KPROBE_HASH_BITS); 463 head = &kretprobe_inst_table[hash]; 464 kretprobe_table_lock(hash, &flags); 465 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { 466 if (ri->task == tk) 467 recycle_rp_inst(ri, &empty_rp); 468 } 469 kretprobe_table_unlock(hash, &flags); 470 INIT_HLIST_HEAD(&empty_rp); 471 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 472 hlist_del(&ri->hlist); 473 kfree(ri); 474 } 475} 476 477static inline void free_rp_inst(struct kretprobe *rp) 478{ 479 struct kretprobe_instance *ri; 480 struct hlist_node *pos, *next; 481 482 hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) { 483 hlist_del(&ri->hlist); 484 kfree(ri); 485 } 486} 487 488static void __kprobes cleanup_rp_inst(struct kretprobe *rp) 489{ 490 unsigned long flags, hash; 491 struct kretprobe_instance *ri; 492 struct hlist_node *pos, *next; 493 struct hlist_head *head; 494 495 /* No race here */ 496 for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) { 497 kretprobe_table_lock(hash, &flags); 498 head = &kretprobe_inst_table[hash]; 499 hlist_for_each_entry_safe(ri, pos, next, head, hlist) { 500 if (ri->rp == rp) 501 ri->rp = NULL; 502 } 503 kretprobe_table_unlock(hash, &flags); 504 } 505 free_rp_inst(rp); 506} 507 508/* 509 * Keep all fields in the kprobe consistent 510 */ 511static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) 512{ 513 memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t)); 514 memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn)); 515} 516 517/* 518* Add the new probe to old_p->list. Fail if this is the 519* second jprobe at the address - two jprobes can't coexist 520*/ 521static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) 522{ 523 if (p->break_handler) { 524 if (old_p->break_handler) 525 return -EEXIST; 526 list_add_tail_rcu(&p->list, &old_p->list); 527 old_p->break_handler = aggr_break_handler; 528 } else 529 list_add_rcu(&p->list, &old_p->list); 530 if (p->post_handler && !old_p->post_handler) 531 old_p->post_handler = aggr_post_handler; 532 return 0; 533} 534 535/* 536 * Fill in the required fields of the "manager kprobe". Replace the 537 * earlier kprobe in the hlist with the manager kprobe 538 */ 539static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) 540{ 541 copy_kprobe(p, ap); 542 flush_insn_slot(ap); 543 ap->addr = p->addr; 544 ap->pre_handler = aggr_pre_handler; 545 ap->fault_handler = aggr_fault_handler; 546 if (p->post_handler) 547 ap->post_handler = aggr_post_handler; 548 if (p->break_handler) 549 ap->break_handler = aggr_break_handler; 550 551 INIT_LIST_HEAD(&ap->list); 552 list_add_rcu(&p->list, &ap->list); 553 554 hlist_replace_rcu(&p->hlist, &ap->hlist); 555} 556 557/* 558 * This is the second or subsequent kprobe at the address - handle 559 * the intricacies 560 */ 561static int __kprobes register_aggr_kprobe(struct kprobe *old_p, 562 struct kprobe *p) 563{ 564 int ret = 0; 565 struct kprobe *ap; 566 567 if (old_p->pre_handler == aggr_pre_handler) { 568 copy_kprobe(old_p, p); 569 ret = add_new_kprobe(old_p, p); 570 } else { 571 ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL); 572 if (!ap) 573 return -ENOMEM; 574 add_aggr_kprobe(ap, old_p); 575 copy_kprobe(ap, p); 576 ret = add_new_kprobe(ap, p); 577 } 578 return ret; 579} 580 581static int __kprobes in_kprobes_functions(unsigned long addr) 582{ 583 struct kprobe_blackpoint *kb; 584 585 if (addr >= (unsigned long)__kprobes_text_start && 586 addr < (unsigned long)__kprobes_text_end) 587 return -EINVAL; 588 /* 589 * If there exists a kprobe_blacklist, verify and 590 * fail any probe registration in the prohibited area 591 */ 592 for (kb = kprobe_blacklist; kb->name != NULL; kb++) { 593 if (kb->start_addr) { 594 if (addr >= kb->start_addr && 595 addr < (kb->start_addr + kb->range)) 596 return -EINVAL; 597 } 598 } 599 return 0; 600} 601 602/* 603 * If we have a symbol_name argument, look it up and add the offset field 604 * to it. This way, we can specify a relative address to a symbol. 605 */ 606static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) 607{ 608 kprobe_opcode_t *addr = p->addr; 609 if (p->symbol_name) { 610 if (addr) 611 return NULL; 612 kprobe_lookup_name(p->symbol_name, addr); 613 } 614 615 if (!addr) 616 return NULL; 617 return (kprobe_opcode_t *)(((char *)addr) + p->offset); 618} 619 620static int __kprobes __register_kprobe(struct kprobe *p, 621 unsigned long called_from) 622{ 623 int ret = 0; 624 struct kprobe *old_p; 625 struct module *probed_mod; 626 kprobe_opcode_t *addr; 627 628 addr = kprobe_addr(p); 629 if (!addr) 630 return -EINVAL; 631 p->addr = addr; 632 633 preempt_disable(); 634 if (!__kernel_text_address((unsigned long) p->addr) || 635 in_kprobes_functions((unsigned long) p->addr)) { 636 preempt_enable(); 637 return -EINVAL; 638 } 639 640 p->mod_refcounted = 0; 641 642 /* 643 * Check if are we probing a module. 644 */ 645 probed_mod = __module_text_address((unsigned long) p->addr); 646 if (probed_mod) { 647 struct module *calling_mod; 648 calling_mod = __module_text_address(called_from); 649 /* 650 * We must allow modules to probe themself and in this case 651 * avoid incrementing the module refcount, so as to allow 652 * unloading of self probing modules. 653 */ 654 if (calling_mod != probed_mod) { 655 if (unlikely(!try_module_get(probed_mod))) { 656 preempt_enable(); 657 return -EINVAL; 658 } 659 p->mod_refcounted = 1; 660 } else 661 probed_mod = NULL; 662 } 663 preempt_enable(); 664 665 p->nmissed = 0; 666 INIT_LIST_HEAD(&p->list); 667 mutex_lock(&kprobe_mutex); 668 old_p = get_kprobe(p->addr); 669 if (old_p) { 670 ret = register_aggr_kprobe(old_p, p); 671 goto out; 672 } 673 674 ret = arch_prepare_kprobe(p); 675 if (ret) 676 goto out; 677 678 INIT_HLIST_NODE(&p->hlist); 679 hlist_add_head_rcu(&p->hlist, 680 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); 681 682 if (kprobe_enabled) 683 arch_arm_kprobe(p); 684 685out: 686 mutex_unlock(&kprobe_mutex); 687 688 if (ret && probed_mod) 689 module_put(probed_mod); 690 return ret; 691} 692 693/* 694 * Unregister a kprobe without a scheduler synchronization. 695 */ 696static int __kprobes __unregister_kprobe_top(struct kprobe *p) 697{ 698 struct kprobe *old_p, *list_p; 699 700 old_p = get_kprobe(p->addr); 701 if (unlikely(!old_p)) 702 return -EINVAL; 703 704 if (p != old_p) { 705 list_for_each_entry_rcu(list_p, &old_p->list, list) 706 if (list_p == p) 707 /* kprobe p is a valid probe */ 708 goto valid_p; 709 return -EINVAL; 710 } 711valid_p: 712 if (old_p == p || 713 (old_p->pre_handler == aggr_pre_handler && 714 list_is_singular(&old_p->list))) { 715 /* 716 * Only probe on the hash list. Disarm only if kprobes are 717 * enabled - otherwise, the breakpoint would already have 718 * been removed. We save on flushing icache. 719 */ 720 if (kprobe_enabled) 721 arch_disarm_kprobe(p); 722 hlist_del_rcu(&old_p->hlist); 723 } else { 724 if (p->break_handler) 725 old_p->break_handler = NULL; 726 if (p->post_handler) { 727 list_for_each_entry_rcu(list_p, &old_p->list, list) { 728 if ((list_p != p) && (list_p->post_handler)) 729 goto noclean; 730 } 731 old_p->post_handler = NULL; 732 } 733noclean: 734 list_del_rcu(&p->list); 735 } 736 return 0; 737} 738 739static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) 740{ 741 struct module *mod; 742 struct kprobe *old_p; 743 744 if (p->mod_refcounted) { 745 /* 746 * Since we've already incremented refcount, 747 * we don't need to disable preemption. 748 */ 749 mod = module_text_address((unsigned long)p->addr); 750 if (mod) 751 module_put(mod); 752 } 753 754 if (list_empty(&p->list) || list_is_singular(&p->list)) { 755 if (!list_empty(&p->list)) { 756 /* "p" is the last child of an aggr_kprobe */ 757 old_p = list_entry(p->list.next, struct kprobe, list); 758 list_del(&p->list); 759 kfree(old_p); 760 } 761 arch_remove_kprobe(p); 762 } 763} 764 765static int __register_kprobes(struct kprobe **kps, int num, 766 unsigned long called_from) 767{ 768 int i, ret = 0; 769 770 if (num <= 0) 771 return -EINVAL; 772 for (i = 0; i < num; i++) { 773 ret = __register_kprobe(kps[i], called_from); 774 if (ret < 0) { 775 if (i > 0) 776 unregister_kprobes(kps, i); 777 break; 778 } 779 } 780 return ret; 781} 782 783/* 784 * Registration and unregistration functions for kprobe. 785 */ 786int __kprobes register_kprobe(struct kprobe *p) 787{ 788 return __register_kprobes(&p, 1, 789 (unsigned long)__builtin_return_address(0)); 790} 791 792void __kprobes unregister_kprobe(struct kprobe *p) 793{ 794 unregister_kprobes(&p, 1); 795} 796 797int __kprobes register_kprobes(struct kprobe **kps, int num) 798{ 799 return __register_kprobes(kps, num, 800 (unsigned long)__builtin_return_address(0)); 801} 802 803void __kprobes unregister_kprobes(struct kprobe **kps, int num) 804{ 805 int i; 806 807 if (num <= 0) 808 return; 809 mutex_lock(&kprobe_mutex); 810 for (i = 0; i < num; i++) 811 if (__unregister_kprobe_top(kps[i]) < 0) 812 kps[i]->addr = NULL; 813 mutex_unlock(&kprobe_mutex); 814 815 synchronize_sched(); 816 for (i = 0; i < num; i++) 817 if (kps[i]->addr) 818 __unregister_kprobe_bottom(kps[i]); 819} 820 821static struct notifier_block kprobe_exceptions_nb = { 822 .notifier_call = kprobe_exceptions_notify, 823 .priority = 0x7fffffff /* we need to be notified first */ 824}; 825 826unsigned long __weak arch_deref_entry_point(void *entry) 827{ 828 return (unsigned long)entry; 829} 830 831static int __register_jprobes(struct jprobe **jps, int num, 832 unsigned long called_from) 833{ 834 struct jprobe *jp; 835 int ret = 0, i; 836 837 if (num <= 0) 838 return -EINVAL; 839 for (i = 0; i < num; i++) { 840 unsigned long addr; 841 jp = jps[i]; 842 addr = arch_deref_entry_point(jp->entry); 843 844 if (!kernel_text_address(addr)) 845 ret = -EINVAL; 846 else { 847 /* Todo: Verify probepoint is a function entry point */ 848 jp->kp.pre_handler = setjmp_pre_handler; 849 jp->kp.break_handler = longjmp_break_handler; 850 ret = __register_kprobe(&jp->kp, called_from); 851 } 852 if (ret < 0) { 853 if (i > 0) 854 unregister_jprobes(jps, i); 855 break; 856 } 857 } 858 return ret; 859} 860 861int __kprobes register_jprobe(struct jprobe *jp) 862{ 863 return __register_jprobes(&jp, 1, 864 (unsigned long)__builtin_return_address(0)); 865} 866 867void __kprobes unregister_jprobe(struct jprobe *jp) 868{ 869 unregister_jprobes(&jp, 1); 870} 871 872int __kprobes register_jprobes(struct jprobe **jps, int num) 873{ 874 return __register_jprobes(jps, num, 875 (unsigned long)__builtin_return_address(0)); 876} 877 878void __kprobes unregister_jprobes(struct jprobe **jps, int num) 879{ 880 int i; 881 882 if (num <= 0) 883 return; 884 mutex_lock(&kprobe_mutex); 885 for (i = 0; i < num; i++) 886 if (__unregister_kprobe_top(&jps[i]->kp) < 0) 887 jps[i]->kp.addr = NULL; 888 mutex_unlock(&kprobe_mutex); 889 890 synchronize_sched(); 891 for (i = 0; i < num; i++) { 892 if (jps[i]->kp.addr) 893 __unregister_kprobe_bottom(&jps[i]->kp); 894 } 895} 896 897#ifdef CONFIG_KRETPROBES 898/* 899 * This kprobe pre_handler is registered with every kretprobe. When probe 900 * hits it will set up the return probe. 901 */ 902static int __kprobes pre_handler_kretprobe(struct kprobe *p, 903 struct pt_regs *regs) 904{ 905 struct kretprobe *rp = container_of(p, struct kretprobe, kp); 906 unsigned long hash, flags = 0; 907 struct kretprobe_instance *ri; 908 909 /*TODO: consider to only swap the RA after the last pre_handler fired */ 910 hash = hash_ptr(current, KPROBE_HASH_BITS); 911 spin_lock_irqsave(&rp->lock, flags); 912 if (!hlist_empty(&rp->free_instances)) { 913 ri = hlist_entry(rp->free_instances.first, 914 struct kretprobe_instance, hlist); 915 hlist_del(&ri->hlist); 916 spin_unlock_irqrestore(&rp->lock, flags); 917 918 ri->rp = rp; 919 ri->task = current; 920 921 if (rp->entry_handler && rp->entry_handler(ri, regs)) { 922 spin_unlock_irqrestore(&rp->lock, flags); 923 return 0; 924 } 925 926 arch_prepare_kretprobe(ri, regs); 927 928 /* XXX(hch): why is there no hlist_move_head? */ 929 INIT_HLIST_NODE(&ri->hlist); 930 kretprobe_table_lock(hash, &flags); 931 hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]); 932 kretprobe_table_unlock(hash, &flags); 933 } else { 934 rp->nmissed++; 935 spin_unlock_irqrestore(&rp->lock, flags); 936 } 937 return 0; 938} 939 940static int __kprobes __register_kretprobe(struct kretprobe *rp, 941 unsigned long called_from) 942{ 943 int ret = 0; 944 struct kretprobe_instance *inst; 945 int i; 946 void *addr; 947 948 if (kretprobe_blacklist_size) { 949 addr = kprobe_addr(&rp->kp); 950 if (!addr) 951 return -EINVAL; 952 953 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { 954 if (kretprobe_blacklist[i].addr == addr) 955 return -EINVAL; 956 } 957 } 958 959 rp->kp.pre_handler = pre_handler_kretprobe; 960 rp->kp.post_handler = NULL; 961 rp->kp.fault_handler = NULL; 962 rp->kp.break_handler = NULL; 963 964 /* Pre-allocate memory for max kretprobe instances */ 965 if (rp->maxactive <= 0) { 966#ifdef CONFIG_PREEMPT 967 rp->maxactive = max(10, 2 * NR_CPUS); 968#else 969 rp->maxactive = NR_CPUS; 970#endif 971 } 972 spin_lock_init(&rp->lock); 973 INIT_HLIST_HEAD(&rp->free_instances); 974 for (i = 0; i < rp->maxactive; i++) { 975 inst = kmalloc(sizeof(struct kretprobe_instance) + 976 rp->data_size, GFP_KERNEL); 977 if (inst == NULL) { 978 free_rp_inst(rp); 979 return -ENOMEM; 980 } 981 INIT_HLIST_NODE(&inst->hlist); 982 hlist_add_head(&inst->hlist, &rp->free_instances); 983 } 984 985 rp->nmissed = 0; 986 /* Establish function entry probe point */ 987 ret = __register_kprobe(&rp->kp, called_from); 988 if (ret != 0) 989 free_rp_inst(rp); 990 return ret; 991} 992 993static int __register_kretprobes(struct kretprobe **rps, int num, 994 unsigned long called_from) 995{ 996 int ret = 0, i; 997 998 if (num <= 0) 999 return -EINVAL; 1000 for (i = 0; i < num; i++) { 1001 ret = __register_kretprobe(rps[i], called_from); 1002 if (ret < 0) { 1003 if (i > 0) 1004 unregister_kretprobes(rps, i); 1005 break; 1006 } 1007 } 1008 return ret; 1009} 1010 1011int __kprobes register_kretprobe(struct kretprobe *rp) 1012{ 1013 return __register_kretprobes(&rp, 1, 1014 (unsigned long)__builtin_return_address(0)); 1015} 1016 1017void __kprobes unregister_kretprobe(struct kretprobe *rp) 1018{ 1019 unregister_kretprobes(&rp, 1); 1020} 1021 1022int __kprobes register_kretprobes(struct kretprobe **rps, int num) 1023{ 1024 return __register_kretprobes(rps, num, 1025 (unsigned long)__builtin_return_address(0)); 1026} 1027 1028void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) 1029{ 1030 int i; 1031 1032 if (num <= 0) 1033 return; 1034 mutex_lock(&kprobe_mutex); 1035 for (i = 0; i < num; i++) 1036 if (__unregister_kprobe_top(&rps[i]->kp) < 0) 1037 rps[i]->kp.addr = NULL; 1038 mutex_unlock(&kprobe_mutex); 1039 1040 synchronize_sched(); 1041 for (i = 0; i < num; i++) { 1042 if (rps[i]->kp.addr) { 1043 __unregister_kprobe_bottom(&rps[i]->kp); 1044 cleanup_rp_inst(rps[i]); 1045 } 1046 } 1047} 1048 1049#else /* CONFIG_KRETPROBES */ 1050int __kprobes register_kretprobe(struct kretprobe *rp) 1051{ 1052 return -ENOSYS; 1053} 1054 1055int __kprobes register_kretprobes(struct kretprobe **rps, int num) 1056{ 1057 return -ENOSYS; 1058} 1059void __kprobes unregister_kretprobe(struct kretprobe *rp) 1060{ 1061} 1062 1063void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) 1064{ 1065} 1066 1067static int __kprobes pre_handler_kretprobe(struct kprobe *p, 1068 struct pt_regs *regs) 1069{ 1070 return 0; 1071} 1072 1073#endif /* CONFIG_KRETPROBES */ 1074 1075static int __init init_kprobes(void) 1076{ 1077 int i, err = 0; 1078 unsigned long offset = 0, size = 0; 1079 char *modname, namebuf[128]; 1080 const char *symbol_name; 1081 void *addr; 1082 struct kprobe_blackpoint *kb; 1083 1084 /* FIXME allocate the probe table, currently defined statically */ 1085 /* initialize all list heads */ 1086 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1087 INIT_HLIST_HEAD(&kprobe_table[i]); 1088 INIT_HLIST_HEAD(&kretprobe_inst_table[i]); 1089 spin_lock_init(&(kretprobe_table_locks[i].lock)); 1090 } 1091 1092 /* 1093 * Lookup and populate the kprobe_blacklist. 1094 * 1095 * Unlike the kretprobe blacklist, we'll need to determine 1096 * the range of addresses that belong to the said functions, 1097 * since a kprobe need not necessarily be at the beginning 1098 * of a function. 1099 */ 1100 for (kb = kprobe_blacklist; kb->name != NULL; kb++) { 1101 kprobe_lookup_name(kb->name, addr); 1102 if (!addr) 1103 continue; 1104 1105 kb->start_addr = (unsigned long)addr; 1106 symbol_name = kallsyms_lookup(kb->start_addr, 1107 &size, &offset, &modname, namebuf); 1108 if (!symbol_name) 1109 kb->range = 0; 1110 else 1111 kb->range = size; 1112 } 1113 1114 if (kretprobe_blacklist_size) { 1115 /* lookup the function address from its name */ 1116 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { 1117 kprobe_lookup_name(kretprobe_blacklist[i].name, 1118 kretprobe_blacklist[i].addr); 1119 if (!kretprobe_blacklist[i].addr) 1120 printk("kretprobe: lookup failed: %s\n", 1121 kretprobe_blacklist[i].name); 1122 } 1123 } 1124 1125 /* By default, kprobes are enabled */ 1126 kprobe_enabled = true; 1127 1128 err = arch_init_kprobes(); 1129 if (!err) 1130 err = register_die_notifier(&kprobe_exceptions_nb); 1131 kprobes_initialized = (err == 0); 1132 1133 if (!err) 1134 init_test_probes(); 1135 return err; 1136} 1137 1138#ifdef CONFIG_DEBUG_FS 1139static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p, 1140 const char *sym, int offset,char *modname) 1141{ 1142 char *kprobe_type; 1143 1144 if (p->pre_handler == pre_handler_kretprobe) 1145 kprobe_type = "r"; 1146 else if (p->pre_handler == setjmp_pre_handler) 1147 kprobe_type = "j"; 1148 else 1149 kprobe_type = "k"; 1150 if (sym) 1151 seq_printf(pi, "%p %s %s+0x%x %s\n", p->addr, kprobe_type, 1152 sym, offset, (modname ? modname : " ")); 1153 else 1154 seq_printf(pi, "%p %s %p\n", p->addr, kprobe_type, p->addr); 1155} 1156 1157static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) 1158{ 1159 return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL; 1160} 1161 1162static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos) 1163{ 1164 (*pos)++; 1165 if (*pos >= KPROBE_TABLE_SIZE) 1166 return NULL; 1167 return pos; 1168} 1169 1170static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v) 1171{ 1172 /* Nothing to do */ 1173} 1174 1175static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v) 1176{ 1177 struct hlist_head *head; 1178 struct hlist_node *node; 1179 struct kprobe *p, *kp; 1180 const char *sym = NULL; 1181 unsigned int i = *(loff_t *) v; 1182 unsigned long offset = 0; 1183 char *modname, namebuf[128]; 1184 1185 head = &kprobe_table[i]; 1186 preempt_disable(); 1187 hlist_for_each_entry_rcu(p, node, head, hlist) { 1188 sym = kallsyms_lookup((unsigned long)p->addr, NULL, 1189 &offset, &modname, namebuf); 1190 if (p->pre_handler == aggr_pre_handler) { 1191 list_for_each_entry_rcu(kp, &p->list, list) 1192 report_probe(pi, kp, sym, offset, modname); 1193 } else 1194 report_probe(pi, p, sym, offset, modname); 1195 } 1196 preempt_enable(); 1197 return 0; 1198} 1199 1200static struct seq_operations kprobes_seq_ops = { 1201 .start = kprobe_seq_start, 1202 .next = kprobe_seq_next, 1203 .stop = kprobe_seq_stop, 1204 .show = show_kprobe_addr 1205}; 1206 1207static int __kprobes kprobes_open(struct inode *inode, struct file *filp) 1208{ 1209 return seq_open(filp, &kprobes_seq_ops); 1210} 1211 1212static struct file_operations debugfs_kprobes_operations = { 1213 .open = kprobes_open, 1214 .read = seq_read, 1215 .llseek = seq_lseek, 1216 .release = seq_release, 1217}; 1218 1219static void __kprobes enable_all_kprobes(void) 1220{ 1221 struct hlist_head *head; 1222 struct hlist_node *node; 1223 struct kprobe *p; 1224 unsigned int i; 1225 1226 mutex_lock(&kprobe_mutex); 1227 1228 /* If kprobes are already enabled, just return */ 1229 if (kprobe_enabled) 1230 goto already_enabled; 1231 1232 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1233 head = &kprobe_table[i]; 1234 hlist_for_each_entry_rcu(p, node, head, hlist) 1235 arch_arm_kprobe(p); 1236 } 1237 1238 kprobe_enabled = true; 1239 printk(KERN_INFO "Kprobes globally enabled\n"); 1240 1241already_enabled: 1242 mutex_unlock(&kprobe_mutex); 1243 return; 1244} 1245 1246static void __kprobes disable_all_kprobes(void) 1247{ 1248 struct hlist_head *head; 1249 struct hlist_node *node; 1250 struct kprobe *p; 1251 unsigned int i; 1252 1253 mutex_lock(&kprobe_mutex); 1254 1255 /* If kprobes are already disabled, just return */ 1256 if (!kprobe_enabled) 1257 goto already_disabled; 1258 1259 kprobe_enabled = false; 1260 printk(KERN_INFO "Kprobes globally disabled\n"); 1261 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1262 head = &kprobe_table[i]; 1263 hlist_for_each_entry_rcu(p, node, head, hlist) { 1264 if (!arch_trampoline_kprobe(p)) 1265 arch_disarm_kprobe(p); 1266 } 1267 } 1268 1269 mutex_unlock(&kprobe_mutex); 1270 /* Allow all currently running kprobes to complete */ 1271 synchronize_sched(); 1272 return; 1273 1274already_disabled: 1275 mutex_unlock(&kprobe_mutex); 1276 return; 1277} 1278 1279/* 1280 * XXX: The debugfs bool file interface doesn't allow for callbacks 1281 * when the bool state is switched. We can reuse that facility when 1282 * available 1283 */ 1284static ssize_t read_enabled_file_bool(struct file *file, 1285 char __user *user_buf, size_t count, loff_t *ppos) 1286{ 1287 char buf[3]; 1288 1289 if (kprobe_enabled) 1290 buf[0] = '1'; 1291 else 1292 buf[0] = '0'; 1293 buf[1] = '\n'; 1294 buf[2] = 0x00; 1295 return simple_read_from_buffer(user_buf, count, ppos, buf, 2); 1296} 1297 1298static ssize_t write_enabled_file_bool(struct file *file, 1299 const char __user *user_buf, size_t count, loff_t *ppos) 1300{ 1301 char buf[32]; 1302 int buf_size; 1303 1304 buf_size = min(count, (sizeof(buf)-1)); 1305 if (copy_from_user(buf, user_buf, buf_size)) 1306 return -EFAULT; 1307 1308 switch (buf[0]) { 1309 case 'y': 1310 case 'Y': 1311 case '1': 1312 enable_all_kprobes(); 1313 break; 1314 case 'n': 1315 case 'N': 1316 case '0': 1317 disable_all_kprobes(); 1318 break; 1319 } 1320 1321 return count; 1322} 1323 1324static struct file_operations fops_kp = { 1325 .read = read_enabled_file_bool, 1326 .write = write_enabled_file_bool, 1327}; 1328 1329static int __kprobes debugfs_kprobe_init(void) 1330{ 1331 struct dentry *dir, *file; 1332 unsigned int value = 1; 1333 1334 dir = debugfs_create_dir("kprobes", NULL); 1335 if (!dir) 1336 return -ENOMEM; 1337 1338 file = debugfs_create_file("list", 0444, dir, NULL, 1339 &debugfs_kprobes_operations); 1340 if (!file) { 1341 debugfs_remove(dir); 1342 return -ENOMEM; 1343 } 1344 1345 file = debugfs_create_file("enabled", 0600, dir, 1346 &value, &fops_kp); 1347 if (!file) { 1348 debugfs_remove(dir); 1349 return -ENOMEM; 1350 } 1351 1352 return 0; 1353} 1354 1355late_initcall(debugfs_kprobe_init); 1356#endif /* CONFIG_DEBUG_FS */ 1357 1358module_init(init_kprobes); 1359 1360EXPORT_SYMBOL_GPL(register_kprobe); 1361EXPORT_SYMBOL_GPL(unregister_kprobe); 1362EXPORT_SYMBOL_GPL(register_kprobes); 1363EXPORT_SYMBOL_GPL(unregister_kprobes); 1364EXPORT_SYMBOL_GPL(register_jprobe); 1365EXPORT_SYMBOL_GPL(unregister_jprobe); 1366EXPORT_SYMBOL_GPL(register_jprobes); 1367EXPORT_SYMBOL_GPL(unregister_jprobes); 1368EXPORT_SYMBOL_GPL(jprobe_return); 1369EXPORT_SYMBOL_GPL(register_kretprobe); 1370EXPORT_SYMBOL_GPL(unregister_kretprobe); 1371EXPORT_SYMBOL_GPL(register_kretprobes); 1372EXPORT_SYMBOL_GPL(unregister_kretprobes); 1373