kprobes.c revision e65845235c8120be63001fc1a4ac00c819194bbe
1/* 2 * Kernel Probes (KProbes) 3 * kernel/kprobes.c 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 * 19 * Copyright (C) IBM Corporation, 2002, 2004 20 * 21 * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel 22 * Probes initial implementation (includes suggestions from 23 * Rusty Russell). 24 * 2004-Aug Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with 25 * hlists and exceptions notifier as suggested by Andi Kleen. 26 * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes 27 * interface to access function arguments. 28 * 2004-Sep Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes 29 * exceptions notifier to be first on the priority list. 30 * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston 31 * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi 32 * <prasanna@in.ibm.com> added function-return probes. 33 */ 34#include <linux/kprobes.h> 35#include <linux/spinlock.h> 36#include <linux/hash.h> 37#include <linux/init.h> 38#include <linux/slab.h> 39#include <linux/module.h> 40#include <linux/moduleloader.h> 41#include <asm-generic/sections.h> 42#include <asm/cacheflush.h> 43#include <asm/errno.h> 44#include <asm/kdebug.h> 45 46#define KPROBE_HASH_BITS 6 47#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS) 48 49static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; 50static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; 51 52unsigned int kprobe_cpu = NR_CPUS; 53static DEFINE_SPINLOCK(kprobe_lock); 54static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; 55 56/* 57 * kprobe->ainsn.insn points to the copy of the instruction to be 58 * single-stepped. x86_64, POWER4 and above have no-exec support and 59 * stepping on the instruction on a vmalloced/kmalloced/data page 60 * is a recipe for disaster 61 */ 62#define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) 63 64struct kprobe_insn_page { 65 struct hlist_node hlist; 66 kprobe_opcode_t *insns; /* Page of instruction slots */ 67 char slot_used[INSNS_PER_PAGE]; 68 int nused; 69}; 70 71static struct hlist_head kprobe_insn_pages; 72 73/** 74 * get_insn_slot() - Find a slot on an executable page for an instruction. 75 * We allocate an executable page if there's no room on existing ones. 76 */ 77kprobe_opcode_t __kprobes *get_insn_slot(void) 78{ 79 struct kprobe_insn_page *kip; 80 struct hlist_node *pos; 81 82 hlist_for_each(pos, &kprobe_insn_pages) { 83 kip = hlist_entry(pos, struct kprobe_insn_page, hlist); 84 if (kip->nused < INSNS_PER_PAGE) { 85 int i; 86 for (i = 0; i < INSNS_PER_PAGE; i++) { 87 if (!kip->slot_used[i]) { 88 kip->slot_used[i] = 1; 89 kip->nused++; 90 return kip->insns + (i * MAX_INSN_SIZE); 91 } 92 } 93 /* Surprise! No unused slots. Fix kip->nused. */ 94 kip->nused = INSNS_PER_PAGE; 95 } 96 } 97 98 /* All out of space. Need to allocate a new page. Use slot 0.*/ 99 kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL); 100 if (!kip) { 101 return NULL; 102 } 103 104 /* 105 * Use module_alloc so this page is within +/- 2GB of where the 106 * kernel image and loaded module images reside. This is required 107 * so x86_64 can correctly handle the %rip-relative fixups. 108 */ 109 kip->insns = module_alloc(PAGE_SIZE); 110 if (!kip->insns) { 111 kfree(kip); 112 return NULL; 113 } 114 INIT_HLIST_NODE(&kip->hlist); 115 hlist_add_head(&kip->hlist, &kprobe_insn_pages); 116 memset(kip->slot_used, 0, INSNS_PER_PAGE); 117 kip->slot_used[0] = 1; 118 kip->nused = 1; 119 return kip->insns; 120} 121 122void __kprobes free_insn_slot(kprobe_opcode_t *slot) 123{ 124 struct kprobe_insn_page *kip; 125 struct hlist_node *pos; 126 127 hlist_for_each(pos, &kprobe_insn_pages) { 128 kip = hlist_entry(pos, struct kprobe_insn_page, hlist); 129 if (kip->insns <= slot && 130 slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) { 131 int i = (slot - kip->insns) / MAX_INSN_SIZE; 132 kip->slot_used[i] = 0; 133 kip->nused--; 134 if (kip->nused == 0) { 135 /* 136 * Page is no longer in use. Free it unless 137 * it's the last one. We keep the last one 138 * so as not to have to set it up again the 139 * next time somebody inserts a probe. 140 */ 141 hlist_del(&kip->hlist); 142 if (hlist_empty(&kprobe_insn_pages)) { 143 INIT_HLIST_NODE(&kip->hlist); 144 hlist_add_head(&kip->hlist, 145 &kprobe_insn_pages); 146 } else { 147 module_free(NULL, kip->insns); 148 kfree(kip); 149 } 150 } 151 return; 152 } 153 } 154} 155 156/* Locks kprobe: irqs must be disabled */ 157void __kprobes lock_kprobes(void) 158{ 159 unsigned long flags = 0; 160 161 /* Avoiding local interrupts to happen right after we take the kprobe_lock 162 * and before we get a chance to update kprobe_cpu, this to prevent 163 * deadlock when we have a kprobe on ISR routine and a kprobe on task 164 * routine 165 */ 166 local_irq_save(flags); 167 168 spin_lock(&kprobe_lock); 169 kprobe_cpu = smp_processor_id(); 170 171 local_irq_restore(flags); 172} 173 174void __kprobes unlock_kprobes(void) 175{ 176 unsigned long flags = 0; 177 178 /* Avoiding local interrupts to happen right after we update 179 * kprobe_cpu and before we get a a chance to release kprobe_lock, 180 * this to prevent deadlock when we have a kprobe on ISR routine and 181 * a kprobe on task routine 182 */ 183 local_irq_save(flags); 184 185 kprobe_cpu = NR_CPUS; 186 spin_unlock(&kprobe_lock); 187 188 local_irq_restore(flags); 189} 190 191/* We have preemption disabled.. so it is safe to use __ versions */ 192static inline void set_kprobe_instance(struct kprobe *kp) 193{ 194 __get_cpu_var(kprobe_instance) = kp; 195} 196 197static inline void reset_kprobe_instance(void) 198{ 199 __get_cpu_var(kprobe_instance) = NULL; 200} 201 202/* You have to be holding the kprobe_lock */ 203struct kprobe __kprobes *get_kprobe(void *addr) 204{ 205 struct hlist_head *head; 206 struct hlist_node *node; 207 208 head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; 209 hlist_for_each(node, head) { 210 struct kprobe *p = hlist_entry(node, struct kprobe, hlist); 211 if (p->addr == addr) 212 return p; 213 } 214 return NULL; 215} 216 217/* 218 * Aggregate handlers for multiple kprobes support - these handlers 219 * take care of invoking the individual kprobe handlers on p->list 220 */ 221static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) 222{ 223 struct kprobe *kp; 224 225 list_for_each_entry(kp, &p->list, list) { 226 if (kp->pre_handler) { 227 set_kprobe_instance(kp); 228 if (kp->pre_handler(kp, regs)) 229 return 1; 230 } 231 reset_kprobe_instance(); 232 } 233 return 0; 234} 235 236static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, 237 unsigned long flags) 238{ 239 struct kprobe *kp; 240 241 list_for_each_entry(kp, &p->list, list) { 242 if (kp->post_handler) { 243 set_kprobe_instance(kp); 244 kp->post_handler(kp, regs, flags); 245 reset_kprobe_instance(); 246 } 247 } 248 return; 249} 250 251static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, 252 int trapnr) 253{ 254 struct kprobe *cur = __get_cpu_var(kprobe_instance); 255 256 /* 257 * if we faulted "during" the execution of a user specified 258 * probe handler, invoke just that probe's fault handler 259 */ 260 if (cur && cur->fault_handler) { 261 if (cur->fault_handler(cur, regs, trapnr)) 262 return 1; 263 } 264 return 0; 265} 266 267static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) 268{ 269 struct kprobe *cur = __get_cpu_var(kprobe_instance); 270 int ret = 0; 271 272 if (cur && cur->break_handler) { 273 if (cur->break_handler(cur, regs)) 274 ret = 1; 275 } 276 reset_kprobe_instance(); 277 return ret; 278} 279 280struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp) 281{ 282 struct hlist_node *node; 283 struct kretprobe_instance *ri; 284 hlist_for_each_entry(ri, node, &rp->free_instances, uflist) 285 return ri; 286 return NULL; 287} 288 289static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe 290 *rp) 291{ 292 struct hlist_node *node; 293 struct kretprobe_instance *ri; 294 hlist_for_each_entry(ri, node, &rp->used_instances, uflist) 295 return ri; 296 return NULL; 297} 298 299void __kprobes add_rp_inst(struct kretprobe_instance *ri) 300{ 301 /* 302 * Remove rp inst off the free list - 303 * Add it back when probed function returns 304 */ 305 hlist_del(&ri->uflist); 306 307 /* Add rp inst onto table */ 308 INIT_HLIST_NODE(&ri->hlist); 309 hlist_add_head(&ri->hlist, 310 &kretprobe_inst_table[hash_ptr(ri->task, KPROBE_HASH_BITS)]); 311 312 /* Also add this rp inst to the used list. */ 313 INIT_HLIST_NODE(&ri->uflist); 314 hlist_add_head(&ri->uflist, &ri->rp->used_instances); 315} 316 317void __kprobes recycle_rp_inst(struct kretprobe_instance *ri) 318{ 319 /* remove rp inst off the rprobe_inst_table */ 320 hlist_del(&ri->hlist); 321 if (ri->rp) { 322 /* remove rp inst off the used list */ 323 hlist_del(&ri->uflist); 324 /* put rp inst back onto the free list */ 325 INIT_HLIST_NODE(&ri->uflist); 326 hlist_add_head(&ri->uflist, &ri->rp->free_instances); 327 } else 328 /* Unregistering */ 329 kfree(ri); 330} 331 332struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk) 333{ 334 return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]; 335} 336 337/* 338 * This function is called from exit_thread or flush_thread when task tk's 339 * stack is being recycled so that we can recycle any function-return probe 340 * instances associated with this task. These left over instances represent 341 * probed functions that have been called but will never return. 342 */ 343void __kprobes kprobe_flush_task(struct task_struct *tk) 344{ 345 struct kretprobe_instance *ri; 346 struct hlist_head *head; 347 struct hlist_node *node, *tmp; 348 unsigned long flags = 0; 349 350 spin_lock_irqsave(&kprobe_lock, flags); 351 head = kretprobe_inst_table_head(current); 352 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { 353 if (ri->task == tk) 354 recycle_rp_inst(ri); 355 } 356 spin_unlock_irqrestore(&kprobe_lock, flags); 357} 358 359/* 360 * This kprobe pre_handler is registered with every kretprobe. When probe 361 * hits it will set up the return probe. 362 */ 363static int __kprobes pre_handler_kretprobe(struct kprobe *p, 364 struct pt_regs *regs) 365{ 366 struct kretprobe *rp = container_of(p, struct kretprobe, kp); 367 368 /*TODO: consider to only swap the RA after the last pre_handler fired */ 369 arch_prepare_kretprobe(rp, regs); 370 return 0; 371} 372 373static inline void free_rp_inst(struct kretprobe *rp) 374{ 375 struct kretprobe_instance *ri; 376 while ((ri = get_free_rp_inst(rp)) != NULL) { 377 hlist_del(&ri->uflist); 378 kfree(ri); 379 } 380} 381 382/* 383 * Keep all fields in the kprobe consistent 384 */ 385static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) 386{ 387 memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t)); 388 memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn)); 389} 390 391/* 392* Add the new probe to old_p->list. Fail if this is the 393* second jprobe at the address - two jprobes can't coexist 394*/ 395static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) 396{ 397 struct kprobe *kp; 398 399 if (p->break_handler) { 400 list_for_each_entry(kp, &old_p->list, list) { 401 if (kp->break_handler) 402 return -EEXIST; 403 } 404 list_add_tail(&p->list, &old_p->list); 405 } else 406 list_add(&p->list, &old_p->list); 407 return 0; 408} 409 410/* 411 * Fill in the required fields of the "manager kprobe". Replace the 412 * earlier kprobe in the hlist with the manager kprobe 413 */ 414static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) 415{ 416 copy_kprobe(p, ap); 417 ap->addr = p->addr; 418 ap->pre_handler = aggr_pre_handler; 419 ap->post_handler = aggr_post_handler; 420 ap->fault_handler = aggr_fault_handler; 421 ap->break_handler = aggr_break_handler; 422 423 INIT_LIST_HEAD(&ap->list); 424 list_add(&p->list, &ap->list); 425 426 INIT_HLIST_NODE(&ap->hlist); 427 hlist_del(&p->hlist); 428 hlist_add_head(&ap->hlist, 429 &kprobe_table[hash_ptr(ap->addr, KPROBE_HASH_BITS)]); 430} 431 432/* 433 * This is the second or subsequent kprobe at the address - handle 434 * the intricacies 435 * TODO: Move kcalloc outside the spinlock 436 */ 437static int __kprobes register_aggr_kprobe(struct kprobe *old_p, 438 struct kprobe *p) 439{ 440 int ret = 0; 441 struct kprobe *ap; 442 443 if (old_p->pre_handler == aggr_pre_handler) { 444 copy_kprobe(old_p, p); 445 ret = add_new_kprobe(old_p, p); 446 } else { 447 ap = kcalloc(1, sizeof(struct kprobe), GFP_ATOMIC); 448 if (!ap) 449 return -ENOMEM; 450 add_aggr_kprobe(ap, old_p); 451 copy_kprobe(ap, p); 452 ret = add_new_kprobe(ap, p); 453 } 454 return ret; 455} 456 457/* kprobe removal house-keeping routines */ 458static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags) 459{ 460 arch_disarm_kprobe(p); 461 hlist_del(&p->hlist); 462 spin_unlock_irqrestore(&kprobe_lock, flags); 463 arch_remove_kprobe(p); 464} 465 466static inline void cleanup_aggr_kprobe(struct kprobe *old_p, 467 struct kprobe *p, unsigned long flags) 468{ 469 list_del(&p->list); 470 if (list_empty(&old_p->list)) { 471 cleanup_kprobe(old_p, flags); 472 kfree(old_p); 473 } else 474 spin_unlock_irqrestore(&kprobe_lock, flags); 475} 476 477static int __kprobes in_kprobes_functions(unsigned long addr) 478{ 479 if (addr >= (unsigned long)__kprobes_text_start 480 && addr < (unsigned long)__kprobes_text_end) 481 return -EINVAL; 482 return 0; 483} 484 485int __kprobes register_kprobe(struct kprobe *p) 486{ 487 int ret = 0; 488 unsigned long flags = 0; 489 struct kprobe *old_p; 490 491 if ((ret = in_kprobes_functions((unsigned long) p->addr)) != 0) 492 return ret; 493 if ((ret = arch_prepare_kprobe(p)) != 0) 494 goto rm_kprobe; 495 496 spin_lock_irqsave(&kprobe_lock, flags); 497 old_p = get_kprobe(p->addr); 498 p->nmissed = 0; 499 if (old_p) { 500 ret = register_aggr_kprobe(old_p, p); 501 goto out; 502 } 503 504 arch_copy_kprobe(p); 505 INIT_HLIST_NODE(&p->hlist); 506 hlist_add_head(&p->hlist, 507 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); 508 509 arch_arm_kprobe(p); 510 511out: 512 spin_unlock_irqrestore(&kprobe_lock, flags); 513rm_kprobe: 514 if (ret == -EEXIST) 515 arch_remove_kprobe(p); 516 return ret; 517} 518 519void __kprobes unregister_kprobe(struct kprobe *p) 520{ 521 unsigned long flags; 522 struct kprobe *old_p; 523 524 spin_lock_irqsave(&kprobe_lock, flags); 525 old_p = get_kprobe(p->addr); 526 if (old_p) { 527 if (old_p->pre_handler == aggr_pre_handler) 528 cleanup_aggr_kprobe(old_p, p, flags); 529 else 530 cleanup_kprobe(p, flags); 531 } else 532 spin_unlock_irqrestore(&kprobe_lock, flags); 533} 534 535static struct notifier_block kprobe_exceptions_nb = { 536 .notifier_call = kprobe_exceptions_notify, 537 .priority = 0x7fffffff /* we need to notified first */ 538}; 539 540int __kprobes register_jprobe(struct jprobe *jp) 541{ 542 /* Todo: Verify probepoint is a function entry point */ 543 jp->kp.pre_handler = setjmp_pre_handler; 544 jp->kp.break_handler = longjmp_break_handler; 545 546 return register_kprobe(&jp->kp); 547} 548 549void __kprobes unregister_jprobe(struct jprobe *jp) 550{ 551 unregister_kprobe(&jp->kp); 552} 553 554#ifdef ARCH_SUPPORTS_KRETPROBES 555 556int __kprobes register_kretprobe(struct kretprobe *rp) 557{ 558 int ret = 0; 559 struct kretprobe_instance *inst; 560 int i; 561 562 rp->kp.pre_handler = pre_handler_kretprobe; 563 564 /* Pre-allocate memory for max kretprobe instances */ 565 if (rp->maxactive <= 0) { 566#ifdef CONFIG_PREEMPT 567 rp->maxactive = max(10, 2 * NR_CPUS); 568#else 569 rp->maxactive = NR_CPUS; 570#endif 571 } 572 INIT_HLIST_HEAD(&rp->used_instances); 573 INIT_HLIST_HEAD(&rp->free_instances); 574 for (i = 0; i < rp->maxactive; i++) { 575 inst = kmalloc(sizeof(struct kretprobe_instance), GFP_KERNEL); 576 if (inst == NULL) { 577 free_rp_inst(rp); 578 return -ENOMEM; 579 } 580 INIT_HLIST_NODE(&inst->uflist); 581 hlist_add_head(&inst->uflist, &rp->free_instances); 582 } 583 584 rp->nmissed = 0; 585 /* Establish function entry probe point */ 586 if ((ret = register_kprobe(&rp->kp)) != 0) 587 free_rp_inst(rp); 588 return ret; 589} 590 591#else /* ARCH_SUPPORTS_KRETPROBES */ 592 593int __kprobes register_kretprobe(struct kretprobe *rp) 594{ 595 return -ENOSYS; 596} 597 598#endif /* ARCH_SUPPORTS_KRETPROBES */ 599 600void __kprobes unregister_kretprobe(struct kretprobe *rp) 601{ 602 unsigned long flags; 603 struct kretprobe_instance *ri; 604 605 unregister_kprobe(&rp->kp); 606 /* No race here */ 607 spin_lock_irqsave(&kprobe_lock, flags); 608 free_rp_inst(rp); 609 while ((ri = get_used_rp_inst(rp)) != NULL) { 610 ri->rp = NULL; 611 hlist_del(&ri->uflist); 612 } 613 spin_unlock_irqrestore(&kprobe_lock, flags); 614} 615 616static int __init init_kprobes(void) 617{ 618 int i, err = 0; 619 620 /* FIXME allocate the probe table, currently defined statically */ 621 /* initialize all list heads */ 622 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 623 INIT_HLIST_HEAD(&kprobe_table[i]); 624 INIT_HLIST_HEAD(&kretprobe_inst_table[i]); 625 } 626 627 err = arch_init_kprobes(); 628 if (!err) 629 err = register_die_notifier(&kprobe_exceptions_nb); 630 631 return err; 632} 633 634__initcall(init_kprobes); 635 636EXPORT_SYMBOL_GPL(register_kprobe); 637EXPORT_SYMBOL_GPL(unregister_kprobe); 638EXPORT_SYMBOL_GPL(register_jprobe); 639EXPORT_SYMBOL_GPL(unregister_jprobe); 640EXPORT_SYMBOL_GPL(jprobe_return); 641EXPORT_SYMBOL_GPL(register_kretprobe); 642EXPORT_SYMBOL_GPL(unregister_kretprobe); 643 644