kprobes.c revision 129415607845d4daea11ddcba706005c69dcb942
1/*
2 *  Kernel Probes (KProbes)
3 *  kernel/kprobes.c
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 *
19 * Copyright (C) IBM Corporation, 2002, 2004
20 *
21 * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
22 *		Probes initial implementation (includes suggestions from
23 *		Rusty Russell).
24 * 2004-Aug	Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with
25 *		hlists and exceptions notifier as suggested by Andi Kleen.
26 * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
27 *		interface to access function arguments.
28 * 2004-Sep	Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes
29 *		exceptions notifier to be first on the priority list.
30 * 2005-May	Hien Nguyen <hien@us.ibm.com>, Jim Keniston
31 *		<jkenisto@us.ibm.com> and Prasanna S Panchamukhi
32 *		<prasanna@in.ibm.com> added function-return probes.
33 */
34#include <linux/kprobes.h>
35#include <linux/hash.h>
36#include <linux/init.h>
37#include <linux/slab.h>
38#include <linux/stddef.h>
39#include <linux/module.h>
40#include <linux/moduleloader.h>
41#include <linux/kallsyms.h>
42#include <linux/freezer.h>
43#include <linux/seq_file.h>
44#include <linux/debugfs.h>
45#include <linux/kdebug.h>
46
47#include <asm-generic/sections.h>
48#include <asm/cacheflush.h>
49#include <asm/errno.h>
50#include <asm/uaccess.h>
51
52#define KPROBE_HASH_BITS 6
53#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
54
55
56/*
57 * Some oddball architectures like 64bit powerpc have function descriptors
58 * so this must be overridable.
59 */
60#ifndef kprobe_lookup_name
61#define kprobe_lookup_name(name, addr) \
62	addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name)))
63#endif
64
65static int kprobes_initialized;
66static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
67static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
68
69/* NOTE: change this value only with kprobe_mutex held */
70static bool kprobe_enabled;
71
72static DEFINE_MUTEX(kprobe_mutex);	/* Protects kprobe_table */
73static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
74static struct {
75	spinlock_t lock ____cacheline_aligned_in_smp;
76} kretprobe_table_locks[KPROBE_TABLE_SIZE];
77
78static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
79{
80	return &(kretprobe_table_locks[hash].lock);
81}
82
83/*
84 * Normally, functions that we'd want to prohibit kprobes in, are marked
85 * __kprobes. But, there are cases where such functions already belong to
86 * a different section (__sched for preempt_schedule)
87 *
88 * For such cases, we now have a blacklist
89 */
90static struct kprobe_blackpoint kprobe_blacklist[] = {
91	{"preempt_schedule",},
92	{NULL}    /* Terminator */
93};
94
95#ifdef __ARCH_WANT_KPROBES_INSN_SLOT
96/*
97 * kprobe->ainsn.insn points to the copy of the instruction to be
98 * single-stepped. x86_64, POWER4 and above have no-exec support and
99 * stepping on the instruction on a vmalloced/kmalloced/data page
100 * is a recipe for disaster
101 */
102#define INSNS_PER_PAGE	(PAGE_SIZE/(MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
103
104struct kprobe_insn_page {
105	struct hlist_node hlist;
106	kprobe_opcode_t *insns;		/* Page of instruction slots */
107	char slot_used[INSNS_PER_PAGE];
108	int nused;
109	int ngarbage;
110};
111
112enum kprobe_slot_state {
113	SLOT_CLEAN = 0,
114	SLOT_DIRTY = 1,
115	SLOT_USED = 2,
116};
117
118static DEFINE_MUTEX(kprobe_insn_mutex);	/* Protects kprobe_insn_pages */
119static struct hlist_head kprobe_insn_pages;
120static int kprobe_garbage_slots;
121static int collect_garbage_slots(void);
122
123static int __kprobes check_safety(void)
124{
125	int ret = 0;
126#if defined(CONFIG_PREEMPT) && defined(CONFIG_PM)
127	ret = freeze_processes();
128	if (ret == 0) {
129		struct task_struct *p, *q;
130		do_each_thread(p, q) {
131			if (p != current && p->state == TASK_RUNNING &&
132			    p->pid != 0) {
133				printk("Check failed: %s is running\n",p->comm);
134				ret = -1;
135				goto loop_end;
136			}
137		} while_each_thread(p, q);
138	}
139loop_end:
140	thaw_processes();
141#else
142	synchronize_sched();
143#endif
144	return ret;
145}
146
147/**
148 * __get_insn_slot() - Find a slot on an executable page for an instruction.
149 * We allocate an executable page if there's no room on existing ones.
150 */
151static kprobe_opcode_t __kprobes *__get_insn_slot(void)
152{
153	struct kprobe_insn_page *kip;
154	struct hlist_node *pos;
155
156 retry:
157	hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) {
158		if (kip->nused < INSNS_PER_PAGE) {
159			int i;
160			for (i = 0; i < INSNS_PER_PAGE; i++) {
161				if (kip->slot_used[i] == SLOT_CLEAN) {
162					kip->slot_used[i] = SLOT_USED;
163					kip->nused++;
164					return kip->insns + (i * MAX_INSN_SIZE);
165				}
166			}
167			/* Surprise!  No unused slots.  Fix kip->nused. */
168			kip->nused = INSNS_PER_PAGE;
169		}
170	}
171
172	/* If there are any garbage slots, collect it and try again. */
173	if (kprobe_garbage_slots && collect_garbage_slots() == 0) {
174		goto retry;
175	}
176	/* All out of space.  Need to allocate a new page. Use slot 0. */
177	kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL);
178	if (!kip)
179		return NULL;
180
181	/*
182	 * Use module_alloc so this page is within +/- 2GB of where the
183	 * kernel image and loaded module images reside. This is required
184	 * so x86_64 can correctly handle the %rip-relative fixups.
185	 */
186	kip->insns = module_alloc(PAGE_SIZE);
187	if (!kip->insns) {
188		kfree(kip);
189		return NULL;
190	}
191	INIT_HLIST_NODE(&kip->hlist);
192	hlist_add_head(&kip->hlist, &kprobe_insn_pages);
193	memset(kip->slot_used, SLOT_CLEAN, INSNS_PER_PAGE);
194	kip->slot_used[0] = SLOT_USED;
195	kip->nused = 1;
196	kip->ngarbage = 0;
197	return kip->insns;
198}
199
200kprobe_opcode_t __kprobes *get_insn_slot(void)
201{
202	kprobe_opcode_t *ret;
203	mutex_lock(&kprobe_insn_mutex);
204	ret = __get_insn_slot();
205	mutex_unlock(&kprobe_insn_mutex);
206	return ret;
207}
208
209/* Return 1 if all garbages are collected, otherwise 0. */
210static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
211{
212	kip->slot_used[idx] = SLOT_CLEAN;
213	kip->nused--;
214	if (kip->nused == 0) {
215		/*
216		 * Page is no longer in use.  Free it unless
217		 * it's the last one.  We keep the last one
218		 * so as not to have to set it up again the
219		 * next time somebody inserts a probe.
220		 */
221		hlist_del(&kip->hlist);
222		if (hlist_empty(&kprobe_insn_pages)) {
223			INIT_HLIST_NODE(&kip->hlist);
224			hlist_add_head(&kip->hlist,
225				       &kprobe_insn_pages);
226		} else {
227			module_free(NULL, kip->insns);
228			kfree(kip);
229		}
230		return 1;
231	}
232	return 0;
233}
234
235static int __kprobes collect_garbage_slots(void)
236{
237	struct kprobe_insn_page *kip;
238	struct hlist_node *pos, *next;
239	int safety;
240
241	/* Ensure no-one is preepmted on the garbages */
242	mutex_unlock(&kprobe_insn_mutex);
243	safety = check_safety();
244	mutex_lock(&kprobe_insn_mutex);
245	if (safety != 0)
246		return -EAGAIN;
247
248	hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) {
249		int i;
250		if (kip->ngarbage == 0)
251			continue;
252		kip->ngarbage = 0;	/* we will collect all garbages */
253		for (i = 0; i < INSNS_PER_PAGE; i++) {
254			if (kip->slot_used[i] == SLOT_DIRTY &&
255			    collect_one_slot(kip, i))
256				break;
257		}
258	}
259	kprobe_garbage_slots = 0;
260	return 0;
261}
262
263void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
264{
265	struct kprobe_insn_page *kip;
266	struct hlist_node *pos;
267
268	mutex_lock(&kprobe_insn_mutex);
269	hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) {
270		if (kip->insns <= slot &&
271		    slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) {
272			int i = (slot - kip->insns) / MAX_INSN_SIZE;
273			if (dirty) {
274				kip->slot_used[i] = SLOT_DIRTY;
275				kip->ngarbage++;
276			} else {
277				collect_one_slot(kip, i);
278			}
279			break;
280		}
281	}
282
283	if (dirty && ++kprobe_garbage_slots > INSNS_PER_PAGE)
284		collect_garbage_slots();
285
286	mutex_unlock(&kprobe_insn_mutex);
287}
288#endif
289
290/* We have preemption disabled.. so it is safe to use __ versions */
291static inline void set_kprobe_instance(struct kprobe *kp)
292{
293	__get_cpu_var(kprobe_instance) = kp;
294}
295
296static inline void reset_kprobe_instance(void)
297{
298	__get_cpu_var(kprobe_instance) = NULL;
299}
300
301/*
302 * This routine is called either:
303 * 	- under the kprobe_mutex - during kprobe_[un]register()
304 * 				OR
305 * 	- with preemption disabled - from arch/xxx/kernel/kprobes.c
306 */
307struct kprobe __kprobes *get_kprobe(void *addr)
308{
309	struct hlist_head *head;
310	struct hlist_node *node;
311	struct kprobe *p;
312
313	head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
314	hlist_for_each_entry_rcu(p, node, head, hlist) {
315		if (p->addr == addr)
316			return p;
317	}
318	return NULL;
319}
320
321/*
322 * Aggregate handlers for multiple kprobes support - these handlers
323 * take care of invoking the individual kprobe handlers on p->list
324 */
325static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
326{
327	struct kprobe *kp;
328
329	list_for_each_entry_rcu(kp, &p->list, list) {
330		if (kp->pre_handler) {
331			set_kprobe_instance(kp);
332			if (kp->pre_handler(kp, regs))
333				return 1;
334		}
335		reset_kprobe_instance();
336	}
337	return 0;
338}
339
340static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
341					unsigned long flags)
342{
343	struct kprobe *kp;
344
345	list_for_each_entry_rcu(kp, &p->list, list) {
346		if (kp->post_handler) {
347			set_kprobe_instance(kp);
348			kp->post_handler(kp, regs, flags);
349			reset_kprobe_instance();
350		}
351	}
352}
353
354static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
355					int trapnr)
356{
357	struct kprobe *cur = __get_cpu_var(kprobe_instance);
358
359	/*
360	 * if we faulted "during" the execution of a user specified
361	 * probe handler, invoke just that probe's fault handler
362	 */
363	if (cur && cur->fault_handler) {
364		if (cur->fault_handler(cur, regs, trapnr))
365			return 1;
366	}
367	return 0;
368}
369
370static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
371{
372	struct kprobe *cur = __get_cpu_var(kprobe_instance);
373	int ret = 0;
374
375	if (cur && cur->break_handler) {
376		if (cur->break_handler(cur, regs))
377			ret = 1;
378	}
379	reset_kprobe_instance();
380	return ret;
381}
382
383/* Walks the list and increments nmissed count for multiprobe case */
384void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
385{
386	struct kprobe *kp;
387	if (p->pre_handler != aggr_pre_handler) {
388		p->nmissed++;
389	} else {
390		list_for_each_entry_rcu(kp, &p->list, list)
391			kp->nmissed++;
392	}
393	return;
394}
395
396void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
397				struct hlist_head *head)
398{
399	struct kretprobe *rp = ri->rp;
400
401	/* remove rp inst off the rprobe_inst_table */
402	hlist_del(&ri->hlist);
403	INIT_HLIST_NODE(&ri->hlist);
404	if (likely(rp)) {
405		spin_lock(&rp->lock);
406		hlist_add_head(&ri->hlist, &rp->free_instances);
407		spin_unlock(&rp->lock);
408	} else
409		/* Unregistering */
410		hlist_add_head(&ri->hlist, head);
411}
412
413void kretprobe_hash_lock(struct task_struct *tsk,
414			 struct hlist_head **head, unsigned long *flags)
415{
416	unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
417	spinlock_t *hlist_lock;
418
419	*head = &kretprobe_inst_table[hash];
420	hlist_lock = kretprobe_table_lock_ptr(hash);
421	spin_lock_irqsave(hlist_lock, *flags);
422}
423
424static void kretprobe_table_lock(unsigned long hash, unsigned long *flags)
425{
426	spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
427	spin_lock_irqsave(hlist_lock, *flags);
428}
429
430void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags)
431{
432	unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
433	spinlock_t *hlist_lock;
434
435	hlist_lock = kretprobe_table_lock_ptr(hash);
436	spin_unlock_irqrestore(hlist_lock, *flags);
437}
438
439void kretprobe_table_unlock(unsigned long hash, unsigned long *flags)
440{
441	spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
442	spin_unlock_irqrestore(hlist_lock, *flags);
443}
444
445/*
446 * This function is called from finish_task_switch when task tk becomes dead,
447 * so that we can recycle any function-return probe instances associated
448 * with this task. These left over instances represent probed functions
449 * that have been called but will never return.
450 */
451void __kprobes kprobe_flush_task(struct task_struct *tk)
452{
453	struct kretprobe_instance *ri;
454	struct hlist_head *head, empty_rp;
455	struct hlist_node *node, *tmp;
456	unsigned long hash, flags = 0;
457
458	if (unlikely(!kprobes_initialized))
459		/* Early boot.  kretprobe_table_locks not yet initialized. */
460		return;
461
462	hash = hash_ptr(tk, KPROBE_HASH_BITS);
463	head = &kretprobe_inst_table[hash];
464	kretprobe_table_lock(hash, &flags);
465	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
466		if (ri->task == tk)
467			recycle_rp_inst(ri, &empty_rp);
468	}
469	kretprobe_table_unlock(hash, &flags);
470	INIT_HLIST_HEAD(&empty_rp);
471	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
472		hlist_del(&ri->hlist);
473		kfree(ri);
474	}
475}
476
477static inline void free_rp_inst(struct kretprobe *rp)
478{
479	struct kretprobe_instance *ri;
480	struct hlist_node *pos, *next;
481
482	hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) {
483		hlist_del(&ri->hlist);
484		kfree(ri);
485	}
486}
487
488static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
489{
490	unsigned long flags, hash;
491	struct kretprobe_instance *ri;
492	struct hlist_node *pos, *next;
493	struct hlist_head *head;
494
495	/* No race here */
496	for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) {
497		kretprobe_table_lock(hash, &flags);
498		head = &kretprobe_inst_table[hash];
499		hlist_for_each_entry_safe(ri, pos, next, head, hlist) {
500			if (ri->rp == rp)
501				ri->rp = NULL;
502		}
503		kretprobe_table_unlock(hash, &flags);
504	}
505	free_rp_inst(rp);
506}
507
508/*
509 * Keep all fields in the kprobe consistent
510 */
511static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
512{
513	memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
514	memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
515}
516
517/*
518* Add the new probe to old_p->list. Fail if this is the
519* second jprobe at the address - two jprobes can't coexist
520*/
521static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p)
522{
523	if (p->break_handler) {
524		if (old_p->break_handler)
525			return -EEXIST;
526		list_add_tail_rcu(&p->list, &old_p->list);
527		old_p->break_handler = aggr_break_handler;
528	} else
529		list_add_rcu(&p->list, &old_p->list);
530	if (p->post_handler && !old_p->post_handler)
531		old_p->post_handler = aggr_post_handler;
532	return 0;
533}
534
535/*
536 * Fill in the required fields of the "manager kprobe". Replace the
537 * earlier kprobe in the hlist with the manager kprobe
538 */
539static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
540{
541	copy_kprobe(p, ap);
542	flush_insn_slot(ap);
543	ap->addr = p->addr;
544	ap->pre_handler = aggr_pre_handler;
545	ap->fault_handler = aggr_fault_handler;
546	if (p->post_handler)
547		ap->post_handler = aggr_post_handler;
548	if (p->break_handler)
549		ap->break_handler = aggr_break_handler;
550
551	INIT_LIST_HEAD(&ap->list);
552	list_add_rcu(&p->list, &ap->list);
553
554	hlist_replace_rcu(&p->hlist, &ap->hlist);
555}
556
557/*
558 * This is the second or subsequent kprobe at the address - handle
559 * the intricacies
560 */
561static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
562					  struct kprobe *p)
563{
564	int ret = 0;
565	struct kprobe *ap;
566
567	if (old_p->pre_handler == aggr_pre_handler) {
568		copy_kprobe(old_p, p);
569		ret = add_new_kprobe(old_p, p);
570	} else {
571		ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
572		if (!ap)
573			return -ENOMEM;
574		add_aggr_kprobe(ap, old_p);
575		copy_kprobe(ap, p);
576		ret = add_new_kprobe(ap, p);
577	}
578	return ret;
579}
580
581static int __kprobes in_kprobes_functions(unsigned long addr)
582{
583	struct kprobe_blackpoint *kb;
584
585	if (addr >= (unsigned long)__kprobes_text_start &&
586	    addr < (unsigned long)__kprobes_text_end)
587		return -EINVAL;
588	/*
589	 * If there exists a kprobe_blacklist, verify and
590	 * fail any probe registration in the prohibited area
591	 */
592	for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
593		if (kb->start_addr) {
594			if (addr >= kb->start_addr &&
595			    addr < (kb->start_addr + kb->range))
596				return -EINVAL;
597		}
598	}
599	return 0;
600}
601
602/*
603 * If we have a symbol_name argument, look it up and add the offset field
604 * to it. This way, we can specify a relative address to a symbol.
605 */
606static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
607{
608	kprobe_opcode_t *addr = p->addr;
609	if (p->symbol_name) {
610		if (addr)
611			return NULL;
612		kprobe_lookup_name(p->symbol_name, addr);
613	}
614
615	if (!addr)
616		return NULL;
617	return (kprobe_opcode_t *)(((char *)addr) + p->offset);
618}
619
620static int __kprobes __register_kprobe(struct kprobe *p,
621	unsigned long called_from)
622{
623	int ret = 0;
624	struct kprobe *old_p;
625	struct module *probed_mod;
626	kprobe_opcode_t *addr;
627
628	addr = kprobe_addr(p);
629	if (!addr)
630		return -EINVAL;
631	p->addr = addr;
632
633	preempt_disable();
634	if (!__kernel_text_address((unsigned long) p->addr) ||
635	    in_kprobes_functions((unsigned long) p->addr)) {
636		preempt_enable();
637		return -EINVAL;
638	}
639
640	p->mod_refcounted = 0;
641
642	/*
643	 * Check if are we probing a module.
644	 */
645	probed_mod = __module_text_address((unsigned long) p->addr);
646	if (probed_mod) {
647		struct module *calling_mod;
648		calling_mod = __module_text_address(called_from);
649		/*
650		 * We must allow modules to probe themself and in this case
651		 * avoid incrementing the module refcount, so as to allow
652		 * unloading of self probing modules.
653		 */
654		if (calling_mod != probed_mod) {
655			if (unlikely(!try_module_get(probed_mod))) {
656				preempt_enable();
657				return -EINVAL;
658			}
659			p->mod_refcounted = 1;
660		} else
661			probed_mod = NULL;
662	}
663	preempt_enable();
664
665	p->nmissed = 0;
666	INIT_LIST_HEAD(&p->list);
667	mutex_lock(&kprobe_mutex);
668	old_p = get_kprobe(p->addr);
669	if (old_p) {
670		ret = register_aggr_kprobe(old_p, p);
671		goto out;
672	}
673
674	ret = arch_prepare_kprobe(p);
675	if (ret)
676		goto out;
677
678	INIT_HLIST_NODE(&p->hlist);
679	hlist_add_head_rcu(&p->hlist,
680		       &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
681
682	if (kprobe_enabled)
683		arch_arm_kprobe(p);
684
685out:
686	mutex_unlock(&kprobe_mutex);
687
688	if (ret && probed_mod)
689		module_put(probed_mod);
690	return ret;
691}
692
693/*
694 * Unregister a kprobe without a scheduler synchronization.
695 */
696static int __kprobes __unregister_kprobe_top(struct kprobe *p)
697{
698	struct kprobe *old_p, *list_p;
699
700	old_p = get_kprobe(p->addr);
701	if (unlikely(!old_p))
702		return -EINVAL;
703
704	if (p != old_p) {
705		list_for_each_entry_rcu(list_p, &old_p->list, list)
706			if (list_p == p)
707			/* kprobe p is a valid probe */
708				goto valid_p;
709		return -EINVAL;
710	}
711valid_p:
712	if (old_p == p ||
713	    (old_p->pre_handler == aggr_pre_handler &&
714	     list_is_singular(&old_p->list))) {
715		/*
716		 * Only probe on the hash list. Disarm only if kprobes are
717		 * enabled - otherwise, the breakpoint would already have
718		 * been removed. We save on flushing icache.
719		 */
720		if (kprobe_enabled)
721			arch_disarm_kprobe(p);
722		hlist_del_rcu(&old_p->hlist);
723	} else {
724		if (p->break_handler)
725			old_p->break_handler = NULL;
726		if (p->post_handler) {
727			list_for_each_entry_rcu(list_p, &old_p->list, list) {
728				if ((list_p != p) && (list_p->post_handler))
729					goto noclean;
730			}
731			old_p->post_handler = NULL;
732		}
733noclean:
734		list_del_rcu(&p->list);
735	}
736	return 0;
737}
738
739static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
740{
741	struct module *mod;
742	struct kprobe *old_p;
743
744	if (p->mod_refcounted) {
745		/*
746		 * Since we've already incremented refcount,
747		 * we don't need to disable preemption.
748		 */
749		mod = module_text_address((unsigned long)p->addr);
750		if (mod)
751			module_put(mod);
752	}
753
754	if (list_empty(&p->list) || list_is_singular(&p->list)) {
755		if (!list_empty(&p->list)) {
756			/* "p" is the last child of an aggr_kprobe */
757			old_p = list_entry(p->list.next, struct kprobe, list);
758			list_del(&p->list);
759			kfree(old_p);
760		}
761		arch_remove_kprobe(p);
762	}
763}
764
765static int __register_kprobes(struct kprobe **kps, int num,
766	unsigned long called_from)
767{
768	int i, ret = 0;
769
770	if (num <= 0)
771		return -EINVAL;
772	for (i = 0; i < num; i++) {
773		ret = __register_kprobe(kps[i], called_from);
774		if (ret < 0) {
775			if (i > 0)
776				unregister_kprobes(kps, i);
777			break;
778		}
779	}
780	return ret;
781}
782
783/*
784 * Registration and unregistration functions for kprobe.
785 */
786int __kprobes register_kprobe(struct kprobe *p)
787{
788	return __register_kprobes(&p, 1,
789				  (unsigned long)__builtin_return_address(0));
790}
791
792void __kprobes unregister_kprobe(struct kprobe *p)
793{
794	unregister_kprobes(&p, 1);
795}
796
797int __kprobes register_kprobes(struct kprobe **kps, int num)
798{
799	return __register_kprobes(kps, num,
800				  (unsigned long)__builtin_return_address(0));
801}
802
803void __kprobes unregister_kprobes(struct kprobe **kps, int num)
804{
805	int i;
806
807	if (num <= 0)
808		return;
809	mutex_lock(&kprobe_mutex);
810	for (i = 0; i < num; i++)
811		if (__unregister_kprobe_top(kps[i]) < 0)
812			kps[i]->addr = NULL;
813	mutex_unlock(&kprobe_mutex);
814
815	synchronize_sched();
816	for (i = 0; i < num; i++)
817		if (kps[i]->addr)
818			__unregister_kprobe_bottom(kps[i]);
819}
820
821static struct notifier_block kprobe_exceptions_nb = {
822	.notifier_call = kprobe_exceptions_notify,
823	.priority = 0x7fffffff /* we need to be notified first */
824};
825
826unsigned long __weak arch_deref_entry_point(void *entry)
827{
828	return (unsigned long)entry;
829}
830
831static int __register_jprobes(struct jprobe **jps, int num,
832	unsigned long called_from)
833{
834	struct jprobe *jp;
835	int ret = 0, i;
836
837	if (num <= 0)
838		return -EINVAL;
839	for (i = 0; i < num; i++) {
840		unsigned long addr;
841		jp = jps[i];
842		addr = arch_deref_entry_point(jp->entry);
843
844		if (!kernel_text_address(addr))
845			ret = -EINVAL;
846		else {
847			/* Todo: Verify probepoint is a function entry point */
848			jp->kp.pre_handler = setjmp_pre_handler;
849			jp->kp.break_handler = longjmp_break_handler;
850			ret = __register_kprobe(&jp->kp, called_from);
851		}
852		if (ret < 0) {
853			if (i > 0)
854				unregister_jprobes(jps, i);
855			break;
856		}
857	}
858	return ret;
859}
860
861int __kprobes register_jprobe(struct jprobe *jp)
862{
863	return __register_jprobes(&jp, 1,
864		(unsigned long)__builtin_return_address(0));
865}
866
867void __kprobes unregister_jprobe(struct jprobe *jp)
868{
869	unregister_jprobes(&jp, 1);
870}
871
872int __kprobes register_jprobes(struct jprobe **jps, int num)
873{
874	return __register_jprobes(jps, num,
875		(unsigned long)__builtin_return_address(0));
876}
877
878void __kprobes unregister_jprobes(struct jprobe **jps, int num)
879{
880	int i;
881
882	if (num <= 0)
883		return;
884	mutex_lock(&kprobe_mutex);
885	for (i = 0; i < num; i++)
886		if (__unregister_kprobe_top(&jps[i]->kp) < 0)
887			jps[i]->kp.addr = NULL;
888	mutex_unlock(&kprobe_mutex);
889
890	synchronize_sched();
891	for (i = 0; i < num; i++) {
892		if (jps[i]->kp.addr)
893			__unregister_kprobe_bottom(&jps[i]->kp);
894	}
895}
896
897#ifdef CONFIG_KRETPROBES
898/*
899 * This kprobe pre_handler is registered with every kretprobe. When probe
900 * hits it will set up the return probe.
901 */
902static int __kprobes pre_handler_kretprobe(struct kprobe *p,
903					   struct pt_regs *regs)
904{
905	struct kretprobe *rp = container_of(p, struct kretprobe, kp);
906	unsigned long hash, flags = 0;
907	struct kretprobe_instance *ri;
908
909	/*TODO: consider to only swap the RA after the last pre_handler fired */
910	hash = hash_ptr(current, KPROBE_HASH_BITS);
911	spin_lock_irqsave(&rp->lock, flags);
912	if (!hlist_empty(&rp->free_instances)) {
913		ri = hlist_entry(rp->free_instances.first,
914				struct kretprobe_instance, hlist);
915		hlist_del(&ri->hlist);
916		spin_unlock_irqrestore(&rp->lock, flags);
917
918		ri->rp = rp;
919		ri->task = current;
920
921		if (rp->entry_handler && rp->entry_handler(ri, regs)) {
922			spin_unlock_irqrestore(&rp->lock, flags);
923			return 0;
924		}
925
926		arch_prepare_kretprobe(ri, regs);
927
928		/* XXX(hch): why is there no hlist_move_head? */
929		INIT_HLIST_NODE(&ri->hlist);
930		kretprobe_table_lock(hash, &flags);
931		hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
932		kretprobe_table_unlock(hash, &flags);
933	} else {
934		rp->nmissed++;
935		spin_unlock_irqrestore(&rp->lock, flags);
936	}
937	return 0;
938}
939
940static int __kprobes __register_kretprobe(struct kretprobe *rp,
941					  unsigned long called_from)
942{
943	int ret = 0;
944	struct kretprobe_instance *inst;
945	int i;
946	void *addr;
947
948	if (kretprobe_blacklist_size) {
949		addr = kprobe_addr(&rp->kp);
950		if (!addr)
951			return -EINVAL;
952
953		for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
954			if (kretprobe_blacklist[i].addr == addr)
955				return -EINVAL;
956		}
957	}
958
959	rp->kp.pre_handler = pre_handler_kretprobe;
960	rp->kp.post_handler = NULL;
961	rp->kp.fault_handler = NULL;
962	rp->kp.break_handler = NULL;
963
964	/* Pre-allocate memory for max kretprobe instances */
965	if (rp->maxactive <= 0) {
966#ifdef CONFIG_PREEMPT
967		rp->maxactive = max(10, 2 * NR_CPUS);
968#else
969		rp->maxactive = NR_CPUS;
970#endif
971	}
972	spin_lock_init(&rp->lock);
973	INIT_HLIST_HEAD(&rp->free_instances);
974	for (i = 0; i < rp->maxactive; i++) {
975		inst = kmalloc(sizeof(struct kretprobe_instance) +
976			       rp->data_size, GFP_KERNEL);
977		if (inst == NULL) {
978			free_rp_inst(rp);
979			return -ENOMEM;
980		}
981		INIT_HLIST_NODE(&inst->hlist);
982		hlist_add_head(&inst->hlist, &rp->free_instances);
983	}
984
985	rp->nmissed = 0;
986	/* Establish function entry probe point */
987	ret = __register_kprobe(&rp->kp, called_from);
988	if (ret != 0)
989		free_rp_inst(rp);
990	return ret;
991}
992
993static int __register_kretprobes(struct kretprobe **rps, int num,
994	unsigned long called_from)
995{
996	int ret = 0, i;
997
998	if (num <= 0)
999		return -EINVAL;
1000	for (i = 0; i < num; i++) {
1001		ret = __register_kretprobe(rps[i], called_from);
1002		if (ret < 0) {
1003			if (i > 0)
1004				unregister_kretprobes(rps, i);
1005			break;
1006		}
1007	}
1008	return ret;
1009}
1010
1011int __kprobes register_kretprobe(struct kretprobe *rp)
1012{
1013	return __register_kretprobes(&rp, 1,
1014			(unsigned long)__builtin_return_address(0));
1015}
1016
1017void __kprobes unregister_kretprobe(struct kretprobe *rp)
1018{
1019	unregister_kretprobes(&rp, 1);
1020}
1021
1022int __kprobes register_kretprobes(struct kretprobe **rps, int num)
1023{
1024	return __register_kretprobes(rps, num,
1025			(unsigned long)__builtin_return_address(0));
1026}
1027
1028void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
1029{
1030	int i;
1031
1032	if (num <= 0)
1033		return;
1034	mutex_lock(&kprobe_mutex);
1035	for (i = 0; i < num; i++)
1036		if (__unregister_kprobe_top(&rps[i]->kp) < 0)
1037			rps[i]->kp.addr = NULL;
1038	mutex_unlock(&kprobe_mutex);
1039
1040	synchronize_sched();
1041	for (i = 0; i < num; i++) {
1042		if (rps[i]->kp.addr) {
1043			__unregister_kprobe_bottom(&rps[i]->kp);
1044			cleanup_rp_inst(rps[i]);
1045		}
1046	}
1047}
1048
1049#else /* CONFIG_KRETPROBES */
1050int __kprobes register_kretprobe(struct kretprobe *rp)
1051{
1052	return -ENOSYS;
1053}
1054
1055int __kprobes register_kretprobes(struct kretprobe **rps, int num)
1056{
1057	return -ENOSYS;
1058}
1059void __kprobes unregister_kretprobe(struct kretprobe *rp)
1060{
1061}
1062
1063void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
1064{
1065}
1066
1067static int __kprobes pre_handler_kretprobe(struct kprobe *p,
1068					   struct pt_regs *regs)
1069{
1070	return 0;
1071}
1072
1073#endif /* CONFIG_KRETPROBES */
1074
1075static int __init init_kprobes(void)
1076{
1077	int i, err = 0;
1078	unsigned long offset = 0, size = 0;
1079	char *modname, namebuf[128];
1080	const char *symbol_name;
1081	void *addr;
1082	struct kprobe_blackpoint *kb;
1083
1084	/* FIXME allocate the probe table, currently defined statically */
1085	/* initialize all list heads */
1086	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1087		INIT_HLIST_HEAD(&kprobe_table[i]);
1088		INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
1089		spin_lock_init(&(kretprobe_table_locks[i].lock));
1090	}
1091
1092	/*
1093	 * Lookup and populate the kprobe_blacklist.
1094	 *
1095	 * Unlike the kretprobe blacklist, we'll need to determine
1096	 * the range of addresses that belong to the said functions,
1097	 * since a kprobe need not necessarily be at the beginning
1098	 * of a function.
1099	 */
1100	for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
1101		kprobe_lookup_name(kb->name, addr);
1102		if (!addr)
1103			continue;
1104
1105		kb->start_addr = (unsigned long)addr;
1106		symbol_name = kallsyms_lookup(kb->start_addr,
1107				&size, &offset, &modname, namebuf);
1108		if (!symbol_name)
1109			kb->range = 0;
1110		else
1111			kb->range = size;
1112	}
1113
1114	if (kretprobe_blacklist_size) {
1115		/* lookup the function address from its name */
1116		for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
1117			kprobe_lookup_name(kretprobe_blacklist[i].name,
1118					   kretprobe_blacklist[i].addr);
1119			if (!kretprobe_blacklist[i].addr)
1120				printk("kretprobe: lookup failed: %s\n",
1121				       kretprobe_blacklist[i].name);
1122		}
1123	}
1124
1125	/* By default, kprobes are enabled */
1126	kprobe_enabled = true;
1127
1128	err = arch_init_kprobes();
1129	if (!err)
1130		err = register_die_notifier(&kprobe_exceptions_nb);
1131	kprobes_initialized = (err == 0);
1132
1133	if (!err)
1134		init_test_probes();
1135	return err;
1136}
1137
1138#ifdef CONFIG_DEBUG_FS
1139static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
1140		const char *sym, int offset,char *modname)
1141{
1142	char *kprobe_type;
1143
1144	if (p->pre_handler == pre_handler_kretprobe)
1145		kprobe_type = "r";
1146	else if (p->pre_handler == setjmp_pre_handler)
1147		kprobe_type = "j";
1148	else
1149		kprobe_type = "k";
1150	if (sym)
1151		seq_printf(pi, "%p  %s  %s+0x%x  %s\n", p->addr, kprobe_type,
1152			sym, offset, (modname ? modname : " "));
1153	else
1154		seq_printf(pi, "%p  %s  %p\n", p->addr, kprobe_type, p->addr);
1155}
1156
1157static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
1158{
1159	return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL;
1160}
1161
1162static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
1163{
1164	(*pos)++;
1165	if (*pos >= KPROBE_TABLE_SIZE)
1166		return NULL;
1167	return pos;
1168}
1169
1170static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v)
1171{
1172	/* Nothing to do */
1173}
1174
1175static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v)
1176{
1177	struct hlist_head *head;
1178	struct hlist_node *node;
1179	struct kprobe *p, *kp;
1180	const char *sym = NULL;
1181	unsigned int i = *(loff_t *) v;
1182	unsigned long offset = 0;
1183	char *modname, namebuf[128];
1184
1185	head = &kprobe_table[i];
1186	preempt_disable();
1187	hlist_for_each_entry_rcu(p, node, head, hlist) {
1188		sym = kallsyms_lookup((unsigned long)p->addr, NULL,
1189					&offset, &modname, namebuf);
1190		if (p->pre_handler == aggr_pre_handler) {
1191			list_for_each_entry_rcu(kp, &p->list, list)
1192				report_probe(pi, kp, sym, offset, modname);
1193		} else
1194			report_probe(pi, p, sym, offset, modname);
1195	}
1196	preempt_enable();
1197	return 0;
1198}
1199
1200static struct seq_operations kprobes_seq_ops = {
1201	.start = kprobe_seq_start,
1202	.next  = kprobe_seq_next,
1203	.stop  = kprobe_seq_stop,
1204	.show  = show_kprobe_addr
1205};
1206
1207static int __kprobes kprobes_open(struct inode *inode, struct file *filp)
1208{
1209	return seq_open(filp, &kprobes_seq_ops);
1210}
1211
1212static struct file_operations debugfs_kprobes_operations = {
1213	.open           = kprobes_open,
1214	.read           = seq_read,
1215	.llseek         = seq_lseek,
1216	.release        = seq_release,
1217};
1218
1219static void __kprobes enable_all_kprobes(void)
1220{
1221	struct hlist_head *head;
1222	struct hlist_node *node;
1223	struct kprobe *p;
1224	unsigned int i;
1225
1226	mutex_lock(&kprobe_mutex);
1227
1228	/* If kprobes are already enabled, just return */
1229	if (kprobe_enabled)
1230		goto already_enabled;
1231
1232	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1233		head = &kprobe_table[i];
1234		hlist_for_each_entry_rcu(p, node, head, hlist)
1235			arch_arm_kprobe(p);
1236	}
1237
1238	kprobe_enabled = true;
1239	printk(KERN_INFO "Kprobes globally enabled\n");
1240
1241already_enabled:
1242	mutex_unlock(&kprobe_mutex);
1243	return;
1244}
1245
1246static void __kprobes disable_all_kprobes(void)
1247{
1248	struct hlist_head *head;
1249	struct hlist_node *node;
1250	struct kprobe *p;
1251	unsigned int i;
1252
1253	mutex_lock(&kprobe_mutex);
1254
1255	/* If kprobes are already disabled, just return */
1256	if (!kprobe_enabled)
1257		goto already_disabled;
1258
1259	kprobe_enabled = false;
1260	printk(KERN_INFO "Kprobes globally disabled\n");
1261	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1262		head = &kprobe_table[i];
1263		hlist_for_each_entry_rcu(p, node, head, hlist) {
1264			if (!arch_trampoline_kprobe(p))
1265				arch_disarm_kprobe(p);
1266		}
1267	}
1268
1269	mutex_unlock(&kprobe_mutex);
1270	/* Allow all currently running kprobes to complete */
1271	synchronize_sched();
1272	return;
1273
1274already_disabled:
1275	mutex_unlock(&kprobe_mutex);
1276	return;
1277}
1278
1279/*
1280 * XXX: The debugfs bool file interface doesn't allow for callbacks
1281 * when the bool state is switched. We can reuse that facility when
1282 * available
1283 */
1284static ssize_t read_enabled_file_bool(struct file *file,
1285	       char __user *user_buf, size_t count, loff_t *ppos)
1286{
1287	char buf[3];
1288
1289	if (kprobe_enabled)
1290		buf[0] = '1';
1291	else
1292		buf[0] = '0';
1293	buf[1] = '\n';
1294	buf[2] = 0x00;
1295	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
1296}
1297
1298static ssize_t write_enabled_file_bool(struct file *file,
1299	       const char __user *user_buf, size_t count, loff_t *ppos)
1300{
1301	char buf[32];
1302	int buf_size;
1303
1304	buf_size = min(count, (sizeof(buf)-1));
1305	if (copy_from_user(buf, user_buf, buf_size))
1306		return -EFAULT;
1307
1308	switch (buf[0]) {
1309	case 'y':
1310	case 'Y':
1311	case '1':
1312		enable_all_kprobes();
1313		break;
1314	case 'n':
1315	case 'N':
1316	case '0':
1317		disable_all_kprobes();
1318		break;
1319	}
1320
1321	return count;
1322}
1323
1324static struct file_operations fops_kp = {
1325	.read =         read_enabled_file_bool,
1326	.write =        write_enabled_file_bool,
1327};
1328
1329static int __kprobes debugfs_kprobe_init(void)
1330{
1331	struct dentry *dir, *file;
1332	unsigned int value = 1;
1333
1334	dir = debugfs_create_dir("kprobes", NULL);
1335	if (!dir)
1336		return -ENOMEM;
1337
1338	file = debugfs_create_file("list", 0444, dir, NULL,
1339				&debugfs_kprobes_operations);
1340	if (!file) {
1341		debugfs_remove(dir);
1342		return -ENOMEM;
1343	}
1344
1345	file = debugfs_create_file("enabled", 0600, dir,
1346					&value, &fops_kp);
1347	if (!file) {
1348		debugfs_remove(dir);
1349		return -ENOMEM;
1350	}
1351
1352	return 0;
1353}
1354
1355late_initcall(debugfs_kprobe_init);
1356#endif /* CONFIG_DEBUG_FS */
1357
1358module_init(init_kprobes);
1359
1360EXPORT_SYMBOL_GPL(register_kprobe);
1361EXPORT_SYMBOL_GPL(unregister_kprobe);
1362EXPORT_SYMBOL_GPL(register_kprobes);
1363EXPORT_SYMBOL_GPL(unregister_kprobes);
1364EXPORT_SYMBOL_GPL(register_jprobe);
1365EXPORT_SYMBOL_GPL(unregister_jprobe);
1366EXPORT_SYMBOL_GPL(register_jprobes);
1367EXPORT_SYMBOL_GPL(unregister_jprobes);
1368EXPORT_SYMBOL_GPL(jprobe_return);
1369EXPORT_SYMBOL_GPL(register_kretprobe);
1370EXPORT_SYMBOL_GPL(unregister_kretprobe);
1371EXPORT_SYMBOL_GPL(register_kretprobes);
1372EXPORT_SYMBOL_GPL(unregister_kretprobes);
1373