kprobes.c revision 05662bdb64c746079de7ac4dc4fb4caa5e8e119f
1/*
2 *  Kernel Probes (KProbes)
3 *  kernel/kprobes.c
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 *
19 * Copyright (C) IBM Corporation, 2002, 2004
20 *
21 * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
22 *		Probes initial implementation (includes suggestions from
23 *		Rusty Russell).
24 * 2004-Aug	Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with
25 *		hlists and exceptions notifier as suggested by Andi Kleen.
26 * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
27 *		interface to access function arguments.
28 * 2004-Sep	Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes
29 *		exceptions notifier to be first on the priority list.
30 * 2005-May	Hien Nguyen <hien@us.ibm.com>, Jim Keniston
31 *		<jkenisto@us.ibm.com> and Prasanna S Panchamukhi
32 *		<prasanna@in.ibm.com> added function-return probes.
33 */
34#include <linux/kprobes.h>
35#include <linux/hash.h>
36#include <linux/init.h>
37#include <linux/slab.h>
38#include <linux/stddef.h>
39#include <linux/module.h>
40#include <linux/moduleloader.h>
41#include <linux/kallsyms.h>
42#include <linux/freezer.h>
43#include <linux/seq_file.h>
44#include <linux/debugfs.h>
45#include <linux/sysctl.h>
46#include <linux/kdebug.h>
47#include <linux/memory.h>
48#include <linux/ftrace.h>
49#include <linux/cpu.h>
50
51#include <asm-generic/sections.h>
52#include <asm/cacheflush.h>
53#include <asm/errno.h>
54#include <asm/uaccess.h>
55
56#define KPROBE_HASH_BITS 6
57#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
58
59
60/*
61 * Some oddball architectures like 64bit powerpc have function descriptors
62 * so this must be overridable.
63 */
64#ifndef kprobe_lookup_name
65#define kprobe_lookup_name(name, addr) \
66	addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name)))
67#endif
68
69static int kprobes_initialized;
70static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
71static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
72
73/* NOTE: change this value only with kprobe_mutex held */
74static bool kprobes_all_disarmed;
75
76static DEFINE_MUTEX(kprobe_mutex);	/* Protects kprobe_table */
77static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
78static struct {
79	spinlock_t lock ____cacheline_aligned_in_smp;
80} kretprobe_table_locks[KPROBE_TABLE_SIZE];
81
82static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
83{
84	return &(kretprobe_table_locks[hash].lock);
85}
86
87/*
88 * Normally, functions that we'd want to prohibit kprobes in, are marked
89 * __kprobes. But, there are cases where such functions already belong to
90 * a different section (__sched for preempt_schedule)
91 *
92 * For such cases, we now have a blacklist
93 */
94static struct kprobe_blackpoint kprobe_blacklist[] = {
95	{"preempt_schedule",},
96	{"native_get_debugreg",},
97	{"irq_entries_start",},
98	{"common_interrupt",},
99	{"mcount",},	/* mcount can be called from everywhere */
100	{NULL}    /* Terminator */
101};
102
103#ifdef __ARCH_WANT_KPROBES_INSN_SLOT
104/*
105 * kprobe->ainsn.insn points to the copy of the instruction to be
106 * single-stepped. x86_64, POWER4 and above have no-exec support and
107 * stepping on the instruction on a vmalloced/kmalloced/data page
108 * is a recipe for disaster
109 */
110struct kprobe_insn_page {
111	struct list_head list;
112	kprobe_opcode_t *insns;		/* Page of instruction slots */
113	int nused;
114	int ngarbage;
115	char slot_used[];
116};
117
118#define KPROBE_INSN_PAGE_SIZE(slots)			\
119	(offsetof(struct kprobe_insn_page, slot_used) +	\
120	 (sizeof(char) * (slots)))
121
122struct kprobe_insn_cache {
123	struct list_head pages;	/* list of kprobe_insn_page */
124	size_t insn_size;	/* size of instruction slot */
125	int nr_garbage;
126};
127
128static int slots_per_page(struct kprobe_insn_cache *c)
129{
130	return PAGE_SIZE/(c->insn_size * sizeof(kprobe_opcode_t));
131}
132
133enum kprobe_slot_state {
134	SLOT_CLEAN = 0,
135	SLOT_DIRTY = 1,
136	SLOT_USED = 2,
137};
138
139static DEFINE_MUTEX(kprobe_insn_mutex);	/* Protects kprobe_insn_slots */
140static struct kprobe_insn_cache kprobe_insn_slots = {
141	.pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
142	.insn_size = MAX_INSN_SIZE,
143	.nr_garbage = 0,
144};
145static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c);
146
147/**
148 * __get_insn_slot() - Find a slot on an executable page for an instruction.
149 * We allocate an executable page if there's no room on existing ones.
150 */
151static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
152{
153	struct kprobe_insn_page *kip;
154
155 retry:
156	list_for_each_entry(kip, &c->pages, list) {
157		if (kip->nused < slots_per_page(c)) {
158			int i;
159			for (i = 0; i < slots_per_page(c); i++) {
160				if (kip->slot_used[i] == SLOT_CLEAN) {
161					kip->slot_used[i] = SLOT_USED;
162					kip->nused++;
163					return kip->insns + (i * c->insn_size);
164				}
165			}
166			/* kip->nused is broken. Fix it. */
167			kip->nused = slots_per_page(c);
168			WARN_ON(1);
169		}
170	}
171
172	/* If there are any garbage slots, collect it and try again. */
173	if (c->nr_garbage && collect_garbage_slots(c) == 0)
174		goto retry;
175
176	/* All out of space.  Need to allocate a new page. */
177	kip = kmalloc(KPROBE_INSN_PAGE_SIZE(slots_per_page(c)), GFP_KERNEL);
178	if (!kip)
179		return NULL;
180
181	/*
182	 * Use module_alloc so this page is within +/- 2GB of where the
183	 * kernel image and loaded module images reside. This is required
184	 * so x86_64 can correctly handle the %rip-relative fixups.
185	 */
186	kip->insns = module_alloc(PAGE_SIZE);
187	if (!kip->insns) {
188		kfree(kip);
189		return NULL;
190	}
191	INIT_LIST_HEAD(&kip->list);
192	memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c));
193	kip->slot_used[0] = SLOT_USED;
194	kip->nused = 1;
195	kip->ngarbage = 0;
196	list_add(&kip->list, &c->pages);
197	return kip->insns;
198}
199
200
201kprobe_opcode_t __kprobes *get_insn_slot(void)
202{
203	kprobe_opcode_t *ret = NULL;
204
205	mutex_lock(&kprobe_insn_mutex);
206	ret = __get_insn_slot(&kprobe_insn_slots);
207	mutex_unlock(&kprobe_insn_mutex);
208
209	return ret;
210}
211
212/* Return 1 if all garbages are collected, otherwise 0. */
213static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
214{
215	kip->slot_used[idx] = SLOT_CLEAN;
216	kip->nused--;
217	if (kip->nused == 0) {
218		/*
219		 * Page is no longer in use.  Free it unless
220		 * it's the last one.  We keep the last one
221		 * so as not to have to set it up again the
222		 * next time somebody inserts a probe.
223		 */
224		if (!list_is_singular(&kip->list)) {
225			list_del(&kip->list);
226			module_free(NULL, kip->insns);
227			kfree(kip);
228		}
229		return 1;
230	}
231	return 0;
232}
233
234static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c)
235{
236	struct kprobe_insn_page *kip, *next;
237
238	/* Ensure no-one is interrupted on the garbages */
239	synchronize_sched();
240
241	list_for_each_entry_safe(kip, next, &c->pages, list) {
242		int i;
243		if (kip->ngarbage == 0)
244			continue;
245		kip->ngarbage = 0;	/* we will collect all garbages */
246		for (i = 0; i < slots_per_page(c); i++) {
247			if (kip->slot_used[i] == SLOT_DIRTY &&
248			    collect_one_slot(kip, i))
249				break;
250		}
251	}
252	c->nr_garbage = 0;
253	return 0;
254}
255
256static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
257				       kprobe_opcode_t *slot, int dirty)
258{
259	struct kprobe_insn_page *kip;
260
261	list_for_each_entry(kip, &c->pages, list) {
262		long idx = ((long)slot - (long)kip->insns) /
263				(c->insn_size * sizeof(kprobe_opcode_t));
264		if (idx >= 0 && idx < slots_per_page(c)) {
265			WARN_ON(kip->slot_used[idx] != SLOT_USED);
266			if (dirty) {
267				kip->slot_used[idx] = SLOT_DIRTY;
268				kip->ngarbage++;
269				if (++c->nr_garbage > slots_per_page(c))
270					collect_garbage_slots(c);
271			} else
272				collect_one_slot(kip, idx);
273			return;
274		}
275	}
276	/* Could not free this slot. */
277	WARN_ON(1);
278}
279
280void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
281{
282	mutex_lock(&kprobe_insn_mutex);
283	__free_insn_slot(&kprobe_insn_slots, slot, dirty);
284	mutex_unlock(&kprobe_insn_mutex);
285}
286#ifdef CONFIG_OPTPROBES
287/* For optimized_kprobe buffer */
288static DEFINE_MUTEX(kprobe_optinsn_mutex); /* Protects kprobe_optinsn_slots */
289static struct kprobe_insn_cache kprobe_optinsn_slots = {
290	.pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
291	/* .insn_size is initialized later */
292	.nr_garbage = 0,
293};
294/* Get a slot for optimized_kprobe buffer */
295kprobe_opcode_t __kprobes *get_optinsn_slot(void)
296{
297	kprobe_opcode_t *ret = NULL;
298
299	mutex_lock(&kprobe_optinsn_mutex);
300	ret = __get_insn_slot(&kprobe_optinsn_slots);
301	mutex_unlock(&kprobe_optinsn_mutex);
302
303	return ret;
304}
305
306void __kprobes free_optinsn_slot(kprobe_opcode_t * slot, int dirty)
307{
308	mutex_lock(&kprobe_optinsn_mutex);
309	__free_insn_slot(&kprobe_optinsn_slots, slot, dirty);
310	mutex_unlock(&kprobe_optinsn_mutex);
311}
312#endif
313#endif
314
315/* We have preemption disabled.. so it is safe to use __ versions */
316static inline void set_kprobe_instance(struct kprobe *kp)
317{
318	__get_cpu_var(kprobe_instance) = kp;
319}
320
321static inline void reset_kprobe_instance(void)
322{
323	__get_cpu_var(kprobe_instance) = NULL;
324}
325
326/*
327 * This routine is called either:
328 * 	- under the kprobe_mutex - during kprobe_[un]register()
329 * 				OR
330 * 	- with preemption disabled - from arch/xxx/kernel/kprobes.c
331 */
332struct kprobe __kprobes *get_kprobe(void *addr)
333{
334	struct hlist_head *head;
335	struct hlist_node *node;
336	struct kprobe *p;
337
338	head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
339	hlist_for_each_entry_rcu(p, node, head, hlist) {
340		if (p->addr == addr)
341			return p;
342	}
343
344	return NULL;
345}
346
347static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
348
349/* Return true if the kprobe is an aggregator */
350static inline int kprobe_aggrprobe(struct kprobe *p)
351{
352	return p->pre_handler == aggr_pre_handler;
353}
354
355/*
356 * Keep all fields in the kprobe consistent
357 */
358static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
359{
360	memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
361	memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
362}
363
364#ifdef CONFIG_OPTPROBES
365/* NOTE: change this value only with kprobe_mutex held */
366static bool kprobes_allow_optimization;
367
368/*
369 * Call all pre_handler on the list, but ignores its return value.
370 * This must be called from arch-dep optimized caller.
371 */
372void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
373{
374	struct kprobe *kp;
375
376	list_for_each_entry_rcu(kp, &p->list, list) {
377		if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
378			set_kprobe_instance(kp);
379			kp->pre_handler(kp, regs);
380		}
381		reset_kprobe_instance();
382	}
383}
384
385/* Return true(!0) if the kprobe is ready for optimization. */
386static inline int kprobe_optready(struct kprobe *p)
387{
388	struct optimized_kprobe *op;
389
390	if (kprobe_aggrprobe(p)) {
391		op = container_of(p, struct optimized_kprobe, kp);
392		return arch_prepared_optinsn(&op->optinsn);
393	}
394
395	return 0;
396}
397
398/*
399 * Return an optimized kprobe whose optimizing code replaces
400 * instructions including addr (exclude breakpoint).
401 */
402struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
403{
404	int i;
405	struct kprobe *p = NULL;
406	struct optimized_kprobe *op;
407
408	/* Don't check i == 0, since that is a breakpoint case. */
409	for (i = 1; !p && i < MAX_OPTIMIZED_LENGTH; i++)
410		p = get_kprobe((void *)(addr - i));
411
412	if (p && kprobe_optready(p)) {
413		op = container_of(p, struct optimized_kprobe, kp);
414		if (arch_within_optimized_kprobe(op, addr))
415			return p;
416	}
417
418	return NULL;
419}
420
421/* Optimization staging list, protected by kprobe_mutex */
422static LIST_HEAD(optimizing_list);
423
424static void kprobe_optimizer(struct work_struct *work);
425static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
426#define OPTIMIZE_DELAY 5
427
428/* Kprobe jump optimizer */
429static __kprobes void kprobe_optimizer(struct work_struct *work)
430{
431	struct optimized_kprobe *op, *tmp;
432
433	/* Lock modules while optimizing kprobes */
434	mutex_lock(&module_mutex);
435	mutex_lock(&kprobe_mutex);
436	if (kprobes_all_disarmed || !kprobes_allow_optimization)
437		goto end;
438
439	/*
440	 * Wait for quiesence period to ensure all running interrupts
441	 * are done. Because optprobe may modify multiple instructions
442	 * there is a chance that Nth instruction is interrupted. In that
443	 * case, running interrupt can return to 2nd-Nth byte of jump
444	 * instruction. This wait is for avoiding it.
445	 */
446	synchronize_sched();
447
448	/*
449	 * The optimization/unoptimization refers online_cpus via
450	 * stop_machine() and cpu-hotplug modifies online_cpus.
451	 * And same time, text_mutex will be held in cpu-hotplug and here.
452	 * This combination can cause a deadlock (cpu-hotplug try to lock
453	 * text_mutex but stop_machine can not be done because online_cpus
454	 * has been changed)
455	 * To avoid this deadlock, we need to call get_online_cpus()
456	 * for preventing cpu-hotplug outside of text_mutex locking.
457	 */
458	get_online_cpus();
459	mutex_lock(&text_mutex);
460	list_for_each_entry_safe(op, tmp, &optimizing_list, list) {
461		WARN_ON(kprobe_disabled(&op->kp));
462		if (arch_optimize_kprobe(op) < 0)
463			op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
464		list_del_init(&op->list);
465	}
466	mutex_unlock(&text_mutex);
467	put_online_cpus();
468end:
469	mutex_unlock(&kprobe_mutex);
470	mutex_unlock(&module_mutex);
471}
472
473/* Optimize kprobe if p is ready to be optimized */
474static __kprobes void optimize_kprobe(struct kprobe *p)
475{
476	struct optimized_kprobe *op;
477
478	/* Check if the kprobe is disabled or not ready for optimization. */
479	if (!kprobe_optready(p) || !kprobes_allow_optimization ||
480	    (kprobe_disabled(p) || kprobes_all_disarmed))
481		return;
482
483	/* Both of break_handler and post_handler are not supported. */
484	if (p->break_handler || p->post_handler)
485		return;
486
487	op = container_of(p, struct optimized_kprobe, kp);
488
489	/* Check there is no other kprobes at the optimized instructions */
490	if (arch_check_optimized_kprobe(op) < 0)
491		return;
492
493	/* Check if it is already optimized. */
494	if (op->kp.flags & KPROBE_FLAG_OPTIMIZED)
495		return;
496
497	op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
498	list_add(&op->list, &optimizing_list);
499	if (!delayed_work_pending(&optimizing_work))
500		schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
501}
502
503/* Unoptimize a kprobe if p is optimized */
504static __kprobes void unoptimize_kprobe(struct kprobe *p)
505{
506	struct optimized_kprobe *op;
507
508	if ((p->flags & KPROBE_FLAG_OPTIMIZED) && kprobe_aggrprobe(p)) {
509		op = container_of(p, struct optimized_kprobe, kp);
510		if (!list_empty(&op->list))
511			/* Dequeue from the optimization queue */
512			list_del_init(&op->list);
513		else
514			/* Replace jump with break */
515			arch_unoptimize_kprobe(op);
516		op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
517	}
518}
519
520/* Remove optimized instructions */
521static void __kprobes kill_optimized_kprobe(struct kprobe *p)
522{
523	struct optimized_kprobe *op;
524
525	op = container_of(p, struct optimized_kprobe, kp);
526	if (!list_empty(&op->list)) {
527		/* Dequeue from the optimization queue */
528		list_del_init(&op->list);
529		op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
530	}
531	/* Don't unoptimize, because the target code will be freed. */
532	arch_remove_optimized_kprobe(op);
533}
534
535/* Try to prepare optimized instructions */
536static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
537{
538	struct optimized_kprobe *op;
539
540	op = container_of(p, struct optimized_kprobe, kp);
541	arch_prepare_optimized_kprobe(op);
542}
543
544/* Free optimized instructions and optimized_kprobe */
545static __kprobes void free_aggr_kprobe(struct kprobe *p)
546{
547	struct optimized_kprobe *op;
548
549	op = container_of(p, struct optimized_kprobe, kp);
550	arch_remove_optimized_kprobe(op);
551	kfree(op);
552}
553
554/* Allocate new optimized_kprobe and try to prepare optimized instructions */
555static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
556{
557	struct optimized_kprobe *op;
558
559	op = kzalloc(sizeof(struct optimized_kprobe), GFP_KERNEL);
560	if (!op)
561		return NULL;
562
563	INIT_LIST_HEAD(&op->list);
564	op->kp.addr = p->addr;
565	arch_prepare_optimized_kprobe(op);
566
567	return &op->kp;
568}
569
570static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
571
572/*
573 * Prepare an optimized_kprobe and optimize it
574 * NOTE: p must be a normal registered kprobe
575 */
576static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
577{
578	struct kprobe *ap;
579	struct optimized_kprobe *op;
580
581	ap = alloc_aggr_kprobe(p);
582	if (!ap)
583		return;
584
585	op = container_of(ap, struct optimized_kprobe, kp);
586	if (!arch_prepared_optinsn(&op->optinsn)) {
587		/* If failed to setup optimizing, fallback to kprobe */
588		free_aggr_kprobe(ap);
589		return;
590	}
591
592	init_aggr_kprobe(ap, p);
593	optimize_kprobe(ap);
594}
595
596#ifdef CONFIG_SYSCTL
597static void __kprobes optimize_all_kprobes(void)
598{
599	struct hlist_head *head;
600	struct hlist_node *node;
601	struct kprobe *p;
602	unsigned int i;
603
604	/* If optimization is already allowed, just return */
605	if (kprobes_allow_optimization)
606		return;
607
608	kprobes_allow_optimization = true;
609	mutex_lock(&text_mutex);
610	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
611		head = &kprobe_table[i];
612		hlist_for_each_entry_rcu(p, node, head, hlist)
613			if (!kprobe_disabled(p))
614				optimize_kprobe(p);
615	}
616	mutex_unlock(&text_mutex);
617	printk(KERN_INFO "Kprobes globally optimized\n");
618}
619
620static void __kprobes unoptimize_all_kprobes(void)
621{
622	struct hlist_head *head;
623	struct hlist_node *node;
624	struct kprobe *p;
625	unsigned int i;
626
627	/* If optimization is already prohibited, just return */
628	if (!kprobes_allow_optimization)
629		return;
630
631	kprobes_allow_optimization = false;
632	printk(KERN_INFO "Kprobes globally unoptimized\n");
633	get_online_cpus();	/* For avoiding text_mutex deadlock */
634	mutex_lock(&text_mutex);
635	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
636		head = &kprobe_table[i];
637		hlist_for_each_entry_rcu(p, node, head, hlist) {
638			if (!kprobe_disabled(p))
639				unoptimize_kprobe(p);
640		}
641	}
642
643	mutex_unlock(&text_mutex);
644	put_online_cpus();
645	/* Allow all currently running kprobes to complete */
646	synchronize_sched();
647}
648
649int sysctl_kprobes_optimization;
650int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
651				      void __user *buffer, size_t *length,
652				      loff_t *ppos)
653{
654	int ret;
655
656	mutex_lock(&kprobe_mutex);
657	sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0;
658	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
659
660	if (sysctl_kprobes_optimization)
661		optimize_all_kprobes();
662	else
663		unoptimize_all_kprobes();
664	mutex_unlock(&kprobe_mutex);
665
666	return ret;
667}
668#endif /* CONFIG_SYSCTL */
669
670static void __kprobes __arm_kprobe(struct kprobe *p)
671{
672	struct kprobe *old_p;
673
674	/* Check collision with other optimized kprobes */
675	old_p = get_optimized_kprobe((unsigned long)p->addr);
676	if (unlikely(old_p))
677		unoptimize_kprobe(old_p); /* Fallback to unoptimized kprobe */
678
679	arch_arm_kprobe(p);
680	optimize_kprobe(p);	/* Try to optimize (add kprobe to a list) */
681}
682
683static void __kprobes __disarm_kprobe(struct kprobe *p)
684{
685	struct kprobe *old_p;
686
687	unoptimize_kprobe(p);	/* Try to unoptimize */
688	arch_disarm_kprobe(p);
689
690	/* If another kprobe was blocked, optimize it. */
691	old_p = get_optimized_kprobe((unsigned long)p->addr);
692	if (unlikely(old_p))
693		optimize_kprobe(old_p);
694}
695
696#else /* !CONFIG_OPTPROBES */
697
698#define optimize_kprobe(p)			do {} while (0)
699#define unoptimize_kprobe(p)			do {} while (0)
700#define kill_optimized_kprobe(p)		do {} while (0)
701#define prepare_optimized_kprobe(p)		do {} while (0)
702#define try_to_optimize_kprobe(p)		do {} while (0)
703#define __arm_kprobe(p)				arch_arm_kprobe(p)
704#define __disarm_kprobe(p)			arch_disarm_kprobe(p)
705
706static __kprobes void free_aggr_kprobe(struct kprobe *p)
707{
708	kfree(p);
709}
710
711static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
712{
713	return kzalloc(sizeof(struct kprobe), GFP_KERNEL);
714}
715#endif /* CONFIG_OPTPROBES */
716
717/* Arm a kprobe with text_mutex */
718static void __kprobes arm_kprobe(struct kprobe *kp)
719{
720	/*
721	 * Here, since __arm_kprobe() doesn't use stop_machine(),
722	 * this doesn't cause deadlock on text_mutex. So, we don't
723	 * need get_online_cpus().
724	 */
725	mutex_lock(&text_mutex);
726	__arm_kprobe(kp);
727	mutex_unlock(&text_mutex);
728}
729
730/* Disarm a kprobe with text_mutex */
731static void __kprobes disarm_kprobe(struct kprobe *kp)
732{
733	get_online_cpus();	/* For avoiding text_mutex deadlock */
734	mutex_lock(&text_mutex);
735	__disarm_kprobe(kp);
736	mutex_unlock(&text_mutex);
737	put_online_cpus();
738}
739
740/*
741 * Aggregate handlers for multiple kprobes support - these handlers
742 * take care of invoking the individual kprobe handlers on p->list
743 */
744static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
745{
746	struct kprobe *kp;
747
748	list_for_each_entry_rcu(kp, &p->list, list) {
749		if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
750			set_kprobe_instance(kp);
751			if (kp->pre_handler(kp, regs))
752				return 1;
753		}
754		reset_kprobe_instance();
755	}
756	return 0;
757}
758
759static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
760					unsigned long flags)
761{
762	struct kprobe *kp;
763
764	list_for_each_entry_rcu(kp, &p->list, list) {
765		if (kp->post_handler && likely(!kprobe_disabled(kp))) {
766			set_kprobe_instance(kp);
767			kp->post_handler(kp, regs, flags);
768			reset_kprobe_instance();
769		}
770	}
771}
772
773static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
774					int trapnr)
775{
776	struct kprobe *cur = __get_cpu_var(kprobe_instance);
777
778	/*
779	 * if we faulted "during" the execution of a user specified
780	 * probe handler, invoke just that probe's fault handler
781	 */
782	if (cur && cur->fault_handler) {
783		if (cur->fault_handler(cur, regs, trapnr))
784			return 1;
785	}
786	return 0;
787}
788
789static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
790{
791	struct kprobe *cur = __get_cpu_var(kprobe_instance);
792	int ret = 0;
793
794	if (cur && cur->break_handler) {
795		if (cur->break_handler(cur, regs))
796			ret = 1;
797	}
798	reset_kprobe_instance();
799	return ret;
800}
801
802/* Walks the list and increments nmissed count for multiprobe case */
803void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
804{
805	struct kprobe *kp;
806	if (!kprobe_aggrprobe(p)) {
807		p->nmissed++;
808	} else {
809		list_for_each_entry_rcu(kp, &p->list, list)
810			kp->nmissed++;
811	}
812	return;
813}
814
815void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
816				struct hlist_head *head)
817{
818	struct kretprobe *rp = ri->rp;
819
820	/* remove rp inst off the rprobe_inst_table */
821	hlist_del(&ri->hlist);
822	INIT_HLIST_NODE(&ri->hlist);
823	if (likely(rp)) {
824		spin_lock(&rp->lock);
825		hlist_add_head(&ri->hlist, &rp->free_instances);
826		spin_unlock(&rp->lock);
827	} else
828		/* Unregistering */
829		hlist_add_head(&ri->hlist, head);
830}
831
832void __kprobes kretprobe_hash_lock(struct task_struct *tsk,
833			 struct hlist_head **head, unsigned long *flags)
834{
835	unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
836	spinlock_t *hlist_lock;
837
838	*head = &kretprobe_inst_table[hash];
839	hlist_lock = kretprobe_table_lock_ptr(hash);
840	spin_lock_irqsave(hlist_lock, *flags);
841}
842
843static void __kprobes kretprobe_table_lock(unsigned long hash,
844	unsigned long *flags)
845{
846	spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
847	spin_lock_irqsave(hlist_lock, *flags);
848}
849
850void __kprobes kretprobe_hash_unlock(struct task_struct *tsk,
851	unsigned long *flags)
852{
853	unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
854	spinlock_t *hlist_lock;
855
856	hlist_lock = kretprobe_table_lock_ptr(hash);
857	spin_unlock_irqrestore(hlist_lock, *flags);
858}
859
860void __kprobes kretprobe_table_unlock(unsigned long hash, unsigned long *flags)
861{
862	spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
863	spin_unlock_irqrestore(hlist_lock, *flags);
864}
865
866/*
867 * This function is called from finish_task_switch when task tk becomes dead,
868 * so that we can recycle any function-return probe instances associated
869 * with this task. These left over instances represent probed functions
870 * that have been called but will never return.
871 */
872void __kprobes kprobe_flush_task(struct task_struct *tk)
873{
874	struct kretprobe_instance *ri;
875	struct hlist_head *head, empty_rp;
876	struct hlist_node *node, *tmp;
877	unsigned long hash, flags = 0;
878
879	if (unlikely(!kprobes_initialized))
880		/* Early boot.  kretprobe_table_locks not yet initialized. */
881		return;
882
883	hash = hash_ptr(tk, KPROBE_HASH_BITS);
884	head = &kretprobe_inst_table[hash];
885	kretprobe_table_lock(hash, &flags);
886	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
887		if (ri->task == tk)
888			recycle_rp_inst(ri, &empty_rp);
889	}
890	kretprobe_table_unlock(hash, &flags);
891	INIT_HLIST_HEAD(&empty_rp);
892	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
893		hlist_del(&ri->hlist);
894		kfree(ri);
895	}
896}
897
898static inline void free_rp_inst(struct kretprobe *rp)
899{
900	struct kretprobe_instance *ri;
901	struct hlist_node *pos, *next;
902
903	hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) {
904		hlist_del(&ri->hlist);
905		kfree(ri);
906	}
907}
908
909static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
910{
911	unsigned long flags, hash;
912	struct kretprobe_instance *ri;
913	struct hlist_node *pos, *next;
914	struct hlist_head *head;
915
916	/* No race here */
917	for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) {
918		kretprobe_table_lock(hash, &flags);
919		head = &kretprobe_inst_table[hash];
920		hlist_for_each_entry_safe(ri, pos, next, head, hlist) {
921			if (ri->rp == rp)
922				ri->rp = NULL;
923		}
924		kretprobe_table_unlock(hash, &flags);
925	}
926	free_rp_inst(rp);
927}
928
929/*
930* Add the new probe to ap->list. Fail if this is the
931* second jprobe at the address - two jprobes can't coexist
932*/
933static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
934{
935	BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
936
937	if (p->break_handler || p->post_handler)
938		unoptimize_kprobe(ap);	/* Fall back to normal kprobe */
939
940	if (p->break_handler) {
941		if (ap->break_handler)
942			return -EEXIST;
943		list_add_tail_rcu(&p->list, &ap->list);
944		ap->break_handler = aggr_break_handler;
945	} else
946		list_add_rcu(&p->list, &ap->list);
947	if (p->post_handler && !ap->post_handler)
948		ap->post_handler = aggr_post_handler;
949
950	if (kprobe_disabled(ap) && !kprobe_disabled(p)) {
951		ap->flags &= ~KPROBE_FLAG_DISABLED;
952		if (!kprobes_all_disarmed)
953			/* Arm the breakpoint again. */
954			__arm_kprobe(ap);
955	}
956	return 0;
957}
958
959/*
960 * Fill in the required fields of the "manager kprobe". Replace the
961 * earlier kprobe in the hlist with the manager kprobe
962 */
963static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
964{
965	/* Copy p's insn slot to ap */
966	copy_kprobe(p, ap);
967	flush_insn_slot(ap);
968	ap->addr = p->addr;
969	ap->flags = p->flags & ~KPROBE_FLAG_OPTIMIZED;
970	ap->pre_handler = aggr_pre_handler;
971	ap->fault_handler = aggr_fault_handler;
972	/* We don't care the kprobe which has gone. */
973	if (p->post_handler && !kprobe_gone(p))
974		ap->post_handler = aggr_post_handler;
975	if (p->break_handler && !kprobe_gone(p))
976		ap->break_handler = aggr_break_handler;
977
978	INIT_LIST_HEAD(&ap->list);
979	INIT_HLIST_NODE(&ap->hlist);
980
981	list_add_rcu(&p->list, &ap->list);
982	hlist_replace_rcu(&p->hlist, &ap->hlist);
983}
984
985/*
986 * This is the second or subsequent kprobe at the address - handle
987 * the intricacies
988 */
989static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
990					  struct kprobe *p)
991{
992	int ret = 0;
993	struct kprobe *ap = old_p;
994
995	if (!kprobe_aggrprobe(old_p)) {
996		/* If old_p is not an aggr_kprobe, create new aggr_kprobe. */
997		ap = alloc_aggr_kprobe(old_p);
998		if (!ap)
999			return -ENOMEM;
1000		init_aggr_kprobe(ap, old_p);
1001	}
1002
1003	if (kprobe_gone(ap)) {
1004		/*
1005		 * Attempting to insert new probe at the same location that
1006		 * had a probe in the module vaddr area which already
1007		 * freed. So, the instruction slot has already been
1008		 * released. We need a new slot for the new probe.
1009		 */
1010		ret = arch_prepare_kprobe(ap);
1011		if (ret)
1012			/*
1013			 * Even if fail to allocate new slot, don't need to
1014			 * free aggr_probe. It will be used next time, or
1015			 * freed by unregister_kprobe.
1016			 */
1017			return ret;
1018
1019		/* Prepare optimized instructions if possible. */
1020		prepare_optimized_kprobe(ap);
1021
1022		/*
1023		 * Clear gone flag to prevent allocating new slot again, and
1024		 * set disabled flag because it is not armed yet.
1025		 */
1026		ap->flags = (ap->flags & ~KPROBE_FLAG_GONE)
1027			    | KPROBE_FLAG_DISABLED;
1028	}
1029
1030	/* Copy ap's insn slot to p */
1031	copy_kprobe(ap, p);
1032	return add_new_kprobe(ap, p);
1033}
1034
1035/* Try to disable aggr_kprobe, and return 1 if succeeded.*/
1036static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p)
1037{
1038	struct kprobe *kp;
1039
1040	list_for_each_entry_rcu(kp, &p->list, list) {
1041		if (!kprobe_disabled(kp))
1042			/*
1043			 * There is an active probe on the list.
1044			 * We can't disable aggr_kprobe.
1045			 */
1046			return 0;
1047	}
1048	p->flags |= KPROBE_FLAG_DISABLED;
1049	return 1;
1050}
1051
1052static int __kprobes in_kprobes_functions(unsigned long addr)
1053{
1054	struct kprobe_blackpoint *kb;
1055
1056	if (addr >= (unsigned long)__kprobes_text_start &&
1057	    addr < (unsigned long)__kprobes_text_end)
1058		return -EINVAL;
1059	/*
1060	 * If there exists a kprobe_blacklist, verify and
1061	 * fail any probe registration in the prohibited area
1062	 */
1063	for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
1064		if (kb->start_addr) {
1065			if (addr >= kb->start_addr &&
1066			    addr < (kb->start_addr + kb->range))
1067				return -EINVAL;
1068		}
1069	}
1070	return 0;
1071}
1072
1073/*
1074 * If we have a symbol_name argument, look it up and add the offset field
1075 * to it. This way, we can specify a relative address to a symbol.
1076 */
1077static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
1078{
1079	kprobe_opcode_t *addr = p->addr;
1080	if (p->symbol_name) {
1081		if (addr)
1082			return NULL;
1083		kprobe_lookup_name(p->symbol_name, addr);
1084	}
1085
1086	if (!addr)
1087		return NULL;
1088	return (kprobe_opcode_t *)(((char *)addr) + p->offset);
1089}
1090
1091/* Check passed kprobe is valid and return kprobe in kprobe_table. */
1092static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
1093{
1094	struct kprobe *old_p, *list_p;
1095
1096	old_p = get_kprobe(p->addr);
1097	if (unlikely(!old_p))
1098		return NULL;
1099
1100	if (p != old_p) {
1101		list_for_each_entry_rcu(list_p, &old_p->list, list)
1102			if (list_p == p)
1103			/* kprobe p is a valid probe */
1104				goto valid;
1105		return NULL;
1106	}
1107valid:
1108	return old_p;
1109}
1110
1111/* Return error if the kprobe is being re-registered */
1112static inline int check_kprobe_rereg(struct kprobe *p)
1113{
1114	int ret = 0;
1115	struct kprobe *old_p;
1116
1117	mutex_lock(&kprobe_mutex);
1118	old_p = __get_valid_kprobe(p);
1119	if (old_p)
1120		ret = -EINVAL;
1121	mutex_unlock(&kprobe_mutex);
1122	return ret;
1123}
1124
1125int __kprobes register_kprobe(struct kprobe *p)
1126{
1127	int ret = 0;
1128	struct kprobe *old_p;
1129	struct module *probed_mod;
1130	kprobe_opcode_t *addr;
1131
1132	addr = kprobe_addr(p);
1133	if (!addr)
1134		return -EINVAL;
1135	p->addr = addr;
1136
1137	ret = check_kprobe_rereg(p);
1138	if (ret)
1139		return ret;
1140
1141	preempt_disable();
1142	if (!kernel_text_address((unsigned long) p->addr) ||
1143	    in_kprobes_functions((unsigned long) p->addr) ||
1144	    ftrace_text_reserved(p->addr, p->addr)) {
1145		preempt_enable();
1146		return -EINVAL;
1147	}
1148
1149	/* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
1150	p->flags &= KPROBE_FLAG_DISABLED;
1151
1152	/*
1153	 * Check if are we probing a module.
1154	 */
1155	probed_mod = __module_text_address((unsigned long) p->addr);
1156	if (probed_mod) {
1157		/*
1158		 * We must hold a refcount of the probed module while updating
1159		 * its code to prohibit unexpected unloading.
1160		 */
1161		if (unlikely(!try_module_get(probed_mod))) {
1162			preempt_enable();
1163			return -EINVAL;
1164		}
1165		/*
1166		 * If the module freed .init.text, we couldn't insert
1167		 * kprobes in there.
1168		 */
1169		if (within_module_init((unsigned long)p->addr, probed_mod) &&
1170		    probed_mod->state != MODULE_STATE_COMING) {
1171			module_put(probed_mod);
1172			preempt_enable();
1173			return -EINVAL;
1174		}
1175	}
1176	preempt_enable();
1177
1178	p->nmissed = 0;
1179	INIT_LIST_HEAD(&p->list);
1180	mutex_lock(&kprobe_mutex);
1181
1182	get_online_cpus();	/* For avoiding text_mutex deadlock. */
1183	mutex_lock(&text_mutex);
1184
1185	old_p = get_kprobe(p->addr);
1186	if (old_p) {
1187		/* Since this may unoptimize old_p, locking text_mutex. */
1188		ret = register_aggr_kprobe(old_p, p);
1189		goto out;
1190	}
1191
1192	ret = arch_prepare_kprobe(p);
1193	if (ret)
1194		goto out;
1195
1196	INIT_HLIST_NODE(&p->hlist);
1197	hlist_add_head_rcu(&p->hlist,
1198		       &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
1199
1200	if (!kprobes_all_disarmed && !kprobe_disabled(p))
1201		__arm_kprobe(p);
1202
1203	/* Try to optimize kprobe */
1204	try_to_optimize_kprobe(p);
1205
1206out:
1207	mutex_unlock(&text_mutex);
1208	put_online_cpus();
1209	mutex_unlock(&kprobe_mutex);
1210
1211	if (probed_mod)
1212		module_put(probed_mod);
1213
1214	return ret;
1215}
1216EXPORT_SYMBOL_GPL(register_kprobe);
1217
1218/*
1219 * Unregister a kprobe without a scheduler synchronization.
1220 */
1221static int __kprobes __unregister_kprobe_top(struct kprobe *p)
1222{
1223	struct kprobe *old_p, *list_p;
1224
1225	old_p = __get_valid_kprobe(p);
1226	if (old_p == NULL)
1227		return -EINVAL;
1228
1229	if (old_p == p ||
1230	    (kprobe_aggrprobe(old_p) &&
1231	     list_is_singular(&old_p->list))) {
1232		/*
1233		 * Only probe on the hash list. Disarm only if kprobes are
1234		 * enabled and not gone - otherwise, the breakpoint would
1235		 * already have been removed. We save on flushing icache.
1236		 */
1237		if (!kprobes_all_disarmed && !kprobe_disabled(old_p))
1238			disarm_kprobe(old_p);
1239		hlist_del_rcu(&old_p->hlist);
1240	} else {
1241		if (p->break_handler && !kprobe_gone(p))
1242			old_p->break_handler = NULL;
1243		if (p->post_handler && !kprobe_gone(p)) {
1244			list_for_each_entry_rcu(list_p, &old_p->list, list) {
1245				if ((list_p != p) && (list_p->post_handler))
1246					goto noclean;
1247			}
1248			old_p->post_handler = NULL;
1249		}
1250noclean:
1251		list_del_rcu(&p->list);
1252		if (!kprobe_disabled(old_p)) {
1253			try_to_disable_aggr_kprobe(old_p);
1254			if (!kprobes_all_disarmed) {
1255				if (kprobe_disabled(old_p))
1256					disarm_kprobe(old_p);
1257				else
1258					/* Try to optimize this probe again */
1259					optimize_kprobe(old_p);
1260			}
1261		}
1262	}
1263	return 0;
1264}
1265
1266static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
1267{
1268	struct kprobe *old_p;
1269
1270	if (list_empty(&p->list))
1271		arch_remove_kprobe(p);
1272	else if (list_is_singular(&p->list)) {
1273		/* "p" is the last child of an aggr_kprobe */
1274		old_p = list_entry(p->list.next, struct kprobe, list);
1275		list_del(&p->list);
1276		arch_remove_kprobe(old_p);
1277		free_aggr_kprobe(old_p);
1278	}
1279}
1280
1281int __kprobes register_kprobes(struct kprobe **kps, int num)
1282{
1283	int i, ret = 0;
1284
1285	if (num <= 0)
1286		return -EINVAL;
1287	for (i = 0; i < num; i++) {
1288		ret = register_kprobe(kps[i]);
1289		if (ret < 0) {
1290			if (i > 0)
1291				unregister_kprobes(kps, i);
1292			break;
1293		}
1294	}
1295	return ret;
1296}
1297EXPORT_SYMBOL_GPL(register_kprobes);
1298
1299void __kprobes unregister_kprobe(struct kprobe *p)
1300{
1301	unregister_kprobes(&p, 1);
1302}
1303EXPORT_SYMBOL_GPL(unregister_kprobe);
1304
1305void __kprobes unregister_kprobes(struct kprobe **kps, int num)
1306{
1307	int i;
1308
1309	if (num <= 0)
1310		return;
1311	mutex_lock(&kprobe_mutex);
1312	for (i = 0; i < num; i++)
1313		if (__unregister_kprobe_top(kps[i]) < 0)
1314			kps[i]->addr = NULL;
1315	mutex_unlock(&kprobe_mutex);
1316
1317	synchronize_sched();
1318	for (i = 0; i < num; i++)
1319		if (kps[i]->addr)
1320			__unregister_kprobe_bottom(kps[i]);
1321}
1322EXPORT_SYMBOL_GPL(unregister_kprobes);
1323
1324static struct notifier_block kprobe_exceptions_nb = {
1325	.notifier_call = kprobe_exceptions_notify,
1326	.priority = 0x7fffffff /* we need to be notified first */
1327};
1328
1329unsigned long __weak arch_deref_entry_point(void *entry)
1330{
1331	return (unsigned long)entry;
1332}
1333
1334int __kprobes register_jprobes(struct jprobe **jps, int num)
1335{
1336	struct jprobe *jp;
1337	int ret = 0, i;
1338
1339	if (num <= 0)
1340		return -EINVAL;
1341	for (i = 0; i < num; i++) {
1342		unsigned long addr, offset;
1343		jp = jps[i];
1344		addr = arch_deref_entry_point(jp->entry);
1345
1346		/* Verify probepoint is a function entry point */
1347		if (kallsyms_lookup_size_offset(addr, NULL, &offset) &&
1348		    offset == 0) {
1349			jp->kp.pre_handler = setjmp_pre_handler;
1350			jp->kp.break_handler = longjmp_break_handler;
1351			ret = register_kprobe(&jp->kp);
1352		} else
1353			ret = -EINVAL;
1354
1355		if (ret < 0) {
1356			if (i > 0)
1357				unregister_jprobes(jps, i);
1358			break;
1359		}
1360	}
1361	return ret;
1362}
1363EXPORT_SYMBOL_GPL(register_jprobes);
1364
1365int __kprobes register_jprobe(struct jprobe *jp)
1366{
1367	return register_jprobes(&jp, 1);
1368}
1369EXPORT_SYMBOL_GPL(register_jprobe);
1370
1371void __kprobes unregister_jprobe(struct jprobe *jp)
1372{
1373	unregister_jprobes(&jp, 1);
1374}
1375EXPORT_SYMBOL_GPL(unregister_jprobe);
1376
1377void __kprobes unregister_jprobes(struct jprobe **jps, int num)
1378{
1379	int i;
1380
1381	if (num <= 0)
1382		return;
1383	mutex_lock(&kprobe_mutex);
1384	for (i = 0; i < num; i++)
1385		if (__unregister_kprobe_top(&jps[i]->kp) < 0)
1386			jps[i]->kp.addr = NULL;
1387	mutex_unlock(&kprobe_mutex);
1388
1389	synchronize_sched();
1390	for (i = 0; i < num; i++) {
1391		if (jps[i]->kp.addr)
1392			__unregister_kprobe_bottom(&jps[i]->kp);
1393	}
1394}
1395EXPORT_SYMBOL_GPL(unregister_jprobes);
1396
1397#ifdef CONFIG_KRETPROBES
1398/*
1399 * This kprobe pre_handler is registered with every kretprobe. When probe
1400 * hits it will set up the return probe.
1401 */
1402static int __kprobes pre_handler_kretprobe(struct kprobe *p,
1403					   struct pt_regs *regs)
1404{
1405	struct kretprobe *rp = container_of(p, struct kretprobe, kp);
1406	unsigned long hash, flags = 0;
1407	struct kretprobe_instance *ri;
1408
1409	/*TODO: consider to only swap the RA after the last pre_handler fired */
1410	hash = hash_ptr(current, KPROBE_HASH_BITS);
1411	spin_lock_irqsave(&rp->lock, flags);
1412	if (!hlist_empty(&rp->free_instances)) {
1413		ri = hlist_entry(rp->free_instances.first,
1414				struct kretprobe_instance, hlist);
1415		hlist_del(&ri->hlist);
1416		spin_unlock_irqrestore(&rp->lock, flags);
1417
1418		ri->rp = rp;
1419		ri->task = current;
1420
1421		if (rp->entry_handler && rp->entry_handler(ri, regs))
1422			return 0;
1423
1424		arch_prepare_kretprobe(ri, regs);
1425
1426		/* XXX(hch): why is there no hlist_move_head? */
1427		INIT_HLIST_NODE(&ri->hlist);
1428		kretprobe_table_lock(hash, &flags);
1429		hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
1430		kretprobe_table_unlock(hash, &flags);
1431	} else {
1432		rp->nmissed++;
1433		spin_unlock_irqrestore(&rp->lock, flags);
1434	}
1435	return 0;
1436}
1437
1438int __kprobes register_kretprobe(struct kretprobe *rp)
1439{
1440	int ret = 0;
1441	struct kretprobe_instance *inst;
1442	int i;
1443	void *addr;
1444
1445	if (kretprobe_blacklist_size) {
1446		addr = kprobe_addr(&rp->kp);
1447		if (!addr)
1448			return -EINVAL;
1449
1450		for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
1451			if (kretprobe_blacklist[i].addr == addr)
1452				return -EINVAL;
1453		}
1454	}
1455
1456	rp->kp.pre_handler = pre_handler_kretprobe;
1457	rp->kp.post_handler = NULL;
1458	rp->kp.fault_handler = NULL;
1459	rp->kp.break_handler = NULL;
1460
1461	/* Pre-allocate memory for max kretprobe instances */
1462	if (rp->maxactive <= 0) {
1463#ifdef CONFIG_PREEMPT
1464		rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus());
1465#else
1466		rp->maxactive = num_possible_cpus();
1467#endif
1468	}
1469	spin_lock_init(&rp->lock);
1470	INIT_HLIST_HEAD(&rp->free_instances);
1471	for (i = 0; i < rp->maxactive; i++) {
1472		inst = kmalloc(sizeof(struct kretprobe_instance) +
1473			       rp->data_size, GFP_KERNEL);
1474		if (inst == NULL) {
1475			free_rp_inst(rp);
1476			return -ENOMEM;
1477		}
1478		INIT_HLIST_NODE(&inst->hlist);
1479		hlist_add_head(&inst->hlist, &rp->free_instances);
1480	}
1481
1482	rp->nmissed = 0;
1483	/* Establish function entry probe point */
1484	ret = register_kprobe(&rp->kp);
1485	if (ret != 0)
1486		free_rp_inst(rp);
1487	return ret;
1488}
1489EXPORT_SYMBOL_GPL(register_kretprobe);
1490
1491int __kprobes register_kretprobes(struct kretprobe **rps, int num)
1492{
1493	int ret = 0, i;
1494
1495	if (num <= 0)
1496		return -EINVAL;
1497	for (i = 0; i < num; i++) {
1498		ret = register_kretprobe(rps[i]);
1499		if (ret < 0) {
1500			if (i > 0)
1501				unregister_kretprobes(rps, i);
1502			break;
1503		}
1504	}
1505	return ret;
1506}
1507EXPORT_SYMBOL_GPL(register_kretprobes);
1508
1509void __kprobes unregister_kretprobe(struct kretprobe *rp)
1510{
1511	unregister_kretprobes(&rp, 1);
1512}
1513EXPORT_SYMBOL_GPL(unregister_kretprobe);
1514
1515void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
1516{
1517	int i;
1518
1519	if (num <= 0)
1520		return;
1521	mutex_lock(&kprobe_mutex);
1522	for (i = 0; i < num; i++)
1523		if (__unregister_kprobe_top(&rps[i]->kp) < 0)
1524			rps[i]->kp.addr = NULL;
1525	mutex_unlock(&kprobe_mutex);
1526
1527	synchronize_sched();
1528	for (i = 0; i < num; i++) {
1529		if (rps[i]->kp.addr) {
1530			__unregister_kprobe_bottom(&rps[i]->kp);
1531			cleanup_rp_inst(rps[i]);
1532		}
1533	}
1534}
1535EXPORT_SYMBOL_GPL(unregister_kretprobes);
1536
1537#else /* CONFIG_KRETPROBES */
1538int __kprobes register_kretprobe(struct kretprobe *rp)
1539{
1540	return -ENOSYS;
1541}
1542EXPORT_SYMBOL_GPL(register_kretprobe);
1543
1544int __kprobes register_kretprobes(struct kretprobe **rps, int num)
1545{
1546	return -ENOSYS;
1547}
1548EXPORT_SYMBOL_GPL(register_kretprobes);
1549
1550void __kprobes unregister_kretprobe(struct kretprobe *rp)
1551{
1552}
1553EXPORT_SYMBOL_GPL(unregister_kretprobe);
1554
1555void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
1556{
1557}
1558EXPORT_SYMBOL_GPL(unregister_kretprobes);
1559
1560static int __kprobes pre_handler_kretprobe(struct kprobe *p,
1561					   struct pt_regs *regs)
1562{
1563	return 0;
1564}
1565
1566#endif /* CONFIG_KRETPROBES */
1567
1568/* Set the kprobe gone and remove its instruction buffer. */
1569static void __kprobes kill_kprobe(struct kprobe *p)
1570{
1571	struct kprobe *kp;
1572
1573	p->flags |= KPROBE_FLAG_GONE;
1574	if (kprobe_aggrprobe(p)) {
1575		/*
1576		 * If this is an aggr_kprobe, we have to list all the
1577		 * chained probes and mark them GONE.
1578		 */
1579		list_for_each_entry_rcu(kp, &p->list, list)
1580			kp->flags |= KPROBE_FLAG_GONE;
1581		p->post_handler = NULL;
1582		p->break_handler = NULL;
1583		kill_optimized_kprobe(p);
1584	}
1585	/*
1586	 * Here, we can remove insn_slot safely, because no thread calls
1587	 * the original probed function (which will be freed soon) any more.
1588	 */
1589	arch_remove_kprobe(p);
1590}
1591
1592/* Disable one kprobe */
1593int __kprobes disable_kprobe(struct kprobe *kp)
1594{
1595	int ret = 0;
1596	struct kprobe *p;
1597
1598	mutex_lock(&kprobe_mutex);
1599
1600	/* Check whether specified probe is valid. */
1601	p = __get_valid_kprobe(kp);
1602	if (unlikely(p == NULL)) {
1603		ret = -EINVAL;
1604		goto out;
1605	}
1606
1607	/* If the probe is already disabled (or gone), just return */
1608	if (kprobe_disabled(kp))
1609		goto out;
1610
1611	kp->flags |= KPROBE_FLAG_DISABLED;
1612	if (p != kp)
1613		/* When kp != p, p is always enabled. */
1614		try_to_disable_aggr_kprobe(p);
1615
1616	if (!kprobes_all_disarmed && kprobe_disabled(p))
1617		disarm_kprobe(p);
1618out:
1619	mutex_unlock(&kprobe_mutex);
1620	return ret;
1621}
1622EXPORT_SYMBOL_GPL(disable_kprobe);
1623
1624/* Enable one kprobe */
1625int __kprobes enable_kprobe(struct kprobe *kp)
1626{
1627	int ret = 0;
1628	struct kprobe *p;
1629
1630	mutex_lock(&kprobe_mutex);
1631
1632	/* Check whether specified probe is valid. */
1633	p = __get_valid_kprobe(kp);
1634	if (unlikely(p == NULL)) {
1635		ret = -EINVAL;
1636		goto out;
1637	}
1638
1639	if (kprobe_gone(kp)) {
1640		/* This kprobe has gone, we couldn't enable it. */
1641		ret = -EINVAL;
1642		goto out;
1643	}
1644
1645	if (p != kp)
1646		kp->flags &= ~KPROBE_FLAG_DISABLED;
1647
1648	if (!kprobes_all_disarmed && kprobe_disabled(p)) {
1649		p->flags &= ~KPROBE_FLAG_DISABLED;
1650		arm_kprobe(p);
1651	}
1652out:
1653	mutex_unlock(&kprobe_mutex);
1654	return ret;
1655}
1656EXPORT_SYMBOL_GPL(enable_kprobe);
1657
1658void __kprobes dump_kprobe(struct kprobe *kp)
1659{
1660	printk(KERN_WARNING "Dumping kprobe:\n");
1661	printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n",
1662	       kp->symbol_name, kp->addr, kp->offset);
1663}
1664
1665/* Module notifier call back, checking kprobes on the module */
1666static int __kprobes kprobes_module_callback(struct notifier_block *nb,
1667					     unsigned long val, void *data)
1668{
1669	struct module *mod = data;
1670	struct hlist_head *head;
1671	struct hlist_node *node;
1672	struct kprobe *p;
1673	unsigned int i;
1674	int checkcore = (val == MODULE_STATE_GOING);
1675
1676	if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
1677		return NOTIFY_DONE;
1678
1679	/*
1680	 * When MODULE_STATE_GOING was notified, both of module .text and
1681	 * .init.text sections would be freed. When MODULE_STATE_LIVE was
1682	 * notified, only .init.text section would be freed. We need to
1683	 * disable kprobes which have been inserted in the sections.
1684	 */
1685	mutex_lock(&kprobe_mutex);
1686	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1687		head = &kprobe_table[i];
1688		hlist_for_each_entry_rcu(p, node, head, hlist)
1689			if (within_module_init((unsigned long)p->addr, mod) ||
1690			    (checkcore &&
1691			     within_module_core((unsigned long)p->addr, mod))) {
1692				/*
1693				 * The vaddr this probe is installed will soon
1694				 * be vfreed buy not synced to disk. Hence,
1695				 * disarming the breakpoint isn't needed.
1696				 */
1697				kill_kprobe(p);
1698			}
1699	}
1700	mutex_unlock(&kprobe_mutex);
1701	return NOTIFY_DONE;
1702}
1703
1704static struct notifier_block kprobe_module_nb = {
1705	.notifier_call = kprobes_module_callback,
1706	.priority = 0
1707};
1708
1709static int __init init_kprobes(void)
1710{
1711	int i, err = 0;
1712	unsigned long offset = 0, size = 0;
1713	char *modname, namebuf[128];
1714	const char *symbol_name;
1715	void *addr;
1716	struct kprobe_blackpoint *kb;
1717
1718	/* FIXME allocate the probe table, currently defined statically */
1719	/* initialize all list heads */
1720	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1721		INIT_HLIST_HEAD(&kprobe_table[i]);
1722		INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
1723		spin_lock_init(&(kretprobe_table_locks[i].lock));
1724	}
1725
1726	/*
1727	 * Lookup and populate the kprobe_blacklist.
1728	 *
1729	 * Unlike the kretprobe blacklist, we'll need to determine
1730	 * the range of addresses that belong to the said functions,
1731	 * since a kprobe need not necessarily be at the beginning
1732	 * of a function.
1733	 */
1734	for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
1735		kprobe_lookup_name(kb->name, addr);
1736		if (!addr)
1737			continue;
1738
1739		kb->start_addr = (unsigned long)addr;
1740		symbol_name = kallsyms_lookup(kb->start_addr,
1741				&size, &offset, &modname, namebuf);
1742		if (!symbol_name)
1743			kb->range = 0;
1744		else
1745			kb->range = size;
1746	}
1747
1748	if (kretprobe_blacklist_size) {
1749		/* lookup the function address from its name */
1750		for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
1751			kprobe_lookup_name(kretprobe_blacklist[i].name,
1752					   kretprobe_blacklist[i].addr);
1753			if (!kretprobe_blacklist[i].addr)
1754				printk("kretprobe: lookup failed: %s\n",
1755				       kretprobe_blacklist[i].name);
1756		}
1757	}
1758
1759#if defined(CONFIG_OPTPROBES)
1760#if defined(__ARCH_WANT_KPROBES_INSN_SLOT)
1761	/* Init kprobe_optinsn_slots */
1762	kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
1763#endif
1764	/* By default, kprobes can be optimized */
1765	kprobes_allow_optimization = true;
1766#endif
1767
1768	/* By default, kprobes are armed */
1769	kprobes_all_disarmed = false;
1770
1771	err = arch_init_kprobes();
1772	if (!err)
1773		err = register_die_notifier(&kprobe_exceptions_nb);
1774	if (!err)
1775		err = register_module_notifier(&kprobe_module_nb);
1776
1777	kprobes_initialized = (err == 0);
1778
1779	if (!err)
1780		init_test_probes();
1781	return err;
1782}
1783
1784#ifdef CONFIG_DEBUG_FS
1785static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
1786		const char *sym, int offset, char *modname, struct kprobe *pp)
1787{
1788	char *kprobe_type;
1789
1790	if (p->pre_handler == pre_handler_kretprobe)
1791		kprobe_type = "r";
1792	else if (p->pre_handler == setjmp_pre_handler)
1793		kprobe_type = "j";
1794	else
1795		kprobe_type = "k";
1796
1797	if (sym)
1798		seq_printf(pi, "%p  %s  %s+0x%x  %s ",
1799			p->addr, kprobe_type, sym, offset,
1800			(modname ? modname : " "));
1801	else
1802		seq_printf(pi, "%p  %s  %p ",
1803			p->addr, kprobe_type, p->addr);
1804
1805	if (!pp)
1806		pp = p;
1807	seq_printf(pi, "%s%s%s\n",
1808		(kprobe_gone(p) ? "[GONE]" : ""),
1809		((kprobe_disabled(p) && !kprobe_gone(p)) ?  "[DISABLED]" : ""),
1810		(kprobe_optimized(pp) ? "[OPTIMIZED]" : ""));
1811}
1812
1813static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
1814{
1815	return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL;
1816}
1817
1818static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
1819{
1820	(*pos)++;
1821	if (*pos >= KPROBE_TABLE_SIZE)
1822		return NULL;
1823	return pos;
1824}
1825
1826static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v)
1827{
1828	/* Nothing to do */
1829}
1830
1831static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v)
1832{
1833	struct hlist_head *head;
1834	struct hlist_node *node;
1835	struct kprobe *p, *kp;
1836	const char *sym = NULL;
1837	unsigned int i = *(loff_t *) v;
1838	unsigned long offset = 0;
1839	char *modname, namebuf[128];
1840
1841	head = &kprobe_table[i];
1842	preempt_disable();
1843	hlist_for_each_entry_rcu(p, node, head, hlist) {
1844		sym = kallsyms_lookup((unsigned long)p->addr, NULL,
1845					&offset, &modname, namebuf);
1846		if (kprobe_aggrprobe(p)) {
1847			list_for_each_entry_rcu(kp, &p->list, list)
1848				report_probe(pi, kp, sym, offset, modname, p);
1849		} else
1850			report_probe(pi, p, sym, offset, modname, NULL);
1851	}
1852	preempt_enable();
1853	return 0;
1854}
1855
1856static const struct seq_operations kprobes_seq_ops = {
1857	.start = kprobe_seq_start,
1858	.next  = kprobe_seq_next,
1859	.stop  = kprobe_seq_stop,
1860	.show  = show_kprobe_addr
1861};
1862
1863static int __kprobes kprobes_open(struct inode *inode, struct file *filp)
1864{
1865	return seq_open(filp, &kprobes_seq_ops);
1866}
1867
1868static const struct file_operations debugfs_kprobes_operations = {
1869	.open           = kprobes_open,
1870	.read           = seq_read,
1871	.llseek         = seq_lseek,
1872	.release        = seq_release,
1873};
1874
1875static void __kprobes arm_all_kprobes(void)
1876{
1877	struct hlist_head *head;
1878	struct hlist_node *node;
1879	struct kprobe *p;
1880	unsigned int i;
1881
1882	mutex_lock(&kprobe_mutex);
1883
1884	/* If kprobes are armed, just return */
1885	if (!kprobes_all_disarmed)
1886		goto already_enabled;
1887
1888	/* Arming kprobes doesn't optimize kprobe itself */
1889	mutex_lock(&text_mutex);
1890	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1891		head = &kprobe_table[i];
1892		hlist_for_each_entry_rcu(p, node, head, hlist)
1893			if (!kprobe_disabled(p))
1894				__arm_kprobe(p);
1895	}
1896	mutex_unlock(&text_mutex);
1897
1898	kprobes_all_disarmed = false;
1899	printk(KERN_INFO "Kprobes globally enabled\n");
1900
1901already_enabled:
1902	mutex_unlock(&kprobe_mutex);
1903	return;
1904}
1905
1906static void __kprobes disarm_all_kprobes(void)
1907{
1908	struct hlist_head *head;
1909	struct hlist_node *node;
1910	struct kprobe *p;
1911	unsigned int i;
1912
1913	mutex_lock(&kprobe_mutex);
1914
1915	/* If kprobes are already disarmed, just return */
1916	if (kprobes_all_disarmed)
1917		goto already_disabled;
1918
1919	kprobes_all_disarmed = true;
1920	printk(KERN_INFO "Kprobes globally disabled\n");
1921
1922	/*
1923	 * Here we call get_online_cpus() for avoiding text_mutex deadlock,
1924	 * because disarming may also unoptimize kprobes.
1925	 */
1926	get_online_cpus();
1927	mutex_lock(&text_mutex);
1928	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1929		head = &kprobe_table[i];
1930		hlist_for_each_entry_rcu(p, node, head, hlist) {
1931			if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
1932				__disarm_kprobe(p);
1933		}
1934	}
1935
1936	mutex_unlock(&text_mutex);
1937	put_online_cpus();
1938	mutex_unlock(&kprobe_mutex);
1939	/* Allow all currently running kprobes to complete */
1940	synchronize_sched();
1941	return;
1942
1943already_disabled:
1944	mutex_unlock(&kprobe_mutex);
1945	return;
1946}
1947
1948/*
1949 * XXX: The debugfs bool file interface doesn't allow for callbacks
1950 * when the bool state is switched. We can reuse that facility when
1951 * available
1952 */
1953static ssize_t read_enabled_file_bool(struct file *file,
1954	       char __user *user_buf, size_t count, loff_t *ppos)
1955{
1956	char buf[3];
1957
1958	if (!kprobes_all_disarmed)
1959		buf[0] = '1';
1960	else
1961		buf[0] = '0';
1962	buf[1] = '\n';
1963	buf[2] = 0x00;
1964	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
1965}
1966
1967static ssize_t write_enabled_file_bool(struct file *file,
1968	       const char __user *user_buf, size_t count, loff_t *ppos)
1969{
1970	char buf[32];
1971	int buf_size;
1972
1973	buf_size = min(count, (sizeof(buf)-1));
1974	if (copy_from_user(buf, user_buf, buf_size))
1975		return -EFAULT;
1976
1977	switch (buf[0]) {
1978	case 'y':
1979	case 'Y':
1980	case '1':
1981		arm_all_kprobes();
1982		break;
1983	case 'n':
1984	case 'N':
1985	case '0':
1986		disarm_all_kprobes();
1987		break;
1988	}
1989
1990	return count;
1991}
1992
1993static const struct file_operations fops_kp = {
1994	.read =         read_enabled_file_bool,
1995	.write =        write_enabled_file_bool,
1996};
1997
1998static int __kprobes debugfs_kprobe_init(void)
1999{
2000	struct dentry *dir, *file;
2001	unsigned int value = 1;
2002
2003	dir = debugfs_create_dir("kprobes", NULL);
2004	if (!dir)
2005		return -ENOMEM;
2006
2007	file = debugfs_create_file("list", 0444, dir, NULL,
2008				&debugfs_kprobes_operations);
2009	if (!file) {
2010		debugfs_remove(dir);
2011		return -ENOMEM;
2012	}
2013
2014	file = debugfs_create_file("enabled", 0600, dir,
2015					&value, &fops_kp);
2016	if (!file) {
2017		debugfs_remove(dir);
2018		return -ENOMEM;
2019	}
2020
2021	return 0;
2022}
2023
2024late_initcall(debugfs_kprobe_init);
2025#endif /* CONFIG_DEBUG_FS */
2026
2027module_init(init_kprobes);
2028
2029/* defined in arch/.../kernel/kprobes.c */
2030EXPORT_SYMBOL_GPL(jprobe_return);
2031