op_syscalls.c revision 8cfa702f803c5ef6a2b062a489a1b2cf66b45b5e
1/**
2 * @file op_syscalls.c
3 * Tracing of system calls
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author John Levon
9 * @author Philippe Elie
10 */
11
12#include <linux/sched.h>
13#include <linux/unistd.h>
14#include <linux/mman.h>
15#include <linux/file.h>
16
17#include "oprofile.h"
18
19void oprof_put_note(struct op_note * samp);
20void __oprof_put_note(struct op_note * samp);
21
22extern spinlock_t note_lock;
23
24/* ------------ system calls --------------- */
25
26struct mmap_arg_struct {
27	unsigned long addr;
28        unsigned long len;
29        unsigned long prot;
30        unsigned long flags;
31        unsigned long fd;
32        unsigned long offset;
33};
34
35asmlinkage static int (*old_sys_fork)(struct pt_regs);
36asmlinkage static int (*old_sys_vfork)(struct pt_regs);
37asmlinkage static int (*old_sys_clone)(struct pt_regs);
38asmlinkage static int (*old_sys_execve)(struct pt_regs);
39asmlinkage static int (*old_old_mmap)(struct mmap_arg_struct *);
40#ifdef HAVE_MMAP2
41asmlinkage static long (*old_sys_mmap2)(ulong, ulong, ulong, ulong, ulong, ulong);
42#endif
43asmlinkage static long (*old_sys_init_module)(char const *, struct module *);
44asmlinkage static long (*old_sys_exit)(int);
45
46/* called with note_lock held */
47static void oprof_output_map(ulong addr, ulong len, ulong offset,
48			     struct file * file, int is_execve)
49{
50	struct op_note note;
51
52	/* don't bother with /dev/zero mappings etc. */
53	if (!len)
54		return;
55
56	note.pid = current->pid;
57	note.tgid = op_get_tgid();
58	note.addr = addr;
59	note.len = len;
60	note.offset = offset;
61	note.type = is_execve ? OP_EXEC : OP_MAP;
62	note.hash = hash_path(file);
63	if (note.hash == -1)
64		return;
65	/* holding note lock */
66	__oprof_put_note(&note);
67}
68
69static int oprof_output_maps(struct task_struct * task)
70{
71	int size=0;
72	struct mm_struct * mm;
73	struct vm_area_struct * map;
74
75	/* we don't need to worry about mm_users here, since there is at
76	   least one user (current), and if there's other code using this
77	   mm, then mm_users must be at least 2; we should never have to
78	   mmput() here. */
79
80	if (!(mm = task->mm))
81		goto out;
82
83	lock_mmap(mm);
84	spin_lock(&note_lock);
85
86	/* We need two pass, daemon assume than the first mmap notification
87	 * is for the executable but some process doesn't follow this model.
88	 */
89	for (map = mm->mmap; map; map = map->vm_next) {
90		if (!(map->vm_flags & VM_EXEC) || !map->vm_file)
91			continue;
92		if (!(map->vm_flags & VM_EXECUTABLE))
93			continue;
94
95		oprof_output_map(map->vm_start, map->vm_end-map->vm_start,
96			GET_VM_OFFSET(map), map->vm_file, 1);
97	}
98	for (map = mm->mmap; map; map = map->vm_next) {
99		if (!(map->vm_flags & VM_EXEC) || !map->vm_file)
100			continue;
101		if (map->vm_flags & VM_EXECUTABLE)
102			continue;
103
104		oprof_output_map(map->vm_start, map->vm_end-map->vm_start,
105			GET_VM_OFFSET(map), map->vm_file, 0);
106	}
107
108	spin_unlock(&note_lock);
109	unlock_mmap(mm);
110
111out:
112	return size;
113}
114
115asmlinkage static int my_sys_execve(struct pt_regs regs)
116{
117	char * filename;
118	int ret;
119
120	MOD_INC_USE_COUNT;
121
122	lock_execve();
123
124	filename = getname((char *)regs.ebx);
125	if (IS_ERR(filename)) {
126		ret = PTR_ERR(filename);
127		goto out;
128	}
129	ret = do_execve(filename, (char **)regs.ecx, (char **)regs.edx, &regs);
130
131	if (!ret) {
132		PTRACE_OFF(current);
133		oprof_output_maps(current);
134	}
135
136	putname(filename);
137
138out:
139	unlock_execve();
140	MOD_DEC_USE_COUNT;
141        return ret;
142}
143
144static void out_mmap(ulong addr, ulong len, ulong prot, ulong flags, ulong fd,
145		     ulong offset)
146{
147	struct file * file;
148
149	lock_out_mmap();
150
151	file = fget(fd);
152	if (!file)
153		goto out;
154
155	spin_lock(&note_lock);
156	oprof_output_map(addr, len, offset, file, 0);
157	spin_unlock(&note_lock);
158
159	fput(file);
160
161out:
162	unlock_out_mmap();
163}
164
165#ifdef HAVE_MMAP2
166asmlinkage static int my_sys_mmap2(ulong addr, ulong len,
167	ulong prot, ulong flags, ulong fd, ulong pgoff)
168{
169	int ret;
170
171	MOD_INC_USE_COUNT;
172
173	ret = old_sys_mmap2(addr, len, prot, flags, fd, pgoff);
174
175	if ((prot & PROT_EXEC) && ret >= 0)
176		out_mmap(ret, len, prot, flags, fd, pgoff << PAGE_SHIFT);
177
178	MOD_DEC_USE_COUNT;
179	return ret;
180}
181#endif
182
183asmlinkage static int my_old_mmap(struct mmap_arg_struct * arg)
184{
185	int ret;
186
187	MOD_INC_USE_COUNT;
188
189	ret = old_old_mmap(arg);
190
191	if (ret >= 0) {
192		struct mmap_arg_struct a;
193
194		if (copy_from_user(&a, arg, sizeof(a))) {
195			ret = -EFAULT;
196			goto out;
197		}
198
199		if (a.prot&PROT_EXEC)
200			out_mmap(ret, a.len, a.prot, a.flags, a.fd, a.offset);
201	}
202
203out:
204	MOD_DEC_USE_COUNT;
205	return ret;
206}
207
208inline static void oprof_report_fork(u32 old_pid, u32 new_pid, u32 old_tgid, u32 new_tgid)
209{
210	struct op_note note;
211
212	note.type = OP_FORK;
213	note.pid = old_pid;
214	note.tgid = old_tgid;
215	note.addr = new_pid;
216	note.len = new_tgid;
217	oprof_put_note(&note);
218}
219
220asmlinkage static int my_sys_fork(struct pt_regs regs)
221{
222	u32 pid = current->pid;
223	u32 tgid = op_get_tgid();
224	int ret;
225
226	MOD_INC_USE_COUNT;
227
228	ret = old_sys_fork(regs);
229	if (ret)
230		oprof_report_fork(pid, ret, tgid, ret);
231	MOD_DEC_USE_COUNT;
232	return ret;
233}
234
235asmlinkage static int my_sys_vfork(struct pt_regs regs)
236{
237	u32 pid = current->pid;
238	u32 tgid = op_get_tgid();
239	int ret;
240
241	MOD_INC_USE_COUNT;
242	ret = old_sys_vfork(regs);
243	if (ret)
244		oprof_report_fork(pid, ret, tgid, ret);
245	MOD_DEC_USE_COUNT;
246	return ret;
247}
248
249asmlinkage static int my_sys_clone(struct pt_regs regs)
250{
251	u32 pid = current->pid;
252	u32 tgid = op_get_tgid();
253#if V_AT_LEAST(2, 4, 0)
254	u32 clone_flags = regs.ebx;
255#endif
256	int ret;
257
258	MOD_INC_USE_COUNT;
259	ret = old_sys_clone(regs);
260	if (ret) {
261#if V_AT_LEAST(2, 4, 0)
262		if (clone_flags & CLONE_THREAD)
263			oprof_report_fork(pid, ret, tgid, tgid);
264		else
265#endif
266			oprof_report_fork(pid, ret, tgid, ret);
267	}
268	MOD_DEC_USE_COUNT;
269	return ret;
270}
271
272asmlinkage static long my_sys_init_module(char const * name_user, struct module * mod_user)
273{
274	long ret;
275
276	MOD_INC_USE_COUNT;
277
278	ret = old_sys_init_module(name_user, mod_user);
279
280	if (ret >= 0) {
281		struct op_note note;
282
283		note.type = OP_DROP_MODULES;
284		oprof_put_note(&note);
285	}
286	MOD_DEC_USE_COUNT;
287	return ret;
288}
289
290/* used from do_nmi */
291asmlinkage long my_sys_exit(int error_code)
292{
293	struct op_note note;
294
295	MOD_INC_USE_COUNT;
296
297	note.type = OP_EXIT;
298	note.pid = current->pid;
299	note.tgid = op_get_tgid();
300	oprof_put_note(&note);
301
302	/* this looks UP-dangerous, as the exit sleeps and we don't
303	 * have a use count, but in fact its ok as sys_exit is noreturn,
304	 * so we can never come back to this non-existent exec page
305	 */
306	MOD_DEC_USE_COUNT;
307	return old_sys_exit(error_code);
308}
309
310extern void * sys_call_table[];
311
312void op_save_syscalls(void)
313{
314	old_sys_fork = sys_call_table[__NR_fork];
315	old_sys_vfork = sys_call_table[__NR_vfork];
316	old_sys_clone = sys_call_table[__NR_clone];
317	old_sys_execve = sys_call_table[__NR_execve];
318	old_old_mmap = sys_call_table[__NR_mmap];
319#ifdef HAVE_MMAP2
320	old_sys_mmap2 = sys_call_table[__NR_mmap2];
321#endif
322	old_sys_init_module = sys_call_table[__NR_init_module];
323	old_sys_exit = sys_call_table[__NR_exit];
324}
325
326void op_intercept_syscalls(void)
327{
328	sys_call_table[__NR_fork] = my_sys_fork;
329	sys_call_table[__NR_vfork] = my_sys_vfork;
330	sys_call_table[__NR_clone] = my_sys_clone;
331	sys_call_table[__NR_execve] = my_sys_execve;
332	sys_call_table[__NR_mmap] = my_old_mmap;
333#ifdef HAVE_MMAP2
334	sys_call_table[__NR_mmap2] = my_sys_mmap2;
335#endif
336	sys_call_table[__NR_init_module] = my_sys_init_module;
337	sys_call_table[__NR_exit] = my_sys_exit;
338}
339
340void op_restore_syscalls(void)
341{
342	sys_call_table[__NR_fork] = old_sys_fork;
343	sys_call_table[__NR_vfork] = old_sys_vfork;
344	sys_call_table[__NR_clone] = old_sys_clone;
345	sys_call_table[__NR_execve] = old_sys_execve;
346	sys_call_table[__NR_mmap] = old_old_mmap;
347#ifdef HAVE_MMAP2
348	sys_call_table[__NR_mmap2] = old_sys_mmap2;
349#endif
350	sys_call_table[__NR_init_module] = old_sys_init_module;
351	sys_call_table[__NR_exit] = old_sys_exit;
352}
353