1/**
2 * @file op_syscalls.c
3 * Tracing of system calls
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Bob Montgomery
9 * @author Will Cohen
10 * @author John Levon
11 * @author Philippe Elie
12 */
13
14#include <linux/sched.h>
15#include <linux/unistd.h>
16#include <linux/mman.h>
17#include <linux/file.h>
18
19#include "oprofile.h"
20#include "op_dcache.h"
21#include "op_util.h"
22
23uint dname_top;
24struct qstr **dname_stack;
25char * pool_pos;
26char * pool_start;
27char * pool_end;
28
29void oprof_put_note(struct op_note * samp);
30
31/* ------------ system calls --------------- */
32
33struct mmap_arg_struct {
34	unsigned long addr;
35        unsigned long len;
36        unsigned long prot;
37        unsigned long flags;
38        unsigned long fd;
39        unsigned long offset;
40};
41
42/* --------- IA64 versions of system calls ------ */
43asmlinkage static int (*old_sys_clone)(long, long);
44asmlinkage static int (*old_sys_clone2)(long, long, long);
45asmlinkage static int (*old_sys_execve)(char *, char **, char **);
46asmlinkage static unsigned long (*old_sys_mmap)(unsigned long,
47				unsigned long, int, int, int, long);
48asmlinkage static unsigned long (*old_sys_mmap2)(unsigned long,
49				unsigned long, int, int, int, long);
50asmlinkage static long (*old_sys_init_module)(char const *, struct module *);
51asmlinkage static long (*old_sys_exit)(int);
52
53/* --------- declarations of interception stubs for IA64  ------ */
54asmlinkage long post_stub_clone(long, long);
55asmlinkage long post_stub_clone2(long, long, long);
56asmlinkage long my_ia64_execve(char *, char **, char **);
57asmlinkage unsigned long post_stub_mmap(unsigned long,
58					unsigned long, int, int, int, long);
59asmlinkage unsigned long post_stub_mmap2(unsigned long,
60					unsigned long, int, int, int, long);
61asmlinkage long post_stub_init_module(char const *, struct module *);
62asmlinkage long pre_stub_exit(int);
63
64/* IA64 system call table doesn't use function pointers, it uses
65 * pointers to code (not the same thing).  Basically it can violate the
66 * procedure calling rules because these "procedure calls" are made by
67 * the assembly language BREAK handler in ivt.S.
68 */
69
70struct fdesc {
71	void * ip;
72	void * gp;
73};
74
75struct fdesc fdesc_clone;
76struct fdesc fdesc_clone2;
77struct fdesc fdesc_execve;
78struct fdesc fdesc_mmap;
79struct fdesc fdesc_mmap2;
80struct fdesc fdesc_init_module;
81struct fdesc fdesc_exit;
82/* ----------- End of IA64 weirdness for now -------------- */
83
84spinlock_t map_lock = SPIN_LOCK_UNLOCKED;
85
86/* called with map_lock held */
87static void oprof_output_map(ulong addr, ulong len,
88	ulong offset, struct file * file, int is_execve)
89{
90	struct op_note note;
91
92	/* don't bother with /dev/zero mappings etc. */
93	if (!len)
94		return;
95
96	note.pid = current->pid;
97	note.tgid = op_get_tgid();
98	note.addr = addr;
99	note.len = len;
100	note.offset = offset;
101	note.type = is_execve ? OP_EXEC : OP_MAP;
102	note.hash = hash_path(file);
103	if (note.hash == -1)
104		return;
105	oprof_put_note(&note);
106}
107
108static int oprof_output_maps(struct task_struct * task)
109{
110	int size=0;
111	struct mm_struct * mm;
112	struct vm_area_struct * map;
113
114	/* we don't need to worry about mm_users here, since there is at
115	   least one user (current), and if there's other code using this
116	   mm, then mm_users must be at least 2; we should never have to
117	   mmput() here. */
118
119	if (!(mm = task->mm))
120		goto out;
121
122	lock_mmap(mm);
123	spin_lock(&map_lock);
124
125	/* We need two pass, daemon assume than the first mmap notification
126	 * is for the executable but some process doesn't follow this model.
127	 */
128	for (map = mm->mmap; map; map = map->vm_next) {
129		if (!(map->vm_flags & VM_EXEC) || !map->vm_file)
130			continue;
131		if (!(map->vm_flags & VM_EXECUTABLE))
132			continue;
133
134		oprof_output_map(map->vm_start, map->vm_end-map->vm_start,
135			GET_VM_OFFSET(map), map->vm_file, 1);
136	}
137	for (map = mm->mmap; map; map = map->vm_next) {
138		if (!(map->vm_flags & VM_EXEC) || !map->vm_file)
139			continue;
140		if (map->vm_flags & VM_EXECUTABLE)
141			continue;
142
143		oprof_output_map(map->vm_start, map->vm_end-map->vm_start,
144			GET_VM_OFFSET(map), map->vm_file, 0);
145	}
146	spin_unlock(&map_lock);
147	unlock_mmap(mm);
148
149out:
150	return size;
151}
152
153
154/* execve is a special case on IA64.  The others get the result and
155 * arguments after the system call has been made from the ASM stub. */
156
157asmlinkage long
158my_sys_execve (char * filename, char **argv, char **envp, struct pt_regs * regs)
159{
160	int error;
161
162	MOD_INC_USE_COUNT;
163
164	filename = getname(filename);
165	error = PTR_ERR(filename);
166	if (IS_ERR(filename))
167		goto out;
168	error = do_execve(filename, argv, envp, regs);
169
170	if (!error) {
171		PTRACE_OFF(current);
172		oprof_output_maps(current);
173	}
174	putname(filename);
175out:
176	unlock_execve();
177	MOD_DEC_USE_COUNT;
178	return error;
179}
180
181
182static void out_mmap(ulong addr, ulong len, ulong prot, ulong flags,
183	ulong fd, ulong offset)
184{
185	struct file * file;
186
187	lock_out_mmap();
188
189	file = fget(fd);
190	if (!file)
191		goto out;
192
193	spin_lock(&map_lock);
194	oprof_output_map(addr, len, offset, file, 0);
195	spin_unlock(&map_lock);
196
197	fput(file);
198
199out:
200	unlock_out_mmap();
201}
202
203
204/*
205 * IA64 mmap routines:
206 * The post_sys_* routines are called after the syscall has been made.
207 * The first argument is the return value from the system call.
208 */
209asmlinkage void post_sys_mmap2(ulong ret, ulong addr, ulong len,
210	ulong prot, ulong flags, ulong fd, ulong pgoff)
211{
212	/* FIXME: This should be done in the ASM stub. */
213	MOD_INC_USE_COUNT;
214
215	if ((prot & PROT_EXEC) && ret >= 0)
216		out_mmap(ret, len, prot, flags, fd, pgoff << PAGE_SHIFT);
217	goto out;
218out:
219	MOD_DEC_USE_COUNT;
220}
221
222asmlinkage void post_sys_mmap(ulong ret, ulong addr, ulong len,
223	ulong prot, ulong flags, ulong fd, ulong off)
224{
225	/* FIXME: This should be done in the ASM stub. */
226	MOD_INC_USE_COUNT;
227
228	if ((prot & PROT_EXEC) && ret >= 0)
229		out_mmap(ret, len, prot, flags, fd, off);
230	goto out;
231out:
232	MOD_DEC_USE_COUNT;
233}
234
235
236inline static void oprof_report_fork(u32 old_pid, u32 new_pid, u32 old_tgid, u32 new_tgid)
237{
238	struct op_note note;
239
240	note.type = OP_FORK;
241	note.pid = old_pid;
242	note.tgid = old_tgid;
243	note.addr = new_pid;
244	note.len = new_tgid;
245	oprof_put_note(&note);
246}
247
248
249asmlinkage void post_sys_clone(long ret, long arg0, long arg1)
250{
251	u32 pid = current->pid;
252	u32 tgid = op_get_tgid();
253
254	/* FIXME: This should be done in the ASM stub. */
255	MOD_INC_USE_COUNT;
256
257	if (ret)
258		/* FIXME: my libc show clone() is not implemented in ia64
259		 * but used only by fork() with a SIGCHILD first parameter
260		 * so we assume it's a fork */
261		oprof_report_fork(pid, ret, pid, tgid);
262	MOD_DEC_USE_COUNT;
263}
264
265asmlinkage void post_sys_clone2(long ret, long arg0, long arg1, long arg2)
266{
267	u32 pid = current->pid;
268	u32 tgid = op_get_tgid();
269	long clone_flags = arg0;
270
271	/* FIXME: This should be done in the ASM stub. */
272	MOD_INC_USE_COUNT;
273
274	if (ret) {
275		if (clone_flags & CLONE_THREAD)
276			oprof_report_fork(pid, ret, tgid, tgid);
277		else
278			oprof_report_fork(pid, ret, tgid, ret);
279	}
280	MOD_DEC_USE_COUNT;
281}
282
283asmlinkage void
284post_sys_init_module(long ret, char const * name_user,
285                     struct module * mod_user)
286{
287	/* FIXME: This should be done in the ASM stub. */
288	MOD_INC_USE_COUNT;
289
290	if (ret >= 0) {
291		struct op_note note;
292
293		note.type = OP_DROP_MODULES;
294		oprof_put_note(&note);
295	}
296	MOD_DEC_USE_COUNT;
297}
298
299/* Exit must use a pre-call intercept stub.  There is no post exit. */
300asmlinkage void pre_sys_exit(int error_code)
301{
302	struct op_note note;
303
304	MOD_INC_USE_COUNT;
305
306	note.addr = current->times.tms_utime;
307	note.len = current->times.tms_stime;
308	note.offset = current->start_time;
309	note.type = OP_EXIT;
310	note.pid = current->pid;
311	note.tgid = op_get_tgid();
312	oprof_put_note(&note);
313
314	/* this looks UP-dangerous, as the exit sleeps and we don't
315	 * have a use count, but in fact its ok as sys_exit is noreturn,
316	 * so we can never come back to this non-existent exec page
317	 */
318	MOD_DEC_USE_COUNT;
319}
320
321extern void * sys_call_table[];
322
323/* FIXME:  Now that I'm never trying to do a C-level call through these
324 * pointers, I should just save, intercept, and restore with void *
325 * instead of the void * part of the function descriptor, I think.
326 */
327
328void op_save_syscalls(void)
329{
330	fdesc_clone.ip = sys_call_table[__NR_clone - __NR_ni_syscall];
331	old_sys_clone = (void *)&fdesc_clone;
332	fdesc_clone2.ip = sys_call_table[__NR_clone2 - __NR_ni_syscall];
333	old_sys_clone2 = (void *)&fdesc_clone2;
334	fdesc_execve.ip = sys_call_table[__NR_execve - __NR_ni_syscall];
335	old_sys_execve = (void *)&fdesc_execve;
336	fdesc_mmap.ip = sys_call_table[__NR_mmap - __NR_ni_syscall];
337	old_sys_mmap = (void *)&fdesc_mmap;
338	fdesc_mmap2.ip = sys_call_table[__NR_mmap2 - __NR_ni_syscall];
339	old_sys_mmap2 = (void *)&fdesc_mmap2;
340	fdesc_init_module.ip = sys_call_table[__NR_init_module - __NR_ni_syscall];
341	old_sys_init_module = (void *)&fdesc_init_module;
342	fdesc_exit.ip = sys_call_table[__NR_exit - __NR_ni_syscall];
343	old_sys_exit = (void *)&fdesc_exit;
344}
345
346void op_intercept_syscalls(void)
347{
348	/* Must extract the function address from the stub function
349	 * descriptors.
350	 */
351	sys_call_table[__NR_clone - __NR_ni_syscall] =
352		((struct fdesc *)post_stub_clone)->ip;
353	sys_call_table[__NR_clone2 - __NR_ni_syscall] =
354		((struct fdesc *)post_stub_clone2)->ip;
355	sys_call_table[__NR_execve - __NR_ni_syscall] =
356		((struct fdesc *)my_ia64_execve)->ip;
357	sys_call_table[__NR_mmap - __NR_ni_syscall] =
358		((struct fdesc *)post_stub_mmap)->ip;
359	sys_call_table[__NR_mmap2 - __NR_ni_syscall] =
360		((struct fdesc *)post_stub_mmap2)->ip;
361	sys_call_table[__NR_init_module - __NR_ni_syscall] =
362		((struct fdesc *)post_stub_init_module)->ip;
363	sys_call_table[__NR_exit - __NR_ni_syscall] =
364		((struct fdesc *)pre_stub_exit)->ip;
365}
366
367void op_restore_syscalls(void)
368{
369	sys_call_table[__NR_clone - __NR_ni_syscall] =
370		((struct fdesc *)old_sys_clone)->ip;
371	sys_call_table[__NR_clone2 - __NR_ni_syscall] =
372		((struct fdesc *)old_sys_clone2)->ip;
373	sys_call_table[__NR_execve - __NR_ni_syscall] =
374		((struct fdesc *)old_sys_execve)->ip;
375	sys_call_table[__NR_mmap - __NR_ni_syscall] =
376		((struct fdesc *)old_sys_mmap)->ip;
377	sys_call_table[__NR_mmap2 - __NR_ni_syscall] =
378		((struct fdesc *)old_sys_mmap2)->ip;
379	sys_call_table[__NR_init_module - __NR_ni_syscall] =
380		((struct fdesc *)old_sys_init_module)->ip;
381	sys_call_table[__NR_exit - __NR_ni_syscall] =
382		((struct fdesc *)old_sys_exit)->ip;
383}
384