process_64.c revision 5b0e508415a83989fe704b4718a1a214bc333ca7
1/*
2 *  Copyright (C) 1995  Linus Torvalds
3 *
4 *  Pentium III FXSR, SSE support
5 *	Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 *  X86-64 port
8 *	Andi Kleen.
9 *
10 *	CPU hotplug support - ashok.raj@intel.com
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17#include <stdarg.h>
18
19#include <linux/cpu.h>
20#include <linux/errno.h>
21#include <linux/sched.h>
22#include <linux/fs.h>
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/elfcore.h>
26#include <linux/smp.h>
27#include <linux/slab.h>
28#include <linux/user.h>
29#include <linux/interrupt.h>
30#include <linux/utsname.h>
31#include <linux/delay.h>
32#include <linux/module.h>
33#include <linux/ptrace.h>
34#include <linux/random.h>
35#include <linux/notifier.h>
36#include <linux/kprobes.h>
37#include <linux/kdebug.h>
38#include <linux/tick.h>
39
40#include <asm/uaccess.h>
41#include <asm/pgtable.h>
42#include <asm/system.h>
43#include <asm/io.h>
44#include <asm/processor.h>
45#include <asm/i387.h>
46#include <asm/mmu_context.h>
47#include <asm/pda.h>
48#include <asm/prctl.h>
49#include <asm/desc.h>
50#include <asm/proto.h>
51#include <asm/ia32.h>
52#include <asm/idle.h>
53
54asmlinkage extern void ret_from_fork(void);
55
56unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
57
58unsigned long boot_option_idle_override = 0;
59EXPORT_SYMBOL(boot_option_idle_override);
60
61/*
62 * Powermanagement idle function, if any..
63 */
64void (*pm_idle)(void);
65EXPORT_SYMBOL(pm_idle);
66
67static ATOMIC_NOTIFIER_HEAD(idle_notifier);
68
69void idle_notifier_register(struct notifier_block *n)
70{
71	atomic_notifier_chain_register(&idle_notifier, n);
72}
73
74void enter_idle(void)
75{
76	write_pda(isidle, 1);
77	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
78}
79
80static void __exit_idle(void)
81{
82	if (test_and_clear_bit_pda(0, isidle) == 0)
83		return;
84	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
85}
86
87/* Called from interrupts to signify idle end */
88void exit_idle(void)
89{
90	/* idle loop has pid 0 */
91	if (current->pid)
92		return;
93	__exit_idle();
94}
95
96/*
97 * We use this if we don't have any better
98 * idle routine..
99 */
100void default_idle(void)
101{
102	current_thread_info()->status &= ~TS_POLLING;
103	/*
104	 * TS_POLLING-cleared state must be visible before we
105	 * test NEED_RESCHED:
106	 */
107	smp_mb();
108	local_irq_disable();
109	if (!need_resched()) {
110		ktime_t t0, t1;
111		u64 t0n, t1n;
112
113		t0 = ktime_get();
114		t0n = ktime_to_ns(t0);
115		safe_halt();	/* enables interrupts racelessly */
116		local_irq_disable();
117		t1 = ktime_get();
118		t1n = ktime_to_ns(t1);
119		sched_clock_idle_wakeup_event(t1n - t0n);
120	}
121	local_irq_enable();
122	current_thread_info()->status |= TS_POLLING;
123}
124
125/*
126 * On SMP it's slightly faster (but much more power-consuming!)
127 * to poll the ->need_resched flag instead of waiting for the
128 * cross-CPU IPI to arrive. Use this option with caution.
129 */
130static void poll_idle(void)
131{
132	local_irq_enable();
133	cpu_relax();
134}
135
136#ifdef CONFIG_HOTPLUG_CPU
137DECLARE_PER_CPU(int, cpu_state);
138
139#include <asm/nmi.h>
140/* We halt the CPU with physical CPU hotplug */
141static inline void play_dead(void)
142{
143	idle_task_exit();
144	wbinvd();
145	mb();
146	/* Ack it */
147	__get_cpu_var(cpu_state) = CPU_DEAD;
148
149	local_irq_disable();
150	while (1)
151		halt();
152}
153#else
154static inline void play_dead(void)
155{
156	BUG();
157}
158#endif /* CONFIG_HOTPLUG_CPU */
159
160/*
161 * The idle thread. There's no useful work to be
162 * done, so just try to conserve power and have a
163 * low exit latency (ie sit in a loop waiting for
164 * somebody to say that they'd like to reschedule)
165 */
166void cpu_idle(void)
167{
168	current_thread_info()->status |= TS_POLLING;
169	/* endless idle loop with no priority at all */
170	while (1) {
171		tick_nohz_stop_sched_tick();
172		while (!need_resched()) {
173			void (*idle)(void);
174
175			rmb();
176			idle = pm_idle;
177			if (!idle)
178				idle = default_idle;
179			if (cpu_is_offline(smp_processor_id()))
180				play_dead();
181			/*
182			 * Idle routines should keep interrupts disabled
183			 * from here on, until they go to idle.
184			 * Otherwise, idle callbacks can misfire.
185			 */
186			local_irq_disable();
187			enter_idle();
188			idle();
189			/* In many cases the interrupt that ended idle
190			   has already called exit_idle. But some idle
191			   loops can be woken up without interrupt. */
192			__exit_idle();
193		}
194
195		tick_nohz_restart_sched_tick();
196		preempt_enable_no_resched();
197		schedule();
198		preempt_disable();
199	}
200}
201
202static void do_nothing(void *unused)
203{
204}
205
206/*
207 * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
208 * pm_idle and update to new pm_idle value. Required while changing pm_idle
209 * handler on SMP systems.
210 *
211 * Caller must have changed pm_idle to the new value before the call. Old
212 * pm_idle value will not be used by any CPU after the return of this function.
213 */
214void cpu_idle_wait(void)
215{
216	smp_mb();
217	/* kick all the CPUs so that they exit out of pm_idle */
218	smp_call_function(do_nothing, NULL, 0, 1);
219}
220EXPORT_SYMBOL_GPL(cpu_idle_wait);
221
222/*
223 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
224 * which can obviate IPI to trigger checking of need_resched.
225 * We execute MONITOR against need_resched and enter optimized wait state
226 * through MWAIT. Whenever someone changes need_resched, we would be woken
227 * up from MWAIT (without an IPI).
228 *
229 * New with Core Duo processors, MWAIT can take some hints based on CPU
230 * capability.
231 */
232void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
233{
234	if (!need_resched()) {
235		__monitor((void *)&current_thread_info()->flags, 0, 0);
236		smp_mb();
237		if (!need_resched())
238			__mwait(ax, cx);
239	}
240}
241
242/* Default MONITOR/MWAIT with no hints, used for default C1 state */
243static void mwait_idle(void)
244{
245	if (!need_resched()) {
246		__monitor((void *)&current_thread_info()->flags, 0, 0);
247		smp_mb();
248		if (!need_resched())
249			__sti_mwait(0, 0);
250		else
251			local_irq_enable();
252	} else {
253		local_irq_enable();
254	}
255}
256
257
258static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
259{
260	if (force_mwait)
261		return 1;
262	/* Any C1 states supported? */
263	return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
264}
265
266void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
267{
268	static int selected;
269
270	if (selected)
271		return;
272#ifdef CONFIG_X86_SMP
273	if (pm_idle == poll_idle && smp_num_siblings > 1) {
274		printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
275			" performance may degrade.\n");
276	}
277#endif
278	if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
279		/*
280		 * Skip, if setup has overridden idle.
281		 * One CPU supports mwait => All CPUs supports mwait
282		 */
283		if (!pm_idle) {
284			printk(KERN_INFO "using mwait in idle threads.\n");
285			pm_idle = mwait_idle;
286		}
287	}
288	selected = 1;
289}
290
291static int __init idle_setup(char *str)
292{
293	if (!strcmp(str, "poll")) {
294		printk("using polling idle threads.\n");
295		pm_idle = poll_idle;
296	} else if (!strcmp(str, "mwait"))
297		force_mwait = 1;
298	else
299		return -1;
300
301	boot_option_idle_override = 1;
302	return 0;
303}
304early_param("idle", idle_setup);
305
306/* Prints also some state that isn't saved in the pt_regs */
307void __show_regs(struct pt_regs * regs)
308{
309	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
310	unsigned long d0, d1, d2, d3, d6, d7;
311	unsigned int fsindex, gsindex;
312	unsigned int ds, cs, es;
313
314	printk("\n");
315	print_modules();
316	printk("Pid: %d, comm: %.20s %s %s %.*s\n",
317		current->pid, current->comm, print_tainted(),
318		init_utsname()->release,
319		(int)strcspn(init_utsname()->version, " "),
320		init_utsname()->version);
321	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
322	printk_address(regs->ip, 1);
323	printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->sp,
324		regs->flags);
325	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
326	       regs->ax, regs->bx, regs->cx);
327	printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
328	       regs->dx, regs->si, regs->di);
329	printk("RBP: %016lx R08: %016lx R09: %016lx\n",
330	       regs->bp, regs->r8, regs->r9);
331	printk("R10: %016lx R11: %016lx R12: %016lx\n",
332	       regs->r10, regs->r11, regs->r12);
333	printk("R13: %016lx R14: %016lx R15: %016lx\n",
334	       regs->r13, regs->r14, regs->r15);
335
336	asm("movl %%ds,%0" : "=r" (ds));
337	asm("movl %%cs,%0" : "=r" (cs));
338	asm("movl %%es,%0" : "=r" (es));
339	asm("movl %%fs,%0" : "=r" (fsindex));
340	asm("movl %%gs,%0" : "=r" (gsindex));
341
342	rdmsrl(MSR_FS_BASE, fs);
343	rdmsrl(MSR_GS_BASE, gs);
344	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
345
346	cr0 = read_cr0();
347	cr2 = read_cr2();
348	cr3 = read_cr3();
349	cr4 = read_cr4();
350
351	printk("FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
352	       fs,fsindex,gs,gsindex,shadowgs);
353	printk("CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
354	printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
355
356	get_debugreg(d0, 0);
357	get_debugreg(d1, 1);
358	get_debugreg(d2, 2);
359	printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
360	get_debugreg(d3, 3);
361	get_debugreg(d6, 6);
362	get_debugreg(d7, 7);
363	printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
364}
365
366void show_regs(struct pt_regs *regs)
367{
368	printk("CPU %d:", smp_processor_id());
369	__show_regs(regs);
370	show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
371}
372
373/*
374 * Free current thread data structures etc..
375 */
376void exit_thread(void)
377{
378	struct task_struct *me = current;
379	struct thread_struct *t = &me->thread;
380
381	if (me->thread.io_bitmap_ptr) {
382		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
383
384		kfree(t->io_bitmap_ptr);
385		t->io_bitmap_ptr = NULL;
386		clear_thread_flag(TIF_IO_BITMAP);
387		/*
388		 * Careful, clear this in the TSS too:
389		 */
390		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
391		t->io_bitmap_max = 0;
392		put_cpu();
393	}
394}
395
396void flush_thread(void)
397{
398	struct task_struct *tsk = current;
399
400	if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
401		clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
402		if (test_tsk_thread_flag(tsk, TIF_IA32)) {
403			clear_tsk_thread_flag(tsk, TIF_IA32);
404		} else {
405			set_tsk_thread_flag(tsk, TIF_IA32);
406			current_thread_info()->status |= TS_COMPAT;
407		}
408	}
409	clear_tsk_thread_flag(tsk, TIF_DEBUG);
410
411	tsk->thread.debugreg0 = 0;
412	tsk->thread.debugreg1 = 0;
413	tsk->thread.debugreg2 = 0;
414	tsk->thread.debugreg3 = 0;
415	tsk->thread.debugreg6 = 0;
416	tsk->thread.debugreg7 = 0;
417	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
418	/*
419	 * Forget coprocessor state..
420	 */
421	clear_fpu(tsk);
422	clear_used_math();
423}
424
425void release_thread(struct task_struct *dead_task)
426{
427	if (dead_task->mm) {
428		if (dead_task->mm->context.size) {
429			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
430					dead_task->comm,
431					dead_task->mm->context.ldt,
432					dead_task->mm->context.size);
433			BUG();
434		}
435	}
436}
437
438static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
439{
440	struct user_desc ud = {
441		.base_addr = addr,
442		.limit = 0xfffff,
443		.seg_32bit = 1,
444		.limit_in_pages = 1,
445		.useable = 1,
446	};
447	struct desc_struct *desc = t->thread.tls_array;
448	desc += tls;
449	fill_ldt(desc, &ud);
450}
451
452static inline u32 read_32bit_tls(struct task_struct *t, int tls)
453{
454	return get_desc_base(&t->thread.tls_array[tls]);
455}
456
457/*
458 * This gets called before we allocate a new thread and copy
459 * the current task into it.
460 */
461void prepare_to_copy(struct task_struct *tsk)
462{
463	unlazy_fpu(tsk);
464}
465
466int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
467		unsigned long unused,
468	struct task_struct * p, struct pt_regs * regs)
469{
470	int err;
471	struct pt_regs * childregs;
472	struct task_struct *me = current;
473
474	childregs = ((struct pt_regs *)
475			(THREAD_SIZE + task_stack_page(p))) - 1;
476	*childregs = *regs;
477
478	childregs->ax = 0;
479	childregs->sp = sp;
480	if (sp == ~0UL)
481		childregs->sp = (unsigned long)childregs;
482
483	p->thread.sp = (unsigned long) childregs;
484	p->thread.sp0 = (unsigned long) (childregs+1);
485	p->thread.usersp = me->thread.usersp;
486
487	set_tsk_thread_flag(p, TIF_FORK);
488
489	p->thread.fs = me->thread.fs;
490	p->thread.gs = me->thread.gs;
491
492	asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
493	asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
494	asm("mov %%es,%0" : "=m" (p->thread.es));
495	asm("mov %%ds,%0" : "=m" (p->thread.ds));
496
497	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
498		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
499		if (!p->thread.io_bitmap_ptr) {
500			p->thread.io_bitmap_max = 0;
501			return -ENOMEM;
502		}
503		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
504				IO_BITMAP_BYTES);
505		set_tsk_thread_flag(p, TIF_IO_BITMAP);
506	}
507
508	/*
509	 * Set a new TLS for the child thread?
510	 */
511	if (clone_flags & CLONE_SETTLS) {
512#ifdef CONFIG_IA32_EMULATION
513		if (test_thread_flag(TIF_IA32))
514			err = do_set_thread_area(p, -1,
515				(struct user_desc __user *)childregs->si, 0);
516		else
517#endif
518			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
519		if (err)
520			goto out;
521	}
522	err = 0;
523out:
524	if (err && p->thread.io_bitmap_ptr) {
525		kfree(p->thread.io_bitmap_ptr);
526		p->thread.io_bitmap_max = 0;
527	}
528	return err;
529}
530
531void
532start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
533{
534	asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0));
535	load_gs_index(0);
536	regs->ip		= new_ip;
537	regs->sp		= new_sp;
538	write_pda(oldrsp, new_sp);
539	regs->cs		= __USER_CS;
540	regs->ss		= __USER_DS;
541	regs->flags		= 0x200;
542	set_fs(USER_DS);
543}
544EXPORT_SYMBOL_GPL(start_thread);
545
546/*
547 * This special macro can be used to load a debugging register
548 */
549#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
550
551static inline void __switch_to_xtra(struct task_struct *prev_p,
552				    struct task_struct *next_p,
553				    struct tss_struct *tss)
554{
555	struct thread_struct *prev, *next;
556	unsigned long debugctl;
557
558	prev = &prev_p->thread,
559	next = &next_p->thread;
560
561	debugctl = prev->debugctlmsr;
562	if (next->ds_area_msr != prev->ds_area_msr) {
563		/* we clear debugctl to make sure DS
564		 * is not in use when we change it */
565		debugctl = 0;
566		update_debugctlmsr(0);
567		wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
568	}
569
570	if (next->debugctlmsr != debugctl)
571		update_debugctlmsr(next->debugctlmsr);
572
573	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
574		loaddebug(next, 0);
575		loaddebug(next, 1);
576		loaddebug(next, 2);
577		loaddebug(next, 3);
578		/* no 4 and 5 */
579		loaddebug(next, 6);
580		loaddebug(next, 7);
581	}
582
583	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
584		/*
585		 * Copy the relevant range of the IO bitmap.
586		 * Normally this is 128 bytes or less:
587		 */
588		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
589		       max(prev->io_bitmap_max, next->io_bitmap_max));
590	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
591		/*
592		 * Clear any possible leftover bits:
593		 */
594		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
595	}
596
597#ifdef X86_BTS
598	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
599		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
600
601	if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
602		ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
603#endif
604}
605
606/*
607 *	switch_to(x,y) should switch tasks from x to y.
608 *
609 * This could still be optimized:
610 * - fold all the options into a flag word and test it with a single test.
611 * - could test fs/gs bitsliced
612 *
613 * Kprobes not supported here. Set the probe on schedule instead.
614 */
615struct task_struct *
616__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
617{
618	struct thread_struct *prev = &prev_p->thread,
619				 *next = &next_p->thread;
620	int cpu = smp_processor_id();
621	struct tss_struct *tss = &per_cpu(init_tss, cpu);
622
623	/* we're going to use this soon, after a few expensive things */
624	if (next_p->fpu_counter>5)
625		prefetch(&next->i387.fxsave);
626
627	/*
628	 * Reload esp0, LDT and the page table pointer:
629	 */
630	load_sp0(tss, next);
631
632	/*
633	 * Switch DS and ES.
634	 * This won't pick up thread selector changes, but I guess that is ok.
635	 */
636	asm volatile("mov %%es,%0" : "=m" (prev->es));
637	if (unlikely(next->es | prev->es))
638		loadsegment(es, next->es);
639
640	asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
641	if (unlikely(next->ds | prev->ds))
642		loadsegment(ds, next->ds);
643
644	load_TLS(next, cpu);
645
646	/*
647	 * Switch FS and GS.
648	 */
649	{
650		unsigned fsindex;
651		asm volatile("movl %%fs,%0" : "=r" (fsindex));
652		/* segment register != 0 always requires a reload.
653		   also reload when it has changed.
654		   when prev process used 64bit base always reload
655		   to avoid an information leak. */
656		if (unlikely(fsindex | next->fsindex | prev->fs)) {
657			loadsegment(fs, next->fsindex);
658			/* check if the user used a selector != 0
659	                 * if yes clear 64bit base, since overloaded base
660                         * is always mapped to the Null selector
661                         */
662			if (fsindex)
663			prev->fs = 0;
664		}
665		/* when next process has a 64bit base use it */
666		if (next->fs)
667			wrmsrl(MSR_FS_BASE, next->fs);
668		prev->fsindex = fsindex;
669	}
670	{
671		unsigned gsindex;
672		asm volatile("movl %%gs,%0" : "=r" (gsindex));
673		if (unlikely(gsindex | next->gsindex | prev->gs)) {
674			load_gs_index(next->gsindex);
675			if (gsindex)
676			prev->gs = 0;
677		}
678		if (next->gs)
679			wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
680		prev->gsindex = gsindex;
681	}
682
683	/* Must be after DS reload */
684	unlazy_fpu(prev_p);
685
686	/*
687	 * Switch the PDA and FPU contexts.
688	 */
689	prev->usersp = read_pda(oldrsp);
690	write_pda(oldrsp, next->usersp);
691	write_pda(pcurrent, next_p);
692
693	write_pda(kernelstack,
694	(unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
695#ifdef CONFIG_CC_STACKPROTECTOR
696	write_pda(stack_canary, next_p->stack_canary);
697	/*
698	 * Build time only check to make sure the stack_canary is at
699	 * offset 40 in the pda; this is a gcc ABI requirement
700	 */
701	BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
702#endif
703
704	/*
705	 * Now maybe reload the debug registers and handle I/O bitmaps
706	 */
707	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
708		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
709		__switch_to_xtra(prev_p, next_p, tss);
710
711	/* If the task has used fpu the last 5 timeslices, just do a full
712	 * restore of the math state immediately to avoid the trap; the
713	 * chances of needing FPU soon are obviously high now
714	 */
715	if (next_p->fpu_counter>5)
716		math_state_restore();
717	return prev_p;
718}
719
720/*
721 * sys_execve() executes a new program.
722 */
723asmlinkage
724long sys_execve(char __user *name, char __user * __user *argv,
725		char __user * __user *envp, struct pt_regs *regs)
726{
727	long error;
728	char * filename;
729
730	filename = getname(name);
731	error = PTR_ERR(filename);
732	if (IS_ERR(filename))
733		return error;
734	error = do_execve(filename, argv, envp, regs);
735	putname(filename);
736	return error;
737}
738
739void set_personality_64bit(void)
740{
741	/* inherit personality from parent */
742
743	/* Make sure to be in 64bit mode */
744	clear_thread_flag(TIF_IA32);
745
746	/* TBD: overwrites user setup. Should have two bits.
747	   But 64bit processes have always behaved this way,
748	   so it's not too bad. The main problem is just that
749	   32bit childs are affected again. */
750	current->personality &= ~READ_IMPLIES_EXEC;
751}
752
753asmlinkage long sys_fork(struct pt_regs *regs)
754{
755	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
756}
757
758asmlinkage long
759sys_clone(unsigned long clone_flags, unsigned long newsp,
760	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
761{
762	if (!newsp)
763		newsp = regs->sp;
764	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
765}
766
767/*
768 * This is trivial, and on the face of it looks like it
769 * could equally well be done in user mode.
770 *
771 * Not so, for quite unobvious reasons - register pressure.
772 * In user mode vfork() cannot have a stack frame, and if
773 * done by calling the "clone()" system call directly, you
774 * do not have enough call-clobbered registers to hold all
775 * the information you need.
776 */
777asmlinkage long sys_vfork(struct pt_regs *regs)
778{
779	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
780		    NULL, NULL);
781}
782
783unsigned long get_wchan(struct task_struct *p)
784{
785	unsigned long stack;
786	u64 fp,ip;
787	int count = 0;
788
789	if (!p || p == current || p->state==TASK_RUNNING)
790		return 0;
791	stack = (unsigned long)task_stack_page(p);
792	if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
793		return 0;
794	fp = *(u64 *)(p->thread.sp);
795	do {
796		if (fp < (unsigned long)stack ||
797		    fp > (unsigned long)stack+THREAD_SIZE)
798			return 0;
799		ip = *(u64 *)(fp+8);
800		if (!in_sched_functions(ip))
801			return ip;
802		fp = *(u64 *)fp;
803	} while (count++ < 16);
804	return 0;
805}
806
807long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
808{
809	int ret = 0;
810	int doit = task == current;
811	int cpu;
812
813	switch (code) {
814	case ARCH_SET_GS:
815		if (addr >= TASK_SIZE_OF(task))
816			return -EPERM;
817		cpu = get_cpu();
818		/* handle small bases via the GDT because that's faster to
819		   switch. */
820		if (addr <= 0xffffffff) {
821			set_32bit_tls(task, GS_TLS, addr);
822			if (doit) {
823				load_TLS(&task->thread, cpu);
824				load_gs_index(GS_TLS_SEL);
825			}
826			task->thread.gsindex = GS_TLS_SEL;
827			task->thread.gs = 0;
828		} else {
829			task->thread.gsindex = 0;
830			task->thread.gs = addr;
831			if (doit) {
832				load_gs_index(0);
833				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
834			}
835		}
836		put_cpu();
837		break;
838	case ARCH_SET_FS:
839		/* Not strictly needed for fs, but do it for symmetry
840		   with gs */
841		if (addr >= TASK_SIZE_OF(task))
842			return -EPERM;
843		cpu = get_cpu();
844		/* handle small bases via the GDT because that's faster to
845		   switch. */
846		if (addr <= 0xffffffff) {
847			set_32bit_tls(task, FS_TLS, addr);
848			if (doit) {
849				load_TLS(&task->thread, cpu);
850				asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
851			}
852			task->thread.fsindex = FS_TLS_SEL;
853			task->thread.fs = 0;
854		} else {
855			task->thread.fsindex = 0;
856			task->thread.fs = addr;
857			if (doit) {
858				/* set the selector to 0 to not confuse
859				   __switch_to */
860				asm volatile("movl %0,%%fs" :: "r" (0));
861				ret = checking_wrmsrl(MSR_FS_BASE, addr);
862			}
863		}
864		put_cpu();
865		break;
866	case ARCH_GET_FS: {
867		unsigned long base;
868		if (task->thread.fsindex == FS_TLS_SEL)
869			base = read_32bit_tls(task, FS_TLS);
870		else if (doit)
871			rdmsrl(MSR_FS_BASE, base);
872		else
873			base = task->thread.fs;
874		ret = put_user(base, (unsigned long __user *)addr);
875		break;
876	}
877	case ARCH_GET_GS: {
878		unsigned long base;
879		unsigned gsindex;
880		if (task->thread.gsindex == GS_TLS_SEL)
881			base = read_32bit_tls(task, GS_TLS);
882		else if (doit) {
883			asm("movl %%gs,%0" : "=r" (gsindex));
884			if (gsindex)
885				rdmsrl(MSR_KERNEL_GS_BASE, base);
886			else
887				base = task->thread.gs;
888		}
889		else
890			base = task->thread.gs;
891		ret = put_user(base, (unsigned long __user *)addr);
892		break;
893	}
894
895	default:
896		ret = -EINVAL;
897		break;
898	}
899
900	return ret;
901}
902
903long sys_arch_prctl(int code, unsigned long addr)
904{
905	return do_arch_prctl(current, code, addr);
906}
907
908unsigned long arch_align_stack(unsigned long sp)
909{
910	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
911		sp -= get_random_int() % 8192;
912	return sp & ~0xf;
913}
914
915unsigned long arch_randomize_brk(struct mm_struct *mm)
916{
917	unsigned long range_end = mm->brk + 0x02000000;
918	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
919}
920