process_64.c revision 9af45651f1f7c89942e016a1a00a7ebddfa727f8
1/*
2 *  Copyright (C) 1995  Linus Torvalds
3 *
4 *  Pentium III FXSR, SSE support
5 *	Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 *  X86-64 port
8 *	Andi Kleen.
9 *
10 *	CPU hotplug support - ashok.raj@intel.com
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17#include <stdarg.h>
18
19#include <linux/cpu.h>
20#include <linux/errno.h>
21#include <linux/sched.h>
22#include <linux/fs.h>
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/elfcore.h>
26#include <linux/smp.h>
27#include <linux/slab.h>
28#include <linux/user.h>
29#include <linux/interrupt.h>
30#include <linux/utsname.h>
31#include <linux/delay.h>
32#include <linux/module.h>
33#include <linux/ptrace.h>
34#include <linux/random.h>
35#include <linux/notifier.h>
36#include <linux/kprobes.h>
37#include <linux/kdebug.h>
38#include <linux/tick.h>
39#include <linux/prctl.h>
40#include <linux/uaccess.h>
41#include <linux/io.h>
42#include <linux/ftrace.h>
43
44#include <asm/pgtable.h>
45#include <asm/system.h>
46#include <asm/processor.h>
47#include <asm/i387.h>
48#include <asm/mmu_context.h>
49#include <asm/pda.h>
50#include <asm/prctl.h>
51#include <asm/desc.h>
52#include <asm/proto.h>
53#include <asm/ia32.h>
54#include <asm/idle.h>
55#include <asm/syscalls.h>
56#include <asm/ds.h>
57
58asmlinkage extern void ret_from_fork(void);
59
60DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
61EXPORT_PER_CPU_SYMBOL(current_task);
62
63unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
64
65static ATOMIC_NOTIFIER_HEAD(idle_notifier);
66
67void idle_notifier_register(struct notifier_block *n)
68{
69	atomic_notifier_chain_register(&idle_notifier, n);
70}
71EXPORT_SYMBOL_GPL(idle_notifier_register);
72
73void idle_notifier_unregister(struct notifier_block *n)
74{
75	atomic_notifier_chain_unregister(&idle_notifier, n);
76}
77EXPORT_SYMBOL_GPL(idle_notifier_unregister);
78
79void enter_idle(void)
80{
81	write_pda(isidle, 1);
82	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
83}
84
85static void __exit_idle(void)
86{
87	if (test_and_clear_bit_pda(0, isidle) == 0)
88		return;
89	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
90}
91
92/* Called from interrupts to signify idle end */
93void exit_idle(void)
94{
95	/* idle loop has pid 0 */
96	if (current->pid)
97		return;
98	__exit_idle();
99}
100
101#ifndef CONFIG_SMP
102static inline void play_dead(void)
103{
104	BUG();
105}
106#endif
107
108/*
109 * The idle thread. There's no useful work to be
110 * done, so just try to conserve power and have a
111 * low exit latency (ie sit in a loop waiting for
112 * somebody to say that they'd like to reschedule)
113 */
114void cpu_idle(void)
115{
116	current_thread_info()->status |= TS_POLLING;
117	/* endless idle loop with no priority at all */
118	while (1) {
119		tick_nohz_stop_sched_tick(1);
120		while (!need_resched()) {
121
122			rmb();
123
124			if (cpu_is_offline(smp_processor_id()))
125				play_dead();
126			/*
127			 * Idle routines should keep interrupts disabled
128			 * from here on, until they go to idle.
129			 * Otherwise, idle callbacks can misfire.
130			 */
131			local_irq_disable();
132			enter_idle();
133			/* Don't trace irqs off for idle */
134			stop_critical_timings();
135			pm_idle();
136			start_critical_timings();
137			/* In many cases the interrupt that ended idle
138			   has already called exit_idle. But some idle
139			   loops can be woken up without interrupt. */
140			__exit_idle();
141		}
142
143		tick_nohz_restart_sched_tick();
144		preempt_enable_no_resched();
145		schedule();
146		preempt_disable();
147	}
148}
149
150/* Prints also some state that isn't saved in the pt_regs */
151void __show_regs(struct pt_regs *regs, int all)
152{
153	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
154	unsigned long d0, d1, d2, d3, d6, d7;
155	unsigned int fsindex, gsindex;
156	unsigned int ds, cs, es;
157
158	printk("\n");
159	print_modules();
160	printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
161		current->pid, current->comm, print_tainted(),
162		init_utsname()->release,
163		(int)strcspn(init_utsname()->version, " "),
164		init_utsname()->version);
165	printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
166	printk_address(regs->ip, 1);
167	printk(KERN_INFO "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
168			regs->sp, regs->flags);
169	printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
170	       regs->ax, regs->bx, regs->cx);
171	printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
172	       regs->dx, regs->si, regs->di);
173	printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
174	       regs->bp, regs->r8, regs->r9);
175	printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
176	       regs->r10, regs->r11, regs->r12);
177	printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
178	       regs->r13, regs->r14, regs->r15);
179
180	asm("movl %%ds,%0" : "=r" (ds));
181	asm("movl %%cs,%0" : "=r" (cs));
182	asm("movl %%es,%0" : "=r" (es));
183	asm("movl %%fs,%0" : "=r" (fsindex));
184	asm("movl %%gs,%0" : "=r" (gsindex));
185
186	rdmsrl(MSR_FS_BASE, fs);
187	rdmsrl(MSR_GS_BASE, gs);
188	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
189
190	if (!all)
191		return;
192
193	cr0 = read_cr0();
194	cr2 = read_cr2();
195	cr3 = read_cr3();
196	cr4 = read_cr4();
197
198	printk(KERN_INFO "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
199	       fs, fsindex, gs, gsindex, shadowgs);
200	printk(KERN_INFO "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
201			es, cr0);
202	printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
203			cr4);
204
205	get_debugreg(d0, 0);
206	get_debugreg(d1, 1);
207	get_debugreg(d2, 2);
208	printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
209	get_debugreg(d3, 3);
210	get_debugreg(d6, 6);
211	get_debugreg(d7, 7);
212	printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
213}
214
215void show_regs(struct pt_regs *regs)
216{
217	printk(KERN_INFO "CPU %d:", smp_processor_id());
218	__show_regs(regs, 1);
219	show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
220}
221
222/*
223 * Free current thread data structures etc..
224 */
225void exit_thread(void)
226{
227	struct task_struct *me = current;
228	struct thread_struct *t = &me->thread;
229
230	if (me->thread.io_bitmap_ptr) {
231		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
232
233		kfree(t->io_bitmap_ptr);
234		t->io_bitmap_ptr = NULL;
235		clear_thread_flag(TIF_IO_BITMAP);
236		/*
237		 * Careful, clear this in the TSS too:
238		 */
239		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
240		t->io_bitmap_max = 0;
241		put_cpu();
242	}
243
244	ds_exit_thread(current);
245}
246
247void flush_thread(void)
248{
249	struct task_struct *tsk = current;
250
251	if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
252		clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
253		if (test_tsk_thread_flag(tsk, TIF_IA32)) {
254			clear_tsk_thread_flag(tsk, TIF_IA32);
255		} else {
256			set_tsk_thread_flag(tsk, TIF_IA32);
257			current_thread_info()->status |= TS_COMPAT;
258		}
259	}
260	clear_tsk_thread_flag(tsk, TIF_DEBUG);
261
262	tsk->thread.debugreg0 = 0;
263	tsk->thread.debugreg1 = 0;
264	tsk->thread.debugreg2 = 0;
265	tsk->thread.debugreg3 = 0;
266	tsk->thread.debugreg6 = 0;
267	tsk->thread.debugreg7 = 0;
268	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
269	/*
270	 * Forget coprocessor state..
271	 */
272	tsk->fpu_counter = 0;
273	clear_fpu(tsk);
274	clear_used_math();
275}
276
277void release_thread(struct task_struct *dead_task)
278{
279	if (dead_task->mm) {
280		if (dead_task->mm->context.size) {
281			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
282					dead_task->comm,
283					dead_task->mm->context.ldt,
284					dead_task->mm->context.size);
285			BUG();
286		}
287	}
288}
289
290static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
291{
292	struct user_desc ud = {
293		.base_addr = addr,
294		.limit = 0xfffff,
295		.seg_32bit = 1,
296		.limit_in_pages = 1,
297		.useable = 1,
298	};
299	struct desc_struct *desc = t->thread.tls_array;
300	desc += tls;
301	fill_ldt(desc, &ud);
302}
303
304static inline u32 read_32bit_tls(struct task_struct *t, int tls)
305{
306	return get_desc_base(&t->thread.tls_array[tls]);
307}
308
309/*
310 * This gets called before we allocate a new thread and copy
311 * the current task into it.
312 */
313void prepare_to_copy(struct task_struct *tsk)
314{
315	unlazy_fpu(tsk);
316}
317
318int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
319		unsigned long unused,
320	struct task_struct *p, struct pt_regs *regs)
321{
322	int err;
323	struct pt_regs *childregs;
324	struct task_struct *me = current;
325
326	childregs = ((struct pt_regs *)
327			(THREAD_SIZE + task_stack_page(p))) - 1;
328	*childregs = *regs;
329
330	childregs->ax = 0;
331	childregs->sp = sp;
332	if (sp == ~0UL)
333		childregs->sp = (unsigned long)childregs;
334
335	p->thread.sp = (unsigned long) childregs;
336	p->thread.sp0 = (unsigned long) (childregs+1);
337	p->thread.usersp = me->thread.usersp;
338
339	set_tsk_thread_flag(p, TIF_FORK);
340
341	p->thread.fs = me->thread.fs;
342	p->thread.gs = me->thread.gs;
343
344	savesegment(gs, p->thread.gsindex);
345	savesegment(fs, p->thread.fsindex);
346	savesegment(es, p->thread.es);
347	savesegment(ds, p->thread.ds);
348
349	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
350		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
351		if (!p->thread.io_bitmap_ptr) {
352			p->thread.io_bitmap_max = 0;
353			return -ENOMEM;
354		}
355		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
356				IO_BITMAP_BYTES);
357		set_tsk_thread_flag(p, TIF_IO_BITMAP);
358	}
359
360	/*
361	 * Set a new TLS for the child thread?
362	 */
363	if (clone_flags & CLONE_SETTLS) {
364#ifdef CONFIG_IA32_EMULATION
365		if (test_thread_flag(TIF_IA32))
366			err = do_set_thread_area(p, -1,
367				(struct user_desc __user *)childregs->si, 0);
368		else
369#endif
370			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
371		if (err)
372			goto out;
373	}
374
375	ds_copy_thread(p, me);
376
377	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
378	p->thread.debugctlmsr = 0;
379
380	err = 0;
381out:
382	if (err && p->thread.io_bitmap_ptr) {
383		kfree(p->thread.io_bitmap_ptr);
384		p->thread.io_bitmap_max = 0;
385	}
386	return err;
387}
388
389void
390start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
391{
392	loadsegment(fs, 0);
393	loadsegment(es, 0);
394	loadsegment(ds, 0);
395	load_gs_index(0);
396	regs->ip		= new_ip;
397	regs->sp		= new_sp;
398	write_pda(oldrsp, new_sp);
399	regs->cs		= __USER_CS;
400	regs->ss		= __USER_DS;
401	regs->flags		= 0x200;
402	set_fs(USER_DS);
403	/*
404	 * Free the old FP and other extended state
405	 */
406	free_thread_xstate(current);
407}
408EXPORT_SYMBOL_GPL(start_thread);
409
410static void hard_disable_TSC(void)
411{
412	write_cr4(read_cr4() | X86_CR4_TSD);
413}
414
415void disable_TSC(void)
416{
417	preempt_disable();
418	if (!test_and_set_thread_flag(TIF_NOTSC))
419		/*
420		 * Must flip the CPU state synchronously with
421		 * TIF_NOTSC in the current running context.
422		 */
423		hard_disable_TSC();
424	preempt_enable();
425}
426
427static void hard_enable_TSC(void)
428{
429	write_cr4(read_cr4() & ~X86_CR4_TSD);
430}
431
432static void enable_TSC(void)
433{
434	preempt_disable();
435	if (test_and_clear_thread_flag(TIF_NOTSC))
436		/*
437		 * Must flip the CPU state synchronously with
438		 * TIF_NOTSC in the current running context.
439		 */
440		hard_enable_TSC();
441	preempt_enable();
442}
443
444int get_tsc_mode(unsigned long adr)
445{
446	unsigned int val;
447
448	if (test_thread_flag(TIF_NOTSC))
449		val = PR_TSC_SIGSEGV;
450	else
451		val = PR_TSC_ENABLE;
452
453	return put_user(val, (unsigned int __user *)adr);
454}
455
456int set_tsc_mode(unsigned int val)
457{
458	if (val == PR_TSC_SIGSEGV)
459		disable_TSC();
460	else if (val == PR_TSC_ENABLE)
461		enable_TSC();
462	else
463		return -EINVAL;
464
465	return 0;
466}
467
468/*
469 * This special macro can be used to load a debugging register
470 */
471#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
472
473static inline void __switch_to_xtra(struct task_struct *prev_p,
474				    struct task_struct *next_p,
475				    struct tss_struct *tss)
476{
477	struct thread_struct *prev, *next;
478
479	prev = &prev_p->thread,
480	next = &next_p->thread;
481
482	if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
483	    test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
484		ds_switch_to(prev_p, next_p);
485	else if (next->debugctlmsr != prev->debugctlmsr)
486		update_debugctlmsr(next->debugctlmsr);
487
488	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
489		loaddebug(next, 0);
490		loaddebug(next, 1);
491		loaddebug(next, 2);
492		loaddebug(next, 3);
493		/* no 4 and 5 */
494		loaddebug(next, 6);
495		loaddebug(next, 7);
496	}
497
498	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
499	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
500		/* prev and next are different */
501		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
502			hard_disable_TSC();
503		else
504			hard_enable_TSC();
505	}
506
507	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
508		/*
509		 * Copy the relevant range of the IO bitmap.
510		 * Normally this is 128 bytes or less:
511		 */
512		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
513		       max(prev->io_bitmap_max, next->io_bitmap_max));
514	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
515		/*
516		 * Clear any possible leftover bits:
517		 */
518		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
519	}
520}
521
522/*
523 *	switch_to(x,y) should switch tasks from x to y.
524 *
525 * This could still be optimized:
526 * - fold all the options into a flag word and test it with a single test.
527 * - could test fs/gs bitsliced
528 *
529 * Kprobes not supported here. Set the probe on schedule instead.
530 * Function graph tracer not supported too.
531 */
532__notrace_funcgraph struct task_struct *
533__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
534{
535	struct thread_struct *prev = &prev_p->thread;
536	struct thread_struct *next = &next_p->thread;
537	int cpu = smp_processor_id();
538	struct tss_struct *tss = &per_cpu(init_tss, cpu);
539	unsigned fsindex, gsindex;
540
541	/* we're going to use this soon, after a few expensive things */
542	if (next_p->fpu_counter > 5)
543		prefetch(next->xstate);
544
545	/*
546	 * Reload esp0, LDT and the page table pointer:
547	 */
548	load_sp0(tss, next);
549
550	/*
551	 * Switch DS and ES.
552	 * This won't pick up thread selector changes, but I guess that is ok.
553	 */
554	savesegment(es, prev->es);
555	if (unlikely(next->es | prev->es))
556		loadsegment(es, next->es);
557
558	savesegment(ds, prev->ds);
559	if (unlikely(next->ds | prev->ds))
560		loadsegment(ds, next->ds);
561
562
563	/* We must save %fs and %gs before load_TLS() because
564	 * %fs and %gs may be cleared by load_TLS().
565	 *
566	 * (e.g. xen_load_tls())
567	 */
568	savesegment(fs, fsindex);
569	savesegment(gs, gsindex);
570
571	load_TLS(next, cpu);
572
573	/*
574	 * Leave lazy mode, flushing any hypercalls made here.
575	 * This must be done before restoring TLS segments so
576	 * the GDT and LDT are properly updated, and must be
577	 * done before math_state_restore, so the TS bit is up
578	 * to date.
579	 */
580	arch_leave_lazy_cpu_mode();
581
582	/*
583	 * Switch FS and GS.
584	 *
585	 * Segment register != 0 always requires a reload.  Also
586	 * reload when it has changed.  When prev process used 64bit
587	 * base always reload to avoid an information leak.
588	 */
589	if (unlikely(fsindex | next->fsindex | prev->fs)) {
590		loadsegment(fs, next->fsindex);
591		/*
592		 * Check if the user used a selector != 0; if yes
593		 *  clear 64bit base, since overloaded base is always
594		 *  mapped to the Null selector
595		 */
596		if (fsindex)
597			prev->fs = 0;
598	}
599	/* when next process has a 64bit base use it */
600	if (next->fs)
601		wrmsrl(MSR_FS_BASE, next->fs);
602	prev->fsindex = fsindex;
603
604	if (unlikely(gsindex | next->gsindex | prev->gs)) {
605		load_gs_index(next->gsindex);
606		if (gsindex)
607			prev->gs = 0;
608	}
609	if (next->gs)
610		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
611	prev->gsindex = gsindex;
612
613	/* Must be after DS reload */
614	unlazy_fpu(prev_p);
615
616	/*
617	 * Switch the PDA and FPU contexts.
618	 */
619	prev->usersp = read_pda(oldrsp);
620	write_pda(oldrsp, next->usersp);
621	percpu_write(current_task, next_p);
622
623	percpu_write(kernel_stack,
624		  (unsigned long)task_stack_page(next_p) +
625		  THREAD_SIZE - KERNEL_STACK_OFFSET);
626#ifdef CONFIG_CC_STACKPROTECTOR
627	write_pda(stack_canary, next_p->stack_canary);
628	/*
629	 * Build time only check to make sure the stack_canary is at
630	 * offset 40 in the pda; this is a gcc ABI requirement
631	 */
632	BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
633#endif
634
635	/*
636	 * Now maybe reload the debug registers and handle I/O bitmaps
637	 */
638	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
639		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
640		__switch_to_xtra(prev_p, next_p, tss);
641
642	/* If the task has used fpu the last 5 timeslices, just do a full
643	 * restore of the math state immediately to avoid the trap; the
644	 * chances of needing FPU soon are obviously high now
645	 *
646	 * tsk_used_math() checks prevent calling math_state_restore(),
647	 * which can sleep in the case of !tsk_used_math()
648	 */
649	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
650		math_state_restore();
651	return prev_p;
652}
653
654/*
655 * sys_execve() executes a new program.
656 */
657asmlinkage
658long sys_execve(char __user *name, char __user * __user *argv,
659		char __user * __user *envp, struct pt_regs *regs)
660{
661	long error;
662	char *filename;
663
664	filename = getname(name);
665	error = PTR_ERR(filename);
666	if (IS_ERR(filename))
667		return error;
668	error = do_execve(filename, argv, envp, regs);
669	putname(filename);
670	return error;
671}
672
673void set_personality_64bit(void)
674{
675	/* inherit personality from parent */
676
677	/* Make sure to be in 64bit mode */
678	clear_thread_flag(TIF_IA32);
679
680	/* TBD: overwrites user setup. Should have two bits.
681	   But 64bit processes have always behaved this way,
682	   so it's not too bad. The main problem is just that
683	   32bit childs are affected again. */
684	current->personality &= ~READ_IMPLIES_EXEC;
685}
686
687asmlinkage long sys_fork(struct pt_regs *regs)
688{
689	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
690}
691
692asmlinkage long
693sys_clone(unsigned long clone_flags, unsigned long newsp,
694	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
695{
696	if (!newsp)
697		newsp = regs->sp;
698	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
699}
700
701/*
702 * This is trivial, and on the face of it looks like it
703 * could equally well be done in user mode.
704 *
705 * Not so, for quite unobvious reasons - register pressure.
706 * In user mode vfork() cannot have a stack frame, and if
707 * done by calling the "clone()" system call directly, you
708 * do not have enough call-clobbered registers to hold all
709 * the information you need.
710 */
711asmlinkage long sys_vfork(struct pt_regs *regs)
712{
713	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
714		    NULL, NULL);
715}
716
717unsigned long get_wchan(struct task_struct *p)
718{
719	unsigned long stack;
720	u64 fp, ip;
721	int count = 0;
722
723	if (!p || p == current || p->state == TASK_RUNNING)
724		return 0;
725	stack = (unsigned long)task_stack_page(p);
726	if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
727		return 0;
728	fp = *(u64 *)(p->thread.sp);
729	do {
730		if (fp < (unsigned long)stack ||
731		    fp >= (unsigned long)stack+THREAD_SIZE)
732			return 0;
733		ip = *(u64 *)(fp+8);
734		if (!in_sched_functions(ip))
735			return ip;
736		fp = *(u64 *)fp;
737	} while (count++ < 16);
738	return 0;
739}
740
741long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
742{
743	int ret = 0;
744	int doit = task == current;
745	int cpu;
746
747	switch (code) {
748	case ARCH_SET_GS:
749		if (addr >= TASK_SIZE_OF(task))
750			return -EPERM;
751		cpu = get_cpu();
752		/* handle small bases via the GDT because that's faster to
753		   switch. */
754		if (addr <= 0xffffffff) {
755			set_32bit_tls(task, GS_TLS, addr);
756			if (doit) {
757				load_TLS(&task->thread, cpu);
758				load_gs_index(GS_TLS_SEL);
759			}
760			task->thread.gsindex = GS_TLS_SEL;
761			task->thread.gs = 0;
762		} else {
763			task->thread.gsindex = 0;
764			task->thread.gs = addr;
765			if (doit) {
766				load_gs_index(0);
767				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
768			}
769		}
770		put_cpu();
771		break;
772	case ARCH_SET_FS:
773		/* Not strictly needed for fs, but do it for symmetry
774		   with gs */
775		if (addr >= TASK_SIZE_OF(task))
776			return -EPERM;
777		cpu = get_cpu();
778		/* handle small bases via the GDT because that's faster to
779		   switch. */
780		if (addr <= 0xffffffff) {
781			set_32bit_tls(task, FS_TLS, addr);
782			if (doit) {
783				load_TLS(&task->thread, cpu);
784				loadsegment(fs, FS_TLS_SEL);
785			}
786			task->thread.fsindex = FS_TLS_SEL;
787			task->thread.fs = 0;
788		} else {
789			task->thread.fsindex = 0;
790			task->thread.fs = addr;
791			if (doit) {
792				/* set the selector to 0 to not confuse
793				   __switch_to */
794				loadsegment(fs, 0);
795				ret = checking_wrmsrl(MSR_FS_BASE, addr);
796			}
797		}
798		put_cpu();
799		break;
800	case ARCH_GET_FS: {
801		unsigned long base;
802		if (task->thread.fsindex == FS_TLS_SEL)
803			base = read_32bit_tls(task, FS_TLS);
804		else if (doit)
805			rdmsrl(MSR_FS_BASE, base);
806		else
807			base = task->thread.fs;
808		ret = put_user(base, (unsigned long __user *)addr);
809		break;
810	}
811	case ARCH_GET_GS: {
812		unsigned long base;
813		unsigned gsindex;
814		if (task->thread.gsindex == GS_TLS_SEL)
815			base = read_32bit_tls(task, GS_TLS);
816		else if (doit) {
817			savesegment(gs, gsindex);
818			if (gsindex)
819				rdmsrl(MSR_KERNEL_GS_BASE, base);
820			else
821				base = task->thread.gs;
822		} else
823			base = task->thread.gs;
824		ret = put_user(base, (unsigned long __user *)addr);
825		break;
826	}
827
828	default:
829		ret = -EINVAL;
830		break;
831	}
832
833	return ret;
834}
835
836long sys_arch_prctl(int code, unsigned long addr)
837{
838	return do_arch_prctl(current, code, addr);
839}
840
841unsigned long arch_align_stack(unsigned long sp)
842{
843	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
844		sp -= get_random_int() % 8192;
845	return sp & ~0xf;
846}
847
848unsigned long arch_randomize_brk(struct mm_struct *mm)
849{
850	unsigned long range_end = mm->brk + 0x02000000;
851	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
852}
853