process_64.c revision 00dba56465228825ea806e3a7fc0aa6bba7bdc6c
1/*
2 *  Copyright (C) 1995  Linus Torvalds
3 *
4 *  Pentium III FXSR, SSE support
5 *	Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 *  X86-64 port
8 *	Andi Kleen.
9 *
10 *	CPU hotplug support - ashok.raj@intel.com
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17#include <stdarg.h>
18
19#include <linux/cpu.h>
20#include <linux/errno.h>
21#include <linux/sched.h>
22#include <linux/fs.h>
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/elfcore.h>
26#include <linux/smp.h>
27#include <linux/slab.h>
28#include <linux/user.h>
29#include <linux/interrupt.h>
30#include <linux/utsname.h>
31#include <linux/delay.h>
32#include <linux/module.h>
33#include <linux/ptrace.h>
34#include <linux/random.h>
35#include <linux/notifier.h>
36#include <linux/kprobes.h>
37#include <linux/kdebug.h>
38#include <linux/tick.h>
39#include <linux/prctl.h>
40
41#include <asm/uaccess.h>
42#include <asm/pgtable.h>
43#include <asm/system.h>
44#include <asm/io.h>
45#include <asm/processor.h>
46#include <asm/i387.h>
47#include <asm/mmu_context.h>
48#include <asm/pda.h>
49#include <asm/prctl.h>
50#include <asm/desc.h>
51#include <asm/proto.h>
52#include <asm/ia32.h>
53#include <asm/idle.h>
54
55asmlinkage extern void ret_from_fork(void);
56
57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
59static ATOMIC_NOTIFIER_HEAD(idle_notifier);
60
61void idle_notifier_register(struct notifier_block *n)
62{
63	atomic_notifier_chain_register(&idle_notifier, n);
64}
65
66void enter_idle(void)
67{
68	write_pda(isidle, 1);
69	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
70}
71
72static void __exit_idle(void)
73{
74	if (test_and_clear_bit_pda(0, isidle) == 0)
75		return;
76	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
77}
78
79/* Called from interrupts to signify idle end */
80void exit_idle(void)
81{
82	/* idle loop has pid 0 */
83	if (current->pid)
84		return;
85	__exit_idle();
86}
87
88#ifdef CONFIG_HOTPLUG_CPU
89DECLARE_PER_CPU(int, cpu_state);
90
91#include <asm/nmi.h>
92/* We halt the CPU with physical CPU hotplug */
93static inline void play_dead(void)
94{
95	idle_task_exit();
96	wbinvd();
97	mb();
98	/* Ack it */
99	__get_cpu_var(cpu_state) = CPU_DEAD;
100
101	local_irq_disable();
102	while (1)
103		halt();
104}
105#else
106static inline void play_dead(void)
107{
108	BUG();
109}
110#endif /* CONFIG_HOTPLUG_CPU */
111
112/*
113 * The idle thread. There's no useful work to be
114 * done, so just try to conserve power and have a
115 * low exit latency (ie sit in a loop waiting for
116 * somebody to say that they'd like to reschedule)
117 */
118void cpu_idle(void)
119{
120	current_thread_info()->status |= TS_POLLING;
121	/* endless idle loop with no priority at all */
122	while (1) {
123		tick_nohz_stop_sched_tick();
124		while (!need_resched()) {
125
126			rmb();
127
128			if (cpu_is_offline(smp_processor_id()))
129				play_dead();
130			/*
131			 * Idle routines should keep interrupts disabled
132			 * from here on, until they go to idle.
133			 * Otherwise, idle callbacks can misfire.
134			 */
135			local_irq_disable();
136			enter_idle();
137			pm_idle();
138			/* In many cases the interrupt that ended idle
139			   has already called exit_idle. But some idle
140			   loops can be woken up without interrupt. */
141			__exit_idle();
142		}
143
144		tick_nohz_restart_sched_tick();
145		preempt_enable_no_resched();
146		schedule();
147		preempt_disable();
148	}
149}
150
151/* Prints also some state that isn't saved in the pt_regs */
152void __show_regs(struct pt_regs * regs)
153{
154	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
155	unsigned long d0, d1, d2, d3, d6, d7;
156	unsigned int fsindex, gsindex;
157	unsigned int ds, cs, es;
158
159	printk("\n");
160	print_modules();
161	printk("Pid: %d, comm: %.20s %s %s %.*s\n",
162		current->pid, current->comm, print_tainted(),
163		init_utsname()->release,
164		(int)strcspn(init_utsname()->version, " "),
165		init_utsname()->version);
166	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
167	printk_address(regs->ip, 1);
168	printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->sp,
169		regs->flags);
170	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
171	       regs->ax, regs->bx, regs->cx);
172	printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
173	       regs->dx, regs->si, regs->di);
174	printk("RBP: %016lx R08: %016lx R09: %016lx\n",
175	       regs->bp, regs->r8, regs->r9);
176	printk("R10: %016lx R11: %016lx R12: %016lx\n",
177	       regs->r10, regs->r11, regs->r12);
178	printk("R13: %016lx R14: %016lx R15: %016lx\n",
179	       regs->r13, regs->r14, regs->r15);
180
181	asm("movl %%ds,%0" : "=r" (ds));
182	asm("movl %%cs,%0" : "=r" (cs));
183	asm("movl %%es,%0" : "=r" (es));
184	asm("movl %%fs,%0" : "=r" (fsindex));
185	asm("movl %%gs,%0" : "=r" (gsindex));
186
187	rdmsrl(MSR_FS_BASE, fs);
188	rdmsrl(MSR_GS_BASE, gs);
189	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
190
191	cr0 = read_cr0();
192	cr2 = read_cr2();
193	cr3 = read_cr3();
194	cr4 = read_cr4();
195
196	printk("FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
197	       fs,fsindex,gs,gsindex,shadowgs);
198	printk("CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
199	printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
200
201	get_debugreg(d0, 0);
202	get_debugreg(d1, 1);
203	get_debugreg(d2, 2);
204	printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
205	get_debugreg(d3, 3);
206	get_debugreg(d6, 6);
207	get_debugreg(d7, 7);
208	printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
209}
210
211void show_regs(struct pt_regs *regs)
212{
213	printk("CPU %d:", smp_processor_id());
214	__show_regs(regs);
215	show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
216}
217
218/*
219 * Free current thread data structures etc..
220 */
221void exit_thread(void)
222{
223	struct task_struct *me = current;
224	struct thread_struct *t = &me->thread;
225
226	if (me->thread.io_bitmap_ptr) {
227		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
228
229		kfree(t->io_bitmap_ptr);
230		t->io_bitmap_ptr = NULL;
231		clear_thread_flag(TIF_IO_BITMAP);
232		/*
233		 * Careful, clear this in the TSS too:
234		 */
235		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
236		t->io_bitmap_max = 0;
237		put_cpu();
238	}
239}
240
241void flush_thread(void)
242{
243	struct task_struct *tsk = current;
244
245	if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
246		clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
247		if (test_tsk_thread_flag(tsk, TIF_IA32)) {
248			clear_tsk_thread_flag(tsk, TIF_IA32);
249		} else {
250			set_tsk_thread_flag(tsk, TIF_IA32);
251			current_thread_info()->status |= TS_COMPAT;
252		}
253	}
254	clear_tsk_thread_flag(tsk, TIF_DEBUG);
255
256	tsk->thread.debugreg0 = 0;
257	tsk->thread.debugreg1 = 0;
258	tsk->thread.debugreg2 = 0;
259	tsk->thread.debugreg3 = 0;
260	tsk->thread.debugreg6 = 0;
261	tsk->thread.debugreg7 = 0;
262	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
263	/*
264	 * Forget coprocessor state..
265	 */
266	clear_fpu(tsk);
267	clear_used_math();
268}
269
270void release_thread(struct task_struct *dead_task)
271{
272	if (dead_task->mm) {
273		if (dead_task->mm->context.size) {
274			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
275					dead_task->comm,
276					dead_task->mm->context.ldt,
277					dead_task->mm->context.size);
278			BUG();
279		}
280	}
281}
282
283static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
284{
285	struct user_desc ud = {
286		.base_addr = addr,
287		.limit = 0xfffff,
288		.seg_32bit = 1,
289		.limit_in_pages = 1,
290		.useable = 1,
291	};
292	struct desc_struct *desc = t->thread.tls_array;
293	desc += tls;
294	fill_ldt(desc, &ud);
295}
296
297static inline u32 read_32bit_tls(struct task_struct *t, int tls)
298{
299	return get_desc_base(&t->thread.tls_array[tls]);
300}
301
302/*
303 * This gets called before we allocate a new thread and copy
304 * the current task into it.
305 */
306void prepare_to_copy(struct task_struct *tsk)
307{
308	unlazy_fpu(tsk);
309}
310
311int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
312		unsigned long unused,
313	struct task_struct * p, struct pt_regs * regs)
314{
315	int err;
316	struct pt_regs * childregs;
317	struct task_struct *me = current;
318
319	childregs = ((struct pt_regs *)
320			(THREAD_SIZE + task_stack_page(p))) - 1;
321	*childregs = *regs;
322
323	childregs->ax = 0;
324	childregs->sp = sp;
325	if (sp == ~0UL)
326		childregs->sp = (unsigned long)childregs;
327
328	p->thread.sp = (unsigned long) childregs;
329	p->thread.sp0 = (unsigned long) (childregs+1);
330	p->thread.usersp = me->thread.usersp;
331
332	set_tsk_thread_flag(p, TIF_FORK);
333
334	p->thread.fs = me->thread.fs;
335	p->thread.gs = me->thread.gs;
336
337	asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
338	asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
339	asm("mov %%es,%0" : "=m" (p->thread.es));
340	asm("mov %%ds,%0" : "=m" (p->thread.ds));
341
342	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
343		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
344		if (!p->thread.io_bitmap_ptr) {
345			p->thread.io_bitmap_max = 0;
346			return -ENOMEM;
347		}
348		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
349				IO_BITMAP_BYTES);
350		set_tsk_thread_flag(p, TIF_IO_BITMAP);
351	}
352
353	/*
354	 * Set a new TLS for the child thread?
355	 */
356	if (clone_flags & CLONE_SETTLS) {
357#ifdef CONFIG_IA32_EMULATION
358		if (test_thread_flag(TIF_IA32))
359			err = do_set_thread_area(p, -1,
360				(struct user_desc __user *)childregs->si, 0);
361		else
362#endif
363			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
364		if (err)
365			goto out;
366	}
367	err = 0;
368out:
369	if (err && p->thread.io_bitmap_ptr) {
370		kfree(p->thread.io_bitmap_ptr);
371		p->thread.io_bitmap_max = 0;
372	}
373	return err;
374}
375
376void
377start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
378{
379	asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0));
380	load_gs_index(0);
381	regs->ip		= new_ip;
382	regs->sp		= new_sp;
383	write_pda(oldrsp, new_sp);
384	regs->cs		= __USER_CS;
385	regs->ss		= __USER_DS;
386	regs->flags		= 0x200;
387	set_fs(USER_DS);
388	/*
389	 * Free the old FP and other extended state
390	 */
391	free_thread_xstate(current);
392}
393EXPORT_SYMBOL_GPL(start_thread);
394
395static void hard_disable_TSC(void)
396{
397	write_cr4(read_cr4() | X86_CR4_TSD);
398}
399
400void disable_TSC(void)
401{
402	preempt_disable();
403	if (!test_and_set_thread_flag(TIF_NOTSC))
404		/*
405		 * Must flip the CPU state synchronously with
406		 * TIF_NOTSC in the current running context.
407		 */
408		hard_disable_TSC();
409	preempt_enable();
410}
411
412static void hard_enable_TSC(void)
413{
414	write_cr4(read_cr4() & ~X86_CR4_TSD);
415}
416
417static void enable_TSC(void)
418{
419	preempt_disable();
420	if (test_and_clear_thread_flag(TIF_NOTSC))
421		/*
422		 * Must flip the CPU state synchronously with
423		 * TIF_NOTSC in the current running context.
424		 */
425		hard_enable_TSC();
426	preempt_enable();
427}
428
429int get_tsc_mode(unsigned long adr)
430{
431	unsigned int val;
432
433	if (test_thread_flag(TIF_NOTSC))
434		val = PR_TSC_SIGSEGV;
435	else
436		val = PR_TSC_ENABLE;
437
438	return put_user(val, (unsigned int __user *)adr);
439}
440
441int set_tsc_mode(unsigned int val)
442{
443	if (val == PR_TSC_SIGSEGV)
444		disable_TSC();
445	else if (val == PR_TSC_ENABLE)
446		enable_TSC();
447	else
448		return -EINVAL;
449
450	return 0;
451}
452
453/*
454 * This special macro can be used to load a debugging register
455 */
456#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
457
458static inline void __switch_to_xtra(struct task_struct *prev_p,
459				    struct task_struct *next_p,
460				    struct tss_struct *tss)
461{
462	struct thread_struct *prev, *next;
463	unsigned long debugctl;
464
465	prev = &prev_p->thread,
466	next = &next_p->thread;
467
468	debugctl = prev->debugctlmsr;
469	if (next->ds_area_msr != prev->ds_area_msr) {
470		/* we clear debugctl to make sure DS
471		 * is not in use when we change it */
472		debugctl = 0;
473		update_debugctlmsr(0);
474		wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
475	}
476
477	if (next->debugctlmsr != debugctl)
478		update_debugctlmsr(next->debugctlmsr);
479
480	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
481		loaddebug(next, 0);
482		loaddebug(next, 1);
483		loaddebug(next, 2);
484		loaddebug(next, 3);
485		/* no 4 and 5 */
486		loaddebug(next, 6);
487		loaddebug(next, 7);
488	}
489
490	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
491	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
492		/* prev and next are different */
493		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
494			hard_disable_TSC();
495		else
496			hard_enable_TSC();
497	}
498
499	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
500		/*
501		 * Copy the relevant range of the IO bitmap.
502		 * Normally this is 128 bytes or less:
503		 */
504		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
505		       max(prev->io_bitmap_max, next->io_bitmap_max));
506	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
507		/*
508		 * Clear any possible leftover bits:
509		 */
510		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
511	}
512
513#ifdef X86_BTS
514	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
515		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
516
517	if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
518		ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
519#endif
520}
521
522/*
523 *	switch_to(x,y) should switch tasks from x to y.
524 *
525 * This could still be optimized:
526 * - fold all the options into a flag word and test it with a single test.
527 * - could test fs/gs bitsliced
528 *
529 * Kprobes not supported here. Set the probe on schedule instead.
530 */
531struct task_struct *
532__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
533{
534	struct thread_struct *prev = &prev_p->thread,
535				 *next = &next_p->thread;
536	int cpu = smp_processor_id();
537	struct tss_struct *tss = &per_cpu(init_tss, cpu);
538
539	/* we're going to use this soon, after a few expensive things */
540	if (next_p->fpu_counter>5)
541		prefetch(next->xstate);
542
543	/*
544	 * Reload esp0, LDT and the page table pointer:
545	 */
546	load_sp0(tss, next);
547
548	/*
549	 * Switch DS and ES.
550	 * This won't pick up thread selector changes, but I guess that is ok.
551	 */
552	asm volatile("mov %%es,%0" : "=m" (prev->es));
553	if (unlikely(next->es | prev->es))
554		loadsegment(es, next->es);
555
556	asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
557	if (unlikely(next->ds | prev->ds))
558		loadsegment(ds, next->ds);
559
560	load_TLS(next, cpu);
561
562	/*
563	 * Switch FS and GS.
564	 */
565	{
566		unsigned fsindex;
567		asm volatile("movl %%fs,%0" : "=r" (fsindex));
568		/* segment register != 0 always requires a reload.
569		   also reload when it has changed.
570		   when prev process used 64bit base always reload
571		   to avoid an information leak. */
572		if (unlikely(fsindex | next->fsindex | prev->fs)) {
573			loadsegment(fs, next->fsindex);
574			/* check if the user used a selector != 0
575	                 * if yes clear 64bit base, since overloaded base
576                         * is always mapped to the Null selector
577                         */
578			if (fsindex)
579			prev->fs = 0;
580		}
581		/* when next process has a 64bit base use it */
582		if (next->fs)
583			wrmsrl(MSR_FS_BASE, next->fs);
584		prev->fsindex = fsindex;
585	}
586	{
587		unsigned gsindex;
588		asm volatile("movl %%gs,%0" : "=r" (gsindex));
589		if (unlikely(gsindex | next->gsindex | prev->gs)) {
590			load_gs_index(next->gsindex);
591			if (gsindex)
592			prev->gs = 0;
593		}
594		if (next->gs)
595			wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
596		prev->gsindex = gsindex;
597	}
598
599	/* Must be after DS reload */
600	unlazy_fpu(prev_p);
601
602	/*
603	 * Switch the PDA and FPU contexts.
604	 */
605	prev->usersp = read_pda(oldrsp);
606	write_pda(oldrsp, next->usersp);
607	write_pda(pcurrent, next_p);
608
609	write_pda(kernelstack,
610	(unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
611#ifdef CONFIG_CC_STACKPROTECTOR
612	write_pda(stack_canary, next_p->stack_canary);
613	/*
614	 * Build time only check to make sure the stack_canary is at
615	 * offset 40 in the pda; this is a gcc ABI requirement
616	 */
617	BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
618#endif
619
620	/*
621	 * Now maybe reload the debug registers and handle I/O bitmaps
622	 */
623	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
624		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
625		__switch_to_xtra(prev_p, next_p, tss);
626
627	/* If the task has used fpu the last 5 timeslices, just do a full
628	 * restore of the math state immediately to avoid the trap; the
629	 * chances of needing FPU soon are obviously high now
630	 */
631	if (next_p->fpu_counter>5)
632		math_state_restore();
633	return prev_p;
634}
635
636/*
637 * sys_execve() executes a new program.
638 */
639asmlinkage
640long sys_execve(char __user *name, char __user * __user *argv,
641		char __user * __user *envp, struct pt_regs *regs)
642{
643	long error;
644	char * filename;
645
646	filename = getname(name);
647	error = PTR_ERR(filename);
648	if (IS_ERR(filename))
649		return error;
650	error = do_execve(filename, argv, envp, regs);
651	putname(filename);
652	return error;
653}
654
655void set_personality_64bit(void)
656{
657	/* inherit personality from parent */
658
659	/* Make sure to be in 64bit mode */
660	clear_thread_flag(TIF_IA32);
661
662	/* TBD: overwrites user setup. Should have two bits.
663	   But 64bit processes have always behaved this way,
664	   so it's not too bad. The main problem is just that
665	   32bit childs are affected again. */
666	current->personality &= ~READ_IMPLIES_EXEC;
667}
668
669asmlinkage long sys_fork(struct pt_regs *regs)
670{
671	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
672}
673
674asmlinkage long
675sys_clone(unsigned long clone_flags, unsigned long newsp,
676	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
677{
678	if (!newsp)
679		newsp = regs->sp;
680	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
681}
682
683/*
684 * This is trivial, and on the face of it looks like it
685 * could equally well be done in user mode.
686 *
687 * Not so, for quite unobvious reasons - register pressure.
688 * In user mode vfork() cannot have a stack frame, and if
689 * done by calling the "clone()" system call directly, you
690 * do not have enough call-clobbered registers to hold all
691 * the information you need.
692 */
693asmlinkage long sys_vfork(struct pt_regs *regs)
694{
695	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
696		    NULL, NULL);
697}
698
699unsigned long get_wchan(struct task_struct *p)
700{
701	unsigned long stack;
702	u64 fp,ip;
703	int count = 0;
704
705	if (!p || p == current || p->state==TASK_RUNNING)
706		return 0;
707	stack = (unsigned long)task_stack_page(p);
708	if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
709		return 0;
710	fp = *(u64 *)(p->thread.sp);
711	do {
712		if (fp < (unsigned long)stack ||
713		    fp > (unsigned long)stack+THREAD_SIZE)
714			return 0;
715		ip = *(u64 *)(fp+8);
716		if (!in_sched_functions(ip))
717			return ip;
718		fp = *(u64 *)fp;
719	} while (count++ < 16);
720	return 0;
721}
722
723long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
724{
725	int ret = 0;
726	int doit = task == current;
727	int cpu;
728
729	switch (code) {
730	case ARCH_SET_GS:
731		if (addr >= TASK_SIZE_OF(task))
732			return -EPERM;
733		cpu = get_cpu();
734		/* handle small bases via the GDT because that's faster to
735		   switch. */
736		if (addr <= 0xffffffff) {
737			set_32bit_tls(task, GS_TLS, addr);
738			if (doit) {
739				load_TLS(&task->thread, cpu);
740				load_gs_index(GS_TLS_SEL);
741			}
742			task->thread.gsindex = GS_TLS_SEL;
743			task->thread.gs = 0;
744		} else {
745			task->thread.gsindex = 0;
746			task->thread.gs = addr;
747			if (doit) {
748				load_gs_index(0);
749				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
750			}
751		}
752		put_cpu();
753		break;
754	case ARCH_SET_FS:
755		/* Not strictly needed for fs, but do it for symmetry
756		   with gs */
757		if (addr >= TASK_SIZE_OF(task))
758			return -EPERM;
759		cpu = get_cpu();
760		/* handle small bases via the GDT because that's faster to
761		   switch. */
762		if (addr <= 0xffffffff) {
763			set_32bit_tls(task, FS_TLS, addr);
764			if (doit) {
765				load_TLS(&task->thread, cpu);
766				asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
767			}
768			task->thread.fsindex = FS_TLS_SEL;
769			task->thread.fs = 0;
770		} else {
771			task->thread.fsindex = 0;
772			task->thread.fs = addr;
773			if (doit) {
774				/* set the selector to 0 to not confuse
775				   __switch_to */
776				asm volatile("movl %0,%%fs" :: "r" (0));
777				ret = checking_wrmsrl(MSR_FS_BASE, addr);
778			}
779		}
780		put_cpu();
781		break;
782	case ARCH_GET_FS: {
783		unsigned long base;
784		if (task->thread.fsindex == FS_TLS_SEL)
785			base = read_32bit_tls(task, FS_TLS);
786		else if (doit)
787			rdmsrl(MSR_FS_BASE, base);
788		else
789			base = task->thread.fs;
790		ret = put_user(base, (unsigned long __user *)addr);
791		break;
792	}
793	case ARCH_GET_GS: {
794		unsigned long base;
795		unsigned gsindex;
796		if (task->thread.gsindex == GS_TLS_SEL)
797			base = read_32bit_tls(task, GS_TLS);
798		else if (doit) {
799			asm("movl %%gs,%0" : "=r" (gsindex));
800			if (gsindex)
801				rdmsrl(MSR_KERNEL_GS_BASE, base);
802			else
803				base = task->thread.gs;
804		}
805		else
806			base = task->thread.gs;
807		ret = put_user(base, (unsigned long __user *)addr);
808		break;
809	}
810
811	default:
812		ret = -EINVAL;
813		break;
814	}
815
816	return ret;
817}
818
819long sys_arch_prctl(int code, unsigned long addr)
820{
821	return do_arch_prctl(current, code, addr);
822}
823
824unsigned long arch_align_stack(unsigned long sp)
825{
826	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
827		sp -= get_random_int() % 8192;
828	return sp & ~0xf;
829}
830
831unsigned long arch_randomize_brk(struct mm_struct *mm)
832{
833	unsigned long range_end = mm->brk + 0x02000000;
834	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
835}
836