process_64.c revision 9af45651f1f7c89942e016a1a00a7ebddfa727f8
1/* 2 * Copyright (C) 1995 Linus Torvalds 3 * 4 * Pentium III FXSR, SSE support 5 * Gareth Hughes <gareth@valinux.com>, May 2000 6 * 7 * X86-64 port 8 * Andi Kleen. 9 * 10 * CPU hotplug support - ashok.raj@intel.com 11 */ 12 13/* 14 * This file handles the architecture-dependent parts of process handling.. 15 */ 16 17#include <stdarg.h> 18 19#include <linux/cpu.h> 20#include <linux/errno.h> 21#include <linux/sched.h> 22#include <linux/fs.h> 23#include <linux/kernel.h> 24#include <linux/mm.h> 25#include <linux/elfcore.h> 26#include <linux/smp.h> 27#include <linux/slab.h> 28#include <linux/user.h> 29#include <linux/interrupt.h> 30#include <linux/utsname.h> 31#include <linux/delay.h> 32#include <linux/module.h> 33#include <linux/ptrace.h> 34#include <linux/random.h> 35#include <linux/notifier.h> 36#include <linux/kprobes.h> 37#include <linux/kdebug.h> 38#include <linux/tick.h> 39#include <linux/prctl.h> 40#include <linux/uaccess.h> 41#include <linux/io.h> 42#include <linux/ftrace.h> 43 44#include <asm/pgtable.h> 45#include <asm/system.h> 46#include <asm/processor.h> 47#include <asm/i387.h> 48#include <asm/mmu_context.h> 49#include <asm/pda.h> 50#include <asm/prctl.h> 51#include <asm/desc.h> 52#include <asm/proto.h> 53#include <asm/ia32.h> 54#include <asm/idle.h> 55#include <asm/syscalls.h> 56#include <asm/ds.h> 57 58asmlinkage extern void ret_from_fork(void); 59 60DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; 61EXPORT_PER_CPU_SYMBOL(current_task); 62 63unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; 64 65static ATOMIC_NOTIFIER_HEAD(idle_notifier); 66 67void idle_notifier_register(struct notifier_block *n) 68{ 69 atomic_notifier_chain_register(&idle_notifier, n); 70} 71EXPORT_SYMBOL_GPL(idle_notifier_register); 72 73void idle_notifier_unregister(struct notifier_block *n) 74{ 75 atomic_notifier_chain_unregister(&idle_notifier, n); 76} 77EXPORT_SYMBOL_GPL(idle_notifier_unregister); 78 79void enter_idle(void) 80{ 81 write_pda(isidle, 1); 82 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); 83} 84 85static void __exit_idle(void) 86{ 87 if (test_and_clear_bit_pda(0, isidle) == 0) 88 return; 89 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); 90} 91 92/* Called from interrupts to signify idle end */ 93void exit_idle(void) 94{ 95 /* idle loop has pid 0 */ 96 if (current->pid) 97 return; 98 __exit_idle(); 99} 100 101#ifndef CONFIG_SMP 102static inline void play_dead(void) 103{ 104 BUG(); 105} 106#endif 107 108/* 109 * The idle thread. There's no useful work to be 110 * done, so just try to conserve power and have a 111 * low exit latency (ie sit in a loop waiting for 112 * somebody to say that they'd like to reschedule) 113 */ 114void cpu_idle(void) 115{ 116 current_thread_info()->status |= TS_POLLING; 117 /* endless idle loop with no priority at all */ 118 while (1) { 119 tick_nohz_stop_sched_tick(1); 120 while (!need_resched()) { 121 122 rmb(); 123 124 if (cpu_is_offline(smp_processor_id())) 125 play_dead(); 126 /* 127 * Idle routines should keep interrupts disabled 128 * from here on, until they go to idle. 129 * Otherwise, idle callbacks can misfire. 130 */ 131 local_irq_disable(); 132 enter_idle(); 133 /* Don't trace irqs off for idle */ 134 stop_critical_timings(); 135 pm_idle(); 136 start_critical_timings(); 137 /* In many cases the interrupt that ended idle 138 has already called exit_idle. But some idle 139 loops can be woken up without interrupt. */ 140 __exit_idle(); 141 } 142 143 tick_nohz_restart_sched_tick(); 144 preempt_enable_no_resched(); 145 schedule(); 146 preempt_disable(); 147 } 148} 149 150/* Prints also some state that isn't saved in the pt_regs */ 151void __show_regs(struct pt_regs *regs, int all) 152{ 153 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; 154 unsigned long d0, d1, d2, d3, d6, d7; 155 unsigned int fsindex, gsindex; 156 unsigned int ds, cs, es; 157 158 printk("\n"); 159 print_modules(); 160 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n", 161 current->pid, current->comm, print_tainted(), 162 init_utsname()->release, 163 (int)strcspn(init_utsname()->version, " "), 164 init_utsname()->version); 165 printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); 166 printk_address(regs->ip, 1); 167 printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, 168 regs->sp, regs->flags); 169 printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", 170 regs->ax, regs->bx, regs->cx); 171 printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n", 172 regs->dx, regs->si, regs->di); 173 printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n", 174 regs->bp, regs->r8, regs->r9); 175 printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n", 176 regs->r10, regs->r11, regs->r12); 177 printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n", 178 regs->r13, regs->r14, regs->r15); 179 180 asm("movl %%ds,%0" : "=r" (ds)); 181 asm("movl %%cs,%0" : "=r" (cs)); 182 asm("movl %%es,%0" : "=r" (es)); 183 asm("movl %%fs,%0" : "=r" (fsindex)); 184 asm("movl %%gs,%0" : "=r" (gsindex)); 185 186 rdmsrl(MSR_FS_BASE, fs); 187 rdmsrl(MSR_GS_BASE, gs); 188 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 189 190 if (!all) 191 return; 192 193 cr0 = read_cr0(); 194 cr2 = read_cr2(); 195 cr3 = read_cr3(); 196 cr4 = read_cr4(); 197 198 printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 199 fs, fsindex, gs, gsindex, shadowgs); 200 printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, 201 es, cr0); 202 printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, 203 cr4); 204 205 get_debugreg(d0, 0); 206 get_debugreg(d1, 1); 207 get_debugreg(d2, 2); 208 printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); 209 get_debugreg(d3, 3); 210 get_debugreg(d6, 6); 211 get_debugreg(d7, 7); 212 printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); 213} 214 215void show_regs(struct pt_regs *regs) 216{ 217 printk(KERN_INFO "CPU %d:", smp_processor_id()); 218 __show_regs(regs, 1); 219 show_trace(NULL, regs, (void *)(regs + 1), regs->bp); 220} 221 222/* 223 * Free current thread data structures etc.. 224 */ 225void exit_thread(void) 226{ 227 struct task_struct *me = current; 228 struct thread_struct *t = &me->thread; 229 230 if (me->thread.io_bitmap_ptr) { 231 struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); 232 233 kfree(t->io_bitmap_ptr); 234 t->io_bitmap_ptr = NULL; 235 clear_thread_flag(TIF_IO_BITMAP); 236 /* 237 * Careful, clear this in the TSS too: 238 */ 239 memset(tss->io_bitmap, 0xff, t->io_bitmap_max); 240 t->io_bitmap_max = 0; 241 put_cpu(); 242 } 243 244 ds_exit_thread(current); 245} 246 247void flush_thread(void) 248{ 249 struct task_struct *tsk = current; 250 251 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) { 252 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING); 253 if (test_tsk_thread_flag(tsk, TIF_IA32)) { 254 clear_tsk_thread_flag(tsk, TIF_IA32); 255 } else { 256 set_tsk_thread_flag(tsk, TIF_IA32); 257 current_thread_info()->status |= TS_COMPAT; 258 } 259 } 260 clear_tsk_thread_flag(tsk, TIF_DEBUG); 261 262 tsk->thread.debugreg0 = 0; 263 tsk->thread.debugreg1 = 0; 264 tsk->thread.debugreg2 = 0; 265 tsk->thread.debugreg3 = 0; 266 tsk->thread.debugreg6 = 0; 267 tsk->thread.debugreg7 = 0; 268 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 269 /* 270 * Forget coprocessor state.. 271 */ 272 tsk->fpu_counter = 0; 273 clear_fpu(tsk); 274 clear_used_math(); 275} 276 277void release_thread(struct task_struct *dead_task) 278{ 279 if (dead_task->mm) { 280 if (dead_task->mm->context.size) { 281 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", 282 dead_task->comm, 283 dead_task->mm->context.ldt, 284 dead_task->mm->context.size); 285 BUG(); 286 } 287 } 288} 289 290static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) 291{ 292 struct user_desc ud = { 293 .base_addr = addr, 294 .limit = 0xfffff, 295 .seg_32bit = 1, 296 .limit_in_pages = 1, 297 .useable = 1, 298 }; 299 struct desc_struct *desc = t->thread.tls_array; 300 desc += tls; 301 fill_ldt(desc, &ud); 302} 303 304static inline u32 read_32bit_tls(struct task_struct *t, int tls) 305{ 306 return get_desc_base(&t->thread.tls_array[tls]); 307} 308 309/* 310 * This gets called before we allocate a new thread and copy 311 * the current task into it. 312 */ 313void prepare_to_copy(struct task_struct *tsk) 314{ 315 unlazy_fpu(tsk); 316} 317 318int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, 319 unsigned long unused, 320 struct task_struct *p, struct pt_regs *regs) 321{ 322 int err; 323 struct pt_regs *childregs; 324 struct task_struct *me = current; 325 326 childregs = ((struct pt_regs *) 327 (THREAD_SIZE + task_stack_page(p))) - 1; 328 *childregs = *regs; 329 330 childregs->ax = 0; 331 childregs->sp = sp; 332 if (sp == ~0UL) 333 childregs->sp = (unsigned long)childregs; 334 335 p->thread.sp = (unsigned long) childregs; 336 p->thread.sp0 = (unsigned long) (childregs+1); 337 p->thread.usersp = me->thread.usersp; 338 339 set_tsk_thread_flag(p, TIF_FORK); 340 341 p->thread.fs = me->thread.fs; 342 p->thread.gs = me->thread.gs; 343 344 savesegment(gs, p->thread.gsindex); 345 savesegment(fs, p->thread.fsindex); 346 savesegment(es, p->thread.es); 347 savesegment(ds, p->thread.ds); 348 349 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { 350 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); 351 if (!p->thread.io_bitmap_ptr) { 352 p->thread.io_bitmap_max = 0; 353 return -ENOMEM; 354 } 355 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, 356 IO_BITMAP_BYTES); 357 set_tsk_thread_flag(p, TIF_IO_BITMAP); 358 } 359 360 /* 361 * Set a new TLS for the child thread? 362 */ 363 if (clone_flags & CLONE_SETTLS) { 364#ifdef CONFIG_IA32_EMULATION 365 if (test_thread_flag(TIF_IA32)) 366 err = do_set_thread_area(p, -1, 367 (struct user_desc __user *)childregs->si, 0); 368 else 369#endif 370 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 371 if (err) 372 goto out; 373 } 374 375 ds_copy_thread(p, me); 376 377 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); 378 p->thread.debugctlmsr = 0; 379 380 err = 0; 381out: 382 if (err && p->thread.io_bitmap_ptr) { 383 kfree(p->thread.io_bitmap_ptr); 384 p->thread.io_bitmap_max = 0; 385 } 386 return err; 387} 388 389void 390start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) 391{ 392 loadsegment(fs, 0); 393 loadsegment(es, 0); 394 loadsegment(ds, 0); 395 load_gs_index(0); 396 regs->ip = new_ip; 397 regs->sp = new_sp; 398 write_pda(oldrsp, new_sp); 399 regs->cs = __USER_CS; 400 regs->ss = __USER_DS; 401 regs->flags = 0x200; 402 set_fs(USER_DS); 403 /* 404 * Free the old FP and other extended state 405 */ 406 free_thread_xstate(current); 407} 408EXPORT_SYMBOL_GPL(start_thread); 409 410static void hard_disable_TSC(void) 411{ 412 write_cr4(read_cr4() | X86_CR4_TSD); 413} 414 415void disable_TSC(void) 416{ 417 preempt_disable(); 418 if (!test_and_set_thread_flag(TIF_NOTSC)) 419 /* 420 * Must flip the CPU state synchronously with 421 * TIF_NOTSC in the current running context. 422 */ 423 hard_disable_TSC(); 424 preempt_enable(); 425} 426 427static void hard_enable_TSC(void) 428{ 429 write_cr4(read_cr4() & ~X86_CR4_TSD); 430} 431 432static void enable_TSC(void) 433{ 434 preempt_disable(); 435 if (test_and_clear_thread_flag(TIF_NOTSC)) 436 /* 437 * Must flip the CPU state synchronously with 438 * TIF_NOTSC in the current running context. 439 */ 440 hard_enable_TSC(); 441 preempt_enable(); 442} 443 444int get_tsc_mode(unsigned long adr) 445{ 446 unsigned int val; 447 448 if (test_thread_flag(TIF_NOTSC)) 449 val = PR_TSC_SIGSEGV; 450 else 451 val = PR_TSC_ENABLE; 452 453 return put_user(val, (unsigned int __user *)adr); 454} 455 456int set_tsc_mode(unsigned int val) 457{ 458 if (val == PR_TSC_SIGSEGV) 459 disable_TSC(); 460 else if (val == PR_TSC_ENABLE) 461 enable_TSC(); 462 else 463 return -EINVAL; 464 465 return 0; 466} 467 468/* 469 * This special macro can be used to load a debugging register 470 */ 471#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r) 472 473static inline void __switch_to_xtra(struct task_struct *prev_p, 474 struct task_struct *next_p, 475 struct tss_struct *tss) 476{ 477 struct thread_struct *prev, *next; 478 479 prev = &prev_p->thread, 480 next = &next_p->thread; 481 482 if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || 483 test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) 484 ds_switch_to(prev_p, next_p); 485 else if (next->debugctlmsr != prev->debugctlmsr) 486 update_debugctlmsr(next->debugctlmsr); 487 488 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { 489 loaddebug(next, 0); 490 loaddebug(next, 1); 491 loaddebug(next, 2); 492 loaddebug(next, 3); 493 /* no 4 and 5 */ 494 loaddebug(next, 6); 495 loaddebug(next, 7); 496 } 497 498 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ 499 test_tsk_thread_flag(next_p, TIF_NOTSC)) { 500 /* prev and next are different */ 501 if (test_tsk_thread_flag(next_p, TIF_NOTSC)) 502 hard_disable_TSC(); 503 else 504 hard_enable_TSC(); 505 } 506 507 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 508 /* 509 * Copy the relevant range of the IO bitmap. 510 * Normally this is 128 bytes or less: 511 */ 512 memcpy(tss->io_bitmap, next->io_bitmap_ptr, 513 max(prev->io_bitmap_max, next->io_bitmap_max)); 514 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { 515 /* 516 * Clear any possible leftover bits: 517 */ 518 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); 519 } 520} 521 522/* 523 * switch_to(x,y) should switch tasks from x to y. 524 * 525 * This could still be optimized: 526 * - fold all the options into a flag word and test it with a single test. 527 * - could test fs/gs bitsliced 528 * 529 * Kprobes not supported here. Set the probe on schedule instead. 530 * Function graph tracer not supported too. 531 */ 532__notrace_funcgraph struct task_struct * 533__switch_to(struct task_struct *prev_p, struct task_struct *next_p) 534{ 535 struct thread_struct *prev = &prev_p->thread; 536 struct thread_struct *next = &next_p->thread; 537 int cpu = smp_processor_id(); 538 struct tss_struct *tss = &per_cpu(init_tss, cpu); 539 unsigned fsindex, gsindex; 540 541 /* we're going to use this soon, after a few expensive things */ 542 if (next_p->fpu_counter > 5) 543 prefetch(next->xstate); 544 545 /* 546 * Reload esp0, LDT and the page table pointer: 547 */ 548 load_sp0(tss, next); 549 550 /* 551 * Switch DS and ES. 552 * This won't pick up thread selector changes, but I guess that is ok. 553 */ 554 savesegment(es, prev->es); 555 if (unlikely(next->es | prev->es)) 556 loadsegment(es, next->es); 557 558 savesegment(ds, prev->ds); 559 if (unlikely(next->ds | prev->ds)) 560 loadsegment(ds, next->ds); 561 562 563 /* We must save %fs and %gs before load_TLS() because 564 * %fs and %gs may be cleared by load_TLS(). 565 * 566 * (e.g. xen_load_tls()) 567 */ 568 savesegment(fs, fsindex); 569 savesegment(gs, gsindex); 570 571 load_TLS(next, cpu); 572 573 /* 574 * Leave lazy mode, flushing any hypercalls made here. 575 * This must be done before restoring TLS segments so 576 * the GDT and LDT are properly updated, and must be 577 * done before math_state_restore, so the TS bit is up 578 * to date. 579 */ 580 arch_leave_lazy_cpu_mode(); 581 582 /* 583 * Switch FS and GS. 584 * 585 * Segment register != 0 always requires a reload. Also 586 * reload when it has changed. When prev process used 64bit 587 * base always reload to avoid an information leak. 588 */ 589 if (unlikely(fsindex | next->fsindex | prev->fs)) { 590 loadsegment(fs, next->fsindex); 591 /* 592 * Check if the user used a selector != 0; if yes 593 * clear 64bit base, since overloaded base is always 594 * mapped to the Null selector 595 */ 596 if (fsindex) 597 prev->fs = 0; 598 } 599 /* when next process has a 64bit base use it */ 600 if (next->fs) 601 wrmsrl(MSR_FS_BASE, next->fs); 602 prev->fsindex = fsindex; 603 604 if (unlikely(gsindex | next->gsindex | prev->gs)) { 605 load_gs_index(next->gsindex); 606 if (gsindex) 607 prev->gs = 0; 608 } 609 if (next->gs) 610 wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 611 prev->gsindex = gsindex; 612 613 /* Must be after DS reload */ 614 unlazy_fpu(prev_p); 615 616 /* 617 * Switch the PDA and FPU contexts. 618 */ 619 prev->usersp = read_pda(oldrsp); 620 write_pda(oldrsp, next->usersp); 621 percpu_write(current_task, next_p); 622 623 percpu_write(kernel_stack, 624 (unsigned long)task_stack_page(next_p) + 625 THREAD_SIZE - KERNEL_STACK_OFFSET); 626#ifdef CONFIG_CC_STACKPROTECTOR 627 write_pda(stack_canary, next_p->stack_canary); 628 /* 629 * Build time only check to make sure the stack_canary is at 630 * offset 40 in the pda; this is a gcc ABI requirement 631 */ 632 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); 633#endif 634 635 /* 636 * Now maybe reload the debug registers and handle I/O bitmaps 637 */ 638 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT || 639 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) 640 __switch_to_xtra(prev_p, next_p, tss); 641 642 /* If the task has used fpu the last 5 timeslices, just do a full 643 * restore of the math state immediately to avoid the trap; the 644 * chances of needing FPU soon are obviously high now 645 * 646 * tsk_used_math() checks prevent calling math_state_restore(), 647 * which can sleep in the case of !tsk_used_math() 648 */ 649 if (tsk_used_math(next_p) && next_p->fpu_counter > 5) 650 math_state_restore(); 651 return prev_p; 652} 653 654/* 655 * sys_execve() executes a new program. 656 */ 657asmlinkage 658long sys_execve(char __user *name, char __user * __user *argv, 659 char __user * __user *envp, struct pt_regs *regs) 660{ 661 long error; 662 char *filename; 663 664 filename = getname(name); 665 error = PTR_ERR(filename); 666 if (IS_ERR(filename)) 667 return error; 668 error = do_execve(filename, argv, envp, regs); 669 putname(filename); 670 return error; 671} 672 673void set_personality_64bit(void) 674{ 675 /* inherit personality from parent */ 676 677 /* Make sure to be in 64bit mode */ 678 clear_thread_flag(TIF_IA32); 679 680 /* TBD: overwrites user setup. Should have two bits. 681 But 64bit processes have always behaved this way, 682 so it's not too bad. The main problem is just that 683 32bit childs are affected again. */ 684 current->personality &= ~READ_IMPLIES_EXEC; 685} 686 687asmlinkage long sys_fork(struct pt_regs *regs) 688{ 689 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); 690} 691 692asmlinkage long 693sys_clone(unsigned long clone_flags, unsigned long newsp, 694 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) 695{ 696 if (!newsp) 697 newsp = regs->sp; 698 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); 699} 700 701/* 702 * This is trivial, and on the face of it looks like it 703 * could equally well be done in user mode. 704 * 705 * Not so, for quite unobvious reasons - register pressure. 706 * In user mode vfork() cannot have a stack frame, and if 707 * done by calling the "clone()" system call directly, you 708 * do not have enough call-clobbered registers to hold all 709 * the information you need. 710 */ 711asmlinkage long sys_vfork(struct pt_regs *regs) 712{ 713 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, 714 NULL, NULL); 715} 716 717unsigned long get_wchan(struct task_struct *p) 718{ 719 unsigned long stack; 720 u64 fp, ip; 721 int count = 0; 722 723 if (!p || p == current || p->state == TASK_RUNNING) 724 return 0; 725 stack = (unsigned long)task_stack_page(p); 726 if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE) 727 return 0; 728 fp = *(u64 *)(p->thread.sp); 729 do { 730 if (fp < (unsigned long)stack || 731 fp >= (unsigned long)stack+THREAD_SIZE) 732 return 0; 733 ip = *(u64 *)(fp+8); 734 if (!in_sched_functions(ip)) 735 return ip; 736 fp = *(u64 *)fp; 737 } while (count++ < 16); 738 return 0; 739} 740 741long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) 742{ 743 int ret = 0; 744 int doit = task == current; 745 int cpu; 746 747 switch (code) { 748 case ARCH_SET_GS: 749 if (addr >= TASK_SIZE_OF(task)) 750 return -EPERM; 751 cpu = get_cpu(); 752 /* handle small bases via the GDT because that's faster to 753 switch. */ 754 if (addr <= 0xffffffff) { 755 set_32bit_tls(task, GS_TLS, addr); 756 if (doit) { 757 load_TLS(&task->thread, cpu); 758 load_gs_index(GS_TLS_SEL); 759 } 760 task->thread.gsindex = GS_TLS_SEL; 761 task->thread.gs = 0; 762 } else { 763 task->thread.gsindex = 0; 764 task->thread.gs = addr; 765 if (doit) { 766 load_gs_index(0); 767 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); 768 } 769 } 770 put_cpu(); 771 break; 772 case ARCH_SET_FS: 773 /* Not strictly needed for fs, but do it for symmetry 774 with gs */ 775 if (addr >= TASK_SIZE_OF(task)) 776 return -EPERM; 777 cpu = get_cpu(); 778 /* handle small bases via the GDT because that's faster to 779 switch. */ 780 if (addr <= 0xffffffff) { 781 set_32bit_tls(task, FS_TLS, addr); 782 if (doit) { 783 load_TLS(&task->thread, cpu); 784 loadsegment(fs, FS_TLS_SEL); 785 } 786 task->thread.fsindex = FS_TLS_SEL; 787 task->thread.fs = 0; 788 } else { 789 task->thread.fsindex = 0; 790 task->thread.fs = addr; 791 if (doit) { 792 /* set the selector to 0 to not confuse 793 __switch_to */ 794 loadsegment(fs, 0); 795 ret = checking_wrmsrl(MSR_FS_BASE, addr); 796 } 797 } 798 put_cpu(); 799 break; 800 case ARCH_GET_FS: { 801 unsigned long base; 802 if (task->thread.fsindex == FS_TLS_SEL) 803 base = read_32bit_tls(task, FS_TLS); 804 else if (doit) 805 rdmsrl(MSR_FS_BASE, base); 806 else 807 base = task->thread.fs; 808 ret = put_user(base, (unsigned long __user *)addr); 809 break; 810 } 811 case ARCH_GET_GS: { 812 unsigned long base; 813 unsigned gsindex; 814 if (task->thread.gsindex == GS_TLS_SEL) 815 base = read_32bit_tls(task, GS_TLS); 816 else if (doit) { 817 savesegment(gs, gsindex); 818 if (gsindex) 819 rdmsrl(MSR_KERNEL_GS_BASE, base); 820 else 821 base = task->thread.gs; 822 } else 823 base = task->thread.gs; 824 ret = put_user(base, (unsigned long __user *)addr); 825 break; 826 } 827 828 default: 829 ret = -EINVAL; 830 break; 831 } 832 833 return ret; 834} 835 836long sys_arch_prctl(int code, unsigned long addr) 837{ 838 return do_arch_prctl(current, code, addr); 839} 840 841unsigned long arch_align_stack(unsigned long sp) 842{ 843 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) 844 sp -= get_random_int() % 8192; 845 return sp & ~0xf; 846} 847 848unsigned long arch_randomize_brk(struct mm_struct *mm) 849{ 850 unsigned long range_end = mm->brk + 0x02000000; 851 return randomize_range(mm->brk, range_end, 0) ? : mm->brk; 852} 853