process_64.c revision b10db7f0d2b589a7f88dc3026e150756cb437a28
1/* 2 * Copyright (C) 1995 Linus Torvalds 3 * 4 * Pentium III FXSR, SSE support 5 * Gareth Hughes <gareth@valinux.com>, May 2000 6 * 7 * X86-64 port 8 * Andi Kleen. 9 * 10 * CPU hotplug support - ashok.raj@intel.com 11 */ 12 13/* 14 * This file handles the architecture-dependent parts of process handling.. 15 */ 16 17#include <stdarg.h> 18 19#include <linux/cpu.h> 20#include <linux/errno.h> 21#include <linux/sched.h> 22#include <linux/kernel.h> 23#include <linux/mm.h> 24#include <linux/fs.h> 25#include <linux/elfcore.h> 26#include <linux/smp.h> 27#include <linux/slab.h> 28#include <linux/user.h> 29#include <linux/module.h> 30#include <linux/a.out.h> 31#include <linux/interrupt.h> 32#include <linux/delay.h> 33#include <linux/ptrace.h> 34#include <linux/utsname.h> 35#include <linux/random.h> 36#include <linux/notifier.h> 37#include <linux/kprobes.h> 38#include <linux/kdebug.h> 39#include <linux/tick.h> 40 41#include <asm/uaccess.h> 42#include <asm/pgtable.h> 43#include <asm/system.h> 44#include <asm/io.h> 45#include <asm/processor.h> 46#include <asm/i387.h> 47#include <asm/mmu_context.h> 48#include <asm/pda.h> 49#include <asm/prctl.h> 50#include <asm/desc.h> 51#include <asm/proto.h> 52#include <asm/ia32.h> 53#include <asm/idle.h> 54 55asmlinkage extern void ret_from_fork(void); 56 57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; 58 59unsigned long boot_option_idle_override = 0; 60EXPORT_SYMBOL(boot_option_idle_override); 61 62/* 63 * Powermanagement idle function, if any.. 64 */ 65void (*pm_idle)(void); 66EXPORT_SYMBOL(pm_idle); 67static DEFINE_PER_CPU(unsigned int, cpu_idle_state); 68 69static ATOMIC_NOTIFIER_HEAD(idle_notifier); 70 71void idle_notifier_register(struct notifier_block *n) 72{ 73 atomic_notifier_chain_register(&idle_notifier, n); 74} 75EXPORT_SYMBOL_GPL(idle_notifier_register); 76 77void idle_notifier_unregister(struct notifier_block *n) 78{ 79 atomic_notifier_chain_unregister(&idle_notifier, n); 80} 81EXPORT_SYMBOL(idle_notifier_unregister); 82 83void enter_idle(void) 84{ 85 write_pda(isidle, 1); 86 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); 87} 88 89static void __exit_idle(void) 90{ 91 if (test_and_clear_bit_pda(0, isidle) == 0) 92 return; 93 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); 94} 95 96/* Called from interrupts to signify idle end */ 97void exit_idle(void) 98{ 99 /* idle loop has pid 0 */ 100 if (current->pid) 101 return; 102 __exit_idle(); 103} 104 105/* 106 * We use this if we don't have any better 107 * idle routine.. 108 */ 109static void default_idle(void) 110{ 111 current_thread_info()->status &= ~TS_POLLING; 112 /* 113 * TS_POLLING-cleared state must be visible before we 114 * test NEED_RESCHED: 115 */ 116 smp_mb(); 117 local_irq_disable(); 118 if (!need_resched()) { 119 /* Enables interrupts one instruction before HLT. 120 x86 special cases this so there is no race. */ 121 safe_halt(); 122 } else 123 local_irq_enable(); 124 current_thread_info()->status |= TS_POLLING; 125} 126 127/* 128 * On SMP it's slightly faster (but much more power-consuming!) 129 * to poll the ->need_resched flag instead of waiting for the 130 * cross-CPU IPI to arrive. Use this option with caution. 131 */ 132static void poll_idle (void) 133{ 134 local_irq_enable(); 135 cpu_relax(); 136} 137 138static void do_nothing(void *unused) 139{ 140} 141 142void cpu_idle_wait(void) 143{ 144 unsigned int cpu, this_cpu = get_cpu(); 145 cpumask_t map, tmp = current->cpus_allowed; 146 147 set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); 148 put_cpu(); 149 150 cpus_clear(map); 151 for_each_online_cpu(cpu) { 152 per_cpu(cpu_idle_state, cpu) = 1; 153 cpu_set(cpu, map); 154 } 155 156 __get_cpu_var(cpu_idle_state) = 0; 157 158 wmb(); 159 do { 160 ssleep(1); 161 for_each_online_cpu(cpu) { 162 if (cpu_isset(cpu, map) && 163 !per_cpu(cpu_idle_state, cpu)) 164 cpu_clear(cpu, map); 165 } 166 cpus_and(map, map, cpu_online_map); 167 /* 168 * We waited 1 sec, if a CPU still did not call idle 169 * it may be because it is in idle and not waking up 170 * because it has nothing to do. 171 * Give all the remaining CPUS a kick. 172 */ 173 smp_call_function_mask(map, do_nothing, 0, 0); 174 } while (!cpus_empty(map)); 175 176 set_cpus_allowed(current, tmp); 177} 178EXPORT_SYMBOL_GPL(cpu_idle_wait); 179 180#ifdef CONFIG_HOTPLUG_CPU 181DECLARE_PER_CPU(int, cpu_state); 182 183#include <asm/nmi.h> 184/* We halt the CPU with physical CPU hotplug */ 185static inline void play_dead(void) 186{ 187 idle_task_exit(); 188 wbinvd(); 189 mb(); 190 /* Ack it */ 191 __get_cpu_var(cpu_state) = CPU_DEAD; 192 193 local_irq_disable(); 194 while (1) 195 halt(); 196} 197#else 198static inline void play_dead(void) 199{ 200 BUG(); 201} 202#endif /* CONFIG_HOTPLUG_CPU */ 203 204/* 205 * The idle thread. There's no useful work to be 206 * done, so just try to conserve power and have a 207 * low exit latency (ie sit in a loop waiting for 208 * somebody to say that they'd like to reschedule) 209 */ 210void cpu_idle(void) 211{ 212 current_thread_info()->status |= TS_POLLING; 213 /* endless idle loop with no priority at all */ 214 while (1) { 215 while (!need_resched()) { 216 void (*idle)(void); 217 218 if (__get_cpu_var(cpu_idle_state)) 219 __get_cpu_var(cpu_idle_state) = 0; 220 221 tick_nohz_stop_sched_tick(); 222 223 rmb(); 224 idle = pm_idle; 225 if (!idle) 226 idle = default_idle; 227 if (cpu_is_offline(smp_processor_id())) 228 play_dead(); 229 /* 230 * Idle routines should keep interrupts disabled 231 * from here on, until they go to idle. 232 * Otherwise, idle callbacks can misfire. 233 */ 234 local_irq_disable(); 235 enter_idle(); 236 idle(); 237 /* In many cases the interrupt that ended idle 238 has already called exit_idle. But some idle 239 loops can be woken up without interrupt. */ 240 __exit_idle(); 241 } 242 243 tick_nohz_restart_sched_tick(); 244 preempt_enable_no_resched(); 245 schedule(); 246 preempt_disable(); 247 } 248} 249 250/* 251 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, 252 * which can obviate IPI to trigger checking of need_resched. 253 * We execute MONITOR against need_resched and enter optimized wait state 254 * through MWAIT. Whenever someone changes need_resched, we would be woken 255 * up from MWAIT (without an IPI). 256 * 257 * New with Core Duo processors, MWAIT can take some hints based on CPU 258 * capability. 259 */ 260void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) 261{ 262 if (!need_resched()) { 263 __monitor((void *)¤t_thread_info()->flags, 0, 0); 264 smp_mb(); 265 if (!need_resched()) 266 __mwait(eax, ecx); 267 } 268} 269 270/* Default MONITOR/MWAIT with no hints, used for default C1 state */ 271static void mwait_idle(void) 272{ 273 if (!need_resched()) { 274 __monitor((void *)¤t_thread_info()->flags, 0, 0); 275 smp_mb(); 276 if (!need_resched()) 277 __sti_mwait(0, 0); 278 else 279 local_irq_enable(); 280 } else { 281 local_irq_enable(); 282 } 283} 284 285void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) 286{ 287 static int printed; 288 if (cpu_has(c, X86_FEATURE_MWAIT)) { 289 /* 290 * Skip, if setup has overridden idle. 291 * One CPU supports mwait => All CPUs supports mwait 292 */ 293 if (!pm_idle) { 294 if (!printed) { 295 printk(KERN_INFO "using mwait in idle threads.\n"); 296 printed = 1; 297 } 298 pm_idle = mwait_idle; 299 } 300 } 301} 302 303static int __init idle_setup (char *str) 304{ 305 if (!strcmp(str, "poll")) { 306 printk("using polling idle threads.\n"); 307 pm_idle = poll_idle; 308 } else if (!strcmp(str, "mwait")) 309 force_mwait = 1; 310 else 311 return -1; 312 313 boot_option_idle_override = 1; 314 return 0; 315} 316early_param("idle", idle_setup); 317 318/* Prints also some state that isn't saved in the pt_regs */ 319void __show_regs(struct pt_regs * regs) 320{ 321 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; 322 unsigned long d0, d1, d2, d3, d6, d7; 323 unsigned int fsindex,gsindex; 324 unsigned int ds,cs,es; 325 326 printk("\n"); 327 print_modules(); 328 printk("Pid: %d, comm: %.20s %s %s %.*s\n", 329 current->pid, current->comm, print_tainted(), 330 init_utsname()->release, 331 (int)strcspn(init_utsname()->version, " "), 332 init_utsname()->version); 333 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); 334 printk_address(regs->rip); 335 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, 336 regs->eflags); 337 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", 338 regs->rax, regs->rbx, regs->rcx); 339 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", 340 regs->rdx, regs->rsi, regs->rdi); 341 printk("RBP: %016lx R08: %016lx R09: %016lx\n", 342 regs->rbp, regs->r8, regs->r9); 343 printk("R10: %016lx R11: %016lx R12: %016lx\n", 344 regs->r10, regs->r11, regs->r12); 345 printk("R13: %016lx R14: %016lx R15: %016lx\n", 346 regs->r13, regs->r14, regs->r15); 347 348 asm("movl %%ds,%0" : "=r" (ds)); 349 asm("movl %%cs,%0" : "=r" (cs)); 350 asm("movl %%es,%0" : "=r" (es)); 351 asm("movl %%fs,%0" : "=r" (fsindex)); 352 asm("movl %%gs,%0" : "=r" (gsindex)); 353 354 rdmsrl(MSR_FS_BASE, fs); 355 rdmsrl(MSR_GS_BASE, gs); 356 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 357 358 cr0 = read_cr0(); 359 cr2 = read_cr2(); 360 cr3 = read_cr3(); 361 cr4 = read_cr4(); 362 363 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 364 fs,fsindex,gs,gsindex,shadowgs); 365 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); 366 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4); 367 368 get_debugreg(d0, 0); 369 get_debugreg(d1, 1); 370 get_debugreg(d2, 2); 371 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); 372 get_debugreg(d3, 3); 373 get_debugreg(d6, 6); 374 get_debugreg(d7, 7); 375 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); 376} 377 378void show_regs(struct pt_regs *regs) 379{ 380 printk("CPU %d:", smp_processor_id()); 381 __show_regs(regs); 382 show_trace(NULL, regs, (void *)(regs + 1)); 383} 384 385/* 386 * Free current thread data structures etc.. 387 */ 388void exit_thread(void) 389{ 390 struct task_struct *me = current; 391 struct thread_struct *t = &me->thread; 392 393 if (me->thread.io_bitmap_ptr) { 394 struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); 395 396 kfree(t->io_bitmap_ptr); 397 t->io_bitmap_ptr = NULL; 398 clear_thread_flag(TIF_IO_BITMAP); 399 /* 400 * Careful, clear this in the TSS too: 401 */ 402 memset(tss->io_bitmap, 0xff, t->io_bitmap_max); 403 t->io_bitmap_max = 0; 404 put_cpu(); 405 } 406} 407 408void flush_thread(void) 409{ 410 struct task_struct *tsk = current; 411 412 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) { 413 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING); 414 if (test_tsk_thread_flag(tsk, TIF_IA32)) { 415 clear_tsk_thread_flag(tsk, TIF_IA32); 416 } else { 417 set_tsk_thread_flag(tsk, TIF_IA32); 418 current_thread_info()->status |= TS_COMPAT; 419 } 420 } 421 clear_tsk_thread_flag(tsk, TIF_DEBUG); 422 423 tsk->thread.debugreg0 = 0; 424 tsk->thread.debugreg1 = 0; 425 tsk->thread.debugreg2 = 0; 426 tsk->thread.debugreg3 = 0; 427 tsk->thread.debugreg6 = 0; 428 tsk->thread.debugreg7 = 0; 429 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 430 /* 431 * Forget coprocessor state.. 432 */ 433 clear_fpu(tsk); 434 clear_used_math(); 435} 436 437void release_thread(struct task_struct *dead_task) 438{ 439 if (dead_task->mm) { 440 if (dead_task->mm->context.size) { 441 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", 442 dead_task->comm, 443 dead_task->mm->context.ldt, 444 dead_task->mm->context.size); 445 BUG(); 446 } 447 } 448} 449 450static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) 451{ 452 struct user_desc ud = { 453 .base_addr = addr, 454 .limit = 0xfffff, 455 .seg_32bit = 1, 456 .limit_in_pages = 1, 457 .useable = 1, 458 }; 459 struct n_desc_struct *desc = (void *)t->thread.tls_array; 460 desc += tls; 461 desc->a = LDT_entry_a(&ud); 462 desc->b = LDT_entry_b(&ud); 463} 464 465static inline u32 read_32bit_tls(struct task_struct *t, int tls) 466{ 467 struct desc_struct *desc = (void *)t->thread.tls_array; 468 desc += tls; 469 return desc->base0 | 470 (((u32)desc->base1) << 16) | 471 (((u32)desc->base2) << 24); 472} 473 474/* 475 * This gets called before we allocate a new thread and copy 476 * the current task into it. 477 */ 478void prepare_to_copy(struct task_struct *tsk) 479{ 480 unlazy_fpu(tsk); 481} 482 483int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, 484 unsigned long unused, 485 struct task_struct * p, struct pt_regs * regs) 486{ 487 int err; 488 struct pt_regs * childregs; 489 struct task_struct *me = current; 490 491 childregs = ((struct pt_regs *) 492 (THREAD_SIZE + task_stack_page(p))) - 1; 493 *childregs = *regs; 494 495 childregs->rax = 0; 496 childregs->rsp = rsp; 497 if (rsp == ~0UL) 498 childregs->rsp = (unsigned long)childregs; 499 500 p->thread.rsp = (unsigned long) childregs; 501 p->thread.rsp0 = (unsigned long) (childregs+1); 502 p->thread.userrsp = me->thread.userrsp; 503 504 set_tsk_thread_flag(p, TIF_FORK); 505 506 p->thread.fs = me->thread.fs; 507 p->thread.gs = me->thread.gs; 508 509 asm("mov %%gs,%0" : "=m" (p->thread.gsindex)); 510 asm("mov %%fs,%0" : "=m" (p->thread.fsindex)); 511 asm("mov %%es,%0" : "=m" (p->thread.es)); 512 asm("mov %%ds,%0" : "=m" (p->thread.ds)); 513 514 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { 515 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); 516 if (!p->thread.io_bitmap_ptr) { 517 p->thread.io_bitmap_max = 0; 518 return -ENOMEM; 519 } 520 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, 521 IO_BITMAP_BYTES); 522 set_tsk_thread_flag(p, TIF_IO_BITMAP); 523 } 524 525 /* 526 * Set a new TLS for the child thread? 527 */ 528 if (clone_flags & CLONE_SETTLS) { 529#ifdef CONFIG_IA32_EMULATION 530 if (test_thread_flag(TIF_IA32)) 531 err = ia32_child_tls(p, childregs); 532 else 533#endif 534 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 535 if (err) 536 goto out; 537 } 538 err = 0; 539out: 540 if (err && p->thread.io_bitmap_ptr) { 541 kfree(p->thread.io_bitmap_ptr); 542 p->thread.io_bitmap_max = 0; 543 } 544 return err; 545} 546 547/* 548 * This special macro can be used to load a debugging register 549 */ 550#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r) 551 552static inline void __switch_to_xtra(struct task_struct *prev_p, 553 struct task_struct *next_p, 554 struct tss_struct *tss) 555{ 556 struct thread_struct *prev, *next; 557 558 prev = &prev_p->thread, 559 next = &next_p->thread; 560 561 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { 562 loaddebug(next, 0); 563 loaddebug(next, 1); 564 loaddebug(next, 2); 565 loaddebug(next, 3); 566 /* no 4 and 5 */ 567 loaddebug(next, 6); 568 loaddebug(next, 7); 569 } 570 571 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 572 /* 573 * Copy the relevant range of the IO bitmap. 574 * Normally this is 128 bytes or less: 575 */ 576 memcpy(tss->io_bitmap, next->io_bitmap_ptr, 577 max(prev->io_bitmap_max, next->io_bitmap_max)); 578 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { 579 /* 580 * Clear any possible leftover bits: 581 */ 582 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); 583 } 584} 585 586/* 587 * switch_to(x,y) should switch tasks from x to y. 588 * 589 * This could still be optimized: 590 * - fold all the options into a flag word and test it with a single test. 591 * - could test fs/gs bitsliced 592 * 593 * Kprobes not supported here. Set the probe on schedule instead. 594 */ 595struct task_struct * 596__switch_to(struct task_struct *prev_p, struct task_struct *next_p) 597{ 598 struct thread_struct *prev = &prev_p->thread, 599 *next = &next_p->thread; 600 int cpu = smp_processor_id(); 601 struct tss_struct *tss = &per_cpu(init_tss, cpu); 602 603 /* we're going to use this soon, after a few expensive things */ 604 if (next_p->fpu_counter>5) 605 prefetch(&next->i387.fxsave); 606 607 /* 608 * Reload esp0, LDT and the page table pointer: 609 */ 610 tss->rsp0 = next->rsp0; 611 612 /* 613 * Switch DS and ES. 614 * This won't pick up thread selector changes, but I guess that is ok. 615 */ 616 asm volatile("mov %%es,%0" : "=m" (prev->es)); 617 if (unlikely(next->es | prev->es)) 618 loadsegment(es, next->es); 619 620 asm volatile ("mov %%ds,%0" : "=m" (prev->ds)); 621 if (unlikely(next->ds | prev->ds)) 622 loadsegment(ds, next->ds); 623 624 load_TLS(next, cpu); 625 626 /* 627 * Switch FS and GS. 628 */ 629 { 630 unsigned fsindex; 631 asm volatile("movl %%fs,%0" : "=r" (fsindex)); 632 /* segment register != 0 always requires a reload. 633 also reload when it has changed. 634 when prev process used 64bit base always reload 635 to avoid an information leak. */ 636 if (unlikely(fsindex | next->fsindex | prev->fs)) { 637 loadsegment(fs, next->fsindex); 638 /* check if the user used a selector != 0 639 * if yes clear 64bit base, since overloaded base 640 * is always mapped to the Null selector 641 */ 642 if (fsindex) 643 prev->fs = 0; 644 } 645 /* when next process has a 64bit base use it */ 646 if (next->fs) 647 wrmsrl(MSR_FS_BASE, next->fs); 648 prev->fsindex = fsindex; 649 } 650 { 651 unsigned gsindex; 652 asm volatile("movl %%gs,%0" : "=r" (gsindex)); 653 if (unlikely(gsindex | next->gsindex | prev->gs)) { 654 load_gs_index(next->gsindex); 655 if (gsindex) 656 prev->gs = 0; 657 } 658 if (next->gs) 659 wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 660 prev->gsindex = gsindex; 661 } 662 663 /* Must be after DS reload */ 664 unlazy_fpu(prev_p); 665 666 /* 667 * Switch the PDA and FPU contexts. 668 */ 669 prev->userrsp = read_pda(oldrsp); 670 write_pda(oldrsp, next->userrsp); 671 write_pda(pcurrent, next_p); 672 673 write_pda(kernelstack, 674 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); 675#ifdef CONFIG_CC_STACKPROTECTOR 676 write_pda(stack_canary, next_p->stack_canary); 677 /* 678 * Build time only check to make sure the stack_canary is at 679 * offset 40 in the pda; this is a gcc ABI requirement 680 */ 681 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); 682#endif 683 684 /* 685 * Now maybe reload the debug registers and handle I/O bitmaps 686 */ 687 if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)) 688 || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) 689 __switch_to_xtra(prev_p, next_p, tss); 690 691 /* If the task has used fpu the last 5 timeslices, just do a full 692 * restore of the math state immediately to avoid the trap; the 693 * chances of needing FPU soon are obviously high now 694 */ 695 if (next_p->fpu_counter>5) 696 math_state_restore(); 697 return prev_p; 698} 699 700/* 701 * sys_execve() executes a new program. 702 */ 703asmlinkage 704long sys_execve(char __user *name, char __user * __user *argv, 705 char __user * __user *envp, struct pt_regs regs) 706{ 707 long error; 708 char * filename; 709 710 filename = getname(name); 711 error = PTR_ERR(filename); 712 if (IS_ERR(filename)) 713 return error; 714 error = do_execve(filename, argv, envp, ®s); 715 if (error == 0) { 716 task_lock(current); 717 current->ptrace &= ~PT_DTRACE; 718 task_unlock(current); 719 } 720 putname(filename); 721 return error; 722} 723 724void set_personality_64bit(void) 725{ 726 /* inherit personality from parent */ 727 728 /* Make sure to be in 64bit mode */ 729 clear_thread_flag(TIF_IA32); 730 731 /* TBD: overwrites user setup. Should have two bits. 732 But 64bit processes have always behaved this way, 733 so it's not too bad. The main problem is just that 734 32bit childs are affected again. */ 735 current->personality &= ~READ_IMPLIES_EXEC; 736} 737 738asmlinkage long sys_fork(struct pt_regs *regs) 739{ 740 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL); 741} 742 743asmlinkage long 744sys_clone(unsigned long clone_flags, unsigned long newsp, 745 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) 746{ 747 if (!newsp) 748 newsp = regs->rsp; 749 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); 750} 751 752/* 753 * This is trivial, and on the face of it looks like it 754 * could equally well be done in user mode. 755 * 756 * Not so, for quite unobvious reasons - register pressure. 757 * In user mode vfork() cannot have a stack frame, and if 758 * done by calling the "clone()" system call directly, you 759 * do not have enough call-clobbered registers to hold all 760 * the information you need. 761 */ 762asmlinkage long sys_vfork(struct pt_regs *regs) 763{ 764 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0, 765 NULL, NULL); 766} 767 768unsigned long get_wchan(struct task_struct *p) 769{ 770 unsigned long stack; 771 u64 fp,rip; 772 int count = 0; 773 774 if (!p || p == current || p->state==TASK_RUNNING) 775 return 0; 776 stack = (unsigned long)task_stack_page(p); 777 if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE) 778 return 0; 779 fp = *(u64 *)(p->thread.rsp); 780 do { 781 if (fp < (unsigned long)stack || 782 fp > (unsigned long)stack+THREAD_SIZE) 783 return 0; 784 rip = *(u64 *)(fp+8); 785 if (!in_sched_functions(rip)) 786 return rip; 787 fp = *(u64 *)fp; 788 } while (count++ < 16); 789 return 0; 790} 791 792long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) 793{ 794 int ret = 0; 795 int doit = task == current; 796 int cpu; 797 798 switch (code) { 799 case ARCH_SET_GS: 800 if (addr >= TASK_SIZE_OF(task)) 801 return -EPERM; 802 cpu = get_cpu(); 803 /* handle small bases via the GDT because that's faster to 804 switch. */ 805 if (addr <= 0xffffffff) { 806 set_32bit_tls(task, GS_TLS, addr); 807 if (doit) { 808 load_TLS(&task->thread, cpu); 809 load_gs_index(GS_TLS_SEL); 810 } 811 task->thread.gsindex = GS_TLS_SEL; 812 task->thread.gs = 0; 813 } else { 814 task->thread.gsindex = 0; 815 task->thread.gs = addr; 816 if (doit) { 817 load_gs_index(0); 818 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); 819 } 820 } 821 put_cpu(); 822 break; 823 case ARCH_SET_FS: 824 /* Not strictly needed for fs, but do it for symmetry 825 with gs */ 826 if (addr >= TASK_SIZE_OF(task)) 827 return -EPERM; 828 cpu = get_cpu(); 829 /* handle small bases via the GDT because that's faster to 830 switch. */ 831 if (addr <= 0xffffffff) { 832 set_32bit_tls(task, FS_TLS, addr); 833 if (doit) { 834 load_TLS(&task->thread, cpu); 835 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL)); 836 } 837 task->thread.fsindex = FS_TLS_SEL; 838 task->thread.fs = 0; 839 } else { 840 task->thread.fsindex = 0; 841 task->thread.fs = addr; 842 if (doit) { 843 /* set the selector to 0 to not confuse 844 __switch_to */ 845 asm volatile("movl %0,%%fs" :: "r" (0)); 846 ret = checking_wrmsrl(MSR_FS_BASE, addr); 847 } 848 } 849 put_cpu(); 850 break; 851 case ARCH_GET_FS: { 852 unsigned long base; 853 if (task->thread.fsindex == FS_TLS_SEL) 854 base = read_32bit_tls(task, FS_TLS); 855 else if (doit) 856 rdmsrl(MSR_FS_BASE, base); 857 else 858 base = task->thread.fs; 859 ret = put_user(base, (unsigned long __user *)addr); 860 break; 861 } 862 case ARCH_GET_GS: { 863 unsigned long base; 864 unsigned gsindex; 865 if (task->thread.gsindex == GS_TLS_SEL) 866 base = read_32bit_tls(task, GS_TLS); 867 else if (doit) { 868 asm("movl %%gs,%0" : "=r" (gsindex)); 869 if (gsindex) 870 rdmsrl(MSR_KERNEL_GS_BASE, base); 871 else 872 base = task->thread.gs; 873 } 874 else 875 base = task->thread.gs; 876 ret = put_user(base, (unsigned long __user *)addr); 877 break; 878 } 879 880 default: 881 ret = -EINVAL; 882 break; 883 } 884 885 return ret; 886} 887 888long sys_arch_prctl(int code, unsigned long addr) 889{ 890 return do_arch_prctl(current, code, addr); 891} 892 893/* 894 * Capture the user space registers if the task is not running (in user space) 895 */ 896int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) 897{ 898 struct pt_regs *pp, ptregs; 899 900 pp = task_pt_regs(tsk); 901 902 ptregs = *pp; 903 ptregs.cs &= 0xffff; 904 ptregs.ss &= 0xffff; 905 906 elf_core_copy_regs(regs, &ptregs); 907 908 return 1; 909} 910 911unsigned long arch_align_stack(unsigned long sp) 912{ 913 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) 914 sp -= get_random_int() % 8192; 915 return sp & ~0xf; 916} 917