process_64.c revision 5b0e508415a83989fe704b4718a1a214bc333ca7
1/* 2 * Copyright (C) 1995 Linus Torvalds 3 * 4 * Pentium III FXSR, SSE support 5 * Gareth Hughes <gareth@valinux.com>, May 2000 6 * 7 * X86-64 port 8 * Andi Kleen. 9 * 10 * CPU hotplug support - ashok.raj@intel.com 11 */ 12 13/* 14 * This file handles the architecture-dependent parts of process handling.. 15 */ 16 17#include <stdarg.h> 18 19#include <linux/cpu.h> 20#include <linux/errno.h> 21#include <linux/sched.h> 22#include <linux/fs.h> 23#include <linux/kernel.h> 24#include <linux/mm.h> 25#include <linux/elfcore.h> 26#include <linux/smp.h> 27#include <linux/slab.h> 28#include <linux/user.h> 29#include <linux/interrupt.h> 30#include <linux/utsname.h> 31#include <linux/delay.h> 32#include <linux/module.h> 33#include <linux/ptrace.h> 34#include <linux/random.h> 35#include <linux/notifier.h> 36#include <linux/kprobes.h> 37#include <linux/kdebug.h> 38#include <linux/tick.h> 39 40#include <asm/uaccess.h> 41#include <asm/pgtable.h> 42#include <asm/system.h> 43#include <asm/io.h> 44#include <asm/processor.h> 45#include <asm/i387.h> 46#include <asm/mmu_context.h> 47#include <asm/pda.h> 48#include <asm/prctl.h> 49#include <asm/desc.h> 50#include <asm/proto.h> 51#include <asm/ia32.h> 52#include <asm/idle.h> 53 54asmlinkage extern void ret_from_fork(void); 55 56unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; 57 58unsigned long boot_option_idle_override = 0; 59EXPORT_SYMBOL(boot_option_idle_override); 60 61/* 62 * Powermanagement idle function, if any.. 63 */ 64void (*pm_idle)(void); 65EXPORT_SYMBOL(pm_idle); 66 67static ATOMIC_NOTIFIER_HEAD(idle_notifier); 68 69void idle_notifier_register(struct notifier_block *n) 70{ 71 atomic_notifier_chain_register(&idle_notifier, n); 72} 73 74void enter_idle(void) 75{ 76 write_pda(isidle, 1); 77 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); 78} 79 80static void __exit_idle(void) 81{ 82 if (test_and_clear_bit_pda(0, isidle) == 0) 83 return; 84 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); 85} 86 87/* Called from interrupts to signify idle end */ 88void exit_idle(void) 89{ 90 /* idle loop has pid 0 */ 91 if (current->pid) 92 return; 93 __exit_idle(); 94} 95 96/* 97 * We use this if we don't have any better 98 * idle routine.. 99 */ 100void default_idle(void) 101{ 102 current_thread_info()->status &= ~TS_POLLING; 103 /* 104 * TS_POLLING-cleared state must be visible before we 105 * test NEED_RESCHED: 106 */ 107 smp_mb(); 108 local_irq_disable(); 109 if (!need_resched()) { 110 ktime_t t0, t1; 111 u64 t0n, t1n; 112 113 t0 = ktime_get(); 114 t0n = ktime_to_ns(t0); 115 safe_halt(); /* enables interrupts racelessly */ 116 local_irq_disable(); 117 t1 = ktime_get(); 118 t1n = ktime_to_ns(t1); 119 sched_clock_idle_wakeup_event(t1n - t0n); 120 } 121 local_irq_enable(); 122 current_thread_info()->status |= TS_POLLING; 123} 124 125/* 126 * On SMP it's slightly faster (but much more power-consuming!) 127 * to poll the ->need_resched flag instead of waiting for the 128 * cross-CPU IPI to arrive. Use this option with caution. 129 */ 130static void poll_idle(void) 131{ 132 local_irq_enable(); 133 cpu_relax(); 134} 135 136#ifdef CONFIG_HOTPLUG_CPU 137DECLARE_PER_CPU(int, cpu_state); 138 139#include <asm/nmi.h> 140/* We halt the CPU with physical CPU hotplug */ 141static inline void play_dead(void) 142{ 143 idle_task_exit(); 144 wbinvd(); 145 mb(); 146 /* Ack it */ 147 __get_cpu_var(cpu_state) = CPU_DEAD; 148 149 local_irq_disable(); 150 while (1) 151 halt(); 152} 153#else 154static inline void play_dead(void) 155{ 156 BUG(); 157} 158#endif /* CONFIG_HOTPLUG_CPU */ 159 160/* 161 * The idle thread. There's no useful work to be 162 * done, so just try to conserve power and have a 163 * low exit latency (ie sit in a loop waiting for 164 * somebody to say that they'd like to reschedule) 165 */ 166void cpu_idle(void) 167{ 168 current_thread_info()->status |= TS_POLLING; 169 /* endless idle loop with no priority at all */ 170 while (1) { 171 tick_nohz_stop_sched_tick(); 172 while (!need_resched()) { 173 void (*idle)(void); 174 175 rmb(); 176 idle = pm_idle; 177 if (!idle) 178 idle = default_idle; 179 if (cpu_is_offline(smp_processor_id())) 180 play_dead(); 181 /* 182 * Idle routines should keep interrupts disabled 183 * from here on, until they go to idle. 184 * Otherwise, idle callbacks can misfire. 185 */ 186 local_irq_disable(); 187 enter_idle(); 188 idle(); 189 /* In many cases the interrupt that ended idle 190 has already called exit_idle. But some idle 191 loops can be woken up without interrupt. */ 192 __exit_idle(); 193 } 194 195 tick_nohz_restart_sched_tick(); 196 preempt_enable_no_resched(); 197 schedule(); 198 preempt_disable(); 199 } 200} 201 202static void do_nothing(void *unused) 203{ 204} 205 206/* 207 * cpu_idle_wait - Used to ensure that all the CPUs discard old value of 208 * pm_idle and update to new pm_idle value. Required while changing pm_idle 209 * handler on SMP systems. 210 * 211 * Caller must have changed pm_idle to the new value before the call. Old 212 * pm_idle value will not be used by any CPU after the return of this function. 213 */ 214void cpu_idle_wait(void) 215{ 216 smp_mb(); 217 /* kick all the CPUs so that they exit out of pm_idle */ 218 smp_call_function(do_nothing, NULL, 0, 1); 219} 220EXPORT_SYMBOL_GPL(cpu_idle_wait); 221 222/* 223 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, 224 * which can obviate IPI to trigger checking of need_resched. 225 * We execute MONITOR against need_resched and enter optimized wait state 226 * through MWAIT. Whenever someone changes need_resched, we would be woken 227 * up from MWAIT (without an IPI). 228 * 229 * New with Core Duo processors, MWAIT can take some hints based on CPU 230 * capability. 231 */ 232void mwait_idle_with_hints(unsigned long ax, unsigned long cx) 233{ 234 if (!need_resched()) { 235 __monitor((void *)¤t_thread_info()->flags, 0, 0); 236 smp_mb(); 237 if (!need_resched()) 238 __mwait(ax, cx); 239 } 240} 241 242/* Default MONITOR/MWAIT with no hints, used for default C1 state */ 243static void mwait_idle(void) 244{ 245 if (!need_resched()) { 246 __monitor((void *)¤t_thread_info()->flags, 0, 0); 247 smp_mb(); 248 if (!need_resched()) 249 __sti_mwait(0, 0); 250 else 251 local_irq_enable(); 252 } else { 253 local_irq_enable(); 254 } 255} 256 257 258static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) 259{ 260 if (force_mwait) 261 return 1; 262 /* Any C1 states supported? */ 263 return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0; 264} 265 266void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) 267{ 268 static int selected; 269 270 if (selected) 271 return; 272#ifdef CONFIG_X86_SMP 273 if (pm_idle == poll_idle && smp_num_siblings > 1) { 274 printk(KERN_WARNING "WARNING: polling idle and HT enabled," 275 " performance may degrade.\n"); 276 } 277#endif 278 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { 279 /* 280 * Skip, if setup has overridden idle. 281 * One CPU supports mwait => All CPUs supports mwait 282 */ 283 if (!pm_idle) { 284 printk(KERN_INFO "using mwait in idle threads.\n"); 285 pm_idle = mwait_idle; 286 } 287 } 288 selected = 1; 289} 290 291static int __init idle_setup(char *str) 292{ 293 if (!strcmp(str, "poll")) { 294 printk("using polling idle threads.\n"); 295 pm_idle = poll_idle; 296 } else if (!strcmp(str, "mwait")) 297 force_mwait = 1; 298 else 299 return -1; 300 301 boot_option_idle_override = 1; 302 return 0; 303} 304early_param("idle", idle_setup); 305 306/* Prints also some state that isn't saved in the pt_regs */ 307void __show_regs(struct pt_regs * regs) 308{ 309 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; 310 unsigned long d0, d1, d2, d3, d6, d7; 311 unsigned int fsindex, gsindex; 312 unsigned int ds, cs, es; 313 314 printk("\n"); 315 print_modules(); 316 printk("Pid: %d, comm: %.20s %s %s %.*s\n", 317 current->pid, current->comm, print_tainted(), 318 init_utsname()->release, 319 (int)strcspn(init_utsname()->version, " "), 320 init_utsname()->version); 321 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); 322 printk_address(regs->ip, 1); 323 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp, 324 regs->flags); 325 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", 326 regs->ax, regs->bx, regs->cx); 327 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", 328 regs->dx, regs->si, regs->di); 329 printk("RBP: %016lx R08: %016lx R09: %016lx\n", 330 regs->bp, regs->r8, regs->r9); 331 printk("R10: %016lx R11: %016lx R12: %016lx\n", 332 regs->r10, regs->r11, regs->r12); 333 printk("R13: %016lx R14: %016lx R15: %016lx\n", 334 regs->r13, regs->r14, regs->r15); 335 336 asm("movl %%ds,%0" : "=r" (ds)); 337 asm("movl %%cs,%0" : "=r" (cs)); 338 asm("movl %%es,%0" : "=r" (es)); 339 asm("movl %%fs,%0" : "=r" (fsindex)); 340 asm("movl %%gs,%0" : "=r" (gsindex)); 341 342 rdmsrl(MSR_FS_BASE, fs); 343 rdmsrl(MSR_GS_BASE, gs); 344 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 345 346 cr0 = read_cr0(); 347 cr2 = read_cr2(); 348 cr3 = read_cr3(); 349 cr4 = read_cr4(); 350 351 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 352 fs,fsindex,gs,gsindex,shadowgs); 353 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); 354 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4); 355 356 get_debugreg(d0, 0); 357 get_debugreg(d1, 1); 358 get_debugreg(d2, 2); 359 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); 360 get_debugreg(d3, 3); 361 get_debugreg(d6, 6); 362 get_debugreg(d7, 7); 363 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); 364} 365 366void show_regs(struct pt_regs *regs) 367{ 368 printk("CPU %d:", smp_processor_id()); 369 __show_regs(regs); 370 show_trace(NULL, regs, (void *)(regs + 1), regs->bp); 371} 372 373/* 374 * Free current thread data structures etc.. 375 */ 376void exit_thread(void) 377{ 378 struct task_struct *me = current; 379 struct thread_struct *t = &me->thread; 380 381 if (me->thread.io_bitmap_ptr) { 382 struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); 383 384 kfree(t->io_bitmap_ptr); 385 t->io_bitmap_ptr = NULL; 386 clear_thread_flag(TIF_IO_BITMAP); 387 /* 388 * Careful, clear this in the TSS too: 389 */ 390 memset(tss->io_bitmap, 0xff, t->io_bitmap_max); 391 t->io_bitmap_max = 0; 392 put_cpu(); 393 } 394} 395 396void flush_thread(void) 397{ 398 struct task_struct *tsk = current; 399 400 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) { 401 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING); 402 if (test_tsk_thread_flag(tsk, TIF_IA32)) { 403 clear_tsk_thread_flag(tsk, TIF_IA32); 404 } else { 405 set_tsk_thread_flag(tsk, TIF_IA32); 406 current_thread_info()->status |= TS_COMPAT; 407 } 408 } 409 clear_tsk_thread_flag(tsk, TIF_DEBUG); 410 411 tsk->thread.debugreg0 = 0; 412 tsk->thread.debugreg1 = 0; 413 tsk->thread.debugreg2 = 0; 414 tsk->thread.debugreg3 = 0; 415 tsk->thread.debugreg6 = 0; 416 tsk->thread.debugreg7 = 0; 417 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 418 /* 419 * Forget coprocessor state.. 420 */ 421 clear_fpu(tsk); 422 clear_used_math(); 423} 424 425void release_thread(struct task_struct *dead_task) 426{ 427 if (dead_task->mm) { 428 if (dead_task->mm->context.size) { 429 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", 430 dead_task->comm, 431 dead_task->mm->context.ldt, 432 dead_task->mm->context.size); 433 BUG(); 434 } 435 } 436} 437 438static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) 439{ 440 struct user_desc ud = { 441 .base_addr = addr, 442 .limit = 0xfffff, 443 .seg_32bit = 1, 444 .limit_in_pages = 1, 445 .useable = 1, 446 }; 447 struct desc_struct *desc = t->thread.tls_array; 448 desc += tls; 449 fill_ldt(desc, &ud); 450} 451 452static inline u32 read_32bit_tls(struct task_struct *t, int tls) 453{ 454 return get_desc_base(&t->thread.tls_array[tls]); 455} 456 457/* 458 * This gets called before we allocate a new thread and copy 459 * the current task into it. 460 */ 461void prepare_to_copy(struct task_struct *tsk) 462{ 463 unlazy_fpu(tsk); 464} 465 466int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, 467 unsigned long unused, 468 struct task_struct * p, struct pt_regs * regs) 469{ 470 int err; 471 struct pt_regs * childregs; 472 struct task_struct *me = current; 473 474 childregs = ((struct pt_regs *) 475 (THREAD_SIZE + task_stack_page(p))) - 1; 476 *childregs = *regs; 477 478 childregs->ax = 0; 479 childregs->sp = sp; 480 if (sp == ~0UL) 481 childregs->sp = (unsigned long)childregs; 482 483 p->thread.sp = (unsigned long) childregs; 484 p->thread.sp0 = (unsigned long) (childregs+1); 485 p->thread.usersp = me->thread.usersp; 486 487 set_tsk_thread_flag(p, TIF_FORK); 488 489 p->thread.fs = me->thread.fs; 490 p->thread.gs = me->thread.gs; 491 492 asm("mov %%gs,%0" : "=m" (p->thread.gsindex)); 493 asm("mov %%fs,%0" : "=m" (p->thread.fsindex)); 494 asm("mov %%es,%0" : "=m" (p->thread.es)); 495 asm("mov %%ds,%0" : "=m" (p->thread.ds)); 496 497 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { 498 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); 499 if (!p->thread.io_bitmap_ptr) { 500 p->thread.io_bitmap_max = 0; 501 return -ENOMEM; 502 } 503 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, 504 IO_BITMAP_BYTES); 505 set_tsk_thread_flag(p, TIF_IO_BITMAP); 506 } 507 508 /* 509 * Set a new TLS for the child thread? 510 */ 511 if (clone_flags & CLONE_SETTLS) { 512#ifdef CONFIG_IA32_EMULATION 513 if (test_thread_flag(TIF_IA32)) 514 err = do_set_thread_area(p, -1, 515 (struct user_desc __user *)childregs->si, 0); 516 else 517#endif 518 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 519 if (err) 520 goto out; 521 } 522 err = 0; 523out: 524 if (err && p->thread.io_bitmap_ptr) { 525 kfree(p->thread.io_bitmap_ptr); 526 p->thread.io_bitmap_max = 0; 527 } 528 return err; 529} 530 531void 532start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) 533{ 534 asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0)); 535 load_gs_index(0); 536 regs->ip = new_ip; 537 regs->sp = new_sp; 538 write_pda(oldrsp, new_sp); 539 regs->cs = __USER_CS; 540 regs->ss = __USER_DS; 541 regs->flags = 0x200; 542 set_fs(USER_DS); 543} 544EXPORT_SYMBOL_GPL(start_thread); 545 546/* 547 * This special macro can be used to load a debugging register 548 */ 549#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r) 550 551static inline void __switch_to_xtra(struct task_struct *prev_p, 552 struct task_struct *next_p, 553 struct tss_struct *tss) 554{ 555 struct thread_struct *prev, *next; 556 unsigned long debugctl; 557 558 prev = &prev_p->thread, 559 next = &next_p->thread; 560 561 debugctl = prev->debugctlmsr; 562 if (next->ds_area_msr != prev->ds_area_msr) { 563 /* we clear debugctl to make sure DS 564 * is not in use when we change it */ 565 debugctl = 0; 566 update_debugctlmsr(0); 567 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); 568 } 569 570 if (next->debugctlmsr != debugctl) 571 update_debugctlmsr(next->debugctlmsr); 572 573 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { 574 loaddebug(next, 0); 575 loaddebug(next, 1); 576 loaddebug(next, 2); 577 loaddebug(next, 3); 578 /* no 4 and 5 */ 579 loaddebug(next, 6); 580 loaddebug(next, 7); 581 } 582 583 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 584 /* 585 * Copy the relevant range of the IO bitmap. 586 * Normally this is 128 bytes or less: 587 */ 588 memcpy(tss->io_bitmap, next->io_bitmap_ptr, 589 max(prev->io_bitmap_max, next->io_bitmap_max)); 590 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { 591 /* 592 * Clear any possible leftover bits: 593 */ 594 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); 595 } 596 597#ifdef X86_BTS 598 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) 599 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); 600 601 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) 602 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); 603#endif 604} 605 606/* 607 * switch_to(x,y) should switch tasks from x to y. 608 * 609 * This could still be optimized: 610 * - fold all the options into a flag word and test it with a single test. 611 * - could test fs/gs bitsliced 612 * 613 * Kprobes not supported here. Set the probe on schedule instead. 614 */ 615struct task_struct * 616__switch_to(struct task_struct *prev_p, struct task_struct *next_p) 617{ 618 struct thread_struct *prev = &prev_p->thread, 619 *next = &next_p->thread; 620 int cpu = smp_processor_id(); 621 struct tss_struct *tss = &per_cpu(init_tss, cpu); 622 623 /* we're going to use this soon, after a few expensive things */ 624 if (next_p->fpu_counter>5) 625 prefetch(&next->i387.fxsave); 626 627 /* 628 * Reload esp0, LDT and the page table pointer: 629 */ 630 load_sp0(tss, next); 631 632 /* 633 * Switch DS and ES. 634 * This won't pick up thread selector changes, but I guess that is ok. 635 */ 636 asm volatile("mov %%es,%0" : "=m" (prev->es)); 637 if (unlikely(next->es | prev->es)) 638 loadsegment(es, next->es); 639 640 asm volatile ("mov %%ds,%0" : "=m" (prev->ds)); 641 if (unlikely(next->ds | prev->ds)) 642 loadsegment(ds, next->ds); 643 644 load_TLS(next, cpu); 645 646 /* 647 * Switch FS and GS. 648 */ 649 { 650 unsigned fsindex; 651 asm volatile("movl %%fs,%0" : "=r" (fsindex)); 652 /* segment register != 0 always requires a reload. 653 also reload when it has changed. 654 when prev process used 64bit base always reload 655 to avoid an information leak. */ 656 if (unlikely(fsindex | next->fsindex | prev->fs)) { 657 loadsegment(fs, next->fsindex); 658 /* check if the user used a selector != 0 659 * if yes clear 64bit base, since overloaded base 660 * is always mapped to the Null selector 661 */ 662 if (fsindex) 663 prev->fs = 0; 664 } 665 /* when next process has a 64bit base use it */ 666 if (next->fs) 667 wrmsrl(MSR_FS_BASE, next->fs); 668 prev->fsindex = fsindex; 669 } 670 { 671 unsigned gsindex; 672 asm volatile("movl %%gs,%0" : "=r" (gsindex)); 673 if (unlikely(gsindex | next->gsindex | prev->gs)) { 674 load_gs_index(next->gsindex); 675 if (gsindex) 676 prev->gs = 0; 677 } 678 if (next->gs) 679 wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 680 prev->gsindex = gsindex; 681 } 682 683 /* Must be after DS reload */ 684 unlazy_fpu(prev_p); 685 686 /* 687 * Switch the PDA and FPU contexts. 688 */ 689 prev->usersp = read_pda(oldrsp); 690 write_pda(oldrsp, next->usersp); 691 write_pda(pcurrent, next_p); 692 693 write_pda(kernelstack, 694 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); 695#ifdef CONFIG_CC_STACKPROTECTOR 696 write_pda(stack_canary, next_p->stack_canary); 697 /* 698 * Build time only check to make sure the stack_canary is at 699 * offset 40 in the pda; this is a gcc ABI requirement 700 */ 701 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); 702#endif 703 704 /* 705 * Now maybe reload the debug registers and handle I/O bitmaps 706 */ 707 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT || 708 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) 709 __switch_to_xtra(prev_p, next_p, tss); 710 711 /* If the task has used fpu the last 5 timeslices, just do a full 712 * restore of the math state immediately to avoid the trap; the 713 * chances of needing FPU soon are obviously high now 714 */ 715 if (next_p->fpu_counter>5) 716 math_state_restore(); 717 return prev_p; 718} 719 720/* 721 * sys_execve() executes a new program. 722 */ 723asmlinkage 724long sys_execve(char __user *name, char __user * __user *argv, 725 char __user * __user *envp, struct pt_regs *regs) 726{ 727 long error; 728 char * filename; 729 730 filename = getname(name); 731 error = PTR_ERR(filename); 732 if (IS_ERR(filename)) 733 return error; 734 error = do_execve(filename, argv, envp, regs); 735 putname(filename); 736 return error; 737} 738 739void set_personality_64bit(void) 740{ 741 /* inherit personality from parent */ 742 743 /* Make sure to be in 64bit mode */ 744 clear_thread_flag(TIF_IA32); 745 746 /* TBD: overwrites user setup. Should have two bits. 747 But 64bit processes have always behaved this way, 748 so it's not too bad. The main problem is just that 749 32bit childs are affected again. */ 750 current->personality &= ~READ_IMPLIES_EXEC; 751} 752 753asmlinkage long sys_fork(struct pt_regs *regs) 754{ 755 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); 756} 757 758asmlinkage long 759sys_clone(unsigned long clone_flags, unsigned long newsp, 760 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) 761{ 762 if (!newsp) 763 newsp = regs->sp; 764 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); 765} 766 767/* 768 * This is trivial, and on the face of it looks like it 769 * could equally well be done in user mode. 770 * 771 * Not so, for quite unobvious reasons - register pressure. 772 * In user mode vfork() cannot have a stack frame, and if 773 * done by calling the "clone()" system call directly, you 774 * do not have enough call-clobbered registers to hold all 775 * the information you need. 776 */ 777asmlinkage long sys_vfork(struct pt_regs *regs) 778{ 779 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, 780 NULL, NULL); 781} 782 783unsigned long get_wchan(struct task_struct *p) 784{ 785 unsigned long stack; 786 u64 fp,ip; 787 int count = 0; 788 789 if (!p || p == current || p->state==TASK_RUNNING) 790 return 0; 791 stack = (unsigned long)task_stack_page(p); 792 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE) 793 return 0; 794 fp = *(u64 *)(p->thread.sp); 795 do { 796 if (fp < (unsigned long)stack || 797 fp > (unsigned long)stack+THREAD_SIZE) 798 return 0; 799 ip = *(u64 *)(fp+8); 800 if (!in_sched_functions(ip)) 801 return ip; 802 fp = *(u64 *)fp; 803 } while (count++ < 16); 804 return 0; 805} 806 807long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) 808{ 809 int ret = 0; 810 int doit = task == current; 811 int cpu; 812 813 switch (code) { 814 case ARCH_SET_GS: 815 if (addr >= TASK_SIZE_OF(task)) 816 return -EPERM; 817 cpu = get_cpu(); 818 /* handle small bases via the GDT because that's faster to 819 switch. */ 820 if (addr <= 0xffffffff) { 821 set_32bit_tls(task, GS_TLS, addr); 822 if (doit) { 823 load_TLS(&task->thread, cpu); 824 load_gs_index(GS_TLS_SEL); 825 } 826 task->thread.gsindex = GS_TLS_SEL; 827 task->thread.gs = 0; 828 } else { 829 task->thread.gsindex = 0; 830 task->thread.gs = addr; 831 if (doit) { 832 load_gs_index(0); 833 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); 834 } 835 } 836 put_cpu(); 837 break; 838 case ARCH_SET_FS: 839 /* Not strictly needed for fs, but do it for symmetry 840 with gs */ 841 if (addr >= TASK_SIZE_OF(task)) 842 return -EPERM; 843 cpu = get_cpu(); 844 /* handle small bases via the GDT because that's faster to 845 switch. */ 846 if (addr <= 0xffffffff) { 847 set_32bit_tls(task, FS_TLS, addr); 848 if (doit) { 849 load_TLS(&task->thread, cpu); 850 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL)); 851 } 852 task->thread.fsindex = FS_TLS_SEL; 853 task->thread.fs = 0; 854 } else { 855 task->thread.fsindex = 0; 856 task->thread.fs = addr; 857 if (doit) { 858 /* set the selector to 0 to not confuse 859 __switch_to */ 860 asm volatile("movl %0,%%fs" :: "r" (0)); 861 ret = checking_wrmsrl(MSR_FS_BASE, addr); 862 } 863 } 864 put_cpu(); 865 break; 866 case ARCH_GET_FS: { 867 unsigned long base; 868 if (task->thread.fsindex == FS_TLS_SEL) 869 base = read_32bit_tls(task, FS_TLS); 870 else if (doit) 871 rdmsrl(MSR_FS_BASE, base); 872 else 873 base = task->thread.fs; 874 ret = put_user(base, (unsigned long __user *)addr); 875 break; 876 } 877 case ARCH_GET_GS: { 878 unsigned long base; 879 unsigned gsindex; 880 if (task->thread.gsindex == GS_TLS_SEL) 881 base = read_32bit_tls(task, GS_TLS); 882 else if (doit) { 883 asm("movl %%gs,%0" : "=r" (gsindex)); 884 if (gsindex) 885 rdmsrl(MSR_KERNEL_GS_BASE, base); 886 else 887 base = task->thread.gs; 888 } 889 else 890 base = task->thread.gs; 891 ret = put_user(base, (unsigned long __user *)addr); 892 break; 893 } 894 895 default: 896 ret = -EINVAL; 897 break; 898 } 899 900 return ret; 901} 902 903long sys_arch_prctl(int code, unsigned long addr) 904{ 905 return do_arch_prctl(current, code, addr); 906} 907 908unsigned long arch_align_stack(unsigned long sp) 909{ 910 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) 911 sp -= get_random_int() % 8192; 912 return sp & ~0xf; 913} 914 915unsigned long arch_randomize_brk(struct mm_struct *mm) 916{ 917 unsigned long range_end = mm->brk + 0x02000000; 918 return randomize_range(mm->brk, range_end, 0) ? : mm->brk; 919} 920