process.c revision 42a359e31a0e438b5b978a8f0fecdbd3c86bb033
1/* 2 * Copyright (C) 2002- 2004 Jeff Dike (jdike@addtoit.com) 3 * Licensed under the GPL 4 */ 5 6#include <stdlib.h> 7#include <string.h> 8#include <unistd.h> 9#include <errno.h> 10#include <signal.h> 11#include <sched.h> 12#include "ptrace_user.h" 13#include <sys/wait.h> 14#include <sys/mman.h> 15#include <sys/user.h> 16#include <sys/time.h> 17#include <sys/syscall.h> 18#include <asm/types.h> 19#include "user.h" 20#include "sysdep/ptrace.h" 21#include "kern_util.h" 22#include "skas.h" 23#include "stub-data.h" 24#include "mm_id.h" 25#include "sysdep/sigcontext.h" 26#include "sysdep/stub.h" 27#include "os.h" 28#include "proc_mm.h" 29#include "skas_ptrace.h" 30#include "chan_user.h" 31#include "registers.h" 32#include "mem.h" 33#include "uml-config.h" 34#include "process.h" 35#include "longjmp.h" 36#include "kern_constants.h" 37#include "as-layout.h" 38 39int is_skas_winch(int pid, int fd, void *data) 40{ 41 if(pid != os_getpgrp()) 42 return(0); 43 44 register_winch_irq(-1, fd, -1, data, 0); 45 return(1); 46} 47 48static int ptrace_dump_regs(int pid) 49{ 50 unsigned long regs[MAX_REG_NR]; 51 int i; 52 53 if(ptrace(PTRACE_GETREGS, pid, 0, regs) < 0) 54 return -errno; 55 else { 56 printk("Stub registers -\n"); 57 for(i = 0; i < ARRAY_SIZE(regs); i++) 58 printk("\t%d - %lx\n", i, regs[i]); 59 } 60 61 return 0; 62} 63 64/* 65 * Signals that are OK to receive in the stub - we'll just continue it. 66 * SIGWINCH will happen when UML is inside a detached screen. 67 */ 68#define STUB_SIG_MASK ((1 << SIGVTALRM) | (1 << SIGWINCH)) 69 70/* Signals that the stub will finish with - anything else is an error */ 71#define STUB_DONE_MASK ((1 << SIGUSR1) | (1 << SIGTRAP)) 72 73void wait_stub_done(int pid) 74{ 75 int n, status, err; 76 77 while(1){ 78 CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); 79 if((n < 0) || !WIFSTOPPED(status)) 80 goto bad_wait; 81 82 if(((1 << WSTOPSIG(status)) & STUB_SIG_MASK) == 0) 83 break; 84 85 err = ptrace(PTRACE_CONT, pid, 0, 0); 86 if(err) 87 panic("wait_stub_done : continue failed, errno = %d\n", 88 errno); 89 } 90 91 if(((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0) 92 return; 93 94bad_wait: 95 err = ptrace_dump_regs(pid); 96 if(err) 97 printk("Failed to get registers from stub, errno = %d\n", -err); 98 panic("wait_stub_done : failed to wait for SIGUSR1/SIGTRAP, pid = %d, " 99 "n = %d, errno = %d, status = 0x%x\n", pid, n, errno, status); 100} 101 102extern unsigned long current_stub_stack(void); 103 104void get_skas_faultinfo(int pid, struct faultinfo * fi) 105{ 106 int err; 107 108 if(ptrace_faultinfo){ 109 err = ptrace(PTRACE_FAULTINFO, pid, 0, fi); 110 if(err) 111 panic("get_skas_faultinfo - PTRACE_FAULTINFO failed, " 112 "errno = %d\n", errno); 113 114 /* Special handling for i386, which has different structs */ 115 if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo)) 116 memset((char *)fi + sizeof(struct ptrace_faultinfo), 0, 117 sizeof(struct faultinfo) - 118 sizeof(struct ptrace_faultinfo)); 119 } 120 else { 121 err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV); 122 if(err) 123 panic("Failed to continue stub, pid = %d, errno = %d\n", 124 pid, errno); 125 wait_stub_done(pid); 126 127 /* faultinfo is prepared by the stub-segv-handler at start of 128 * the stub stack page. We just have to copy it. 129 */ 130 memcpy(fi, (void *)current_stub_stack(), sizeof(*fi)); 131 } 132} 133 134static void handle_segv(int pid, union uml_pt_regs * regs) 135{ 136 get_skas_faultinfo(pid, ®s->skas.faultinfo); 137 segv(regs->skas.faultinfo, 0, 1, NULL); 138} 139 140/*To use the same value of using_sysemu as the caller, ask it that value (in local_using_sysemu)*/ 141static void handle_trap(int pid, union uml_pt_regs *regs, int local_using_sysemu) 142{ 143 int err, status; 144 145 /* Mark this as a syscall */ 146 UPT_SYSCALL_NR(regs) = PT_SYSCALL_NR(regs->skas.regs); 147 148 if (!local_using_sysemu) 149 { 150 err = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET, 151 __NR_getpid); 152 if(err < 0) 153 panic("handle_trap - nullifying syscall failed errno = %d\n", 154 errno); 155 156 err = ptrace(PTRACE_SYSCALL, pid, 0, 0); 157 if(err < 0) 158 panic("handle_trap - continuing to end of syscall failed, " 159 "errno = %d\n", errno); 160 161 CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED)); 162 if((err < 0) || !WIFSTOPPED(status) || 163 (WSTOPSIG(status) != SIGTRAP + 0x80)){ 164 err = ptrace_dump_regs(pid); 165 if(err) 166 printk("Failed to get registers from process, " 167 "errno = %d\n", -err); 168 panic("handle_trap - failed to wait at end of syscall, " 169 "errno = %d, status = %d\n", errno, status); 170 } 171 } 172 173 handle_syscall(regs); 174} 175 176extern int __syscall_stub_start; 177 178static int userspace_tramp(void *stack) 179{ 180 void *addr; 181 int err; 182 183 ptrace(PTRACE_TRACEME, 0, 0, 0); 184 185 init_new_thread_signals(); 186 err = set_interval(1); 187 if(err) 188 panic("userspace_tramp - setting timer failed, errno = %d\n", 189 err); 190 191 if(!proc_mm){ 192 /* This has a pte, but it can't be mapped in with the usual 193 * tlb_flush mechanism because this is part of that mechanism 194 */ 195 int fd; 196 __u64 offset; 197 fd = phys_mapping(to_phys(&__syscall_stub_start), &offset); 198 addr = mmap64((void *) UML_CONFIG_STUB_CODE, UM_KERN_PAGE_SIZE, 199 PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); 200 if(addr == MAP_FAILED){ 201 printk("mapping mmap stub failed, errno = %d\n", 202 errno); 203 exit(1); 204 } 205 206 if(stack != NULL){ 207 fd = phys_mapping(to_phys(stack), &offset); 208 addr = mmap((void *) UML_CONFIG_STUB_DATA, 209 UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, 210 MAP_FIXED | MAP_SHARED, fd, offset); 211 if(addr == MAP_FAILED){ 212 printk("mapping segfault stack failed, " 213 "errno = %d\n", errno); 214 exit(1); 215 } 216 } 217 } 218 if(!ptrace_faultinfo && (stack != NULL)){ 219 struct sigaction sa; 220 221 unsigned long v = UML_CONFIG_STUB_CODE + 222 (unsigned long) stub_segv_handler - 223 (unsigned long) &__syscall_stub_start; 224 225 set_sigstack((void *) UML_CONFIG_STUB_DATA, UM_KERN_PAGE_SIZE); 226 sigemptyset(&sa.sa_mask); 227 sigaddset(&sa.sa_mask, SIGIO); 228 sigaddset(&sa.sa_mask, SIGWINCH); 229 sigaddset(&sa.sa_mask, SIGALRM); 230 sigaddset(&sa.sa_mask, SIGVTALRM); 231 sigaddset(&sa.sa_mask, SIGUSR1); 232 sa.sa_flags = SA_ONSTACK; 233 sa.sa_handler = (void *) v; 234 sa.sa_restorer = NULL; 235 if(sigaction(SIGSEGV, &sa, NULL) < 0) 236 panic("userspace_tramp - setting SIGSEGV handler " 237 "failed - errno = %d\n", errno); 238 } 239 240 os_stop_process(os_getpid()); 241 return(0); 242} 243 244/* Each element set once, and only accessed by a single processor anyway */ 245#undef NR_CPUS 246#define NR_CPUS 1 247int userspace_pid[NR_CPUS]; 248 249int start_userspace(unsigned long stub_stack) 250{ 251 void *stack; 252 unsigned long sp; 253 int pid, status, n, flags; 254 255 stack = mmap(NULL, UM_KERN_PAGE_SIZE, 256 PROT_READ | PROT_WRITE | PROT_EXEC, 257 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 258 if(stack == MAP_FAILED) 259 panic("start_userspace : mmap failed, errno = %d", errno); 260 sp = (unsigned long) stack + UM_KERN_PAGE_SIZE - sizeof(void *); 261 262 flags = CLONE_FILES | SIGCHLD; 263 if(proc_mm) flags |= CLONE_VM; 264 pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack); 265 if(pid < 0) 266 panic("start_userspace : clone failed, errno = %d", errno); 267 268 do { 269 CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); 270 if(n < 0) 271 panic("start_userspace : wait failed, errno = %d", 272 errno); 273 } while(WIFSTOPPED(status) && (WSTOPSIG(status) == SIGVTALRM)); 274 275 if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) 276 panic("start_userspace : expected SIGSTOP, got status = %d", 277 status); 278 279 if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL, (void *)PTRACE_O_TRACESYSGOOD) < 0) 280 panic("start_userspace : PTRACE_OLDSETOPTIONS failed, errno=%d\n", 281 errno); 282 283 if(munmap(stack, UM_KERN_PAGE_SIZE) < 0) 284 panic("start_userspace : munmap failed, errno = %d\n", errno); 285 286 return(pid); 287} 288 289void userspace(union uml_pt_regs *regs) 290{ 291 int err, status, op, pid = userspace_pid[0]; 292 /* To prevent races if using_sysemu changes under us.*/ 293 int local_using_sysemu; 294 295 while(1){ 296 restore_registers(pid, regs); 297 298 /* Now we set local_using_sysemu to be used for one loop */ 299 local_using_sysemu = get_using_sysemu(); 300 301 op = SELECT_PTRACE_OPERATION(local_using_sysemu, 302 singlestepping(NULL)); 303 304 err = ptrace(op, pid, 0, 0); 305 if(err) 306 panic("userspace - could not resume userspace process, " 307 "pid=%d, ptrace operation = %d, errno = %d\n", 308 pid, op, errno); 309 310 CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED)); 311 if(err < 0) 312 panic("userspace - waitpid failed, errno = %d\n", 313 errno); 314 315 regs->skas.is_user = 1; 316 save_registers(pid, regs); 317 UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ 318 319 if(WIFSTOPPED(status)){ 320 int sig = WSTOPSIG(status); 321 switch(sig){ 322 case SIGSEGV: 323 if(PTRACE_FULL_FAULTINFO || !ptrace_faultinfo){ 324 get_skas_faultinfo(pid, ®s->skas.faultinfo); 325 (*sig_info[SIGSEGV])(SIGSEGV, regs); 326 } 327 else handle_segv(pid, regs); 328 break; 329 case SIGTRAP + 0x80: 330 handle_trap(pid, regs, local_using_sysemu); 331 break; 332 case SIGTRAP: 333 relay_signal(SIGTRAP, regs); 334 break; 335 case SIGIO: 336 case SIGVTALRM: 337 case SIGILL: 338 case SIGBUS: 339 case SIGFPE: 340 case SIGWINCH: 341 block_signals(); 342 (*sig_info[sig])(sig, regs); 343 unblock_signals(); 344 break; 345 default: 346 printk("userspace - child stopped with signal " 347 "%d\n", sig); 348 } 349 pid = userspace_pid[0]; 350 interrupt_end(); 351 352 /* Avoid -ERESTARTSYS handling in host */ 353 if(PT_SYSCALL_NR_OFFSET != PT_SYSCALL_RET_OFFSET) 354 PT_SYSCALL_NR(regs->skas.regs) = -1; 355 } 356 } 357} 358 359static unsigned long thread_regs[MAX_REG_NR]; 360static unsigned long thread_fp_regs[HOST_FP_SIZE]; 361 362static int __init init_thread_regs(void) 363{ 364 get_safe_registers(thread_regs, thread_fp_regs); 365 /* Set parent's instruction pointer to start of clone-stub */ 366 thread_regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE + 367 (unsigned long) stub_clone_handler - 368 (unsigned long) &__syscall_stub_start; 369 thread_regs[REGS_SP_INDEX] = UML_CONFIG_STUB_DATA + UM_KERN_PAGE_SIZE - 370 sizeof(void *); 371#ifdef __SIGNAL_FRAMESIZE 372 thread_regs[REGS_SP_INDEX] -= __SIGNAL_FRAMESIZE; 373#endif 374 return 0; 375} 376 377__initcall(init_thread_regs); 378 379int copy_context_skas0(unsigned long new_stack, int pid) 380{ 381 int err; 382 unsigned long current_stack = current_stub_stack(); 383 struct stub_data *data = (struct stub_data *) current_stack; 384 struct stub_data *child_data = (struct stub_data *) new_stack; 385 __u64 new_offset; 386 int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset); 387 388 /* prepare offset and fd of child's stack as argument for parent's 389 * and child's mmap2 calls 390 */ 391 *data = ((struct stub_data) { .offset = MMAP_OFFSET(new_offset), 392 .fd = new_fd, 393 .timer = ((struct itimerval) 394 { { 0, 1000000 / hz() }, 395 { 0, 1000000 / hz() }})}); 396 err = ptrace_setregs(pid, thread_regs); 397 if(err < 0) 398 panic("copy_context_skas0 : PTRACE_SETREGS failed, " 399 "pid = %d, errno = %d\n", pid, -err); 400 401 err = ptrace_setfpregs(pid, thread_fp_regs); 402 if(err < 0) 403 panic("copy_context_skas0 : PTRACE_SETFPREGS failed, " 404 "pid = %d, errno = %d\n", pid, -err); 405 406 /* set a well known return code for detection of child write failure */ 407 child_data->err = 12345678; 408 409 /* Wait, until parent has finished its work: read child's pid from 410 * parent's stack, and check, if bad result. 411 */ 412 err = ptrace(PTRACE_CONT, pid, 0, 0); 413 if(err) 414 panic("Failed to continue new process, pid = %d, " 415 "errno = %d\n", pid, errno); 416 wait_stub_done(pid); 417 418 pid = data->err; 419 if(pid < 0) 420 panic("copy_context_skas0 - stub-parent reports error %d\n", 421 -pid); 422 423 /* Wait, until child has finished too: read child's result from 424 * child's stack and check it. 425 */ 426 wait_stub_done(pid); 427 if (child_data->err != UML_CONFIG_STUB_DATA) 428 panic("copy_context_skas0 - stub-child reports error %ld\n", 429 child_data->err); 430 431 if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL, 432 (void *)PTRACE_O_TRACESYSGOOD) < 0) 433 panic("copy_context_skas0 : PTRACE_OLDSETOPTIONS failed, " 434 "errno = %d\n", errno); 435 436 return pid; 437} 438 439/* 440 * This is used only, if stub pages are needed, while proc_mm is 441 * available. Opening /proc/mm creates a new mm_context, which lacks 442 * the stub-pages. Thus, we map them using /proc/mm-fd 443 */ 444void map_stub_pages(int fd, unsigned long code, 445 unsigned long data, unsigned long stack) 446{ 447 struct proc_mm_op mmop; 448 int n; 449 __u64 code_offset; 450 int code_fd = phys_mapping(to_phys((void *) &__syscall_stub_start), 451 &code_offset); 452 453 mmop = ((struct proc_mm_op) { .op = MM_MMAP, 454 .u = 455 { .mmap = 456 { .addr = code, 457 .len = UM_KERN_PAGE_SIZE, 458 .prot = PROT_EXEC, 459 .flags = MAP_FIXED | MAP_PRIVATE, 460 .fd = code_fd, 461 .offset = code_offset 462 } } }); 463 CATCH_EINTR(n = write(fd, &mmop, sizeof(mmop))); 464 if(n != sizeof(mmop)){ 465 n = errno; 466 printk("mmap args - addr = 0x%lx, fd = %d, offset = %llx\n", 467 code, code_fd, (unsigned long long) code_offset); 468 panic("map_stub_pages : /proc/mm map for code failed, " 469 "err = %d\n", n); 470 } 471 472 if ( stack ) { 473 __u64 map_offset; 474 int map_fd = phys_mapping(to_phys((void *)stack), &map_offset); 475 mmop = ((struct proc_mm_op) 476 { .op = MM_MMAP, 477 .u = 478 { .mmap = 479 { .addr = data, 480 .len = UM_KERN_PAGE_SIZE, 481 .prot = PROT_READ | PROT_WRITE, 482 .flags = MAP_FIXED | MAP_SHARED, 483 .fd = map_fd, 484 .offset = map_offset 485 } } }); 486 CATCH_EINTR(n = write(fd, &mmop, sizeof(mmop))); 487 if(n != sizeof(mmop)) 488 panic("map_stub_pages : /proc/mm map for data failed, " 489 "err = %d\n", errno); 490 } 491} 492 493void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)) 494{ 495 (*buf)[0].JB_IP = (unsigned long) handler; 496 (*buf)[0].JB_SP = (unsigned long) stack + UM_THREAD_SIZE - 497 sizeof(void *); 498} 499 500#define INIT_JMP_NEW_THREAD 0 501#define INIT_JMP_CALLBACK 1 502#define INIT_JMP_HALT 2 503#define INIT_JMP_REBOOT 3 504 505void switch_threads(jmp_buf *me, jmp_buf *you) 506{ 507 if(UML_SETJMP(me) == 0) 508 UML_LONGJMP(you, 1); 509} 510 511static jmp_buf initial_jmpbuf; 512 513/* XXX Make these percpu */ 514static void (*cb_proc)(void *arg); 515static void *cb_arg; 516static jmp_buf *cb_back; 517 518int start_idle_thread(void *stack, jmp_buf *switch_buf) 519{ 520 int n; 521 522 set_handler(SIGWINCH, (__sighandler_t) sig_handler, 523 SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGALRM, 524 SIGVTALRM, -1); 525 526 /* 527 * Can't use UML_SETJMP or UML_LONGJMP here because they save 528 * and restore signals, with the possible side-effect of 529 * trying to handle any signals which came when they were 530 * blocked, which can't be done on this stack. 531 * Signals must be blocked when jumping back here and restored 532 * after returning to the jumper. 533 */ 534 n = setjmp(initial_jmpbuf); 535 switch(n){ 536 case INIT_JMP_NEW_THREAD: 537 (*switch_buf)[0].JB_IP = (unsigned long) new_thread_handler; 538 (*switch_buf)[0].JB_SP = (unsigned long) stack + 539 UM_THREAD_SIZE - sizeof(void *); 540 break; 541 case INIT_JMP_CALLBACK: 542 (*cb_proc)(cb_arg); 543 longjmp(*cb_back, 1); 544 break; 545 case INIT_JMP_HALT: 546 kmalloc_ok = 0; 547 return(0); 548 case INIT_JMP_REBOOT: 549 kmalloc_ok = 0; 550 return(1); 551 default: 552 panic("Bad sigsetjmp return in start_idle_thread - %d\n", n); 553 } 554 longjmp(*switch_buf, 1); 555} 556 557void initial_thread_cb_skas(void (*proc)(void *), void *arg) 558{ 559 jmp_buf here; 560 561 cb_proc = proc; 562 cb_arg = arg; 563 cb_back = &here; 564 565 block_signals(); 566 if(UML_SETJMP(&here) == 0) 567 UML_LONGJMP(&initial_jmpbuf, INIT_JMP_CALLBACK); 568 unblock_signals(); 569 570 cb_proc = NULL; 571 cb_arg = NULL; 572 cb_back = NULL; 573} 574 575void halt_skas(void) 576{ 577 block_signals(); 578 UML_LONGJMP(&initial_jmpbuf, INIT_JMP_HALT); 579} 580 581void reboot_skas(void) 582{ 583 block_signals(); 584 UML_LONGJMP(&initial_jmpbuf, INIT_JMP_REBOOT); 585} 586 587void switch_mm_skas(struct mm_id *mm_idp) 588{ 589 int err; 590 591 /* FIXME: need cpu pid in switch_mm_skas */ 592 if(proc_mm){ 593 err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, 594 mm_idp->u.mm_fd); 595 if(err) 596 panic("switch_mm_skas - PTRACE_SWITCH_MM failed, " 597 "errno = %d\n", errno); 598 } 599 else userspace_pid[0] = mm_idp->u.pid; 600} 601