1/*
2 * Copyright (c) 1995, 2008, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26#undef  _LARGEFILE64_SOURCE
27#define _LARGEFILE64_SOURCE 1
28
29#include "jni.h"
30#include "jvm.h"
31#include "jvm_md.h"
32#include "jni_util.h"
33#include "io_util.h"
34#include "JNIHelp.h"
35
36#define NATIVE_METHOD(className, functionName, signature) \
37{ #functionName, signature, (void*)(className ## _ ## functionName) }
38
39/*
40 * Platform-specific support for java.lang.Process
41 */
42#include <assert.h>
43#include <stddef.h>
44#include <stdlib.h>
45#include <sys/types.h>
46#include <ctype.h>
47#ifdef _ALLBSD_SOURCE
48#include <wait.h>
49#else
50#include <sys/wait.h>
51#endif
52#include <signal.h>
53#include <string.h>
54#include <errno.h>
55#include <dirent.h>
56#include <unistd.h>
57#include <fcntl.h>
58#include <limits.h>
59
60#ifdef __APPLE__
61#include <crt_externs.h>
62#define environ (*_NSGetEnviron())
63#endif
64
65/*
66 * There are 3 possible strategies we might use to "fork":
67 *
68 * - fork(2).  Very portable and reliable but subject to
69 *   failure due to overcommit (see the documentation on
70 *   /proc/sys/vm/overcommit_memory in Linux proc(5)).
71 *   This is the ancient problem of spurious failure whenever a large
72 *   process starts a small subprocess.
73 *
74 * - vfork().  Using this is scary because all relevant man pages
75 *   contain dire warnings, e.g. Linux vfork(2).  But at least it's
76 *   documented in the glibc docs and is standardized by XPG4.
77 *   http://www.opengroup.org/onlinepubs/000095399/functions/vfork.html
78 *   On Linux, one might think that vfork() would be implemented using
79 *   the clone system call with flag CLONE_VFORK, but in fact vfork is
80 *   a separate system call (which is a good sign, suggesting that
81 *   vfork will continue to be supported at least on Linux).
82 *   Another good sign is that glibc implements posix_spawn using
83 *   vfork whenever possible.  Note that we cannot use posix_spawn
84 *   ourselves because there's no reliable way to close all inherited
85 *   file descriptors.
86 *
87 * - clone() with flags CLONE_VM but not CLONE_THREAD.  clone() is
88 *   Linux-specific, but this ought to work - at least the glibc
89 *   sources contain code to handle different combinations of CLONE_VM
90 *   and CLONE_THREAD.  However, when this was implemented, it
91 *   appeared to fail on 32-bit i386 (but not 64-bit x86_64) Linux with
92 *   the simple program
93 *     Runtime.getRuntime().exec("/bin/true").waitFor();
94 *   with:
95 *     #  Internal Error (os_linux_x86.cpp:683), pid=19940, tid=2934639536
96 *     #  Error: pthread_getattr_np failed with errno = 3 (ESRCH)
97 *   We believe this is a glibc bug, reported here:
98 *     http://sources.redhat.com/bugzilla/show_bug.cgi?id=10311
99 *   but the glibc maintainers closed it as WONTFIX.
100 *
101 * Based on the above analysis, we are currently using vfork() on
102 * Linux and fork() on other Unix systems, but the code to use clone()
103 * remains.
104 */
105
106#define START_CHILD_USE_CLONE 0  /* clone() currently disabled; see above. */
107
108#ifndef START_CHILD_USE_CLONE
109  #ifdef __linux__
110    #define START_CHILD_USE_CLONE 1
111  #else
112    #define START_CHILD_USE_CLONE 0
113  #endif
114#endif
115
116/* By default, use vfork() on Linux. */
117#ifndef START_CHILD_USE_VFORK
118  #ifdef __linux__
119    #define START_CHILD_USE_VFORK 1
120  #else
121    #define START_CHILD_USE_VFORK 0
122  #endif
123#endif
124
125#if START_CHILD_USE_CLONE
126#include <sched.h>
127#define START_CHILD_SYSTEM_CALL "clone"
128#elif START_CHILD_USE_VFORK
129#define START_CHILD_SYSTEM_CALL "vfork"
130#else
131#define START_CHILD_SYSTEM_CALL "fork"
132#endif
133
134#ifndef STDIN_FILENO
135#define STDIN_FILENO 0
136#endif
137
138#ifndef STDOUT_FILENO
139#define STDOUT_FILENO 1
140#endif
141
142#ifndef STDERR_FILENO
143#define STDERR_FILENO 2
144#endif
145
146#ifndef SA_NOCLDSTOP
147#define SA_NOCLDSTOP 0
148#endif
149
150#ifndef SA_RESTART
151#define SA_RESTART 0
152#endif
153
154#define FAIL_FILENO (STDERR_FILENO + 1)
155
156/* TODO: Refactor. */
157#define RESTARTABLE(_cmd, _result) do { \
158  do { \
159    _result = _cmd; \
160  } while((_result == -1) && (errno == EINTR)); \
161} while(0)
162
163/* This is one of the rare times it's more portable to declare an
164 * external symbol explicitly, rather than via a system header.
165 * The declaration is standardized as part of UNIX98, but there is
166 * no standard (not even de-facto) header file where the
167 * declaration is to be found.  See:
168 * http://www.opengroup.org/onlinepubs/009695399/functions/environ.html
169 * http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_02.html
170 *
171 * "All identifiers in this volume of IEEE Std 1003.1-2001, except
172 * environ, are defined in at least one of the headers" (!)
173 */
174extern char **environ;
175
176
177static void
178setSIGCHLDHandler(JNIEnv *env)
179{
180    /* There is a subtle difference between having the signal handler
181     * for SIGCHLD be SIG_DFL and SIG_IGN.  We cannot obtain process
182     * termination information for child processes if the signal
183     * handler is SIG_IGN.  It must be SIG_DFL.
184     *
185     * We used to set the SIGCHLD handler only on Linux, but it's
186     * safest to set it unconditionally.
187     *
188     * Consider what happens if java's parent process sets the SIGCHLD
189     * handler to SIG_IGN.  Normally signal handlers are inherited by
190     * children, but SIGCHLD is a controversial case.  Solaris appears
191     * to always reset it to SIG_DFL, but this behavior may be
192     * non-standard-compliant, and we shouldn't rely on it.
193     *
194     * References:
195     * http://www.opengroup.org/onlinepubs/7908799/xsh/exec.html
196     * http://www.pasc.org/interps/unofficial/db/p1003.1/pasc-1003.1-132.html
197     */
198    struct sigaction sa;
199    sa.sa_handler = SIG_DFL;
200    sigemptyset(&sa.sa_mask);
201    sa.sa_flags = SA_NOCLDSTOP | SA_RESTART;
202    if (sigaction(SIGCHLD, &sa, NULL) < 0)
203        JNU_ThrowInternalError(env, "Can't set SIGCHLD handler");
204}
205
206static void*
207xmalloc(JNIEnv *env, size_t size)
208{
209    void *p = malloc(size);
210    if (p == NULL)
211        JNU_ThrowOutOfMemoryError(env, NULL);
212    return p;
213}
214
215#define NEW(type, n) ((type *) xmalloc(env, (n) * sizeof(type)))
216
217/**
218 * If PATH is not defined, the OS provides some default value.
219 * Unfortunately, there's no portable way to get this value.
220 * Fortunately, it's only needed if the child has PATH while we do not.
221 */
222static const char*
223defaultPath(void)
224{
225#ifdef __solaris__
226    /* These really are the Solaris defaults! */
227    return (geteuid() == 0 || getuid() == 0) ?
228        "/usr/xpg4/bin:/usr/ccs/bin:/usr/bin:/opt/SUNWspro/bin:/usr/sbin" :
229        "/usr/xpg4/bin:/usr/ccs/bin:/usr/bin:/opt/SUNWspro/bin:";
230#else
231    return ":/bin:/usr/bin";    /* glibc */
232#endif
233}
234
235static const char*
236effectivePath(void)
237{
238    const char *s = getenv("PATH");
239    return (s != NULL) ? s : defaultPath();
240}
241
242static int
243countOccurrences(const char *s, char c)
244{
245    int count;
246    for (count = 0; *s != '\0'; s++)
247        count += (*s == c);
248    return count;
249}
250
251static const char * const *
252splitPath(JNIEnv *env, const char *path)
253{
254    const char *p, *q;
255    char **pathv;
256    int i;
257    int count = countOccurrences(path, ':') + 1;
258
259    pathv = NEW(char*, count+1);
260    pathv[count] = NULL;
261    for (p = path, i = 0; i < count; i++, p = q + 1) {
262        for (q = p; (*q != ':') && (*q != '\0'); q++)
263            ;
264        if (q == p)             /* empty PATH component => "." */
265            pathv[i] = "./";
266        else {
267            int addSlash = ((*(q - 1)) != '/');
268            pathv[i] = NEW(char, q - p + addSlash + 1);
269            memcpy(pathv[i], p, q - p);
270            if (addSlash)
271                pathv[i][q - p] = '/';
272            pathv[i][q - p + addSlash] = '\0';
273        }
274    }
275    return (const char * const *) pathv;
276}
277
278/**
279 * Cached value of JVM's effective PATH.
280 * (We don't support putenv("PATH=...") in native code)
281 */
282static const char *parentPath;
283
284/**
285 * Split, canonicalized version of parentPath
286 */
287static const char * const *parentPathv;
288
289static jfieldID field_exitcode;
290
291JNIEXPORT void JNICALL
292UNIXProcess_initIDs(JNIEnv *env, jclass clazz)
293{
294    field_exitcode = (*env)->GetFieldID(env, clazz, "exitcode", "I");
295
296    parentPath  = effectivePath();
297    parentPathv = splitPath(env, parentPath);
298
299    setSIGCHLDHandler(env);
300}
301
302
303#ifndef WIFEXITED
304#define WIFEXITED(status) (((status)&0xFF) == 0)
305#endif
306
307#ifndef WEXITSTATUS
308#define WEXITSTATUS(status) (((status)>>8)&0xFF)
309#endif
310
311#ifndef WIFSIGNALED
312#define WIFSIGNALED(status) (((status)&0xFF) > 0 && ((status)&0xFF00) == 0)
313#endif
314
315#ifndef WTERMSIG
316#define WTERMSIG(status) ((status)&0x7F)
317#endif
318
319/* Block until a child process exits and return its exit code.
320   Note, can only be called once for any given pid. */
321JNIEXPORT jint JNICALL
322UNIXProcess_waitForProcessExit(JNIEnv* env,
323                                              jobject junk,
324                                              jint pid)
325{
326    /* We used to use waitid() on Solaris, waitpid() on Linux, but
327     * waitpid() is more standard, so use it on all POSIX platforms. */
328    int status;
329    /* Wait for the child process to exit.  This returns immediately if
330       the child has already exited. */
331    while (waitpid(pid, &status, 0) < 0) {
332        switch (errno) {
333        case ECHILD: return 0;
334        case EINTR: break;
335        default: return -1;
336        }
337    }
338
339    if (WIFEXITED(status)) {
340        /*
341         * The child exited normally; get its exit code.
342         */
343        return WEXITSTATUS(status);
344    } else if (WIFSIGNALED(status)) {
345        /* The child exited because of a signal.
346         * The best value to return is 0x80 + signal number,
347         * because that is what all Unix shells do, and because
348         * it allows callers to distinguish between process exit and
349         * process death by signal.
350         * Unfortunately, the historical behavior on Solaris is to return
351         * the signal number, and we preserve this for compatibility. */
352#ifdef __solaris__
353        return WTERMSIG(status);
354#else
355        return 0x80 + WTERMSIG(status);
356#endif
357    } else {
358        /*
359         * Unknown exit code; pass it through.
360         */
361        return status;
362    }
363}
364
365static ssize_t
366restartableWrite(int fd, const void *buf, size_t count)
367{
368    ssize_t result;
369    RESTARTABLE(write(fd, buf, count), result);
370    return result;
371}
372
373static int
374restartableDup2(int fd_from, int fd_to)
375{
376    int err;
377    RESTARTABLE(dup2(fd_from, fd_to), err);
378    return err;
379}
380
381static int
382restartableClose(int fd)
383{
384    int err;
385    RESTARTABLE(close(fd), err);
386    return err;
387}
388
389static int
390closeSafely(int fd)
391{
392    return (fd == -1) ? 0 : restartableClose(fd);
393}
394
395static int
396isAsciiDigit(char c)
397{
398  return c >= '0' && c <= '9';
399}
400
401#ifdef _ALLBSD_SOURCE
402#define FD_DIR "/dev/fd"
403#define dirent64 dirent
404#define readdir64 readdir
405#else
406#define FD_DIR "/proc/self/fd"
407#endif
408
409static int
410closeDescriptors(void)
411{
412    DIR *dp;
413    struct dirent64 *dirp;
414    int from_fd = FAIL_FILENO + 1;
415
416    /* We're trying to close all file descriptors, but opendir() might
417     * itself be implemented using a file descriptor, and we certainly
418     * don't want to close that while it's in use.  We assume that if
419     * opendir() is implemented using a file descriptor, then it uses
420     * the lowest numbered file descriptor, just like open().  So we
421     * close a couple explicitly.  */
422
423    restartableClose(from_fd);          /* for possible use by opendir() */
424    restartableClose(from_fd + 1);      /* another one for good luck */
425
426    if ((dp = opendir(FD_DIR)) == NULL)
427        return 0;
428
429    /* We use readdir64 instead of readdir to work around Solaris bug
430     * 6395699: /proc/self/fd fails to report file descriptors >= 1024 on Solaris 9
431     */
432    while ((dirp = readdir64(dp)) != NULL) {
433        int fd;
434        if (isAsciiDigit(dirp->d_name[0]) &&
435            (fd = strtol(dirp->d_name, NULL, 10)) >= from_fd + 2)
436            restartableClose(fd);
437    }
438
439    closedir(dp);
440
441    return 1;
442}
443
444static int
445moveDescriptor(int fd_from, int fd_to)
446{
447    if (fd_from != fd_to) {
448        if ((restartableDup2(fd_from, fd_to) == -1) ||
449            (restartableClose(fd_from) == -1))
450            return -1;
451    }
452    return 0;
453}
454
455static const char *
456getBytes(JNIEnv *env, jbyteArray arr)
457{
458    return arr == NULL ? NULL :
459        (const char*) (*env)->GetByteArrayElements(env, arr, NULL);
460}
461
462static void
463releaseBytes(JNIEnv *env, jbyteArray arr, const char* parr)
464{
465    if (parr != NULL)
466        (*env)->ReleaseByteArrayElements(env, arr, (jbyte*) parr, JNI_ABORT);
467}
468
469static void
470initVectorFromBlock(const char**vector, const char* block, int count)
471{
472    int i;
473    const char *p;
474    for (i = 0, p = block; i < count; i++) {
475        /* Invariant: p always points to the start of a C string. */
476        vector[i] = p;
477        while (*(p++));
478    }
479    vector[count] = NULL;
480}
481
482static void
483throwIOException(JNIEnv *env, int errnum, const char *defaultDetail)
484{
485    static const char * const format = "error=%d, %s";
486    const char *detail = defaultDetail;
487    char *errmsg;
488    jstring s;
489
490    if (errnum != 0) {
491        const char *s = strerror(errnum);
492        if (strcmp(s, "Unknown error") != 0)
493            detail = s;
494    }
495    /* ASCII Decimal representation uses 2.4 times as many bits as binary. */
496    size_t newsize = strlen(format) + strlen(detail) + 3 * sizeof(errnum);
497    errmsg = NEW(char, newsize);
498    snprintf(errmsg, newsize, format, errnum, detail);
499    s = JNU_NewStringPlatform(env, errmsg);
500    if (s != NULL) {
501        jobject x = JNU_NewObjectByName(env, "java/io/IOException",
502                                        "(Ljava/lang/String;)V", s);
503        if (x != NULL)
504            (*env)->Throw(env, x);
505    }
506    free(errmsg);
507}
508
509#ifdef DEBUG_PROCESS
510/* Debugging process code is difficult; where to write debug output? */
511static void
512debugPrint(char *format, ...)
513{
514    FILE *tty = fopen("/dev/tty", "w");
515    va_list ap;
516    va_start(ap, format);
517    vfprintf(tty, format, ap);
518    va_end(ap);
519    fclose(tty);
520}
521#endif /* DEBUG_PROCESS */
522
523/**
524 * Exec FILE as a traditional Bourne shell script (i.e. one without #!).
525 * If we could do it over again, we would probably not support such an ancient
526 * misfeature, but compatibility wins over sanity.  The original support for
527 * this was imported accidentally from execvp().
528 */
529static void
530execve_as_traditional_shell_script(const char *file,
531                                   const char *argv[],
532                                   const char *const envp[])
533{
534    /* Use the extra word of space provided for us in argv by caller. */
535    const char *argv0 = argv[0];
536    const char *const *end = argv;
537    while (*end != NULL)
538        ++end;
539    memmove(argv+2, argv+1, (end-argv) * sizeof (*end));
540    argv[0] = "/bin/sh";
541    argv[1] = file;
542    execve(argv[0], (char **) argv, (char **) envp);
543    /* Can't even exec /bin/sh?  Big trouble, but let's soldier on... */
544    memmove(argv+1, argv+2, (end-argv) * sizeof (*end));
545    argv[0] = argv0;
546}
547
548/**
549 * Like execve(2), except that in case of ENOEXEC, FILE is assumed to
550 * be a shell script and the system default shell is invoked to run it.
551 */
552static void
553execve_with_shell_fallback(const char *file,
554                           const char *argv[],
555                           const char *const envp[])
556{
557#if START_CHILD_USE_CLONE || START_CHILD_USE_VFORK
558    /* shared address space; be very careful. */
559    execve(file, (char **) argv, (char **) envp);
560    if (errno == ENOEXEC)
561        execve_as_traditional_shell_script(file, argv, envp);
562#else
563    /* unshared address space; we can mutate environ. */
564    environ = (char **) envp;
565    execvp(file, (char **) argv);
566#endif
567}
568
569/**
570 * 'execvpe' should have been included in the Unix standards,
571 * and is a GNU extension in glibc 2.10.
572 *
573 * JDK_execvpe is identical to execvp, except that the child environment is
574 * specified via the 3rd argument instead of being inherited from environ.
575 */
576static void
577JDK_execvpe(const char *file,
578            const char *argv[],
579            const char *const envp[])
580{
581    if (envp == NULL || (char **) envp == environ) {
582        execvp(file, (char **) argv);
583        return;
584    }
585
586    if (*file == '\0') {
587        errno = ENOENT;
588        return;
589    }
590
591    if (strchr(file, '/') != NULL) {
592        execve_with_shell_fallback(file, argv, envp);
593    } else {
594        /* We must search PATH (parent's, not child's) */
595        char expanded_file[PATH_MAX];
596        int filelen = strlen(file);
597        int sticky_errno = 0;
598        const char * const * dirs;
599        for (dirs = parentPathv; *dirs; dirs++) {
600            const char * dir = *dirs;
601            int dirlen = strlen(dir);
602            if (filelen + dirlen + 1 >= PATH_MAX) {
603                errno = ENAMETOOLONG;
604                continue;
605            }
606            memcpy(expanded_file, dir, dirlen);
607            memcpy(expanded_file + dirlen, file, filelen);
608            expanded_file[dirlen + filelen] = '\0';
609            execve_with_shell_fallback(expanded_file, argv, envp);
610            /* There are 3 responses to various classes of errno:
611             * return immediately, continue (especially for ENOENT),
612             * or continue with "sticky" errno.
613             *
614             * From exec(3):
615             *
616             * If permission is denied for a file (the attempted
617             * execve returned EACCES), these functions will continue
618             * searching the rest of the search path.  If no other
619             * file is found, however, they will return with the
620             * global variable errno set to EACCES.
621             */
622            switch (errno) {
623            case EACCES:
624                sticky_errno = errno;
625                /* FALLTHRU */
626            case ENOENT:
627            case ENOTDIR:
628#ifdef ELOOP
629            case ELOOP:
630#endif
631#ifdef ESTALE
632            case ESTALE:
633#endif
634#ifdef ENODEV
635            case ENODEV:
636#endif
637#ifdef ETIMEDOUT
638            case ETIMEDOUT:
639#endif
640                break; /* Try other directories in PATH */
641            default:
642                return;
643            }
644        }
645        if (sticky_errno != 0)
646            errno = sticky_errno;
647    }
648}
649
650/*
651 * Reads nbyte bytes from file descriptor fd into buf,
652 * The read operation is retried in case of EINTR or partial reads.
653 *
654 * Returns number of bytes read (normally nbyte, but may be less in
655 * case of EOF).  In case of read errors, returns -1 and sets errno.
656 */
657static ssize_t
658readFully(int fd, void *buf, size_t nbyte)
659{
660    ssize_t remaining = nbyte;
661    for (;;) {
662        ssize_t n = read(fd, buf, remaining);
663        if (n == 0) {
664            return nbyte - remaining;
665        } else if (n > 0) {
666            remaining -= n;
667            if (remaining <= 0)
668                return nbyte;
669            /* We were interrupted in the middle of reading the bytes.
670             * Unlikely, but possible. */
671            buf = (void *) (((char *)buf) + n);
672        } else if (errno == EINTR) {
673            /* Strange signals like SIGJVM1 are possible at any time.
674             * See http://www.dreamsongs.com/WorseIsBetter.html */
675        } else {
676            return -1;
677        }
678    }
679}
680
681typedef struct _ChildStuff
682{
683    int in[2];
684    int out[2];
685    int err[2];
686    int fail[2];
687    int fds[3];
688    const char **argv;
689    const char **envv;
690    const char *pdir;
691    jboolean redirectErrorStream;
692#if START_CHILD_USE_CLONE
693    void *clone_stack;
694#endif
695} ChildStuff;
696
697static void
698copyPipe(int from[2], int to[2])
699{
700    to[0] = from[0];
701    to[1] = from[1];
702}
703
704/**
705 * Child process after a successful fork() or clone().
706 * This function must not return, and must be prepared for either all
707 * of its address space to be shared with its parent, or to be a copy.
708 * It must not modify global variables such as "environ".
709 */
710static int
711childProcess(void *arg)
712{
713    const ChildStuff* p = (const ChildStuff*) arg;
714
715    /* Close the parent sides of the pipes.
716       Closing pipe fds here is redundant, since closeDescriptors()
717       would do it anyways, but a little paranoia is a good thing. */
718    if ((closeSafely(p->in[1])   == -1) ||
719        (closeSafely(p->out[0])  == -1) ||
720        (closeSafely(p->err[0])  == -1) ||
721        (closeSafely(p->fail[0]) == -1))
722        goto WhyCantJohnnyExec;
723
724    /* Give the child sides of the pipes the right fileno's. */
725    /* Note: it is possible for in[0] == 0 */
726    if ((moveDescriptor(p->in[0] != -1 ?  p->in[0] : p->fds[0],
727                        STDIN_FILENO) == -1) ||
728        (moveDescriptor(p->out[1]!= -1 ? p->out[1] : p->fds[1],
729                        STDOUT_FILENO) == -1))
730        goto WhyCantJohnnyExec;
731
732    if (p->redirectErrorStream) {
733        if ((closeSafely(p->err[1]) == -1) ||
734            (restartableDup2(STDOUT_FILENO, STDERR_FILENO) == -1))
735            goto WhyCantJohnnyExec;
736    } else {
737        if (moveDescriptor(p->err[1] != -1 ? p->err[1] : p->fds[2],
738                           STDERR_FILENO) == -1)
739            goto WhyCantJohnnyExec;
740    }
741
742    if (moveDescriptor(p->fail[1], FAIL_FILENO) == -1)
743        goto WhyCantJohnnyExec;
744
745    /* close everything */
746    if (closeDescriptors() == 0) { /* failed,  close the old way */
747        int max_fd = (int)sysconf(_SC_OPEN_MAX);
748        int fd;
749        for (fd = FAIL_FILENO + 1; fd < max_fd; fd++)
750            if (restartableClose(fd) == -1 && errno != EBADF)
751                goto WhyCantJohnnyExec;
752    }
753
754    /* change to the new working directory */
755    if (p->pdir != NULL && chdir(p->pdir) < 0)
756        goto WhyCantJohnnyExec;
757
758    if (fcntl(FAIL_FILENO, F_SETFD, FD_CLOEXEC) == -1)
759        goto WhyCantJohnnyExec;
760
761    JDK_execvpe(p->argv[0], p->argv, p->envv);
762
763 WhyCantJohnnyExec:
764    /* We used to go to an awful lot of trouble to predict whether the
765     * child would fail, but there is no reliable way to predict the
766     * success of an operation without *trying* it, and there's no way
767     * to try a chdir or exec in the parent.  Instead, all we need is a
768     * way to communicate any failure back to the parent.  Easy; we just
769     * send the errno back to the parent over a pipe in case of failure.
770     * The tricky thing is, how do we communicate the *success* of exec?
771     * We use FD_CLOEXEC together with the fact that a read() on a pipe
772     * yields EOF when the write ends (we have two of them!) are closed.
773     */
774    {
775        int errnum = errno;
776        restartableWrite(FAIL_FILENO, &errnum, sizeof(errnum));
777    }
778    restartableClose(FAIL_FILENO);
779    _exit(-1);
780    return 0;  /* Suppress warning "no return value from function" */
781}
782
783/**
784 * Start a child process running function childProcess.
785 * This function only returns in the parent.
786 * We are unusually paranoid; use of clone/vfork is
787 * especially likely to tickle gcc/glibc bugs.
788 */
789#ifdef __attribute_noinline__  /* See: sys/cdefs.h */
790__attribute_noinline__
791#endif
792static pid_t
793startChild(ChildStuff *c) {
794#if START_CHILD_USE_CLONE
795#define START_CHILD_CLONE_STACK_SIZE (64 * 1024)
796    /*
797     * See clone(2).
798     * Instead of worrying about which direction the stack grows, just
799     * allocate twice as much and start the stack in the middle.
800     */
801    if ((c->clone_stack = malloc(2 * START_CHILD_CLONE_STACK_SIZE)) == NULL)
802        /* errno will be set to ENOMEM */
803        return -1;
804    return clone(childProcess,
805                 c->clone_stack + START_CHILD_CLONE_STACK_SIZE,
806                 CLONE_VFORK | CLONE_VM | SIGCHLD, c);
807#else
808  #if START_CHILD_USE_VFORK
809    /*
810     * We separate the call to vfork into a separate function to make
811     * very sure to keep stack of child from corrupting stack of parent,
812     * as suggested by the scary gcc warning:
813     *  warning: variable 'foo' might be clobbered by 'longjmp' or 'vfork'
814     */
815    volatile pid_t resultPid = vfork();
816  #else
817    /*
818     * From Solaris fork(2): In Solaris 10, a call to fork() is
819     * identical to a call to fork1(); only the calling thread is
820     * replicated in the child process. This is the POSIX-specified
821     * behavior for fork().
822     */
823    pid_t resultPid = fork();
824  #endif
825    if (resultPid == 0)
826        childProcess(c);
827    assert(resultPid != 0);  /* childProcess never returns */
828    return resultPid;
829#endif /* ! START_CHILD_USE_CLONE */
830}
831
832JNIEXPORT jint JNICALL
833UNIXProcess_forkAndExec(JNIEnv *env,
834                                       jobject process,
835                                       jbyteArray prog,
836                                       jbyteArray argBlock, jint argc,
837                                       jbyteArray envBlock, jint envc,
838                                       jbyteArray dir,
839                                       jintArray std_fds,
840                                       jboolean redirectErrorStream)
841{
842    int errnum;
843    int resultPid = -1;
844    int in[2], out[2], err[2], fail[2];
845    jint *fds = NULL;
846    const char *pprog = NULL;
847    const char *pargBlock = NULL;
848    const char *penvBlock = NULL;
849    ChildStuff *c;
850
851    in[0] = in[1] = out[0] = out[1] = err[0] = err[1] = fail[0] = fail[1] = -1;
852
853    if ((c = NEW(ChildStuff, 1)) == NULL) return -1;
854    c->argv = NULL;
855    c->envv = NULL;
856    c->pdir = NULL;
857#if START_CHILD_USE_CLONE
858    c->clone_stack = NULL;
859#endif
860
861    /* Convert prog + argBlock into a char ** argv.
862     * Add one word room for expansion of argv for use by
863     * execve_as_traditional_shell_script.
864     */
865    assert(prog != NULL && argBlock != NULL);
866    if ((pprog     = getBytes(env, prog))       == NULL) goto Catch;
867    if ((pargBlock = getBytes(env, argBlock))   == NULL) goto Catch;
868    if ((c->argv = NEW(const char *, argc + 3)) == NULL) goto Catch;
869    c->argv[0] = pprog;
870    initVectorFromBlock(c->argv+1, pargBlock, argc);
871
872    if (envBlock != NULL) {
873        /* Convert envBlock into a char ** envv */
874        if ((penvBlock = getBytes(env, envBlock))   == NULL) goto Catch;
875        if ((c->envv = NEW(const char *, envc + 1)) == NULL) goto Catch;
876        initVectorFromBlock(c->envv, penvBlock, envc);
877    }
878
879    if (dir != NULL) {
880        if ((c->pdir = getBytes(env, dir)) == NULL) goto Catch;
881    }
882
883    assert(std_fds != NULL);
884    fds = (*env)->GetIntArrayElements(env, std_fds, NULL);
885    if (fds == NULL) goto Catch;
886
887    if ((fds[0] == -1 && pipe(in)  < 0) ||
888        (fds[1] == -1 && pipe(out) < 0) ||
889        (fds[2] == -1 && pipe(err) < 0) ||
890        (pipe(fail) < 0)) {
891        throwIOException(env, errno, "Bad file descriptor");
892        goto Catch;
893    }
894    c->fds[0] = fds[0];
895    c->fds[1] = fds[1];
896    c->fds[2] = fds[2];
897
898    copyPipe(in,   c->in);
899    copyPipe(out,  c->out);
900    copyPipe(err,  c->err);
901    copyPipe(fail, c->fail);
902
903    c->redirectErrorStream = redirectErrorStream;
904
905    resultPid = startChild(c);
906    assert(resultPid != 0);
907
908    if (resultPid < 0) {
909        throwIOException(env, errno, START_CHILD_SYSTEM_CALL " failed");
910        goto Catch;
911    }
912
913    restartableClose(fail[1]); fail[1] = -1; /* See: WhyCantJohnnyExec */
914
915    switch (readFully(fail[0], &errnum, sizeof(errnum))) {
916    case 0: break; /* Exec succeeded */
917    case sizeof(errnum):
918        waitpid(resultPid, NULL, 0);
919        throwIOException(env, errnum, "Exec failed");
920        goto Catch;
921    default:
922        throwIOException(env, errno, "Read failed");
923        goto Catch;
924    }
925
926    fds[0] = (in [1] != -1) ? in [1] : -1;
927    fds[1] = (out[0] != -1) ? out[0] : -1;
928    fds[2] = (err[0] != -1) ? err[0] : -1;
929
930 Finally:
931#if START_CHILD_USE_CLONE
932    free(c->clone_stack);
933#endif
934
935    /* Always clean up the child's side of the pipes */
936    closeSafely(in [0]);
937    closeSafely(out[1]);
938    closeSafely(err[1]);
939
940    /* Always clean up fail descriptors */
941    closeSafely(fail[0]);
942    closeSafely(fail[1]);
943
944    releaseBytes(env, prog,     pprog);
945    releaseBytes(env, argBlock, pargBlock);
946    releaseBytes(env, envBlock, penvBlock);
947    releaseBytes(env, dir,      c->pdir);
948
949    free(c->argv);
950    free(c->envv);
951    free(c);
952
953    if (fds != NULL)
954        (*env)->ReleaseIntArrayElements(env, std_fds, fds, 0);
955
956    return resultPid;
957
958 Catch:
959    /* Clean up the parent's side of the pipes in case of failure only */
960    closeSafely(in [1]);
961    closeSafely(out[0]);
962    closeSafely(err[0]);
963    goto Finally;
964}
965
966JNIEXPORT void JNICALL
967UNIXProcess_destroyProcess(JNIEnv *env, jobject junk, jint pid)
968{
969    kill(pid, SIGTERM);
970}
971
972static JNINativeMethod gMethods[] = {
973  NATIVE_METHOD(UNIXProcess, destroyProcess, "(I)V"),
974  NATIVE_METHOD(UNIXProcess, forkAndExec, "([B[BI[BI[B[IZ)I"),
975  NATIVE_METHOD(UNIXProcess, waitForProcessExit, "(I)I"),
976  NATIVE_METHOD(UNIXProcess, initIDs, "()V"),
977};
978
979void register_java_lang_UNIXProcess(JNIEnv* env) {
980  jniRegisterNativeMethods(env, "java/lang/UNIXProcess", gMethods, NELEM(gMethods));
981}
982