com_android_internal_os_Zygote.cpp revision 8f4eab2fccbc333604f1d248dac9a736db96bc6b
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "Zygote"
18
19// sys/mount.h has to come before linux/fs.h due to redefinition of MS_RDONLY, MS_BIND, etc
20#include <sys/mount.h>
21#include <linux/fs.h>
22
23#include <list>
24#include <sstream>
25#include <string>
26
27#include <fcntl.h>
28#include <grp.h>
29#include <inttypes.h>
30#include <malloc.h>
31#include <mntent.h>
32#include <paths.h>
33#include <signal.h>
34#include <stdlib.h>
35#include <sys/capability.h>
36#include <sys/cdefs.h>
37#include <sys/personality.h>
38#include <sys/prctl.h>
39#include <sys/resource.h>
40#include <sys/stat.h>
41#include <sys/time.h>
42#include <sys/types.h>
43#include <sys/utsname.h>
44#include <sys/wait.h>
45#include <unistd.h>
46
47#include "android-base/logging.h"
48#include <android-base/file.h>
49#include <android-base/stringprintf.h>
50#include <cutils/fs.h>
51#include <cutils/multiuser.h>
52#include <cutils/sched_policy.h>
53#include <private/android_filesystem_config.h>
54#include <utils/String8.h>
55#include <selinux/android.h>
56#include <processgroup/processgroup.h>
57
58#include "core_jni_helpers.h"
59#include <nativehelper/JNIHelp.h>
60#include <nativehelper/ScopedLocalRef.h>
61#include <nativehelper/ScopedPrimitiveArray.h>
62#include <nativehelper/ScopedUtfChars.h>
63#include "fd_utils.h"
64
65#include "nativebridge/native_bridge.h"
66
67namespace {
68
69using android::String8;
70using android::base::StringPrintf;
71using android::base::WriteStringToFile;
72
73static pid_t gSystemServerPid = 0;
74
75static const char kZygoteClassName[] = "com/android/internal/os/Zygote";
76static jclass gZygoteClass;
77static jmethodID gCallPostForkChildHooks;
78
79// Must match values in com.android.internal.os.Zygote.
80enum MountExternalKind {
81  MOUNT_EXTERNAL_NONE = 0,
82  MOUNT_EXTERNAL_DEFAULT = 1,
83  MOUNT_EXTERNAL_READ = 2,
84  MOUNT_EXTERNAL_WRITE = 3,
85};
86
87static void RuntimeAbort(JNIEnv* env, int line, const char* msg) {
88  std::ostringstream oss;
89  oss << __FILE__ << ":" << line << ": " << msg;
90  env->FatalError(oss.str().c_str());
91}
92
93// This signal handler is for zygote mode, since the zygote must reap its children
94static void SigChldHandler(int /*signal_number*/) {
95  pid_t pid;
96  int status;
97
98  // It's necessary to save and restore the errno during this function.
99  // Since errno is stored per thread, changing it here modifies the errno
100  // on the thread on which this signal handler executes. If a signal occurs
101  // between a call and an errno check, it's possible to get the errno set
102  // here.
103  // See b/23572286 for extra information.
104  int saved_errno = errno;
105
106  while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
107     // Log process-death status that we care about.  In general it is
108     // not safe to call LOG(...) from a signal handler because of
109     // possible reentrancy.  However, we know a priori that the
110     // current implementation of LOG() is safe to call from a SIGCHLD
111     // handler in the zygote process.  If the LOG() implementation
112     // changes its locking strategy or its use of syscalls within the
113     // lazy-init critical section, its use here may become unsafe.
114    if (WIFEXITED(status)) {
115      ALOGI("Process %d exited cleanly (%d)", pid, WEXITSTATUS(status));
116    } else if (WIFSIGNALED(status)) {
117      ALOGI("Process %d exited due to signal (%d)", pid, WTERMSIG(status));
118      if (WCOREDUMP(status)) {
119        ALOGI("Process %d dumped core.", pid);
120      }
121    }
122
123    // If the just-crashed process is the system_server, bring down zygote
124    // so that it is restarted by init and system server will be restarted
125    // from there.
126    if (pid == gSystemServerPid) {
127      ALOGE("Exit zygote because system server (%d) has terminated", pid);
128      kill(getpid(), SIGKILL);
129    }
130  }
131
132  // Note that we shouldn't consider ECHILD an error because
133  // the secondary zygote might have no children left to wait for.
134  if (pid < 0 && errno != ECHILD) {
135    ALOGW("Zygote SIGCHLD error in waitpid: %s", strerror(errno));
136  }
137
138  errno = saved_errno;
139}
140
141// Configures the SIGCHLD handler for the zygote process. This is configured
142// very late, because earlier in the runtime we may fork() and exec()
143// other processes, and we want to waitpid() for those rather than
144// have them be harvested immediately.
145//
146// This ends up being called repeatedly before each fork(), but there's
147// no real harm in that.
148static void SetSigChldHandler() {
149  struct sigaction sa;
150  memset(&sa, 0, sizeof(sa));
151  sa.sa_handler = SigChldHandler;
152
153  int err = sigaction(SIGCHLD, &sa, NULL);
154  if (err < 0) {
155    ALOGW("Error setting SIGCHLD handler: %s", strerror(errno));
156  }
157}
158
159// Sets the SIGCHLD handler back to default behavior in zygote children.
160static void UnsetSigChldHandler() {
161  struct sigaction sa;
162  memset(&sa, 0, sizeof(sa));
163  sa.sa_handler = SIG_DFL;
164
165  int err = sigaction(SIGCHLD, &sa, NULL);
166  if (err < 0) {
167    ALOGW("Error unsetting SIGCHLD handler: %s", strerror(errno));
168  }
169}
170
171// Calls POSIX setgroups() using the int[] object as an argument.
172// A NULL argument is tolerated.
173static void SetGids(JNIEnv* env, jintArray javaGids) {
174  if (javaGids == NULL) {
175    return;
176  }
177
178  ScopedIntArrayRO gids(env, javaGids);
179  if (gids.get() == NULL) {
180    RuntimeAbort(env, __LINE__, "Getting gids int array failed");
181  }
182  int rc = setgroups(gids.size(), reinterpret_cast<const gid_t*>(&gids[0]));
183  if (rc == -1) {
184    std::ostringstream oss;
185    oss << "setgroups failed: " << strerror(errno) << ", gids.size=" << gids.size();
186    RuntimeAbort(env, __LINE__, oss.str().c_str());
187  }
188}
189
190// Sets the resource limits via setrlimit(2) for the values in the
191// two-dimensional array of integers that's passed in. The second dimension
192// contains a tuple of length 3: (resource, rlim_cur, rlim_max). NULL is
193// treated as an empty array.
194static void SetRLimits(JNIEnv* env, jobjectArray javaRlimits) {
195  if (javaRlimits == NULL) {
196    return;
197  }
198
199  rlimit rlim;
200  memset(&rlim, 0, sizeof(rlim));
201
202  for (int i = 0; i < env->GetArrayLength(javaRlimits); ++i) {
203    ScopedLocalRef<jobject> javaRlimitObject(env, env->GetObjectArrayElement(javaRlimits, i));
204    ScopedIntArrayRO javaRlimit(env, reinterpret_cast<jintArray>(javaRlimitObject.get()));
205    if (javaRlimit.size() != 3) {
206      RuntimeAbort(env, __LINE__, "rlimits array must have a second dimension of size 3");
207    }
208
209    rlim.rlim_cur = javaRlimit[1];
210    rlim.rlim_max = javaRlimit[2];
211
212    int rc = setrlimit(javaRlimit[0], &rlim);
213    if (rc == -1) {
214      ALOGE("setrlimit(%d, {%ld, %ld}) failed", javaRlimit[0], rlim.rlim_cur,
215            rlim.rlim_max);
216      RuntimeAbort(env, __LINE__, "setrlimit failed");
217    }
218  }
219}
220
221// The debug malloc library needs to know whether it's the zygote or a child.
222extern "C" int gMallocLeakZygoteChild;
223
224static void PreApplicationInit() {
225  // The child process sets this to indicate it's not the zygote.
226  gMallocLeakZygoteChild = 1;
227
228  // Set the jemalloc decay time to 1.
229  mallopt(M_DECAY_TIME, 1);
230}
231
232static void EnableKeepCapabilities(JNIEnv* env) {
233  int rc = prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0);
234  if (rc == -1) {
235    RuntimeAbort(env, __LINE__, "prctl(PR_SET_KEEPCAPS) failed");
236  }
237}
238
239static void DropCapabilitiesBoundingSet(JNIEnv* env) {
240  for (int i = 0; prctl(PR_CAPBSET_READ, i, 0, 0, 0) >= 0; i++) {
241    int rc = prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
242    if (rc == -1) {
243      if (errno == EINVAL) {
244        ALOGE("prctl(PR_CAPBSET_DROP) failed with EINVAL. Please verify "
245              "your kernel is compiled with file capabilities support");
246      } else {
247        ALOGE("prctl(PR_CAPBSET_DROP, %d) failed: %s", i, strerror(errno));
248        RuntimeAbort(env, __LINE__, "prctl(PR_CAPBSET_DROP) failed");
249      }
250    }
251  }
252}
253
254static void SetInheritable(JNIEnv* env, uint64_t inheritable) {
255  __user_cap_header_struct capheader;
256  memset(&capheader, 0, sizeof(capheader));
257  capheader.version = _LINUX_CAPABILITY_VERSION_3;
258  capheader.pid = 0;
259
260  __user_cap_data_struct capdata[2];
261  if (capget(&capheader, &capdata[0]) == -1) {
262    ALOGE("capget failed: %s", strerror(errno));
263    RuntimeAbort(env, __LINE__, "capget failed");
264  }
265
266  capdata[0].inheritable = inheritable;
267  capdata[1].inheritable = inheritable >> 32;
268
269  if (capset(&capheader, &capdata[0]) == -1) {
270    ALOGE("capset(inh=%" PRIx64 ") failed: %s", inheritable, strerror(errno));
271    RuntimeAbort(env, __LINE__, "capset failed");
272  }
273}
274
275static void SetCapabilities(JNIEnv* env, uint64_t permitted, uint64_t effective,
276                            uint64_t inheritable) {
277  __user_cap_header_struct capheader;
278  memset(&capheader, 0, sizeof(capheader));
279  capheader.version = _LINUX_CAPABILITY_VERSION_3;
280  capheader.pid = 0;
281
282  __user_cap_data_struct capdata[2];
283  memset(&capdata, 0, sizeof(capdata));
284  capdata[0].effective = effective;
285  capdata[1].effective = effective >> 32;
286  capdata[0].permitted = permitted;
287  capdata[1].permitted = permitted >> 32;
288  capdata[0].inheritable = inheritable;
289  capdata[1].inheritable = inheritable >> 32;
290
291  if (capset(&capheader, &capdata[0]) == -1) {
292    ALOGE("capset(perm=%" PRIx64 ", eff=%" PRIx64 ", inh=%" PRIx64 ") failed: %s", permitted,
293          effective, inheritable, strerror(errno));
294    RuntimeAbort(env, __LINE__, "capset failed");
295  }
296}
297
298static void SetSchedulerPolicy(JNIEnv* env) {
299  errno = -set_sched_policy(0, SP_DEFAULT);
300  if (errno != 0) {
301    ALOGE("set_sched_policy(0, SP_DEFAULT) failed");
302    RuntimeAbort(env, __LINE__, "set_sched_policy(0, SP_DEFAULT) failed");
303  }
304}
305
306static int UnmountTree(const char* path) {
307    size_t path_len = strlen(path);
308
309    FILE* fp = setmntent("/proc/mounts", "r");
310    if (fp == NULL) {
311        ALOGE("Error opening /proc/mounts: %s", strerror(errno));
312        return -errno;
313    }
314
315    // Some volumes can be stacked on each other, so force unmount in
316    // reverse order to give us the best chance of success.
317    std::list<std::string> toUnmount;
318    mntent* mentry;
319    while ((mentry = getmntent(fp)) != NULL) {
320        if (strncmp(mentry->mnt_dir, path, path_len) == 0) {
321            toUnmount.push_front(std::string(mentry->mnt_dir));
322        }
323    }
324    endmntent(fp);
325
326    for (auto path : toUnmount) {
327        if (umount2(path.c_str(), MNT_DETACH)) {
328            ALOGW("Failed to unmount %s: %s", path.c_str(), strerror(errno));
329        }
330    }
331    return 0;
332}
333
334// Create a private mount namespace and bind mount appropriate emulated
335// storage for the given user.
336static bool MountEmulatedStorage(uid_t uid, jint mount_mode,
337        bool force_mount_namespace) {
338    // See storage config details at http://source.android.com/tech/storage/
339
340    String8 storageSource;
341    if (mount_mode == MOUNT_EXTERNAL_DEFAULT) {
342        storageSource = "/mnt/runtime/default";
343    } else if (mount_mode == MOUNT_EXTERNAL_READ) {
344        storageSource = "/mnt/runtime/read";
345    } else if (mount_mode == MOUNT_EXTERNAL_WRITE) {
346        storageSource = "/mnt/runtime/write";
347    } else if (!force_mount_namespace) {
348        // Sane default of no storage visible
349        return true;
350    }
351
352    // Create a second private mount namespace for our process
353    if (unshare(CLONE_NEWNS) == -1) {
354        ALOGW("Failed to unshare(): %s", strerror(errno));
355        return false;
356    }
357
358    // Handle force_mount_namespace with MOUNT_EXTERNAL_NONE.
359    if (mount_mode == MOUNT_EXTERNAL_NONE) {
360        return true;
361    }
362
363    if (TEMP_FAILURE_RETRY(mount(storageSource.string(), "/storage",
364            NULL, MS_BIND | MS_REC | MS_SLAVE, NULL)) == -1) {
365        ALOGW("Failed to mount %s to /storage: %s", storageSource.string(), strerror(errno));
366        return false;
367    }
368
369    // Mount user-specific symlink helper into place
370    userid_t user_id = multiuser_get_user_id(uid);
371    const String8 userSource(String8::format("/mnt/user/%d", user_id));
372    if (fs_prepare_dir(userSource.string(), 0751, 0, 0) == -1) {
373        return false;
374    }
375    if (TEMP_FAILURE_RETRY(mount(userSource.string(), "/storage/self",
376            NULL, MS_BIND, NULL)) == -1) {
377        ALOGW("Failed to mount %s to /storage/self: %s", userSource.string(), strerror(errno));
378        return false;
379    }
380
381    return true;
382}
383
384static bool NeedsNoRandomizeWorkaround() {
385#if !defined(__arm__)
386    return false;
387#else
388    int major;
389    int minor;
390    struct utsname uts;
391    if (uname(&uts) == -1) {
392        return false;
393    }
394
395    if (sscanf(uts.release, "%d.%d", &major, &minor) != 2) {
396        return false;
397    }
398
399    // Kernels before 3.4.* need the workaround.
400    return (major < 3) || ((major == 3) && (minor < 4));
401#endif
402}
403
404// Utility to close down the Zygote socket file descriptors while
405// the child is still running as root with Zygote's privileges.  Each
406// descriptor (if any) is closed via dup2(), replacing it with a valid
407// (open) descriptor to /dev/null.
408
409static void DetachDescriptors(JNIEnv* env, jintArray fdsToClose) {
410  if (!fdsToClose) {
411    return;
412  }
413  jsize count = env->GetArrayLength(fdsToClose);
414  ScopedIntArrayRO ar(env, fdsToClose);
415  if (ar.get() == NULL) {
416      RuntimeAbort(env, __LINE__, "Bad fd array");
417  }
418  jsize i;
419  int devnull;
420  for (i = 0; i < count; i++) {
421    devnull = open("/dev/null", O_RDWR);
422    if (devnull < 0) {
423      ALOGE("Failed to open /dev/null: %s", strerror(errno));
424      RuntimeAbort(env, __LINE__, "Failed to open /dev/null");
425      continue;
426    }
427    ALOGV("Switching descriptor %d to /dev/null: %s", ar[i], strerror(errno));
428    if (dup2(devnull, ar[i]) < 0) {
429      ALOGE("Failed dup2() on descriptor %d: %s", ar[i], strerror(errno));
430      RuntimeAbort(env, __LINE__, "Failed dup2()");
431    }
432    close(devnull);
433  }
434}
435
436void SetThreadName(const char* thread_name) {
437  bool hasAt = false;
438  bool hasDot = false;
439  const char* s = thread_name;
440  while (*s) {
441    if (*s == '.') {
442      hasDot = true;
443    } else if (*s == '@') {
444      hasAt = true;
445    }
446    s++;
447  }
448  const int len = s - thread_name;
449  if (len < 15 || hasAt || !hasDot) {
450    s = thread_name;
451  } else {
452    s = thread_name + len - 15;
453  }
454  // pthread_setname_np fails rather than truncating long strings.
455  char buf[16];       // MAX_TASK_COMM_LEN=16 is hard-coded into bionic
456  strlcpy(buf, s, sizeof(buf)-1);
457  errno = pthread_setname_np(pthread_self(), buf);
458  if (errno != 0) {
459    ALOGW("Unable to set the name of current thread to '%s': %s", buf, strerror(errno));
460  }
461}
462
463// The list of open zygote file descriptors.
464static FileDescriptorTable* gOpenFdTable = NULL;
465
466static void FillFileDescriptorVector(JNIEnv* env,
467                                     jintArray java_fds,
468                                     std::vector<int>* fds) {
469  CHECK(fds != nullptr);
470  if (java_fds != nullptr) {
471    ScopedIntArrayRO ar(env, java_fds);
472    if (ar.get() == nullptr) {
473      RuntimeAbort(env, __LINE__, "Bad fd array");
474    }
475    fds->reserve(ar.size());
476    for (size_t i = 0; i < ar.size(); ++i) {
477      fds->push_back(ar[i]);
478    }
479  }
480}
481
482// Utility routine to fork zygote and specialize the child process.
483static pid_t ForkAndSpecializeCommon(JNIEnv* env, uid_t uid, gid_t gid, jintArray javaGids,
484                                     jint runtime_flags, jobjectArray javaRlimits,
485                                     jlong permittedCapabilities, jlong effectiveCapabilities,
486                                     jint mount_external,
487                                     jstring java_se_info, jstring java_se_name,
488                                     bool is_system_server, jintArray fdsToClose,
489                                     jintArray fdsToIgnore,
490                                     jstring instructionSet, jstring dataDir) {
491  SetSigChldHandler();
492
493  sigset_t sigchld;
494  sigemptyset(&sigchld);
495  sigaddset(&sigchld, SIGCHLD);
496
497  // Temporarily block SIGCHLD during forks. The SIGCHLD handler might
498  // log, which would result in the logging FDs we close being reopened.
499  // This would cause failures because the FDs are not whitelisted.
500  //
501  // Note that the zygote process is single threaded at this point.
502  if (sigprocmask(SIG_BLOCK, &sigchld, nullptr) == -1) {
503    ALOGE("sigprocmask(SIG_SETMASK, { SIGCHLD }) failed: %s", strerror(errno));
504    RuntimeAbort(env, __LINE__, "Call to sigprocmask(SIG_BLOCK, { SIGCHLD }) failed.");
505  }
506
507  // Close any logging related FDs before we start evaluating the list of
508  // file descriptors.
509  __android_log_close();
510
511  // If this is the first fork for this zygote, create the open FD table.
512  // If it isn't, we just need to check whether the list of open files has
513  // changed (and it shouldn't in the normal case).
514  std::vector<int> fds_to_ignore;
515  FillFileDescriptorVector(env, fdsToIgnore, &fds_to_ignore);
516  if (gOpenFdTable == NULL) {
517    gOpenFdTable = FileDescriptorTable::Create(fds_to_ignore);
518    if (gOpenFdTable == NULL) {
519      RuntimeAbort(env, __LINE__, "Unable to construct file descriptor table.");
520    }
521  } else if (!gOpenFdTable->Restat(fds_to_ignore)) {
522    RuntimeAbort(env, __LINE__, "Unable to restat file descriptor table.");
523  }
524
525  pid_t pid = fork();
526
527  if (pid == 0) {
528    PreApplicationInit();
529
530    // Clean up any descriptors which must be closed immediately
531    DetachDescriptors(env, fdsToClose);
532
533    // Re-open all remaining open file descriptors so that they aren't shared
534    // with the zygote across a fork.
535    if (!gOpenFdTable->ReopenOrDetach()) {
536      RuntimeAbort(env, __LINE__, "Unable to reopen whitelisted descriptors.");
537    }
538
539    if (sigprocmask(SIG_UNBLOCK, &sigchld, nullptr) == -1) {
540      ALOGE("sigprocmask(SIG_SETMASK, { SIGCHLD }) failed: %s", strerror(errno));
541      RuntimeAbort(env, __LINE__, "Call to sigprocmask(SIG_UNBLOCK, { SIGCHLD }) failed.");
542    }
543
544    // Keep capabilities across UID change, unless we're staying root.
545    if (uid != 0) {
546      EnableKeepCapabilities(env);
547    }
548
549    SetInheritable(env, permittedCapabilities);
550    DropCapabilitiesBoundingSet(env);
551
552    bool use_native_bridge = !is_system_server && (instructionSet != NULL)
553        && android::NativeBridgeAvailable();
554    if (use_native_bridge) {
555      ScopedUtfChars isa_string(env, instructionSet);
556      use_native_bridge = android::NeedsNativeBridge(isa_string.c_str());
557    }
558    if (use_native_bridge && dataDir == NULL) {
559      // dataDir should never be null if we need to use a native bridge.
560      // In general, dataDir will never be null for normal applications. It can only happen in
561      // special cases (for isolated processes which are not associated with any app). These are
562      // launched by the framework and should not be emulated anyway.
563      use_native_bridge = false;
564      ALOGW("Native bridge will not be used because dataDir == NULL.");
565    }
566
567    if (!MountEmulatedStorage(uid, mount_external, use_native_bridge)) {
568      ALOGW("Failed to mount emulated storage: %s", strerror(errno));
569      if (errno == ENOTCONN || errno == EROFS) {
570        // When device is actively encrypting, we get ENOTCONN here
571        // since FUSE was mounted before the framework restarted.
572        // When encrypted device is booting, we get EROFS since
573        // FUSE hasn't been created yet by init.
574        // In either case, continue without external storage.
575      } else {
576        RuntimeAbort(env, __LINE__, "Cannot continue without emulated storage");
577      }
578    }
579
580    if (!is_system_server) {
581        int rc = createProcessGroup(uid, getpid());
582        if (rc != 0) {
583            if (rc == -EROFS) {
584                ALOGW("createProcessGroup failed, kernel missing CONFIG_CGROUP_CPUACCT?");
585            } else {
586                ALOGE("createProcessGroup(%d, %d) failed: %s", uid, pid, strerror(-rc));
587            }
588        }
589    }
590
591    SetGids(env, javaGids);
592
593    SetRLimits(env, javaRlimits);
594
595    if (use_native_bridge) {
596      ScopedUtfChars isa_string(env, instructionSet);
597      ScopedUtfChars data_dir(env, dataDir);
598      android::PreInitializeNativeBridge(data_dir.c_str(), isa_string.c_str());
599    }
600
601    int rc = setresgid(gid, gid, gid);
602    if (rc == -1) {
603      ALOGE("setresgid(%d) failed: %s", gid, strerror(errno));
604      RuntimeAbort(env, __LINE__, "setresgid failed");
605    }
606
607    rc = setresuid(uid, uid, uid);
608    if (rc == -1) {
609      ALOGE("setresuid(%d) failed: %s", uid, strerror(errno));
610      RuntimeAbort(env, __LINE__, "setresuid failed");
611    }
612
613    if (NeedsNoRandomizeWorkaround()) {
614        // Work around ARM kernel ASLR lossage (http://b/5817320).
615        int old_personality = personality(0xffffffff);
616        int new_personality = personality(old_personality | ADDR_NO_RANDOMIZE);
617        if (new_personality == -1) {
618            ALOGW("personality(%d) failed: %s", new_personality, strerror(errno));
619        }
620    }
621
622    SetCapabilities(env, permittedCapabilities, effectiveCapabilities, permittedCapabilities);
623
624    SetSchedulerPolicy(env);
625
626    const char* se_info_c_str = NULL;
627    ScopedUtfChars* se_info = NULL;
628    if (java_se_info != NULL) {
629        se_info = new ScopedUtfChars(env, java_se_info);
630        se_info_c_str = se_info->c_str();
631        if (se_info_c_str == NULL) {
632          RuntimeAbort(env, __LINE__, "se_info_c_str == NULL");
633        }
634    }
635    const char* se_name_c_str = NULL;
636    ScopedUtfChars* se_name = NULL;
637    if (java_se_name != NULL) {
638        se_name = new ScopedUtfChars(env, java_se_name);
639        se_name_c_str = se_name->c_str();
640        if (se_name_c_str == NULL) {
641          RuntimeAbort(env, __LINE__, "se_name_c_str == NULL");
642        }
643    }
644    rc = selinux_android_setcontext(uid, is_system_server, se_info_c_str, se_name_c_str);
645    if (rc == -1) {
646      ALOGE("selinux_android_setcontext(%d, %d, \"%s\", \"%s\") failed", uid,
647            is_system_server, se_info_c_str, se_name_c_str);
648      RuntimeAbort(env, __LINE__, "selinux_android_setcontext failed");
649    }
650
651    // Make it easier to debug audit logs by setting the main thread's name to the
652    // nice name rather than "app_process".
653    if (se_info_c_str == NULL && is_system_server) {
654      se_name_c_str = "system_server";
655    }
656    if (se_info_c_str != NULL) {
657      SetThreadName(se_name_c_str);
658    }
659
660    delete se_info;
661    delete se_name;
662
663    UnsetSigChldHandler();
664
665    env->CallStaticVoidMethod(gZygoteClass, gCallPostForkChildHooks, runtime_flags,
666                              is_system_server, instructionSet);
667    if (env->ExceptionCheck()) {
668      RuntimeAbort(env, __LINE__, "Error calling post fork hooks.");
669    }
670  } else if (pid > 0) {
671    // the parent process
672
673    // We blocked SIGCHLD prior to a fork, we unblock it here.
674    if (sigprocmask(SIG_UNBLOCK, &sigchld, nullptr) == -1) {
675      ALOGE("sigprocmask(SIG_SETMASK, { SIGCHLD }) failed: %s", strerror(errno));
676      RuntimeAbort(env, __LINE__, "Call to sigprocmask(SIG_UNBLOCK, { SIGCHLD }) failed.");
677    }
678  }
679  return pid;
680}
681
682static uint64_t GetEffectiveCapabilityMask(JNIEnv* env) {
683    __user_cap_header_struct capheader;
684    memset(&capheader, 0, sizeof(capheader));
685    capheader.version = _LINUX_CAPABILITY_VERSION_3;
686    capheader.pid = 0;
687
688    __user_cap_data_struct capdata[2];
689    if (capget(&capheader, &capdata[0]) == -1) {
690        ALOGE("capget failed: %s", strerror(errno));
691        RuntimeAbort(env, __LINE__, "capget failed");
692    }
693
694    return capdata[0].effective |
695           (static_cast<uint64_t>(capdata[1].effective) << 32);
696}
697}  // anonymous namespace
698
699namespace android {
700
701static void com_android_internal_os_Zygote_nativePreApplicationInit(JNIEnv*, jclass) {
702  PreApplicationInit();
703}
704
705static jint com_android_internal_os_Zygote_nativeForkAndSpecialize(
706        JNIEnv* env, jclass, jint uid, jint gid, jintArray gids,
707        jint runtime_flags, jobjectArray rlimits,
708        jint mount_external, jstring se_info, jstring se_name,
709        jintArray fdsToClose,
710        jintArray fdsToIgnore,
711        jstring instructionSet, jstring appDataDir) {
712    jlong capabilities = 0;
713
714    // Grant CAP_WAKE_ALARM to the Bluetooth process.
715    // Additionally, allow bluetooth to open packet sockets so it can start the DHCP client.
716    // Grant CAP_SYS_NICE to allow Bluetooth to set RT priority for
717    // audio-related threads.
718    // TODO: consider making such functionality an RPC to netd.
719    if (multiuser_get_app_id(uid) == AID_BLUETOOTH) {
720      capabilities |= (1LL << CAP_WAKE_ALARM);
721      capabilities |= (1LL << CAP_NET_RAW);
722      capabilities |= (1LL << CAP_NET_BIND_SERVICE);
723      capabilities |= (1LL << CAP_SYS_NICE);
724    }
725
726    // Grant CAP_BLOCK_SUSPEND to processes that belong to GID "wakelock"
727    bool gid_wakelock_found = false;
728    if (gid == AID_WAKELOCK) {
729      gid_wakelock_found = true;
730    } else if (gids != NULL) {
731      jsize gids_num = env->GetArrayLength(gids);
732      ScopedIntArrayRO ar(env, gids);
733      if (ar.get() == NULL) {
734        RuntimeAbort(env, __LINE__, "Bad gids array");
735      }
736      for (int i = 0; i < gids_num; i++) {
737        if (ar[i] == AID_WAKELOCK) {
738          gid_wakelock_found = true;
739          break;
740        }
741      }
742    }
743    if (gid_wakelock_found) {
744      capabilities |= (1LL << CAP_BLOCK_SUSPEND);
745    }
746
747    // Containers run without some capabilities, so drop any caps that are not
748    // available.
749    capabilities &= GetEffectiveCapabilityMask(env);
750
751    return ForkAndSpecializeCommon(env, uid, gid, gids, runtime_flags,
752            rlimits, capabilities, capabilities, mount_external, se_info,
753            se_name, false, fdsToClose, fdsToIgnore, instructionSet, appDataDir);
754}
755
756static jint com_android_internal_os_Zygote_nativeForkSystemServer(
757        JNIEnv* env, jclass, uid_t uid, gid_t gid, jintArray gids,
758        jint runtime_flags, jobjectArray rlimits, jlong permittedCapabilities,
759        jlong effectiveCapabilities) {
760  pid_t pid = ForkAndSpecializeCommon(env, uid, gid, gids,
761                                      runtime_flags, rlimits,
762                                      permittedCapabilities, effectiveCapabilities,
763                                      MOUNT_EXTERNAL_DEFAULT, NULL, NULL, true, NULL,
764                                      NULL, NULL, NULL);
765  if (pid > 0) {
766      // The zygote process checks whether the child process has died or not.
767      ALOGI("System server process %d has been created", pid);
768      gSystemServerPid = pid;
769      // There is a slight window that the system server process has crashed
770      // but it went unnoticed because we haven't published its pid yet. So
771      // we recheck here just to make sure that all is well.
772      int status;
773      if (waitpid(pid, &status, WNOHANG) == pid) {
774          ALOGE("System server process %d has died. Restarting Zygote!", pid);
775          RuntimeAbort(env, __LINE__, "System server process has died. Restarting Zygote!");
776      }
777
778      // Assign system_server to the correct memory cgroup.
779      if (!WriteStringToFile(StringPrintf("%d", pid), "/dev/memcg/system/tasks")) {
780        ALOGE("couldn't write %d to /dev/memcg/system/tasks", pid);
781      }
782  }
783  return pid;
784}
785
786static void com_android_internal_os_Zygote_nativeAllowFileAcrossFork(
787        JNIEnv* env, jclass, jstring path) {
788    ScopedUtfChars path_native(env, path);
789    const char* path_cstr = path_native.c_str();
790    if (!path_cstr) {
791        RuntimeAbort(env, __LINE__, "path_cstr == NULL");
792    }
793    FileDescriptorWhitelist::Get()->Allow(path_cstr);
794}
795
796static void com_android_internal_os_Zygote_nativeUnmountStorageOnInit(JNIEnv* env, jclass) {
797    // Zygote process unmount root storage space initially before every child processes are forked.
798    // Every forked child processes (include SystemServer) only mount their own root storage space
799    // and no need unmount storage operation in MountEmulatedStorage method.
800    // Zygote process does not utilize root storage spaces and unshares its mount namespace below.
801
802    // See storage config details at http://source.android.com/tech/storage/
803    // Create private mount namespace shared by all children
804    if (unshare(CLONE_NEWNS) == -1) {
805        RuntimeAbort(env, __LINE__, "Failed to unshare()");
806        return;
807    }
808
809    // Mark rootfs as being a slave so that changes from default
810    // namespace only flow into our children.
811    if (mount("rootfs", "/", nullptr, (MS_SLAVE | MS_REC), nullptr) == -1) {
812        RuntimeAbort(env, __LINE__, "Failed to mount() rootfs as MS_SLAVE");
813        return;
814    }
815
816    // Create a staging tmpfs that is shared by our children; they will
817    // bind mount storage into their respective private namespaces, which
818    // are isolated from each other.
819    const char* target_base = getenv("EMULATED_STORAGE_TARGET");
820    if (target_base != nullptr) {
821#define STRINGIFY_UID(x) __STRING(x)
822        if (mount("tmpfs", target_base, "tmpfs", MS_NOSUID | MS_NODEV,
823                  "uid=0,gid=" STRINGIFY_UID(AID_SDCARD_R) ",mode=0751") == -1) {
824            ALOGE("Failed to mount tmpfs to %s", target_base);
825            RuntimeAbort(env, __LINE__, "Failed to mount tmpfs");
826            return;
827        }
828#undef STRINGIFY_UID
829    }
830
831    UnmountTree("/storage");
832}
833
834static const JNINativeMethod gMethods[] = {
835    { "nativeForkAndSpecialize",
836      "(II[II[[IILjava/lang/String;Ljava/lang/String;[I[ILjava/lang/String;Ljava/lang/String;)I",
837      (void *) com_android_internal_os_Zygote_nativeForkAndSpecialize },
838    { "nativeForkSystemServer", "(II[II[[IJJ)I",
839      (void *) com_android_internal_os_Zygote_nativeForkSystemServer },
840    { "nativeAllowFileAcrossFork", "(Ljava/lang/String;)V",
841      (void *) com_android_internal_os_Zygote_nativeAllowFileAcrossFork },
842    { "nativeUnmountStorageOnInit", "()V",
843      (void *) com_android_internal_os_Zygote_nativeUnmountStorageOnInit },
844    { "nativePreApplicationInit", "()V",
845      (void *) com_android_internal_os_Zygote_nativePreApplicationInit }
846};
847
848int register_com_android_internal_os_Zygote(JNIEnv* env) {
849  gZygoteClass = MakeGlobalRefOrDie(env, FindClassOrDie(env, kZygoteClassName));
850  gCallPostForkChildHooks = GetStaticMethodIDOrDie(env, gZygoteClass, "callPostForkChildHooks",
851                                                   "(IZLjava/lang/String;)V");
852
853  return RegisterMethodsOrDie(env, "com/android/internal/os/Zygote", gMethods, NELEM(gMethods));
854}
855}  // namespace android
856