com_android_internal_os_Zygote.cpp revision 041483acea898088c4ff30e32d21f4e9d57b40a0
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "Zygote"
18
19// sys/mount.h has to come before linux/fs.h due to redefinition of MS_RDONLY, MS_BIND, etc
20#include <sys/mount.h>
21#include <linux/fs.h>
22
23#include <list>
24#include <sstream>
25#include <string>
26
27#include <fcntl.h>
28#include <grp.h>
29#include <inttypes.h>
30#include <malloc.h>
31#include <mntent.h>
32#include <paths.h>
33#include <signal.h>
34#include <stdlib.h>
35#include <sys/capability.h>
36#include <sys/cdefs.h>
37#include <sys/personality.h>
38#include <sys/prctl.h>
39#include <sys/resource.h>
40#include <sys/stat.h>
41#include <sys/time.h>
42#include <sys/types.h>
43#include <sys/utsname.h>
44#include <sys/wait.h>
45#include <unistd.h>
46
47#include "android-base/logging.h"
48#include <android-base/file.h>
49#include <android-base/stringprintf.h>
50#include <cutils/fs.h>
51#include <cutils/multiuser.h>
52#include <cutils/sched_policy.h>
53#include <private/android_filesystem_config.h>
54#include <utils/String8.h>
55#include <selinux/android.h>
56#include <seccomp_policy.h>
57#include <processgroup/processgroup.h>
58
59#include "core_jni_helpers.h"
60#include <nativehelper/JNIHelp.h>
61#include <nativehelper/ScopedLocalRef.h>
62#include <nativehelper/ScopedPrimitiveArray.h>
63#include <nativehelper/ScopedUtfChars.h>
64#include "fd_utils.h"
65
66#include "nativebridge/native_bridge.h"
67
68namespace {
69
70using android::String8;
71using android::base::StringPrintf;
72using android::base::WriteStringToFile;
73
74static pid_t gSystemServerPid = 0;
75
76static const char kZygoteClassName[] = "com/android/internal/os/Zygote";
77static jclass gZygoteClass;
78static jmethodID gCallPostForkChildHooks;
79
80static bool g_is_security_enforced = true;
81
82// Must match values in com.android.internal.os.Zygote.
83enum MountExternalKind {
84  MOUNT_EXTERNAL_NONE = 0,
85  MOUNT_EXTERNAL_DEFAULT = 1,
86  MOUNT_EXTERNAL_READ = 2,
87  MOUNT_EXTERNAL_WRITE = 3,
88};
89
90static void RuntimeAbort(JNIEnv* env, int line, const char* msg) {
91  std::ostringstream oss;
92  oss << __FILE__ << ":" << line << ": " << msg;
93  env->FatalError(oss.str().c_str());
94}
95
96// This signal handler is for zygote mode, since the zygote must reap its children
97static void SigChldHandler(int /*signal_number*/) {
98  pid_t pid;
99  int status;
100
101  // It's necessary to save and restore the errno during this function.
102  // Since errno is stored per thread, changing it here modifies the errno
103  // on the thread on which this signal handler executes. If a signal occurs
104  // between a call and an errno check, it's possible to get the errno set
105  // here.
106  // See b/23572286 for extra information.
107  int saved_errno = errno;
108
109  while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
110     // Log process-death status that we care about.  In general it is
111     // not safe to call LOG(...) from a signal handler because of
112     // possible reentrancy.  However, we know a priori that the
113     // current implementation of LOG() is safe to call from a SIGCHLD
114     // handler in the zygote process.  If the LOG() implementation
115     // changes its locking strategy or its use of syscalls within the
116     // lazy-init critical section, its use here may become unsafe.
117    if (WIFEXITED(status)) {
118      ALOGI("Process %d exited cleanly (%d)", pid, WEXITSTATUS(status));
119    } else if (WIFSIGNALED(status)) {
120      ALOGI("Process %d exited due to signal (%d)", pid, WTERMSIG(status));
121      if (WCOREDUMP(status)) {
122        ALOGI("Process %d dumped core.", pid);
123      }
124    }
125
126    // If the just-crashed process is the system_server, bring down zygote
127    // so that it is restarted by init and system server will be restarted
128    // from there.
129    if (pid == gSystemServerPid) {
130      ALOGE("Exit zygote because system server (%d) has terminated", pid);
131      kill(getpid(), SIGKILL);
132    }
133  }
134
135  // Note that we shouldn't consider ECHILD an error because
136  // the secondary zygote might have no children left to wait for.
137  if (pid < 0 && errno != ECHILD) {
138    ALOGW("Zygote SIGCHLD error in waitpid: %s", strerror(errno));
139  }
140
141  errno = saved_errno;
142}
143
144// Configures the SIGCHLD/SIGHUP handlers for the zygote process. This is
145// configured very late, because earlier in the runtime we may fork() and
146// exec() other processes, and we want to waitpid() for those rather than
147// have them be harvested immediately.
148//
149// Ignore SIGHUP because all processes forked by the zygote are in the same
150// process group as the zygote and we don't want to be notified if we become
151// an orphaned group and have one or more stopped processes. This is not a
152// theoretical concern :
153// - we can become an orphaned group if one of our direct descendants forks
154//   and is subsequently killed before its children.
155// - crash_dump routinely STOPs the process it's tracing.
156//
157// See issues b/71965619 and b/25567761 for further details.
158//
159// This ends up being called repeatedly before each fork(), but there's
160// no real harm in that.
161static void SetSignalHandlers() {
162  struct sigaction sig_chld = {};
163  sig_chld.sa_handler = SigChldHandler;
164
165  if (sigaction(SIGCHLD, &sig_chld, NULL) < 0) {
166    ALOGW("Error setting SIGCHLD handler: %s", strerror(errno));
167  }
168
169  struct sigaction sig_hup = {};
170  sig_hup.sa_handler = SIG_IGN;
171  if (sigaction(SIGHUP, &sig_hup, NULL) < 0) {
172    ALOGW("Error setting SIGHUP handler: %s", strerror(errno));
173  }
174}
175
176// Sets the SIGCHLD handler back to default behavior in zygote children.
177static void UnsetChldSignalHandler() {
178  struct sigaction sa;
179  memset(&sa, 0, sizeof(sa));
180  sa.sa_handler = SIG_DFL;
181
182  if (sigaction(SIGCHLD, &sa, NULL) < 0) {
183    ALOGW("Error unsetting SIGCHLD handler: %s", strerror(errno));
184  }
185}
186
187// Calls POSIX setgroups() using the int[] object as an argument.
188// A NULL argument is tolerated.
189static void SetGids(JNIEnv* env, jintArray javaGids) {
190  if (javaGids == NULL) {
191    return;
192  }
193
194  ScopedIntArrayRO gids(env, javaGids);
195  if (gids.get() == NULL) {
196    RuntimeAbort(env, __LINE__, "Getting gids int array failed");
197  }
198  int rc = setgroups(gids.size(), reinterpret_cast<const gid_t*>(&gids[0]));
199  if (rc == -1) {
200    std::ostringstream oss;
201    oss << "setgroups failed: " << strerror(errno) << ", gids.size=" << gids.size();
202    RuntimeAbort(env, __LINE__, oss.str().c_str());
203  }
204}
205
206// Sets the resource limits via setrlimit(2) for the values in the
207// two-dimensional array of integers that's passed in. The second dimension
208// contains a tuple of length 3: (resource, rlim_cur, rlim_max). NULL is
209// treated as an empty array.
210static void SetRLimits(JNIEnv* env, jobjectArray javaRlimits) {
211  if (javaRlimits == NULL) {
212    return;
213  }
214
215  rlimit rlim;
216  memset(&rlim, 0, sizeof(rlim));
217
218  for (int i = 0; i < env->GetArrayLength(javaRlimits); ++i) {
219    ScopedLocalRef<jobject> javaRlimitObject(env, env->GetObjectArrayElement(javaRlimits, i));
220    ScopedIntArrayRO javaRlimit(env, reinterpret_cast<jintArray>(javaRlimitObject.get()));
221    if (javaRlimit.size() != 3) {
222      RuntimeAbort(env, __LINE__, "rlimits array must have a second dimension of size 3");
223    }
224
225    rlim.rlim_cur = javaRlimit[1];
226    rlim.rlim_max = javaRlimit[2];
227
228    int rc = setrlimit(javaRlimit[0], &rlim);
229    if (rc == -1) {
230      ALOGE("setrlimit(%d, {%ld, %ld}) failed", javaRlimit[0], rlim.rlim_cur,
231            rlim.rlim_max);
232      RuntimeAbort(env, __LINE__, "setrlimit failed");
233    }
234  }
235}
236
237// The debug malloc library needs to know whether it's the zygote or a child.
238extern "C" int gMallocLeakZygoteChild;
239
240static void PreApplicationInit() {
241  // The child process sets this to indicate it's not the zygote.
242  gMallocLeakZygoteChild = 1;
243
244  // Set the jemalloc decay time to 1.
245  mallopt(M_DECAY_TIME, 1);
246}
247
248static void SetUpSeccompFilter(uid_t uid) {
249  if (!g_is_security_enforced) {
250    ALOGI("seccomp disabled by setenforce 0");
251    return;
252  }
253
254  // Apply system or app filter based on uid.
255  if (getuid() >= AID_APP_START) {
256    set_app_seccomp_filter();
257  } else {
258    set_system_seccomp_filter();
259  }
260}
261
262static void EnableKeepCapabilities(JNIEnv* env) {
263  int rc = prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0);
264  if (rc == -1) {
265    RuntimeAbort(env, __LINE__, "prctl(PR_SET_KEEPCAPS) failed");
266  }
267}
268
269static void DropCapabilitiesBoundingSet(JNIEnv* env) {
270  for (int i = 0; prctl(PR_CAPBSET_READ, i, 0, 0, 0) >= 0; i++) {
271    int rc = prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
272    if (rc == -1) {
273      if (errno == EINVAL) {
274        ALOGE("prctl(PR_CAPBSET_DROP) failed with EINVAL. Please verify "
275              "your kernel is compiled with file capabilities support");
276      } else {
277        ALOGE("prctl(PR_CAPBSET_DROP, %d) failed: %s", i, strerror(errno));
278        RuntimeAbort(env, __LINE__, "prctl(PR_CAPBSET_DROP) failed");
279      }
280    }
281  }
282}
283
284static void SetInheritable(JNIEnv* env, uint64_t inheritable) {
285  __user_cap_header_struct capheader;
286  memset(&capheader, 0, sizeof(capheader));
287  capheader.version = _LINUX_CAPABILITY_VERSION_3;
288  capheader.pid = 0;
289
290  __user_cap_data_struct capdata[2];
291  if (capget(&capheader, &capdata[0]) == -1) {
292    ALOGE("capget failed: %s", strerror(errno));
293    RuntimeAbort(env, __LINE__, "capget failed");
294  }
295
296  capdata[0].inheritable = inheritable;
297  capdata[1].inheritable = inheritable >> 32;
298
299  if (capset(&capheader, &capdata[0]) == -1) {
300    ALOGE("capset(inh=%" PRIx64 ") failed: %s", inheritable, strerror(errno));
301    RuntimeAbort(env, __LINE__, "capset failed");
302  }
303}
304
305static void SetCapabilities(JNIEnv* env, uint64_t permitted, uint64_t effective,
306                            uint64_t inheritable) {
307  __user_cap_header_struct capheader;
308  memset(&capheader, 0, sizeof(capheader));
309  capheader.version = _LINUX_CAPABILITY_VERSION_3;
310  capheader.pid = 0;
311
312  __user_cap_data_struct capdata[2];
313  memset(&capdata, 0, sizeof(capdata));
314  capdata[0].effective = effective;
315  capdata[1].effective = effective >> 32;
316  capdata[0].permitted = permitted;
317  capdata[1].permitted = permitted >> 32;
318  capdata[0].inheritable = inheritable;
319  capdata[1].inheritable = inheritable >> 32;
320
321  if (capset(&capheader, &capdata[0]) == -1) {
322    ALOGE("capset(perm=%" PRIx64 ", eff=%" PRIx64 ", inh=%" PRIx64 ") failed: %s", permitted,
323          effective, inheritable, strerror(errno));
324    RuntimeAbort(env, __LINE__, "capset failed");
325  }
326}
327
328static void SetSchedulerPolicy(JNIEnv* env) {
329  errno = -set_sched_policy(0, SP_DEFAULT);
330  if (errno != 0) {
331    ALOGE("set_sched_policy(0, SP_DEFAULT) failed");
332    RuntimeAbort(env, __LINE__, "set_sched_policy(0, SP_DEFAULT) failed");
333  }
334}
335
336static int UnmountTree(const char* path) {
337    size_t path_len = strlen(path);
338
339    FILE* fp = setmntent("/proc/mounts", "r");
340    if (fp == NULL) {
341        ALOGE("Error opening /proc/mounts: %s", strerror(errno));
342        return -errno;
343    }
344
345    // Some volumes can be stacked on each other, so force unmount in
346    // reverse order to give us the best chance of success.
347    std::list<std::string> toUnmount;
348    mntent* mentry;
349    while ((mentry = getmntent(fp)) != NULL) {
350        if (strncmp(mentry->mnt_dir, path, path_len) == 0) {
351            toUnmount.push_front(std::string(mentry->mnt_dir));
352        }
353    }
354    endmntent(fp);
355
356    for (auto path : toUnmount) {
357        if (umount2(path.c_str(), MNT_DETACH)) {
358            ALOGW("Failed to unmount %s: %s", path.c_str(), strerror(errno));
359        }
360    }
361    return 0;
362}
363
364// Create a private mount namespace and bind mount appropriate emulated
365// storage for the given user.
366static bool MountEmulatedStorage(uid_t uid, jint mount_mode,
367        bool force_mount_namespace) {
368    // See storage config details at http://source.android.com/tech/storage/
369
370    String8 storageSource;
371    if (mount_mode == MOUNT_EXTERNAL_DEFAULT) {
372        storageSource = "/mnt/runtime/default";
373    } else if (mount_mode == MOUNT_EXTERNAL_READ) {
374        storageSource = "/mnt/runtime/read";
375    } else if (mount_mode == MOUNT_EXTERNAL_WRITE) {
376        storageSource = "/mnt/runtime/write";
377    } else if (!force_mount_namespace) {
378        // Sane default of no storage visible
379        return true;
380    }
381
382    // Create a second private mount namespace for our process
383    if (unshare(CLONE_NEWNS) == -1) {
384        ALOGW("Failed to unshare(): %s", strerror(errno));
385        return false;
386    }
387
388    // Handle force_mount_namespace with MOUNT_EXTERNAL_NONE.
389    if (mount_mode == MOUNT_EXTERNAL_NONE) {
390        return true;
391    }
392
393    if (TEMP_FAILURE_RETRY(mount(storageSource.string(), "/storage",
394            NULL, MS_BIND | MS_REC | MS_SLAVE, NULL)) == -1) {
395        ALOGW("Failed to mount %s to /storage: %s", storageSource.string(), strerror(errno));
396        return false;
397    }
398
399    // Mount user-specific symlink helper into place
400    userid_t user_id = multiuser_get_user_id(uid);
401    const String8 userSource(String8::format("/mnt/user/%d", user_id));
402    if (fs_prepare_dir(userSource.string(), 0751, 0, 0) == -1) {
403        return false;
404    }
405    if (TEMP_FAILURE_RETRY(mount(userSource.string(), "/storage/self",
406            NULL, MS_BIND, NULL)) == -1) {
407        ALOGW("Failed to mount %s to /storage/self: %s", userSource.string(), strerror(errno));
408        return false;
409    }
410
411    return true;
412}
413
414static bool NeedsNoRandomizeWorkaround() {
415#if !defined(__arm__)
416    return false;
417#else
418    int major;
419    int minor;
420    struct utsname uts;
421    if (uname(&uts) == -1) {
422        return false;
423    }
424
425    if (sscanf(uts.release, "%d.%d", &major, &minor) != 2) {
426        return false;
427    }
428
429    // Kernels before 3.4.* need the workaround.
430    return (major < 3) || ((major == 3) && (minor < 4));
431#endif
432}
433
434// Utility to close down the Zygote socket file descriptors while
435// the child is still running as root with Zygote's privileges.  Each
436// descriptor (if any) is closed via dup2(), replacing it with a valid
437// (open) descriptor to /dev/null.
438
439static void DetachDescriptors(JNIEnv* env, jintArray fdsToClose) {
440  if (!fdsToClose) {
441    return;
442  }
443  jsize count = env->GetArrayLength(fdsToClose);
444  ScopedIntArrayRO ar(env, fdsToClose);
445  if (ar.get() == NULL) {
446      RuntimeAbort(env, __LINE__, "Bad fd array");
447  }
448  jsize i;
449  int devnull;
450  for (i = 0; i < count; i++) {
451    devnull = open("/dev/null", O_RDWR);
452    if (devnull < 0) {
453      ALOGE("Failed to open /dev/null: %s", strerror(errno));
454      RuntimeAbort(env, __LINE__, "Failed to open /dev/null");
455      continue;
456    }
457    ALOGV("Switching descriptor %d to /dev/null: %s", ar[i], strerror(errno));
458    if (dup2(devnull, ar[i]) < 0) {
459      ALOGE("Failed dup2() on descriptor %d: %s", ar[i], strerror(errno));
460      RuntimeAbort(env, __LINE__, "Failed dup2()");
461    }
462    close(devnull);
463  }
464}
465
466void SetThreadName(const char* thread_name) {
467  bool hasAt = false;
468  bool hasDot = false;
469  const char* s = thread_name;
470  while (*s) {
471    if (*s == '.') {
472      hasDot = true;
473    } else if (*s == '@') {
474      hasAt = true;
475    }
476    s++;
477  }
478  const int len = s - thread_name;
479  if (len < 15 || hasAt || !hasDot) {
480    s = thread_name;
481  } else {
482    s = thread_name + len - 15;
483  }
484  // pthread_setname_np fails rather than truncating long strings.
485  char buf[16];       // MAX_TASK_COMM_LEN=16 is hard-coded into bionic
486  strlcpy(buf, s, sizeof(buf)-1);
487  errno = pthread_setname_np(pthread_self(), buf);
488  if (errno != 0) {
489    ALOGW("Unable to set the name of current thread to '%s': %s", buf, strerror(errno));
490  }
491  // Update base::logging default tag.
492  android::base::SetDefaultTag(buf);
493}
494
495// The list of open zygote file descriptors.
496static FileDescriptorTable* gOpenFdTable = NULL;
497
498static void FillFileDescriptorVector(JNIEnv* env,
499                                     jintArray java_fds,
500                                     std::vector<int>* fds) {
501  CHECK(fds != nullptr);
502  if (java_fds != nullptr) {
503    ScopedIntArrayRO ar(env, java_fds);
504    if (ar.get() == nullptr) {
505      RuntimeAbort(env, __LINE__, "Bad fd array");
506    }
507    fds->reserve(ar.size());
508    for (size_t i = 0; i < ar.size(); ++i) {
509      fds->push_back(ar[i]);
510    }
511  }
512}
513
514// Utility routine to fork zygote and specialize the child process.
515static pid_t ForkAndSpecializeCommon(JNIEnv* env, uid_t uid, gid_t gid, jintArray javaGids,
516                                     jint runtime_flags, jobjectArray javaRlimits,
517                                     jlong permittedCapabilities, jlong effectiveCapabilities,
518                                     jint mount_external,
519                                     jstring java_se_info, jstring java_se_name,
520                                     bool is_system_server, jintArray fdsToClose,
521                                     jintArray fdsToIgnore, bool is_child_zygote,
522                                     jstring instructionSet, jstring dataDir) {
523  SetSignalHandlers();
524
525  sigset_t sigchld;
526  sigemptyset(&sigchld);
527  sigaddset(&sigchld, SIGCHLD);
528
529  // Temporarily block SIGCHLD during forks. The SIGCHLD handler might
530  // log, which would result in the logging FDs we close being reopened.
531  // This would cause failures because the FDs are not whitelisted.
532  //
533  // Note that the zygote process is single threaded at this point.
534  if (sigprocmask(SIG_BLOCK, &sigchld, nullptr) == -1) {
535    ALOGE("sigprocmask(SIG_SETMASK, { SIGCHLD }) failed: %s", strerror(errno));
536    RuntimeAbort(env, __LINE__, "Call to sigprocmask(SIG_BLOCK, { SIGCHLD }) failed.");
537  }
538
539  // Close any logging related FDs before we start evaluating the list of
540  // file descriptors.
541  __android_log_close();
542
543  // If this is the first fork for this zygote, create the open FD table.
544  // If it isn't, we just need to check whether the list of open files has
545  // changed (and it shouldn't in the normal case).
546  std::vector<int> fds_to_ignore;
547  FillFileDescriptorVector(env, fdsToIgnore, &fds_to_ignore);
548  if (gOpenFdTable == NULL) {
549    gOpenFdTable = FileDescriptorTable::Create(fds_to_ignore);
550    if (gOpenFdTable == NULL) {
551      RuntimeAbort(env, __LINE__, "Unable to construct file descriptor table.");
552    }
553  } else if (!gOpenFdTable->Restat(fds_to_ignore)) {
554    RuntimeAbort(env, __LINE__, "Unable to restat file descriptor table.");
555  }
556
557  pid_t pid = fork();
558
559  if (pid == 0) {
560    PreApplicationInit();
561
562    // Clean up any descriptors which must be closed immediately
563    DetachDescriptors(env, fdsToClose);
564
565    // Re-open all remaining open file descriptors so that they aren't shared
566    // with the zygote across a fork.
567    if (!gOpenFdTable->ReopenOrDetach()) {
568      RuntimeAbort(env, __LINE__, "Unable to reopen whitelisted descriptors.");
569    }
570
571    if (sigprocmask(SIG_UNBLOCK, &sigchld, nullptr) == -1) {
572      ALOGE("sigprocmask(SIG_SETMASK, { SIGCHLD }) failed: %s", strerror(errno));
573      RuntimeAbort(env, __LINE__, "Call to sigprocmask(SIG_UNBLOCK, { SIGCHLD }) failed.");
574    }
575
576    // Must be called when the new process still has CAP_SYS_ADMIN.  The other alternative is to
577    // call prctl(PR_SET_NO_NEW_PRIVS, 1) afterward, but that breaks SELinux domain transition (see
578    // b/71859146).
579    SetUpSeccompFilter(uid);
580
581    // Keep capabilities across UID change, unless we're staying root.
582    if (uid != 0) {
583      EnableKeepCapabilities(env);
584    }
585
586    SetInheritable(env, permittedCapabilities);
587    DropCapabilitiesBoundingSet(env);
588
589    bool use_native_bridge = !is_system_server && (instructionSet != NULL)
590        && android::NativeBridgeAvailable();
591    if (use_native_bridge) {
592      ScopedUtfChars isa_string(env, instructionSet);
593      use_native_bridge = android::NeedsNativeBridge(isa_string.c_str());
594    }
595    if (use_native_bridge && dataDir == NULL) {
596      // dataDir should never be null if we need to use a native bridge.
597      // In general, dataDir will never be null for normal applications. It can only happen in
598      // special cases (for isolated processes which are not associated with any app). These are
599      // launched by the framework and should not be emulated anyway.
600      use_native_bridge = false;
601      ALOGW("Native bridge will not be used because dataDir == NULL.");
602    }
603
604    if (!MountEmulatedStorage(uid, mount_external, use_native_bridge)) {
605      ALOGW("Failed to mount emulated storage: %s", strerror(errno));
606      if (errno == ENOTCONN || errno == EROFS) {
607        // When device is actively encrypting, we get ENOTCONN here
608        // since FUSE was mounted before the framework restarted.
609        // When encrypted device is booting, we get EROFS since
610        // FUSE hasn't been created yet by init.
611        // In either case, continue without external storage.
612      } else {
613        RuntimeAbort(env, __LINE__, "Cannot continue without emulated storage");
614      }
615    }
616
617    if (!is_system_server) {
618        int rc = createProcessGroup(uid, getpid());
619        if (rc != 0) {
620            if (rc == -EROFS) {
621                ALOGW("createProcessGroup failed, kernel missing CONFIG_CGROUP_CPUACCT?");
622            } else {
623                ALOGE("createProcessGroup(%d, %d) failed: %s", uid, pid, strerror(-rc));
624            }
625        }
626    }
627
628    SetGids(env, javaGids);
629
630    SetRLimits(env, javaRlimits);
631
632    if (use_native_bridge) {
633      ScopedUtfChars isa_string(env, instructionSet);
634      ScopedUtfChars data_dir(env, dataDir);
635      android::PreInitializeNativeBridge(data_dir.c_str(), isa_string.c_str());
636    }
637
638    int rc = setresgid(gid, gid, gid);
639    if (rc == -1) {
640      ALOGE("setresgid(%d) failed: %s", gid, strerror(errno));
641      RuntimeAbort(env, __LINE__, "setresgid failed");
642    }
643
644    rc = setresuid(uid, uid, uid);
645    if (rc == -1) {
646      ALOGE("setresuid(%d) failed: %s", uid, strerror(errno));
647      RuntimeAbort(env, __LINE__, "setresuid failed");
648    }
649
650    if (NeedsNoRandomizeWorkaround()) {
651        // Work around ARM kernel ASLR lossage (http://b/5817320).
652        int old_personality = personality(0xffffffff);
653        int new_personality = personality(old_personality | ADDR_NO_RANDOMIZE);
654        if (new_personality == -1) {
655            ALOGW("personality(%d) failed: %s", new_personality, strerror(errno));
656        }
657    }
658
659    SetCapabilities(env, permittedCapabilities, effectiveCapabilities, permittedCapabilities);
660
661    SetSchedulerPolicy(env);
662
663    const char* se_info_c_str = NULL;
664    ScopedUtfChars* se_info = NULL;
665    if (java_se_info != NULL) {
666        se_info = new ScopedUtfChars(env, java_se_info);
667        se_info_c_str = se_info->c_str();
668        if (se_info_c_str == NULL) {
669          RuntimeAbort(env, __LINE__, "se_info_c_str == NULL");
670        }
671    }
672    const char* se_name_c_str = NULL;
673    ScopedUtfChars* se_name = NULL;
674    if (java_se_name != NULL) {
675        se_name = new ScopedUtfChars(env, java_se_name);
676        se_name_c_str = se_name->c_str();
677        if (se_name_c_str == NULL) {
678          RuntimeAbort(env, __LINE__, "se_name_c_str == NULL");
679        }
680    }
681    rc = selinux_android_setcontext(uid, is_system_server, se_info_c_str, se_name_c_str);
682    if (rc == -1) {
683      ALOGE("selinux_android_setcontext(%d, %d, \"%s\", \"%s\") failed", uid,
684            is_system_server, se_info_c_str, se_name_c_str);
685      RuntimeAbort(env, __LINE__, "selinux_android_setcontext failed");
686    }
687
688    // Make it easier to debug audit logs by setting the main thread's name to the
689    // nice name rather than "app_process".
690    if (se_name_c_str == NULL && is_system_server) {
691      se_name_c_str = "system_server";
692    }
693    if (se_name_c_str != NULL) {
694      SetThreadName(se_name_c_str);
695    }
696
697    delete se_info;
698    delete se_name;
699
700    // Unset the SIGCHLD handler, but keep ignoring SIGHUP (rationale in SetSignalHandlers).
701    UnsetChldSignalHandler();
702
703    env->CallStaticVoidMethod(gZygoteClass, gCallPostForkChildHooks, runtime_flags,
704                              is_system_server, is_child_zygote, instructionSet);
705    if (env->ExceptionCheck()) {
706      RuntimeAbort(env, __LINE__, "Error calling post fork hooks.");
707    }
708  } else if (pid > 0) {
709    // the parent process
710
711    // We blocked SIGCHLD prior to a fork, we unblock it here.
712    if (sigprocmask(SIG_UNBLOCK, &sigchld, nullptr) == -1) {
713      ALOGE("sigprocmask(SIG_SETMASK, { SIGCHLD }) failed: %s", strerror(errno));
714      RuntimeAbort(env, __LINE__, "Call to sigprocmask(SIG_UNBLOCK, { SIGCHLD }) failed.");
715    }
716  }
717  return pid;
718}
719
720static uint64_t GetEffectiveCapabilityMask(JNIEnv* env) {
721    __user_cap_header_struct capheader;
722    memset(&capheader, 0, sizeof(capheader));
723    capheader.version = _LINUX_CAPABILITY_VERSION_3;
724    capheader.pid = 0;
725
726    __user_cap_data_struct capdata[2];
727    if (capget(&capheader, &capdata[0]) == -1) {
728        ALOGE("capget failed: %s", strerror(errno));
729        RuntimeAbort(env, __LINE__, "capget failed");
730    }
731
732    return capdata[0].effective |
733           (static_cast<uint64_t>(capdata[1].effective) << 32);
734}
735}  // anonymous namespace
736
737namespace android {
738
739static void com_android_internal_os_Zygote_nativeSecurityInit(JNIEnv*, jclass) {
740  // security_getenforce is not allowed on app process. Initialize and cache the value before
741  // zygote forks.
742  g_is_security_enforced = security_getenforce();
743}
744
745static void com_android_internal_os_Zygote_nativePreApplicationInit(JNIEnv*, jclass) {
746  PreApplicationInit();
747}
748
749static jint com_android_internal_os_Zygote_nativeForkAndSpecialize(
750        JNIEnv* env, jclass, jint uid, jint gid, jintArray gids,
751        jint runtime_flags, jobjectArray rlimits,
752        jint mount_external, jstring se_info, jstring se_name,
753        jintArray fdsToClose, jintArray fdsToIgnore, jboolean is_child_zygote,
754        jstring instructionSet, jstring appDataDir) {
755    jlong capabilities = 0;
756
757    // Grant CAP_WAKE_ALARM to the Bluetooth process.
758    // Additionally, allow bluetooth to open packet sockets so it can start the DHCP client.
759    // Grant CAP_SYS_NICE to allow Bluetooth to set RT priority for
760    // audio-related threads.
761    // TODO: consider making such functionality an RPC to netd.
762    if (multiuser_get_app_id(uid) == AID_BLUETOOTH) {
763      capabilities |= (1LL << CAP_WAKE_ALARM);
764      capabilities |= (1LL << CAP_NET_RAW);
765      capabilities |= (1LL << CAP_NET_BIND_SERVICE);
766      capabilities |= (1LL << CAP_SYS_NICE);
767    }
768
769    // Grant CAP_BLOCK_SUSPEND to processes that belong to GID "wakelock"
770    bool gid_wakelock_found = false;
771    if (gid == AID_WAKELOCK) {
772      gid_wakelock_found = true;
773    } else if (gids != NULL) {
774      jsize gids_num = env->GetArrayLength(gids);
775      ScopedIntArrayRO ar(env, gids);
776      if (ar.get() == NULL) {
777        RuntimeAbort(env, __LINE__, "Bad gids array");
778      }
779      for (int i = 0; i < gids_num; i++) {
780        if (ar[i] == AID_WAKELOCK) {
781          gid_wakelock_found = true;
782          break;
783        }
784      }
785    }
786    if (gid_wakelock_found) {
787      capabilities |= (1LL << CAP_BLOCK_SUSPEND);
788    }
789
790    // If forking a child zygote process, that zygote will need to be able to change
791    // the UID and GID of processes it forks, as well as drop those capabilities.
792    if (is_child_zygote) {
793      capabilities |= (1LL << CAP_SETUID);
794      capabilities |= (1LL << CAP_SETGID);
795      capabilities |= (1LL << CAP_SETPCAP);
796    }
797
798    // Containers run without some capabilities, so drop any caps that are not
799    // available.
800    capabilities &= GetEffectiveCapabilityMask(env);
801
802    return ForkAndSpecializeCommon(env, uid, gid, gids, runtime_flags,
803            rlimits, capabilities, capabilities, mount_external, se_info,
804            se_name, false, fdsToClose, fdsToIgnore, is_child_zygote == JNI_TRUE,
805            instructionSet, appDataDir);
806}
807
808static jint com_android_internal_os_Zygote_nativeForkSystemServer(
809        JNIEnv* env, jclass, uid_t uid, gid_t gid, jintArray gids,
810        jint runtime_flags, jobjectArray rlimits, jlong permittedCapabilities,
811        jlong effectiveCapabilities) {
812  pid_t pid = ForkAndSpecializeCommon(env, uid, gid, gids,
813                                      runtime_flags, rlimits,
814                                      permittedCapabilities, effectiveCapabilities,
815                                      MOUNT_EXTERNAL_DEFAULT, NULL, NULL, true, NULL,
816                                      NULL, false, NULL, NULL);
817  if (pid > 0) {
818      // The zygote process checks whether the child process has died or not.
819      ALOGI("System server process %d has been created", pid);
820      gSystemServerPid = pid;
821      // There is a slight window that the system server process has crashed
822      // but it went unnoticed because we haven't published its pid yet. So
823      // we recheck here just to make sure that all is well.
824      int status;
825      if (waitpid(pid, &status, WNOHANG) == pid) {
826          ALOGE("System server process %d has died. Restarting Zygote!", pid);
827          RuntimeAbort(env, __LINE__, "System server process has died. Restarting Zygote!");
828      }
829
830      // Assign system_server to the correct memory cgroup.
831      // Not all devices mount /dev/memcg so check for the file first
832      // to avoid unnecessarily printing errors and denials in the logs.
833      if (!access("/dev/memcg/system/tasks", F_OK) &&
834                !WriteStringToFile(StringPrintf("%d", pid), "/dev/memcg/system/tasks")) {
835        ALOGE("couldn't write %d to /dev/memcg/system/tasks", pid);
836      }
837  }
838  return pid;
839}
840
841static void com_android_internal_os_Zygote_nativeAllowFileAcrossFork(
842        JNIEnv* env, jclass, jstring path) {
843    ScopedUtfChars path_native(env, path);
844    const char* path_cstr = path_native.c_str();
845    if (!path_cstr) {
846        RuntimeAbort(env, __LINE__, "path_cstr == NULL");
847    }
848    FileDescriptorWhitelist::Get()->Allow(path_cstr);
849}
850
851static void com_android_internal_os_Zygote_nativeUnmountStorageOnInit(JNIEnv* env, jclass) {
852    // Zygote process unmount root storage space initially before every child processes are forked.
853    // Every forked child processes (include SystemServer) only mount their own root storage space
854    // and no need unmount storage operation in MountEmulatedStorage method.
855    // Zygote process does not utilize root storage spaces and unshares its mount namespace below.
856
857    // See storage config details at http://source.android.com/tech/storage/
858    // Create private mount namespace shared by all children
859    if (unshare(CLONE_NEWNS) == -1) {
860        RuntimeAbort(env, __LINE__, "Failed to unshare()");
861        return;
862    }
863
864    // Mark rootfs as being a slave so that changes from default
865    // namespace only flow into our children.
866    if (mount("rootfs", "/", nullptr, (MS_SLAVE | MS_REC), nullptr) == -1) {
867        RuntimeAbort(env, __LINE__, "Failed to mount() rootfs as MS_SLAVE");
868        return;
869    }
870
871    // Create a staging tmpfs that is shared by our children; they will
872    // bind mount storage into their respective private namespaces, which
873    // are isolated from each other.
874    const char* target_base = getenv("EMULATED_STORAGE_TARGET");
875    if (target_base != nullptr) {
876#define STRINGIFY_UID(x) __STRING(x)
877        if (mount("tmpfs", target_base, "tmpfs", MS_NOSUID | MS_NODEV,
878                  "uid=0,gid=" STRINGIFY_UID(AID_SDCARD_R) ",mode=0751") == -1) {
879            ALOGE("Failed to mount tmpfs to %s", target_base);
880            RuntimeAbort(env, __LINE__, "Failed to mount tmpfs");
881            return;
882        }
883#undef STRINGIFY_UID
884    }
885
886    UnmountTree("/storage");
887}
888
889static const JNINativeMethod gMethods[] = {
890    { "nativeSecurityInit", "()V",
891      (void *) com_android_internal_os_Zygote_nativeSecurityInit },
892    { "nativeForkAndSpecialize",
893      "(II[II[[IILjava/lang/String;Ljava/lang/String;[I[IZLjava/lang/String;Ljava/lang/String;)I",
894      (void *) com_android_internal_os_Zygote_nativeForkAndSpecialize },
895    { "nativeForkSystemServer", "(II[II[[IJJ)I",
896      (void *) com_android_internal_os_Zygote_nativeForkSystemServer },
897    { "nativeAllowFileAcrossFork", "(Ljava/lang/String;)V",
898      (void *) com_android_internal_os_Zygote_nativeAllowFileAcrossFork },
899    { "nativeUnmountStorageOnInit", "()V",
900      (void *) com_android_internal_os_Zygote_nativeUnmountStorageOnInit },
901    { "nativePreApplicationInit", "()V",
902      (void *) com_android_internal_os_Zygote_nativePreApplicationInit }
903};
904
905int register_com_android_internal_os_Zygote(JNIEnv* env) {
906  gZygoteClass = MakeGlobalRefOrDie(env, FindClassOrDie(env, kZygoteClassName));
907  gCallPostForkChildHooks = GetStaticMethodIDOrDie(env, gZygoteClass, "callPostForkChildHooks",
908                                                   "(IZZLjava/lang/String;)V");
909
910  return RegisterMethodsOrDie(env, "com/android/internal/os/Zygote", gMethods, NELEM(gMethods));
911}
912}  // namespace android
913