1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "Zygote"
18
19// sys/mount.h has to come before linux/fs.h due to redefinition of MS_RDONLY, MS_BIND, etc
20#include <sys/mount.h>
21#include <linux/fs.h>
22
23#include <list>
24#include <sstream>
25#include <string>
26
27#include <fcntl.h>
28#include <grp.h>
29#include <inttypes.h>
30#include <malloc.h>
31#include <mntent.h>
32#include <paths.h>
33#include <signal.h>
34#include <stdlib.h>
35#include <sys/capability.h>
36#include <sys/cdefs.h>
37#include <sys/personality.h>
38#include <sys/prctl.h>
39#include <sys/resource.h>
40#include <sys/stat.h>
41#include <sys/time.h>
42#include <sys/types.h>
43#include <sys/utsname.h>
44#include <sys/wait.h>
45#include <unistd.h>
46
47#include "android-base/logging.h"
48#include <android-base/file.h>
49#include <android-base/stringprintf.h>
50#include <cutils/fs.h>
51#include <cutils/multiuser.h>
52#include <cutils/sched_policy.h>
53#include <private/android_filesystem_config.h>
54#include <utils/String8.h>
55#include <selinux/android.h>
56#include <seccomp_policy.h>
57#include <processgroup/processgroup.h>
58
59#include "core_jni_helpers.h"
60#include <nativehelper/JNIHelp.h>
61#include <nativehelper/ScopedLocalRef.h>
62#include <nativehelper/ScopedPrimitiveArray.h>
63#include <nativehelper/ScopedUtfChars.h>
64#include "fd_utils.h"
65
66#include "nativebridge/native_bridge.h"
67
68namespace {
69
70using android::String8;
71using android::base::StringPrintf;
72using android::base::WriteStringToFile;
73
74#define CREATE_ERROR(...) StringPrintf("%s:%d: ", __FILE__, __LINE__). \
75                              append(StringPrintf(__VA_ARGS__))
76
77static pid_t gSystemServerPid = 0;
78
79static const char kZygoteClassName[] = "com/android/internal/os/Zygote";
80static jclass gZygoteClass;
81static jmethodID gCallPostForkChildHooks;
82
83static bool g_is_security_enforced = true;
84
85// Must match values in com.android.internal.os.Zygote.
86enum MountExternalKind {
87  MOUNT_EXTERNAL_NONE = 0,
88  MOUNT_EXTERNAL_DEFAULT = 1,
89  MOUNT_EXTERNAL_READ = 2,
90  MOUNT_EXTERNAL_WRITE = 3,
91};
92
93static void RuntimeAbort(JNIEnv* env, int line, const char* msg) {
94  std::ostringstream oss;
95  oss << __FILE__ << ":" << line << ": " << msg;
96  env->FatalError(oss.str().c_str());
97}
98
99// This signal handler is for zygote mode, since the zygote must reap its children
100static void SigChldHandler(int /*signal_number*/) {
101  pid_t pid;
102  int status;
103
104  // It's necessary to save and restore the errno during this function.
105  // Since errno is stored per thread, changing it here modifies the errno
106  // on the thread on which this signal handler executes. If a signal occurs
107  // between a call and an errno check, it's possible to get the errno set
108  // here.
109  // See b/23572286 for extra information.
110  int saved_errno = errno;
111
112  while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
113     // Log process-death status that we care about.  In general it is
114     // not safe to call LOG(...) from a signal handler because of
115     // possible reentrancy.  However, we know a priori that the
116     // current implementation of LOG() is safe to call from a SIGCHLD
117     // handler in the zygote process.  If the LOG() implementation
118     // changes its locking strategy or its use of syscalls within the
119     // lazy-init critical section, its use here may become unsafe.
120    if (WIFEXITED(status)) {
121      ALOGI("Process %d exited cleanly (%d)", pid, WEXITSTATUS(status));
122    } else if (WIFSIGNALED(status)) {
123      ALOGI("Process %d exited due to signal (%d)", pid, WTERMSIG(status));
124      if (WCOREDUMP(status)) {
125        ALOGI("Process %d dumped core.", pid);
126      }
127    }
128
129    // If the just-crashed process is the system_server, bring down zygote
130    // so that it is restarted by init and system server will be restarted
131    // from there.
132    if (pid == gSystemServerPid) {
133      ALOGE("Exit zygote because system server (%d) has terminated", pid);
134      kill(getpid(), SIGKILL);
135    }
136  }
137
138  // Note that we shouldn't consider ECHILD an error because
139  // the secondary zygote might have no children left to wait for.
140  if (pid < 0 && errno != ECHILD) {
141    ALOGW("Zygote SIGCHLD error in waitpid: %s", strerror(errno));
142  }
143
144  errno = saved_errno;
145}
146
147// Configures the SIGCHLD/SIGHUP handlers for the zygote process. This is
148// configured very late, because earlier in the runtime we may fork() and
149// exec() other processes, and we want to waitpid() for those rather than
150// have them be harvested immediately.
151//
152// Ignore SIGHUP because all processes forked by the zygote are in the same
153// process group as the zygote and we don't want to be notified if we become
154// an orphaned group and have one or more stopped processes. This is not a
155// theoretical concern :
156// - we can become an orphaned group if one of our direct descendants forks
157//   and is subsequently killed before its children.
158// - crash_dump routinely STOPs the process it's tracing.
159//
160// See issues b/71965619 and b/25567761 for further details.
161//
162// This ends up being called repeatedly before each fork(), but there's
163// no real harm in that.
164static void SetSignalHandlers() {
165  struct sigaction sig_chld = {};
166  sig_chld.sa_handler = SigChldHandler;
167
168  if (sigaction(SIGCHLD, &sig_chld, NULL) < 0) {
169    ALOGW("Error setting SIGCHLD handler: %s", strerror(errno));
170  }
171
172  struct sigaction sig_hup = {};
173  sig_hup.sa_handler = SIG_IGN;
174  if (sigaction(SIGHUP, &sig_hup, NULL) < 0) {
175    ALOGW("Error setting SIGHUP handler: %s", strerror(errno));
176  }
177}
178
179// Sets the SIGCHLD handler back to default behavior in zygote children.
180static void UnsetChldSignalHandler() {
181  struct sigaction sa;
182  memset(&sa, 0, sizeof(sa));
183  sa.sa_handler = SIG_DFL;
184
185  if (sigaction(SIGCHLD, &sa, NULL) < 0) {
186    ALOGW("Error unsetting SIGCHLD handler: %s", strerror(errno));
187  }
188}
189
190// Calls POSIX setgroups() using the int[] object as an argument.
191// A NULL argument is tolerated.
192static bool SetGids(JNIEnv* env, jintArray javaGids, std::string* error_msg) {
193  if (javaGids == NULL) {
194    return true;
195  }
196
197  ScopedIntArrayRO gids(env, javaGids);
198  if (gids.get() == NULL) {
199    *error_msg = CREATE_ERROR("Getting gids int array failed");
200    return false;
201  }
202  int rc = setgroups(gids.size(), reinterpret_cast<const gid_t*>(&gids[0]));
203  if (rc == -1) {
204    *error_msg = CREATE_ERROR("setgroups failed: %s, gids.size=%zu", strerror(errno), gids.size());
205    return false;
206  }
207
208  return true;
209}
210
211// Sets the resource limits via setrlimit(2) for the values in the
212// two-dimensional array of integers that's passed in. The second dimension
213// contains a tuple of length 3: (resource, rlim_cur, rlim_max). NULL is
214// treated as an empty array.
215static bool SetRLimits(JNIEnv* env, jobjectArray javaRlimits, std::string* error_msg) {
216  if (javaRlimits == NULL) {
217    return true;
218  }
219
220  rlimit rlim;
221  memset(&rlim, 0, sizeof(rlim));
222
223  for (int i = 0; i < env->GetArrayLength(javaRlimits); ++i) {
224    ScopedLocalRef<jobject> javaRlimitObject(env, env->GetObjectArrayElement(javaRlimits, i));
225    ScopedIntArrayRO javaRlimit(env, reinterpret_cast<jintArray>(javaRlimitObject.get()));
226    if (javaRlimit.size() != 3) {
227      *error_msg = CREATE_ERROR("rlimits array must have a second dimension of size 3");
228      return false;
229    }
230
231    rlim.rlim_cur = javaRlimit[1];
232    rlim.rlim_max = javaRlimit[2];
233
234    int rc = setrlimit(javaRlimit[0], &rlim);
235    if (rc == -1) {
236      *error_msg = CREATE_ERROR("setrlimit(%d, {%ld, %ld}) failed", javaRlimit[0], rlim.rlim_cur,
237            rlim.rlim_max);
238      return false;
239    }
240  }
241
242  return true;
243}
244
245// The debug malloc library needs to know whether it's the zygote or a child.
246extern "C" int gMallocLeakZygoteChild;
247
248static void PreApplicationInit() {
249  // The child process sets this to indicate it's not the zygote.
250  gMallocLeakZygoteChild = 1;
251
252  // Set the jemalloc decay time to 1.
253  mallopt(M_DECAY_TIME, 1);
254}
255
256static void SetUpSeccompFilter(uid_t uid) {
257  if (!g_is_security_enforced) {
258    ALOGI("seccomp disabled by setenforce 0");
259    return;
260  }
261
262  // Apply system or app filter based on uid.
263  if (uid >= AID_APP_START) {
264    set_app_seccomp_filter();
265  } else {
266    set_system_seccomp_filter();
267  }
268}
269
270static bool EnableKeepCapabilities(std::string* error_msg) {
271  int rc = prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0);
272  if (rc == -1) {
273    *error_msg = CREATE_ERROR("prctl(PR_SET_KEEPCAPS) failed: %s", strerror(errno));
274    return false;
275  }
276  return true;
277}
278
279static bool DropCapabilitiesBoundingSet(std::string* error_msg) {
280  for (int i = 0; prctl(PR_CAPBSET_READ, i, 0, 0, 0) >= 0; i++) {
281    int rc = prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
282    if (rc == -1) {
283      if (errno == EINVAL) {
284        ALOGE("prctl(PR_CAPBSET_DROP) failed with EINVAL. Please verify "
285              "your kernel is compiled with file capabilities support");
286      } else {
287        *error_msg = CREATE_ERROR("prctl(PR_CAPBSET_DROP, %d) failed: %s", i, strerror(errno));
288        return false;
289      }
290    }
291  }
292  return true;
293}
294
295static bool SetInheritable(uint64_t inheritable, std::string* error_msg) {
296  __user_cap_header_struct capheader;
297  memset(&capheader, 0, sizeof(capheader));
298  capheader.version = _LINUX_CAPABILITY_VERSION_3;
299  capheader.pid = 0;
300
301  __user_cap_data_struct capdata[2];
302  if (capget(&capheader, &capdata[0]) == -1) {
303    *error_msg = CREATE_ERROR("capget failed: %s", strerror(errno));
304    return false;
305  }
306
307  capdata[0].inheritable = inheritable;
308  capdata[1].inheritable = inheritable >> 32;
309
310  if (capset(&capheader, &capdata[0]) == -1) {
311    *error_msg = CREATE_ERROR("capset(inh=%" PRIx64 ") failed: %s", inheritable, strerror(errno));
312    return false;
313  }
314
315  return true;
316}
317
318static bool SetCapabilities(uint64_t permitted, uint64_t effective, uint64_t inheritable,
319                            std::string* error_msg) {
320  __user_cap_header_struct capheader;
321  memset(&capheader, 0, sizeof(capheader));
322  capheader.version = _LINUX_CAPABILITY_VERSION_3;
323  capheader.pid = 0;
324
325  __user_cap_data_struct capdata[2];
326  memset(&capdata, 0, sizeof(capdata));
327  capdata[0].effective = effective;
328  capdata[1].effective = effective >> 32;
329  capdata[0].permitted = permitted;
330  capdata[1].permitted = permitted >> 32;
331  capdata[0].inheritable = inheritable;
332  capdata[1].inheritable = inheritable >> 32;
333
334  if (capset(&capheader, &capdata[0]) == -1) {
335    *error_msg = CREATE_ERROR("capset(perm=%" PRIx64 ", eff=%" PRIx64 ", inh=%" PRIx64 ") "
336                              "failed: %s", permitted, effective, inheritable, strerror(errno));
337    return false;
338  }
339  return true;
340}
341
342static bool SetSchedulerPolicy(std::string* error_msg) {
343  errno = -set_sched_policy(0, SP_DEFAULT);
344  if (errno != 0) {
345    *error_msg = CREATE_ERROR("set_sched_policy(0, SP_DEFAULT) failed: %s", strerror(errno));
346    return false;
347  }
348  return true;
349}
350
351static int UnmountTree(const char* path) {
352    size_t path_len = strlen(path);
353
354    FILE* fp = setmntent("/proc/mounts", "r");
355    if (fp == NULL) {
356        ALOGE("Error opening /proc/mounts: %s", strerror(errno));
357        return -errno;
358    }
359
360    // Some volumes can be stacked on each other, so force unmount in
361    // reverse order to give us the best chance of success.
362    std::list<std::string> toUnmount;
363    mntent* mentry;
364    while ((mentry = getmntent(fp)) != NULL) {
365        if (strncmp(mentry->mnt_dir, path, path_len) == 0) {
366            toUnmount.push_front(std::string(mentry->mnt_dir));
367        }
368    }
369    endmntent(fp);
370
371    for (auto path : toUnmount) {
372        if (umount2(path.c_str(), MNT_DETACH)) {
373            ALOGW("Failed to unmount %s: %s", path.c_str(), strerror(errno));
374        }
375    }
376    return 0;
377}
378
379// Create a private mount namespace and bind mount appropriate emulated
380// storage for the given user.
381static bool MountEmulatedStorage(uid_t uid, jint mount_mode,
382        bool force_mount_namespace, std::string* error_msg) {
383    // See storage config details at http://source.android.com/tech/storage/
384
385    String8 storageSource;
386    if (mount_mode == MOUNT_EXTERNAL_DEFAULT) {
387        storageSource = "/mnt/runtime/default";
388    } else if (mount_mode == MOUNT_EXTERNAL_READ) {
389        storageSource = "/mnt/runtime/read";
390    } else if (mount_mode == MOUNT_EXTERNAL_WRITE) {
391        storageSource = "/mnt/runtime/write";
392    } else if (!force_mount_namespace) {
393        // Sane default of no storage visible
394        return true;
395    }
396
397    // Create a second private mount namespace for our process
398    if (unshare(CLONE_NEWNS) == -1) {
399        *error_msg = CREATE_ERROR("Failed to unshare(): %s", strerror(errno));
400        return false;
401    }
402
403    // Handle force_mount_namespace with MOUNT_EXTERNAL_NONE.
404    if (mount_mode == MOUNT_EXTERNAL_NONE) {
405        return true;
406    }
407
408    if (TEMP_FAILURE_RETRY(mount(storageSource.string(), "/storage",
409            NULL, MS_BIND | MS_REC | MS_SLAVE, NULL)) == -1) {
410        *error_msg = CREATE_ERROR("Failed to mount %s to /storage: %s",
411                                  storageSource.string(),
412                                  strerror(errno));
413        return false;
414    }
415
416    // Mount user-specific symlink helper into place
417    userid_t user_id = multiuser_get_user_id(uid);
418    const String8 userSource(String8::format("/mnt/user/%d", user_id));
419    if (fs_prepare_dir(userSource.string(), 0751, 0, 0) == -1) {
420        *error_msg = CREATE_ERROR("fs_prepare_dir failed on %s", userSource.string());
421        return false;
422    }
423    if (TEMP_FAILURE_RETRY(mount(userSource.string(), "/storage/self",
424            NULL, MS_BIND, NULL)) == -1) {
425        *error_msg = CREATE_ERROR("Failed to mount %s to /storage/self: %s",
426                                  userSource.string(),
427                                  strerror(errno));
428        return false;
429    }
430
431    return true;
432}
433
434static bool NeedsNoRandomizeWorkaround() {
435#if !defined(__arm__)
436    return false;
437#else
438    int major;
439    int minor;
440    struct utsname uts;
441    if (uname(&uts) == -1) {
442        return false;
443    }
444
445    if (sscanf(uts.release, "%d.%d", &major, &minor) != 2) {
446        return false;
447    }
448
449    // Kernels before 3.4.* need the workaround.
450    return (major < 3) || ((major == 3) && (minor < 4));
451#endif
452}
453
454// Utility to close down the Zygote socket file descriptors while
455// the child is still running as root with Zygote's privileges.  Each
456// descriptor (if any) is closed via dup2(), replacing it with a valid
457// (open) descriptor to /dev/null.
458
459static bool DetachDescriptors(JNIEnv* env, jintArray fdsToClose, std::string* error_msg) {
460  if (!fdsToClose) {
461    return true;
462  }
463  jsize count = env->GetArrayLength(fdsToClose);
464  ScopedIntArrayRO ar(env, fdsToClose);
465  if (ar.get() == NULL) {
466    *error_msg = "Bad fd array";
467    return false;
468  }
469  jsize i;
470  int devnull;
471  for (i = 0; i < count; i++) {
472    devnull = open("/dev/null", O_RDWR);
473    if (devnull < 0) {
474      *error_msg = std::string("Failed to open /dev/null: ").append(strerror(errno));
475      return false;
476    }
477    ALOGV("Switching descriptor %d to /dev/null: %s", ar[i], strerror(errno));
478    if (dup2(devnull, ar[i]) < 0) {
479      *error_msg = StringPrintf("Failed dup2() on descriptor %d: %s", ar[i], strerror(errno));
480      return false;
481    }
482    close(devnull);
483  }
484  return true;
485}
486
487void SetThreadName(const char* thread_name) {
488  bool hasAt = false;
489  bool hasDot = false;
490  const char* s = thread_name;
491  while (*s) {
492    if (*s == '.') {
493      hasDot = true;
494    } else if (*s == '@') {
495      hasAt = true;
496    }
497    s++;
498  }
499  const int len = s - thread_name;
500  if (len < 15 || hasAt || !hasDot) {
501    s = thread_name;
502  } else {
503    s = thread_name + len - 15;
504  }
505  // pthread_setname_np fails rather than truncating long strings.
506  char buf[16];       // MAX_TASK_COMM_LEN=16 is hard-coded into bionic
507  strlcpy(buf, s, sizeof(buf)-1);
508  errno = pthread_setname_np(pthread_self(), buf);
509  if (errno != 0) {
510    ALOGW("Unable to set the name of current thread to '%s': %s", buf, strerror(errno));
511  }
512  // Update base::logging default tag.
513  android::base::SetDefaultTag(buf);
514}
515
516// The list of open zygote file descriptors.
517static FileDescriptorTable* gOpenFdTable = NULL;
518
519static bool FillFileDescriptorVector(JNIEnv* env,
520                                     jintArray java_fds,
521                                     std::vector<int>* fds,
522                                     std::string* error_msg) {
523  CHECK(fds != nullptr);
524  if (java_fds != nullptr) {
525    ScopedIntArrayRO ar(env, java_fds);
526    if (ar.get() == nullptr) {
527      *error_msg = "Bad fd array";
528      return false;
529    }
530    fds->reserve(ar.size());
531    for (size_t i = 0; i < ar.size(); ++i) {
532      fds->push_back(ar[i]);
533    }
534  }
535  return true;
536}
537
538// Utility routine to fork zygote and specialize the child process.
539static pid_t ForkAndSpecializeCommon(JNIEnv* env, uid_t uid, gid_t gid, jintArray javaGids,
540                                     jint runtime_flags, jobjectArray javaRlimits,
541                                     jlong permittedCapabilities, jlong effectiveCapabilities,
542                                     jint mount_external,
543                                     jstring java_se_info, jstring java_se_name,
544                                     bool is_system_server, jintArray fdsToClose,
545                                     jintArray fdsToIgnore, bool is_child_zygote,
546                                     jstring instructionSet, jstring dataDir) {
547  SetSignalHandlers();
548
549  sigset_t sigchld;
550  sigemptyset(&sigchld);
551  sigaddset(&sigchld, SIGCHLD);
552
553  auto fail_fn = [env, java_se_name, is_system_server](const std::string& msg)
554      __attribute__ ((noreturn)) {
555    const char* se_name_c_str = nullptr;
556    std::unique_ptr<ScopedUtfChars> se_name;
557    if (java_se_name != nullptr) {
558      se_name.reset(new ScopedUtfChars(env, java_se_name));
559      se_name_c_str = se_name->c_str();
560    }
561    if (se_name_c_str == nullptr && is_system_server) {
562      se_name_c_str = "system_server";
563    }
564    const std::string& error_msg = (se_name_c_str == nullptr)
565        ? msg
566        : StringPrintf("(%s) %s", se_name_c_str, msg.c_str());
567    env->FatalError(error_msg.c_str());
568    __builtin_unreachable();
569  };
570
571  // Temporarily block SIGCHLD during forks. The SIGCHLD handler might
572  // log, which would result in the logging FDs we close being reopened.
573  // This would cause failures because the FDs are not whitelisted.
574  //
575  // Note that the zygote process is single threaded at this point.
576  if (sigprocmask(SIG_BLOCK, &sigchld, nullptr) == -1) {
577    fail_fn(CREATE_ERROR("sigprocmask(SIG_SETMASK, { SIGCHLD }) failed: %s", strerror(errno)));
578  }
579
580  // Close any logging related FDs before we start evaluating the list of
581  // file descriptors.
582  __android_log_close();
583
584  std::string error_msg;
585
586  // If this is the first fork for this zygote, create the open FD table.
587  // If it isn't, we just need to check whether the list of open files has
588  // changed (and it shouldn't in the normal case).
589  std::vector<int> fds_to_ignore;
590  if (!FillFileDescriptorVector(env, fdsToIgnore, &fds_to_ignore, &error_msg)) {
591    fail_fn(error_msg);
592  }
593  if (gOpenFdTable == NULL) {
594    gOpenFdTable = FileDescriptorTable::Create(fds_to_ignore, &error_msg);
595    if (gOpenFdTable == NULL) {
596      fail_fn(error_msg);
597    }
598  } else if (!gOpenFdTable->Restat(fds_to_ignore, &error_msg)) {
599    fail_fn(error_msg);
600  }
601
602  pid_t pid = fork();
603
604  if (pid == 0) {
605    PreApplicationInit();
606
607    // Clean up any descriptors which must be closed immediately
608    if (!DetachDescriptors(env, fdsToClose, &error_msg)) {
609      fail_fn(error_msg);
610    }
611
612    // Re-open all remaining open file descriptors so that they aren't shared
613    // with the zygote across a fork.
614    if (!gOpenFdTable->ReopenOrDetach(&error_msg)) {
615      fail_fn(error_msg);
616    }
617
618    if (sigprocmask(SIG_UNBLOCK, &sigchld, nullptr) == -1) {
619      fail_fn(CREATE_ERROR("sigprocmask(SIG_SETMASK, { SIGCHLD }) failed: %s", strerror(errno)));
620    }
621
622    // Keep capabilities across UID change, unless we're staying root.
623    if (uid != 0) {
624      if (!EnableKeepCapabilities(&error_msg)) {
625        fail_fn(error_msg);
626      }
627    }
628
629    if (!SetInheritable(permittedCapabilities, &error_msg)) {
630      fail_fn(error_msg);
631    }
632    if (!DropCapabilitiesBoundingSet(&error_msg)) {
633      fail_fn(error_msg);
634    }
635
636    bool use_native_bridge = !is_system_server && (instructionSet != NULL)
637        && android::NativeBridgeAvailable();
638    if (use_native_bridge) {
639      ScopedUtfChars isa_string(env, instructionSet);
640      use_native_bridge = android::NeedsNativeBridge(isa_string.c_str());
641    }
642    if (use_native_bridge && dataDir == NULL) {
643      // dataDir should never be null if we need to use a native bridge.
644      // In general, dataDir will never be null for normal applications. It can only happen in
645      // special cases (for isolated processes which are not associated with any app). These are
646      // launched by the framework and should not be emulated anyway.
647      use_native_bridge = false;
648      ALOGW("Native bridge will not be used because dataDir == NULL.");
649    }
650
651    if (!MountEmulatedStorage(uid, mount_external, use_native_bridge, &error_msg)) {
652      ALOGW("Failed to mount emulated storage: %s (%s)", error_msg.c_str(), strerror(errno));
653      if (errno == ENOTCONN || errno == EROFS) {
654        // When device is actively encrypting, we get ENOTCONN here
655        // since FUSE was mounted before the framework restarted.
656        // When encrypted device is booting, we get EROFS since
657        // FUSE hasn't been created yet by init.
658        // In either case, continue without external storage.
659      } else {
660        fail_fn(error_msg);
661      }
662    }
663
664    // If this zygote isn't root, it won't be able to create a process group,
665    // since the directory is owned by root.
666    if (!is_system_server && getuid() == 0) {
667        int rc = createProcessGroup(uid, getpid());
668        if (rc != 0) {
669            if (rc == -EROFS) {
670                ALOGW("createProcessGroup failed, kernel missing CONFIG_CGROUP_CPUACCT?");
671            } else {
672                ALOGE("createProcessGroup(%d, %d) failed: %s", uid, pid, strerror(-rc));
673            }
674        }
675    }
676
677    std::string error_msg;
678    if (!SetGids(env, javaGids, &error_msg)) {
679      fail_fn(error_msg);
680    }
681
682    if (!SetRLimits(env, javaRlimits, &error_msg)) {
683      fail_fn(error_msg);
684    }
685
686    if (use_native_bridge) {
687      ScopedUtfChars isa_string(env, instructionSet);
688      ScopedUtfChars data_dir(env, dataDir);
689      android::PreInitializeNativeBridge(data_dir.c_str(), isa_string.c_str());
690    }
691
692    int rc = setresgid(gid, gid, gid);
693    if (rc == -1) {
694      fail_fn(CREATE_ERROR("setresgid(%d) failed: %s", gid, strerror(errno)));
695    }
696
697    // Must be called when the new process still has CAP_SYS_ADMIN, in this case, before changing
698    // uid from 0, which clears capabilities.  The other alternative is to call
699    // prctl(PR_SET_NO_NEW_PRIVS, 1) afterward, but that breaks SELinux domain transition (see
700    // b/71859146).  As the result, privileged syscalls used below still need to be accessible in
701    // app process.
702    SetUpSeccompFilter(uid);
703
704    rc = setresuid(uid, uid, uid);
705    if (rc == -1) {
706      fail_fn(CREATE_ERROR("setresuid(%d) failed: %s", uid, strerror(errno)));
707    }
708
709    if (NeedsNoRandomizeWorkaround()) {
710        // Work around ARM kernel ASLR lossage (http://b/5817320).
711        int old_personality = personality(0xffffffff);
712        int new_personality = personality(old_personality | ADDR_NO_RANDOMIZE);
713        if (new_personality == -1) {
714            ALOGW("personality(%d) failed: %s", new_personality, strerror(errno));
715        }
716    }
717
718    if (!SetCapabilities(permittedCapabilities, effectiveCapabilities, permittedCapabilities,
719                         &error_msg)) {
720      fail_fn(error_msg);
721    }
722
723    if (!SetSchedulerPolicy(&error_msg)) {
724      fail_fn(error_msg);
725    }
726
727    const char* se_info_c_str = NULL;
728    ScopedUtfChars* se_info = NULL;
729    if (java_se_info != NULL) {
730        se_info = new ScopedUtfChars(env, java_se_info);
731        se_info_c_str = se_info->c_str();
732        if (se_info_c_str == NULL) {
733          fail_fn("se_info_c_str == NULL");
734        }
735    }
736    const char* se_name_c_str = NULL;
737    ScopedUtfChars* se_name = NULL;
738    if (java_se_name != NULL) {
739        se_name = new ScopedUtfChars(env, java_se_name);
740        se_name_c_str = se_name->c_str();
741        if (se_name_c_str == NULL) {
742          fail_fn("se_name_c_str == NULL");
743        }
744    }
745    rc = selinux_android_setcontext(uid, is_system_server, se_info_c_str, se_name_c_str);
746    if (rc == -1) {
747      fail_fn(CREATE_ERROR("selinux_android_setcontext(%d, %d, \"%s\", \"%s\") failed", uid,
748            is_system_server, se_info_c_str, se_name_c_str));
749    }
750
751    // Make it easier to debug audit logs by setting the main thread's name to the
752    // nice name rather than "app_process".
753    if (se_name_c_str == NULL && is_system_server) {
754      se_name_c_str = "system_server";
755    }
756    if (se_name_c_str != NULL) {
757      SetThreadName(se_name_c_str);
758    }
759
760    delete se_info;
761    delete se_name;
762
763    // Unset the SIGCHLD handler, but keep ignoring SIGHUP (rationale in SetSignalHandlers).
764    UnsetChldSignalHandler();
765
766    env->CallStaticVoidMethod(gZygoteClass, gCallPostForkChildHooks, runtime_flags,
767                              is_system_server, is_child_zygote, instructionSet);
768    if (env->ExceptionCheck()) {
769      fail_fn("Error calling post fork hooks.");
770    }
771  } else if (pid > 0) {
772    // the parent process
773
774    // We blocked SIGCHLD prior to a fork, we unblock it here.
775    if (sigprocmask(SIG_UNBLOCK, &sigchld, nullptr) == -1) {
776      fail_fn(CREATE_ERROR("sigprocmask(SIG_SETMASK, { SIGCHLD }) failed: %s", strerror(errno)));
777    }
778  }
779  return pid;
780}
781
782static uint64_t GetEffectiveCapabilityMask(JNIEnv* env) {
783    __user_cap_header_struct capheader;
784    memset(&capheader, 0, sizeof(capheader));
785    capheader.version = _LINUX_CAPABILITY_VERSION_3;
786    capheader.pid = 0;
787
788    __user_cap_data_struct capdata[2];
789    if (capget(&capheader, &capdata[0]) == -1) {
790        ALOGE("capget failed: %s", strerror(errno));
791        RuntimeAbort(env, __LINE__, "capget failed");
792    }
793
794    return capdata[0].effective |
795           (static_cast<uint64_t>(capdata[1].effective) << 32);
796}
797}  // anonymous namespace
798
799namespace android {
800
801static void com_android_internal_os_Zygote_nativeSecurityInit(JNIEnv*, jclass) {
802  // security_getenforce is not allowed on app process. Initialize and cache the value before
803  // zygote forks.
804  g_is_security_enforced = security_getenforce();
805}
806
807static void com_android_internal_os_Zygote_nativePreApplicationInit(JNIEnv*, jclass) {
808  PreApplicationInit();
809}
810
811static jint com_android_internal_os_Zygote_nativeForkAndSpecialize(
812        JNIEnv* env, jclass, jint uid, jint gid, jintArray gids,
813        jint runtime_flags, jobjectArray rlimits,
814        jint mount_external, jstring se_info, jstring se_name,
815        jintArray fdsToClose, jintArray fdsToIgnore, jboolean is_child_zygote,
816        jstring instructionSet, jstring appDataDir) {
817    jlong capabilities = 0;
818
819    // Grant CAP_WAKE_ALARM to the Bluetooth process.
820    // Additionally, allow bluetooth to open packet sockets so it can start the DHCP client.
821    // Grant CAP_SYS_NICE to allow Bluetooth to set RT priority for
822    // audio-related threads.
823    // TODO: consider making such functionality an RPC to netd.
824    if (multiuser_get_app_id(uid) == AID_BLUETOOTH) {
825      capabilities |= (1LL << CAP_WAKE_ALARM);
826      capabilities |= (1LL << CAP_NET_RAW);
827      capabilities |= (1LL << CAP_NET_BIND_SERVICE);
828      capabilities |= (1LL << CAP_SYS_NICE);
829    }
830
831    // Grant CAP_BLOCK_SUSPEND to processes that belong to GID "wakelock"
832    bool gid_wakelock_found = false;
833    if (gid == AID_WAKELOCK) {
834      gid_wakelock_found = true;
835    } else if (gids != NULL) {
836      jsize gids_num = env->GetArrayLength(gids);
837      ScopedIntArrayRO ar(env, gids);
838      if (ar.get() == NULL) {
839        RuntimeAbort(env, __LINE__, "Bad gids array");
840      }
841      for (int i = 0; i < gids_num; i++) {
842        if (ar[i] == AID_WAKELOCK) {
843          gid_wakelock_found = true;
844          break;
845        }
846      }
847    }
848    if (gid_wakelock_found) {
849      capabilities |= (1LL << CAP_BLOCK_SUSPEND);
850    }
851
852    // If forking a child zygote process, that zygote will need to be able to change
853    // the UID and GID of processes it forks, as well as drop those capabilities.
854    if (is_child_zygote) {
855      capabilities |= (1LL << CAP_SETUID);
856      capabilities |= (1LL << CAP_SETGID);
857      capabilities |= (1LL << CAP_SETPCAP);
858    }
859
860    // Containers run without some capabilities, so drop any caps that are not
861    // available.
862    capabilities &= GetEffectiveCapabilityMask(env);
863
864    return ForkAndSpecializeCommon(env, uid, gid, gids, runtime_flags,
865            rlimits, capabilities, capabilities, mount_external, se_info,
866            se_name, false, fdsToClose, fdsToIgnore, is_child_zygote == JNI_TRUE,
867            instructionSet, appDataDir);
868}
869
870static jint com_android_internal_os_Zygote_nativeForkSystemServer(
871        JNIEnv* env, jclass, uid_t uid, gid_t gid, jintArray gids,
872        jint runtime_flags, jobjectArray rlimits, jlong permittedCapabilities,
873        jlong effectiveCapabilities) {
874  pid_t pid = ForkAndSpecializeCommon(env, uid, gid, gids,
875                                      runtime_flags, rlimits,
876                                      permittedCapabilities, effectiveCapabilities,
877                                      MOUNT_EXTERNAL_DEFAULT, NULL, NULL, true, NULL,
878                                      NULL, false, NULL, NULL);
879  if (pid > 0) {
880      // The zygote process checks whether the child process has died or not.
881      ALOGI("System server process %d has been created", pid);
882      gSystemServerPid = pid;
883      // There is a slight window that the system server process has crashed
884      // but it went unnoticed because we haven't published its pid yet. So
885      // we recheck here just to make sure that all is well.
886      int status;
887      if (waitpid(pid, &status, WNOHANG) == pid) {
888          ALOGE("System server process %d has died. Restarting Zygote!", pid);
889          RuntimeAbort(env, __LINE__, "System server process has died. Restarting Zygote!");
890      }
891
892      // Assign system_server to the correct memory cgroup.
893      // Not all devices mount /dev/memcg so check for the file first
894      // to avoid unnecessarily printing errors and denials in the logs.
895      if (!access("/dev/memcg/system/tasks", F_OK) &&
896                !WriteStringToFile(StringPrintf("%d", pid), "/dev/memcg/system/tasks")) {
897        ALOGE("couldn't write %d to /dev/memcg/system/tasks", pid);
898      }
899  }
900  return pid;
901}
902
903static void com_android_internal_os_Zygote_nativeAllowFileAcrossFork(
904        JNIEnv* env, jclass, jstring path) {
905    ScopedUtfChars path_native(env, path);
906    const char* path_cstr = path_native.c_str();
907    if (!path_cstr) {
908        RuntimeAbort(env, __LINE__, "path_cstr == NULL");
909    }
910    FileDescriptorWhitelist::Get()->Allow(path_cstr);
911}
912
913static void com_android_internal_os_Zygote_nativeUnmountStorageOnInit(JNIEnv* env, jclass) {
914    // Zygote process unmount root storage space initially before every child processes are forked.
915    // Every forked child processes (include SystemServer) only mount their own root storage space
916    // and no need unmount storage operation in MountEmulatedStorage method.
917    // Zygote process does not utilize root storage spaces and unshares its mount namespace below.
918
919    // See storage config details at http://source.android.com/tech/storage/
920    // Create private mount namespace shared by all children
921    if (unshare(CLONE_NEWNS) == -1) {
922        RuntimeAbort(env, __LINE__, "Failed to unshare()");
923        return;
924    }
925
926    // Mark rootfs as being a slave so that changes from default
927    // namespace only flow into our children.
928    if (mount("rootfs", "/", nullptr, (MS_SLAVE | MS_REC), nullptr) == -1) {
929        RuntimeAbort(env, __LINE__, "Failed to mount() rootfs as MS_SLAVE");
930        return;
931    }
932
933    // Create a staging tmpfs that is shared by our children; they will
934    // bind mount storage into their respective private namespaces, which
935    // are isolated from each other.
936    const char* target_base = getenv("EMULATED_STORAGE_TARGET");
937    if (target_base != nullptr) {
938#define STRINGIFY_UID(x) __STRING(x)
939        if (mount("tmpfs", target_base, "tmpfs", MS_NOSUID | MS_NODEV,
940                  "uid=0,gid=" STRINGIFY_UID(AID_SDCARD_R) ",mode=0751") == -1) {
941            ALOGE("Failed to mount tmpfs to %s", target_base);
942            RuntimeAbort(env, __LINE__, "Failed to mount tmpfs");
943            return;
944        }
945#undef STRINGIFY_UID
946    }
947
948    UnmountTree("/storage");
949}
950
951static const JNINativeMethod gMethods[] = {
952    { "nativeSecurityInit", "()V",
953      (void *) com_android_internal_os_Zygote_nativeSecurityInit },
954    { "nativeForkAndSpecialize",
955      "(II[II[[IILjava/lang/String;Ljava/lang/String;[I[IZLjava/lang/String;Ljava/lang/String;)I",
956      (void *) com_android_internal_os_Zygote_nativeForkAndSpecialize },
957    { "nativeForkSystemServer", "(II[II[[IJJ)I",
958      (void *) com_android_internal_os_Zygote_nativeForkSystemServer },
959    { "nativeAllowFileAcrossFork", "(Ljava/lang/String;)V",
960      (void *) com_android_internal_os_Zygote_nativeAllowFileAcrossFork },
961    { "nativeUnmountStorageOnInit", "()V",
962      (void *) com_android_internal_os_Zygote_nativeUnmountStorageOnInit },
963    { "nativePreApplicationInit", "()V",
964      (void *) com_android_internal_os_Zygote_nativePreApplicationInit }
965};
966
967int register_com_android_internal_os_Zygote(JNIEnv* env) {
968  gZygoteClass = MakeGlobalRefOrDie(env, FindClassOrDie(env, kZygoteClassName));
969  gCallPostForkChildHooks = GetStaticMethodIDOrDie(env, gZygoteClass, "callPostForkChildHooks",
970                                                   "(IZZLjava/lang/String;)V");
971
972  return RegisterMethodsOrDie(env, "com/android/internal/os/Zygote", gMethods, NELEM(gMethods));
973}
974}  // namespace android
975