com_android_internal_os_Zygote.cpp revision a0d2e6f4b7c9f3375d89b11532cb8e6e09b5c13b
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "Zygote"
18
19// sys/mount.h has to come before linux/fs.h due to redefinition of MS_RDONLY, MS_BIND, etc
20#include <sys/mount.h>
21#include <linux/fs.h>
22
23#include <list>
24#include <sstream>
25#include <string>
26
27#include <fcntl.h>
28#include <grp.h>
29#include <inttypes.h>
30#include <mntent.h>
31#include <paths.h>
32#include <signal.h>
33#include <stdlib.h>
34#include <sys/capability.h>
35#include <sys/cdefs.h>
36#include <sys/personality.h>
37#include <sys/prctl.h>
38#include <sys/resource.h>
39#include <sys/stat.h>
40#include <sys/types.h>
41#include <sys/utsname.h>
42#include <sys/wait.h>
43#include <unistd.h>
44
45#include <cutils/fs.h>
46#include <cutils/multiuser.h>
47#include <cutils/sched_policy.h>
48#include <private/android_filesystem_config.h>
49#include <utils/String8.h>
50#include <selinux/android.h>
51#include <processgroup/processgroup.h>
52
53#include "core_jni_helpers.h"
54#include "JNIHelp.h"
55#include "ScopedLocalRef.h"
56#include "ScopedPrimitiveArray.h"
57#include "ScopedUtfChars.h"
58#include "fd_utils-inl.h"
59
60#include "nativebridge/native_bridge.h"
61
62namespace {
63
64using android::String8;
65
66static pid_t gSystemServerPid = 0;
67
68static const char kZygoteClassName[] = "com/android/internal/os/Zygote";
69static jclass gZygoteClass;
70static jmethodID gCallPostForkChildHooks;
71
72// Must match values in com.android.internal.os.Zygote.
73enum MountExternalKind {
74  MOUNT_EXTERNAL_NONE = 0,
75  MOUNT_EXTERNAL_DEFAULT = 1,
76  MOUNT_EXTERNAL_READ = 2,
77  MOUNT_EXTERNAL_WRITE = 3,
78};
79
80static void RuntimeAbort(JNIEnv* env, int line, const char* msg) {
81  std::ostringstream oss;
82  oss << __FILE__ << ":" << line << ": " << msg;
83  env->FatalError(oss.str().c_str());
84}
85
86// This signal handler is for zygote mode, since the zygote must reap its children
87static void SigChldHandler(int /*signal_number*/) {
88  pid_t pid;
89  int status;
90
91  // It's necessary to save and restore the errno during this function.
92  // Since errno is stored per thread, changing it here modifies the errno
93  // on the thread on which this signal handler executes. If a signal occurs
94  // between a call and an errno check, it's possible to get the errno set
95  // here.
96  // See b/23572286 for extra information.
97  int saved_errno = errno;
98
99  while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
100     // Log process-death status that we care about.  In general it is
101     // not safe to call LOG(...) from a signal handler because of
102     // possible reentrancy.  However, we know a priori that the
103     // current implementation of LOG() is safe to call from a SIGCHLD
104     // handler in the zygote process.  If the LOG() implementation
105     // changes its locking strategy or its use of syscalls within the
106     // lazy-init critical section, its use here may become unsafe.
107    if (WIFEXITED(status)) {
108      if (WEXITSTATUS(status)) {
109        ALOGI("Process %d exited cleanly (%d)", pid, WEXITSTATUS(status));
110      }
111    } else if (WIFSIGNALED(status)) {
112      if (WTERMSIG(status) != SIGKILL) {
113        ALOGI("Process %d exited due to signal (%d)", pid, WTERMSIG(status));
114      }
115      if (WCOREDUMP(status)) {
116        ALOGI("Process %d dumped core.", pid);
117      }
118    }
119
120    // If the just-crashed process is the system_server, bring down zygote
121    // so that it is restarted by init and system server will be restarted
122    // from there.
123    if (pid == gSystemServerPid) {
124      ALOGE("Exit zygote because system server (%d) has terminated", pid);
125      kill(getpid(), SIGKILL);
126    }
127  }
128
129  // Note that we shouldn't consider ECHILD an error because
130  // the secondary zygote might have no children left to wait for.
131  if (pid < 0 && errno != ECHILD) {
132    ALOGW("Zygote SIGCHLD error in waitpid: %s", strerror(errno));
133  }
134
135  errno = saved_errno;
136}
137
138// Configures the SIGCHLD handler for the zygote process. This is configured
139// very late, because earlier in the runtime we may fork() and exec()
140// other processes, and we want to waitpid() for those rather than
141// have them be harvested immediately.
142//
143// This ends up being called repeatedly before each fork(), but there's
144// no real harm in that.
145static void SetSigChldHandler() {
146  struct sigaction sa;
147  memset(&sa, 0, sizeof(sa));
148  sa.sa_handler = SigChldHandler;
149
150  int err = sigaction(SIGCHLD, &sa, NULL);
151  if (err < 0) {
152    ALOGW("Error setting SIGCHLD handler: %s", strerror(errno));
153  }
154}
155
156// Sets the SIGCHLD handler back to default behavior in zygote children.
157static void UnsetSigChldHandler() {
158  struct sigaction sa;
159  memset(&sa, 0, sizeof(sa));
160  sa.sa_handler = SIG_DFL;
161
162  int err = sigaction(SIGCHLD, &sa, NULL);
163  if (err < 0) {
164    ALOGW("Error unsetting SIGCHLD handler: %s", strerror(errno));
165  }
166}
167
168// Calls POSIX setgroups() using the int[] object as an argument.
169// A NULL argument is tolerated.
170static void SetGids(JNIEnv* env, jintArray javaGids) {
171  if (javaGids == NULL) {
172    return;
173  }
174
175  ScopedIntArrayRO gids(env, javaGids);
176  if (gids.get() == NULL) {
177    RuntimeAbort(env, __LINE__, "Getting gids int array failed");
178  }
179  int rc = setgroups(gids.size(), reinterpret_cast<const gid_t*>(&gids[0]));
180  if (rc == -1) {
181    std::ostringstream oss;
182    oss << "setgroups failed: " << strerror(errno) << ", gids.size=" << gids.size();
183    RuntimeAbort(env, __LINE__, oss.str().c_str());
184  }
185}
186
187// Sets the resource limits via setrlimit(2) for the values in the
188// two-dimensional array of integers that's passed in. The second dimension
189// contains a tuple of length 3: (resource, rlim_cur, rlim_max). NULL is
190// treated as an empty array.
191static void SetRLimits(JNIEnv* env, jobjectArray javaRlimits) {
192  if (javaRlimits == NULL) {
193    return;
194  }
195
196  rlimit rlim;
197  memset(&rlim, 0, sizeof(rlim));
198
199  for (int i = 0; i < env->GetArrayLength(javaRlimits); ++i) {
200    ScopedLocalRef<jobject> javaRlimitObject(env, env->GetObjectArrayElement(javaRlimits, i));
201    ScopedIntArrayRO javaRlimit(env, reinterpret_cast<jintArray>(javaRlimitObject.get()));
202    if (javaRlimit.size() != 3) {
203      RuntimeAbort(env, __LINE__, "rlimits array must have a second dimension of size 3");
204    }
205
206    rlim.rlim_cur = javaRlimit[1];
207    rlim.rlim_max = javaRlimit[2];
208
209    int rc = setrlimit(javaRlimit[0], &rlim);
210    if (rc == -1) {
211      ALOGE("setrlimit(%d, {%ld, %ld}) failed", javaRlimit[0], rlim.rlim_cur,
212            rlim.rlim_max);
213      RuntimeAbort(env, __LINE__, "setrlimit failed");
214    }
215  }
216}
217
218// The debug malloc library needs to know whether it's the zygote or a child.
219extern "C" int gMallocLeakZygoteChild;
220
221static void EnableKeepCapabilities(JNIEnv* env) {
222  int rc = prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0);
223  if (rc == -1) {
224    RuntimeAbort(env, __LINE__, "prctl(PR_SET_KEEPCAPS) failed");
225  }
226}
227
228static void DropCapabilitiesBoundingSet(JNIEnv* env) {
229  for (int i = 0; prctl(PR_CAPBSET_READ, i, 0, 0, 0) >= 0; i++) {
230    int rc = prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
231    if (rc == -1) {
232      if (errno == EINVAL) {
233        ALOGE("prctl(PR_CAPBSET_DROP) failed with EINVAL. Please verify "
234              "your kernel is compiled with file capabilities support");
235      } else {
236        RuntimeAbort(env, __LINE__, "prctl(PR_CAPBSET_DROP) failed");
237      }
238    }
239  }
240}
241
242static void SetCapabilities(JNIEnv* env, int64_t permitted, int64_t effective) {
243  __user_cap_header_struct capheader;
244  memset(&capheader, 0, sizeof(capheader));
245  capheader.version = _LINUX_CAPABILITY_VERSION_3;
246  capheader.pid = 0;
247
248  __user_cap_data_struct capdata[2];
249  memset(&capdata, 0, sizeof(capdata));
250  capdata[0].effective = effective;
251  capdata[1].effective = effective >> 32;
252  capdata[0].permitted = permitted;
253  capdata[1].permitted = permitted >> 32;
254
255  if (capset(&capheader, &capdata[0]) == -1) {
256    ALOGE("capset(%" PRId64 ", %" PRId64 ") failed", permitted, effective);
257    RuntimeAbort(env, __LINE__, "capset failed");
258  }
259}
260
261static void SetSchedulerPolicy(JNIEnv* env) {
262  errno = -set_sched_policy(0, SP_DEFAULT);
263  if (errno != 0) {
264    ALOGE("set_sched_policy(0, SP_DEFAULT) failed");
265    RuntimeAbort(env, __LINE__, "set_sched_policy(0, SP_DEFAULT) failed");
266  }
267}
268
269static int UnmountTree(const char* path) {
270    size_t path_len = strlen(path);
271
272    FILE* fp = setmntent("/proc/mounts", "r");
273    if (fp == NULL) {
274        ALOGE("Error opening /proc/mounts: %s", strerror(errno));
275        return -errno;
276    }
277
278    // Some volumes can be stacked on each other, so force unmount in
279    // reverse order to give us the best chance of success.
280    std::list<std::string> toUnmount;
281    mntent* mentry;
282    while ((mentry = getmntent(fp)) != NULL) {
283        if (strncmp(mentry->mnt_dir, path, path_len) == 0) {
284            toUnmount.push_front(std::string(mentry->mnt_dir));
285        }
286    }
287    endmntent(fp);
288
289    for (auto path : toUnmount) {
290        if (umount2(path.c_str(), MNT_DETACH)) {
291            ALOGW("Failed to unmount %s: %s", path.c_str(), strerror(errno));
292        }
293    }
294    return 0;
295}
296
297// Create a private mount namespace and bind mount appropriate emulated
298// storage for the given user.
299static bool MountEmulatedStorage(uid_t uid, jint mount_mode,
300        bool force_mount_namespace) {
301    // See storage config details at http://source.android.com/tech/storage/
302
303    String8 storageSource;
304    if (mount_mode == MOUNT_EXTERNAL_DEFAULT) {
305        storageSource = "/mnt/runtime/default";
306    } else if (mount_mode == MOUNT_EXTERNAL_READ) {
307        storageSource = "/mnt/runtime/read";
308    } else if (mount_mode == MOUNT_EXTERNAL_WRITE) {
309        storageSource = "/mnt/runtime/write";
310    } else if (!force_mount_namespace) {
311        // Sane default of no storage visible
312        return true;
313    }
314
315    // Create a second private mount namespace for our process
316    if (unshare(CLONE_NEWNS) == -1) {
317        ALOGW("Failed to unshare(): %s", strerror(errno));
318        return false;
319    }
320
321    if (TEMP_FAILURE_RETRY(mount(storageSource.string(), "/storage",
322            NULL, MS_BIND | MS_REC | MS_SLAVE, NULL)) == -1) {
323        ALOGW("Failed to mount %s to /storage: %s", storageSource.string(), strerror(errno));
324        return false;
325    }
326
327    // Mount user-specific symlink helper into place
328    userid_t user_id = multiuser_get_user_id(uid);
329    const String8 userSource(String8::format("/mnt/user/%d", user_id));
330    if (fs_prepare_dir(userSource.string(), 0751, 0, 0) == -1) {
331        return false;
332    }
333    if (TEMP_FAILURE_RETRY(mount(userSource.string(), "/storage/self",
334            NULL, MS_BIND, NULL)) == -1) {
335        ALOGW("Failed to mount %s to /storage/self: %s", userSource.string(), strerror(errno));
336        return false;
337    }
338
339    return true;
340}
341
342static bool NeedsNoRandomizeWorkaround() {
343#if !defined(__arm__)
344    return false;
345#else
346    int major;
347    int minor;
348    struct utsname uts;
349    if (uname(&uts) == -1) {
350        return false;
351    }
352
353    if (sscanf(uts.release, "%d.%d", &major, &minor) != 2) {
354        return false;
355    }
356
357    // Kernels before 3.4.* need the workaround.
358    return (major < 3) || ((major == 3) && (minor < 4));
359#endif
360}
361
362// Utility to close down the Zygote socket file descriptors while
363// the child is still running as root with Zygote's privileges.  Each
364// descriptor (if any) is closed via dup2(), replacing it with a valid
365// (open) descriptor to /dev/null.
366
367static void DetachDescriptors(JNIEnv* env, jintArray fdsToClose) {
368  if (!fdsToClose) {
369    return;
370  }
371  jsize count = env->GetArrayLength(fdsToClose);
372  ScopedIntArrayRO ar(env, fdsToClose);
373  if (ar.get() == NULL) {
374      RuntimeAbort(env, __LINE__, "Bad fd array");
375  }
376  jsize i;
377  int devnull;
378  for (i = 0; i < count; i++) {
379    devnull = open("/dev/null", O_RDWR);
380    if (devnull < 0) {
381      ALOGE("Failed to open /dev/null: %s", strerror(errno));
382      RuntimeAbort(env, __LINE__, "Failed to open /dev/null");
383      continue;
384    }
385    ALOGV("Switching descriptor %d to /dev/null: %s", ar[i], strerror(errno));
386    if (dup2(devnull, ar[i]) < 0) {
387      ALOGE("Failed dup2() on descriptor %d: %s", ar[i], strerror(errno));
388      RuntimeAbort(env, __LINE__, "Failed dup2()");
389    }
390    close(devnull);
391  }
392}
393
394void SetThreadName(const char* thread_name) {
395  bool hasAt = false;
396  bool hasDot = false;
397  const char* s = thread_name;
398  while (*s) {
399    if (*s == '.') {
400      hasDot = true;
401    } else if (*s == '@') {
402      hasAt = true;
403    }
404    s++;
405  }
406  const int len = s - thread_name;
407  if (len < 15 || hasAt || !hasDot) {
408    s = thread_name;
409  } else {
410    s = thread_name + len - 15;
411  }
412  // pthread_setname_np fails rather than truncating long strings.
413  char buf[16];       // MAX_TASK_COMM_LEN=16 is hard-coded into bionic
414  strlcpy(buf, s, sizeof(buf)-1);
415  errno = pthread_setname_np(pthread_self(), buf);
416  if (errno != 0) {
417    ALOGW("Unable to set the name of current thread to '%s': %s", buf, strerror(errno));
418  }
419}
420
421#ifdef ENABLE_SCHED_BOOST
422static void SetForkLoad(bool boost) {
423  // set scheduler knob to boost forked processes
424  pid_t currentPid = getpid();
425  // fits at most "/proc/XXXXXXX/sched_init_task_load\0"
426  char schedPath[35];
427  snprintf(schedPath, sizeof(schedPath), "/proc/%u/sched_init_task_load", currentPid);
428  int schedBoostFile = open(schedPath, O_WRONLY);
429  if (schedBoostFile < 0) {
430    ALOGW("Unable to set zygote scheduler boost");
431    return;
432  }
433  if (boost) {
434    write(schedBoostFile, "100\0", 4);
435  } else {
436    write(schedBoostFile, "0\0", 2);
437  }
438  close(schedBoostFile);
439}
440#endif
441
442// The list of open zygote file descriptors.
443static FileDescriptorTable* gOpenFdTable = NULL;
444
445// Utility routine to fork zygote and specialize the child process.
446static pid_t ForkAndSpecializeCommon(JNIEnv* env, uid_t uid, gid_t gid, jintArray javaGids,
447                                     jint debug_flags, jobjectArray javaRlimits,
448                                     jlong permittedCapabilities, jlong effectiveCapabilities,
449                                     jint mount_external,
450                                     jstring java_se_info, jstring java_se_name,
451                                     bool is_system_server, jintArray fdsToClose,
452                                     jstring instructionSet, jstring dataDir) {
453  SetSigChldHandler();
454
455#ifdef ENABLE_SCHED_BOOST
456  SetForkLoad(true);
457#endif
458
459  // Close any logging related FDs before we start evaluating the list of
460  // file descriptors.
461  __android_log_close();
462
463  // If this is the first fork for this zygote, create the open FD table.
464  // If it isn't, we just need to check whether the list of open files has
465  // changed (and it shouldn't in the normal case).
466  if (gOpenFdTable == NULL) {
467    gOpenFdTable = FileDescriptorTable::Create();
468    if (gOpenFdTable == NULL) {
469      RuntimeAbort(env, __LINE__, "Unable to construct file descriptor table.");
470    }
471  } else if (!gOpenFdTable->Restat()) {
472    RuntimeAbort(env, __LINE__, "Unable to restat file descriptor table.");
473  }
474
475  pid_t pid = fork();
476
477  if (pid == 0) {
478    // The child process.
479    gMallocLeakZygoteChild = 1;
480
481    // Clean up any descriptors which must be closed immediately
482    DetachDescriptors(env, fdsToClose);
483
484    // Re-open all remaining open file descriptors so that they aren't shared
485    // with the zygote across a fork.
486    if (!gOpenFdTable->ReopenOrDetach()) {
487      RuntimeAbort(env, __LINE__, "Unable to reopen whitelisted descriptors.");
488    }
489
490    // Keep capabilities across UID change, unless we're staying root.
491    if (uid != 0) {
492      EnableKeepCapabilities(env);
493    }
494
495    DropCapabilitiesBoundingSet(env);
496
497    bool use_native_bridge = !is_system_server && (instructionSet != NULL)
498        && android::NativeBridgeAvailable();
499    if (use_native_bridge) {
500      ScopedUtfChars isa_string(env, instructionSet);
501      use_native_bridge = android::NeedsNativeBridge(isa_string.c_str());
502    }
503    if (use_native_bridge && dataDir == NULL) {
504      // dataDir should never be null if we need to use a native bridge.
505      // In general, dataDir will never be null for normal applications. It can only happen in
506      // special cases (for isolated processes which are not associated with any app). These are
507      // launched by the framework and should not be emulated anyway.
508      use_native_bridge = false;
509      ALOGW("Native bridge will not be used because dataDir == NULL.");
510    }
511
512    if (!MountEmulatedStorage(uid, mount_external, use_native_bridge)) {
513      ALOGW("Failed to mount emulated storage: %s", strerror(errno));
514      if (errno == ENOTCONN || errno == EROFS) {
515        // When device is actively encrypting, we get ENOTCONN here
516        // since FUSE was mounted before the framework restarted.
517        // When encrypted device is booting, we get EROFS since
518        // FUSE hasn't been created yet by init.
519        // In either case, continue without external storage.
520      } else {
521        RuntimeAbort(env, __LINE__, "Cannot continue without emulated storage");
522      }
523    }
524
525    if (!is_system_server) {
526        int rc = createProcessGroup(uid, getpid());
527        if (rc != 0) {
528            if (rc == -EROFS) {
529                ALOGW("createProcessGroup failed, kernel missing CONFIG_CGROUP_CPUACCT?");
530            } else {
531                ALOGE("createProcessGroup(%d, %d) failed: %s", uid, pid, strerror(-rc));
532            }
533        }
534    }
535
536    SetGids(env, javaGids);
537
538    SetRLimits(env, javaRlimits);
539
540    if (use_native_bridge) {
541      ScopedUtfChars isa_string(env, instructionSet);
542      ScopedUtfChars data_dir(env, dataDir);
543      android::PreInitializeNativeBridge(data_dir.c_str(), isa_string.c_str());
544    }
545
546    int rc = setresgid(gid, gid, gid);
547    if (rc == -1) {
548      ALOGE("setresgid(%d) failed: %s", gid, strerror(errno));
549      RuntimeAbort(env, __LINE__, "setresgid failed");
550    }
551
552    rc = setresuid(uid, uid, uid);
553    if (rc == -1) {
554      ALOGE("setresuid(%d) failed: %s", uid, strerror(errno));
555      RuntimeAbort(env, __LINE__, "setresuid failed");
556    }
557
558    if (NeedsNoRandomizeWorkaround()) {
559        // Work around ARM kernel ASLR lossage (http://b/5817320).
560        int old_personality = personality(0xffffffff);
561        int new_personality = personality(old_personality | ADDR_NO_RANDOMIZE);
562        if (new_personality == -1) {
563            ALOGW("personality(%d) failed: %s", new_personality, strerror(errno));
564        }
565    }
566
567    SetCapabilities(env, permittedCapabilities, effectiveCapabilities);
568
569    SetSchedulerPolicy(env);
570
571    const char* se_info_c_str = NULL;
572    ScopedUtfChars* se_info = NULL;
573    if (java_se_info != NULL) {
574        se_info = new ScopedUtfChars(env, java_se_info);
575        se_info_c_str = se_info->c_str();
576        if (se_info_c_str == NULL) {
577          RuntimeAbort(env, __LINE__, "se_info_c_str == NULL");
578        }
579    }
580    const char* se_name_c_str = NULL;
581    ScopedUtfChars* se_name = NULL;
582    if (java_se_name != NULL) {
583        se_name = new ScopedUtfChars(env, java_se_name);
584        se_name_c_str = se_name->c_str();
585        if (se_name_c_str == NULL) {
586          RuntimeAbort(env, __LINE__, "se_name_c_str == NULL");
587        }
588    }
589    rc = selinux_android_setcontext(uid, is_system_server, se_info_c_str, se_name_c_str);
590    if (rc == -1) {
591      ALOGE("selinux_android_setcontext(%d, %d, \"%s\", \"%s\") failed", uid,
592            is_system_server, se_info_c_str, se_name_c_str);
593      RuntimeAbort(env, __LINE__, "selinux_android_setcontext failed");
594    }
595
596    // Make it easier to debug audit logs by setting the main thread's name to the
597    // nice name rather than "app_process".
598    if (se_info_c_str == NULL && is_system_server) {
599      se_name_c_str = "system_server";
600    }
601    if (se_info_c_str != NULL) {
602      SetThreadName(se_name_c_str);
603    }
604
605    delete se_info;
606    delete se_name;
607
608    UnsetSigChldHandler();
609
610    env->CallStaticVoidMethod(gZygoteClass, gCallPostForkChildHooks, debug_flags,
611                              is_system_server, instructionSet);
612    if (env->ExceptionCheck()) {
613      RuntimeAbort(env, __LINE__, "Error calling post fork hooks.");
614    }
615  } else if (pid > 0) {
616    // the parent process
617
618#ifdef ENABLE_SCHED_BOOST
619    // unset scheduler knob
620    SetForkLoad(false);
621#endif
622
623  }
624  return pid;
625}
626}  // anonymous namespace
627
628namespace android {
629
630static jint com_android_internal_os_Zygote_nativeForkAndSpecialize(
631        JNIEnv* env, jclass, jint uid, jint gid, jintArray gids,
632        jint debug_flags, jobjectArray rlimits,
633        jint mount_external, jstring se_info, jstring se_name,
634        jintArray fdsToClose, jstring instructionSet, jstring appDataDir) {
635    jlong capabilities = 0;
636
637    // Grant CAP_WAKE_ALARM to the Bluetooth process.
638    // Additionally, allow bluetooth to open packet sockets so it can start the DHCP client.
639    // TODO: consider making such functionality an RPC to netd.
640    if (multiuser_get_app_id(uid) == AID_BLUETOOTH) {
641      capabilities |= (1LL << CAP_WAKE_ALARM);
642      capabilities |= (1LL << CAP_NET_RAW);
643      capabilities |= (1LL << CAP_NET_BIND_SERVICE);
644    }
645
646    // Grant CAP_BLOCK_SUSPEND to processes that belong to GID "wakelock"
647    bool gid_wakelock_found = false;
648    if (gid == AID_WAKELOCK) {
649      gid_wakelock_found = true;
650    } else if (gids != NULL) {
651      jsize gids_num = env->GetArrayLength(gids);
652      ScopedIntArrayRO ar(env, gids);
653      if (ar.get() == NULL) {
654        RuntimeAbort(env, __LINE__, "Bad gids array");
655      }
656      for (int i = 0; i < gids_num; i++) {
657        if (ar[i] == AID_WAKELOCK) {
658          gid_wakelock_found = true;
659          break;
660        }
661      }
662    }
663    if (gid_wakelock_found) {
664      capabilities |= (1LL << CAP_BLOCK_SUSPEND);
665    }
666
667    return ForkAndSpecializeCommon(env, uid, gid, gids, debug_flags,
668            rlimits, capabilities, capabilities, mount_external, se_info,
669            se_name, false, fdsToClose, instructionSet, appDataDir);
670}
671
672static jint com_android_internal_os_Zygote_nativeForkSystemServer(
673        JNIEnv* env, jclass, uid_t uid, gid_t gid, jintArray gids,
674        jint debug_flags, jobjectArray rlimits, jlong permittedCapabilities,
675        jlong effectiveCapabilities) {
676  pid_t pid = ForkAndSpecializeCommon(env, uid, gid, gids,
677                                      debug_flags, rlimits,
678                                      permittedCapabilities, effectiveCapabilities,
679                                      MOUNT_EXTERNAL_DEFAULT, NULL, NULL, true, NULL,
680                                      NULL, NULL);
681  if (pid > 0) {
682      // The zygote process checks whether the child process has died or not.
683      ALOGI("System server process %d has been created", pid);
684      gSystemServerPid = pid;
685      // There is a slight window that the system server process has crashed
686      // but it went unnoticed because we haven't published its pid yet. So
687      // we recheck here just to make sure that all is well.
688      int status;
689      if (waitpid(pid, &status, WNOHANG) == pid) {
690          ALOGE("System server process %d has died. Restarting Zygote!", pid);
691          RuntimeAbort(env, __LINE__, "System server process has died. Restarting Zygote!");
692      }
693  }
694  return pid;
695}
696
697static void com_android_internal_os_Zygote_nativeUnmountStorageOnInit(JNIEnv* env, jclass) {
698    // Zygote process unmount root storage space initially before every child processes are forked.
699    // Every forked child processes (include SystemServer) only mount their own root storage space
700    // and no need unmount storage operation in MountEmulatedStorage method.
701    // Zygote process does not utilize root storage spaces and unshares its mount namespace below.
702
703    // See storage config details at http://source.android.com/tech/storage/
704    // Create private mount namespace shared by all children
705    if (unshare(CLONE_NEWNS) == -1) {
706        RuntimeAbort(env, __LINE__, "Failed to unshare()");
707        return;
708    }
709
710    // Mark rootfs as being a slave so that changes from default
711    // namespace only flow into our children.
712    if (mount("rootfs", "/", nullptr, (MS_SLAVE | MS_REC), nullptr) == -1) {
713        RuntimeAbort(env, __LINE__, "Failed to mount() rootfs as MS_SLAVE");
714        return;
715    }
716
717    // Create a staging tmpfs that is shared by our children; they will
718    // bind mount storage into their respective private namespaces, which
719    // are isolated from each other.
720    const char* target_base = getenv("EMULATED_STORAGE_TARGET");
721    if (target_base != nullptr) {
722#define STRINGIFY_UID(x) __STRING(x)
723        if (mount("tmpfs", target_base, "tmpfs", MS_NOSUID | MS_NODEV,
724                  "uid=0,gid=" STRINGIFY_UID(AID_SDCARD_R) ",mode=0751") == -1) {
725            ALOGE("Failed to mount tmpfs to %s", target_base);
726            RuntimeAbort(env, __LINE__, "Failed to mount tmpfs");
727            return;
728        }
729#undef STRINGIFY_UID
730    }
731
732    UnmountTree("/storage");
733}
734
735static const JNINativeMethod gMethods[] = {
736    { "nativeForkAndSpecialize",
737      "(II[II[[IILjava/lang/String;Ljava/lang/String;[ILjava/lang/String;Ljava/lang/String;)I",
738      (void *) com_android_internal_os_Zygote_nativeForkAndSpecialize },
739    { "nativeForkSystemServer", "(II[II[[IJJ)I",
740      (void *) com_android_internal_os_Zygote_nativeForkSystemServer },
741    { "nativeUnmountStorageOnInit", "()V",
742      (void *) com_android_internal_os_Zygote_nativeUnmountStorageOnInit }
743};
744
745int register_com_android_internal_os_Zygote(JNIEnv* env) {
746  gZygoteClass = MakeGlobalRefOrDie(env, FindClassOrDie(env, kZygoteClassName));
747  gCallPostForkChildHooks = GetStaticMethodIDOrDie(env, gZygoteClass, "callPostForkChildHooks",
748                                                   "(IZLjava/lang/String;)V");
749
750  return RegisterMethodsOrDie(env, "com/android/internal/os/Zygote", gMethods, NELEM(gMethods));
751}
752}  // namespace android
753
754