com_android_internal_os_Zygote.cpp revision 629dc1801331ce89e8ee0ff7ee5dcde1d7512417
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "Zygote"
18
19// sys/mount.h has to come before linux/fs.h due to redefinition of MS_RDONLY, MS_BIND, etc
20#include <sys/mount.h>
21#include <linux/fs.h>
22
23#include <grp.h>
24#include <fcntl.h>
25#include <paths.h>
26#include <signal.h>
27#include <stdlib.h>
28#include <unistd.h>
29#include <sys/capability.h>
30#include <sys/personality.h>
31#include <sys/prctl.h>
32#include <sys/resource.h>
33#include <sys/stat.h>
34#include <sys/types.h>
35#include <sys/utsname.h>
36#include <sys/wait.h>
37
38
39#include <cutils/fs.h>
40#include <cutils/multiuser.h>
41#include <cutils/sched_policy.h>
42#include <private/android_filesystem_config.h>
43#include <utils/String8.h>
44#include <selinux/android.h>
45#include <processgroup/processgroup.h>
46#include <inttypes.h>
47
48#include "android_runtime/AndroidRuntime.h"
49#include "JNIHelp.h"
50#include "ScopedLocalRef.h"
51#include "ScopedPrimitiveArray.h"
52#include "ScopedUtfChars.h"
53
54namespace {
55
56using android::String8;
57
58static pid_t gSystemServerPid = 0;
59
60static const char kZygoteClassName[] = "com/android/internal/os/Zygote";
61static jclass gZygoteClass;
62static jmethodID gCallPostForkChildHooks;
63
64// Must match values in com.android.internal.os.Zygote.
65enum MountExternalKind {
66  MOUNT_EXTERNAL_NONE = 0,
67  MOUNT_EXTERNAL_SINGLEUSER = 1,
68  MOUNT_EXTERNAL_MULTIUSER = 2,
69  MOUNT_EXTERNAL_MULTIUSER_ALL = 3,
70};
71
72static void RuntimeAbort(JNIEnv* env) {
73  env->FatalError("RuntimeAbort");
74}
75
76// This signal handler is for zygote mode, since the zygote must reap its children
77static void SigChldHandler(int /*signal_number*/) {
78  pid_t pid;
79  int status;
80
81  while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
82     // Log process-death status that we care about.  In general it is
83     // not safe to call LOG(...) from a signal handler because of
84     // possible reentrancy.  However, we know a priori that the
85     // current implementation of LOG() is safe to call from a SIGCHLD
86     // handler in the zygote process.  If the LOG() implementation
87     // changes its locking strategy or its use of syscalls within the
88     // lazy-init critical section, its use here may become unsafe.
89    if (WIFEXITED(status)) {
90      if (WEXITSTATUS(status)) {
91        ALOGI("Process %d exited cleanly (%d)", pid, WEXITSTATUS(status));
92      }
93    } else if (WIFSIGNALED(status)) {
94      if (WTERMSIG(status) != SIGKILL) {
95        ALOGI("Process %d exited due to signal (%d)", pid, WTERMSIG(status));
96      }
97      if (WCOREDUMP(status)) {
98        ALOGI("Process %d dumped core.", pid);
99      }
100    }
101
102    // If the just-crashed process is the system_server, bring down zygote
103    // so that it is restarted by init and system server will be restarted
104    // from there.
105    if (pid == gSystemServerPid) {
106      ALOGE("Exit zygote because system server (%d) has terminated");
107      kill(getpid(), SIGKILL);
108    }
109  }
110
111  // Note that we shouldn't consider ECHILD an error because
112  // the secondary zygote might have no children left to wait for.
113  if (pid < 0 && errno != ECHILD) {
114    ALOGW("Zygote SIGCHLD error in waitpid: %s", strerror(errno));
115  }
116}
117
118// Configures the SIGCHLD handler for the zygote process. This is configured
119// very late, because earlier in the runtime we may fork() and exec()
120// other processes, and we want to waitpid() for those rather than
121// have them be harvested immediately.
122//
123// This ends up being called repeatedly before each fork(), but there's
124// no real harm in that.
125static void SetSigChldHandler() {
126  struct sigaction sa;
127  memset(&sa, 0, sizeof(sa));
128  sa.sa_handler = SigChldHandler;
129
130  int err = sigaction(SIGCHLD, &sa, NULL);
131  if (err < 0) {
132    ALOGW("Error setting SIGCHLD handler: %d", errno);
133  }
134}
135
136// Sets the SIGCHLD handler back to default behavior in zygote children.
137static void UnsetSigChldHandler() {
138  struct sigaction sa;
139  memset(&sa, 0, sizeof(sa));
140  sa.sa_handler = SIG_DFL;
141
142  int err = sigaction(SIGCHLD, &sa, NULL);
143  if (err < 0) {
144    ALOGW("Error unsetting SIGCHLD handler: %d", errno);
145  }
146}
147
148// Calls POSIX setgroups() using the int[] object as an argument.
149// A NULL argument is tolerated.
150static void SetGids(JNIEnv* env, jintArray javaGids) {
151  if (javaGids == NULL) {
152    return;
153  }
154
155  ScopedIntArrayRO gids(env, javaGids);
156  if (gids.get() == NULL) {
157      RuntimeAbort(env);
158  }
159  int rc = setgroups(gids.size(), reinterpret_cast<const gid_t*>(&gids[0]));
160  if (rc == -1) {
161    ALOGE("setgroups failed");
162    RuntimeAbort(env);
163  }
164}
165
166// Sets the resource limits via setrlimit(2) for the values in the
167// two-dimensional array of integers that's passed in. The second dimension
168// contains a tuple of length 3: (resource, rlim_cur, rlim_max). NULL is
169// treated as an empty array.
170static void SetRLimits(JNIEnv* env, jobjectArray javaRlimits) {
171  if (javaRlimits == NULL) {
172    return;
173  }
174
175  rlimit rlim;
176  memset(&rlim, 0, sizeof(rlim));
177
178  for (int i = 0; i < env->GetArrayLength(javaRlimits); ++i) {
179    ScopedLocalRef<jobject> javaRlimitObject(env, env->GetObjectArrayElement(javaRlimits, i));
180    ScopedIntArrayRO javaRlimit(env, reinterpret_cast<jintArray>(javaRlimitObject.get()));
181    if (javaRlimit.size() != 3) {
182      ALOGE("rlimits array must have a second dimension of size 3");
183      RuntimeAbort(env);
184    }
185
186    rlim.rlim_cur = javaRlimit[1];
187    rlim.rlim_max = javaRlimit[2];
188
189    int rc = setrlimit(javaRlimit[0], &rlim);
190    if (rc == -1) {
191      ALOGE("setrlimit(%d, {%d, %d}) failed", javaRlimit[0], rlim.rlim_cur, rlim.rlim_max);
192      RuntimeAbort(env);
193    }
194  }
195}
196
197// The debug malloc library needs to know whether it's the zygote or a child.
198extern "C" int gMallocLeakZygoteChild;
199
200static void EnableKeepCapabilities(JNIEnv* env) {
201  int rc = prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0);
202  if (rc == -1) {
203    ALOGE("prctl(PR_SET_KEEPCAPS) failed");
204    RuntimeAbort(env);
205  }
206}
207
208static void DropCapabilitiesBoundingSet(JNIEnv* env) {
209  for (int i = 0; prctl(PR_CAPBSET_READ, i, 0, 0, 0) >= 0; i++) {
210    int rc = prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
211    if (rc == -1) {
212      if (errno == EINVAL) {
213        ALOGE("prctl(PR_CAPBSET_DROP) failed with EINVAL. Please verify "
214              "your kernel is compiled with file capabilities support");
215      } else {
216        ALOGE("prctl(PR_CAPBSET_DROP) failed");
217        RuntimeAbort(env);
218      }
219    }
220  }
221}
222
223static void SetCapabilities(JNIEnv* env, int64_t permitted, int64_t effective) {
224  __user_cap_header_struct capheader;
225  memset(&capheader, 0, sizeof(capheader));
226  capheader.version = _LINUX_CAPABILITY_VERSION_3;
227  capheader.pid = 0;
228
229  __user_cap_data_struct capdata[2];
230  memset(&capdata, 0, sizeof(capdata));
231  capdata[0].effective = effective;
232  capdata[1].effective = effective >> 32;
233  capdata[0].permitted = permitted;
234  capdata[1].permitted = permitted >> 32;
235
236  if (capset(&capheader, &capdata[0]) == -1) {
237    ALOGE("capset(%lld, %lld) failed", permitted, effective);
238    RuntimeAbort(env);
239  }
240}
241
242static void SetSchedulerPolicy(JNIEnv* env) {
243  errno = -set_sched_policy(0, SP_DEFAULT);
244  if (errno != 0) {
245    ALOGE("set_sched_policy(0, SP_DEFAULT) failed");
246    RuntimeAbort(env);
247  }
248}
249
250// Create a private mount namespace and bind mount appropriate emulated
251// storage for the given user.
252static bool MountEmulatedStorage(uid_t uid, jint mount_mode) {
253  if (mount_mode == MOUNT_EXTERNAL_NONE) {
254    return true;
255  }
256
257  // See storage config details at http://source.android.com/tech/storage/
258  userid_t user_id = multiuser_get_user_id(uid);
259
260  // Create a second private mount namespace for our process
261  if (unshare(CLONE_NEWNS) == -1) {
262      ALOGW("Failed to unshare(): %d", errno);
263      return false;
264  }
265
266  // Create bind mounts to expose external storage
267  if (mount_mode == MOUNT_EXTERNAL_MULTIUSER || mount_mode == MOUNT_EXTERNAL_MULTIUSER_ALL) {
268    // These paths must already be created by init.rc
269    const char* source = getenv("EMULATED_STORAGE_SOURCE");
270    const char* target = getenv("EMULATED_STORAGE_TARGET");
271    const char* legacy = getenv("EXTERNAL_STORAGE");
272    if (source == NULL || target == NULL || legacy == NULL) {
273      ALOGW("Storage environment undefined; unable to provide external storage");
274      return false;
275    }
276
277    // Prepare source paths
278
279    // /mnt/shell/emulated/0
280    const String8 source_user(String8::format("%s/%d", source, user_id));
281    // /storage/emulated/0
282    const String8 target_user(String8::format("%s/%d", target, user_id));
283
284    if (fs_prepare_dir(source_user.string(), 0000, 0, 0) == -1
285        || fs_prepare_dir(target_user.string(), 0000, 0, 0) == -1) {
286      return false;
287    }
288
289    if (mount_mode == MOUNT_EXTERNAL_MULTIUSER_ALL) {
290      // Mount entire external storage tree for all users
291      if (TEMP_FAILURE_RETRY(mount(source, target, NULL, MS_BIND, NULL)) == -1) {
292        ALOGW("Failed to mount %s to %s :%d", source, target, errno);
293        return false;
294      }
295    } else {
296      // Only mount user-specific external storage
297      if (TEMP_FAILURE_RETRY(
298              mount(source_user.string(), target_user.string(), NULL, MS_BIND, NULL)) == -1) {
299        ALOGW("Failed to mount %s to %s: %d", source_user.string(), target_user.string(), errno);
300        return false;
301      }
302    }
303
304    if (fs_prepare_dir(legacy, 0000, 0, 0) == -1) {
305        return false;
306    }
307
308    // Finally, mount user-specific path into place for legacy users
309    if (TEMP_FAILURE_RETRY(
310            mount(target_user.string(), legacy, NULL, MS_BIND | MS_REC, NULL)) == -1) {
311      ALOGW("Failed to mount %s to %s: %d", target_user.string(), legacy, errno);
312      return false;
313    }
314  } else {
315    ALOGW("Mount mode %d unsupported", mount_mode);
316    return false;
317  }
318
319  return true;
320}
321
322static bool NeedsNoRandomizeWorkaround() {
323#if !defined(__arm__)
324    return false;
325#else
326    int major;
327    int minor;
328    struct utsname uts;
329    if (uname(&uts) == -1) {
330        return false;
331    }
332
333    if (sscanf(uts.release, "%d.%d", &major, &minor) != 2) {
334        return false;
335    }
336
337    // Kernels before 3.4.* need the workaround.
338    return (major < 3) || ((major == 3) && (minor < 4));
339#endif
340}
341
342// Utility to close down the Zygote socket file descriptors while
343// the child is still running as root with Zygote's privileges.  Each
344// descriptor (if any) is closed via dup2(), replacing it with a valid
345// (open) descriptor to /dev/null.
346
347static void DetachDescriptors(JNIEnv* env, jintArray fdsToClose) {
348  if (!fdsToClose) {
349    return;
350  }
351  jsize count = env->GetArrayLength(fdsToClose);
352  jint *ar = env->GetIntArrayElements(fdsToClose, 0);
353  if (!ar) {
354      ALOGE("Bad fd array");
355      RuntimeAbort(env);
356  }
357  jsize i;
358  int devnull;
359  for (i = 0; i < count; i++) {
360    devnull = open("/dev/null", O_RDWR);
361    if (devnull < 0) {
362      ALOGE("Failed to open /dev/null");
363      RuntimeAbort(env);
364      continue;
365    }
366    ALOGV("Switching descriptor %d to /dev/null: %d", ar[i], errno);
367    if (dup2(devnull, ar[i]) < 0) {
368      ALOGE("Failed dup2() on descriptor %d", ar[i]);
369      RuntimeAbort(env);
370    }
371    close(devnull);
372  }
373}
374
375void SetThreadName(const char* thread_name) {
376  bool hasAt = false;
377  bool hasDot = false;
378  const char* s = thread_name;
379  while (*s) {
380    if (*s == '.') {
381      hasDot = true;
382    } else if (*s == '@') {
383      hasAt = true;
384    }
385    s++;
386  }
387  const int len = s - thread_name;
388  if (len < 15 || hasAt || !hasDot) {
389    s = thread_name;
390  } else {
391    s = thread_name + len - 15;
392  }
393  // pthread_setname_np fails rather than truncating long strings.
394  char buf[16];       // MAX_TASK_COMM_LEN=16 is hard-coded into bionic
395  strlcpy(buf, s, sizeof(buf)-1);
396  errno = pthread_setname_np(pthread_self(), buf);
397  if (errno != 0) {
398    ALOGW("Unable to set the name of current thread to '%s'", buf);
399  }
400}
401
402  // Temporary timing check.
403uint64_t MsTime() {
404  timespec now;
405  clock_gettime(CLOCK_MONOTONIC, &now);
406  return static_cast<uint64_t>(now.tv_sec) * UINT64_C(1000) + now.tv_nsec / UINT64_C(1000000);
407}
408
409
410void ckTime(uint64_t start, const char* where) {
411  uint64_t now = MsTime();
412  if ((now-start) > 1000) {
413    // If we are taking more than a second, log about it.
414    ALOGW("Slow operation: %"PRIu64" ms in %s", (uint64_t)(now-start), where);
415  }
416}
417
418// Utility routine to fork zygote and specialize the child process.
419static pid_t ForkAndSpecializeCommon(JNIEnv* env, uid_t uid, gid_t gid, jintArray javaGids,
420                                     jint debug_flags, jobjectArray javaRlimits,
421                                     jlong permittedCapabilities, jlong effectiveCapabilities,
422                                     jint mount_external,
423                                     jstring java_se_info, jstring java_se_name,
424                                     bool is_system_server, jintArray fdsToClose) {
425  uint64_t start = MsTime();
426  SetSigChldHandler();
427  ckTime(start, "ForkAndSpecializeCommon:SetSigChldHandler");
428
429  pid_t pid = fork();
430
431  if (pid == 0) {
432    // The child process.
433    gMallocLeakZygoteChild = 1;
434
435
436    // Clean up any descriptors which must be closed immediately
437    DetachDescriptors(env, fdsToClose);
438
439    ckTime(start, "ForkAndSpecializeCommon:Fork and detach");
440
441    // Keep capabilities across UID change, unless we're staying root.
442    if (uid != 0) {
443      EnableKeepCapabilities(env);
444    }
445
446    DropCapabilitiesBoundingSet(env);
447
448    if (!MountEmulatedStorage(uid, mount_external)) {
449      ALOGW("Failed to mount emulated storage: %d", errno);
450      if (errno == ENOTCONN || errno == EROFS) {
451        // When device is actively encrypting, we get ENOTCONN here
452        // since FUSE was mounted before the framework restarted.
453        // When encrypted device is booting, we get EROFS since
454        // FUSE hasn't been created yet by init.
455        // In either case, continue without external storage.
456      } else {
457        ALOGE("Cannot continue without emulated storage");
458        RuntimeAbort(env);
459      }
460    }
461
462    if (!is_system_server) {
463        int rc = createProcessGroup(uid, getpid());
464        if (rc != 0) {
465            if (rc == -EROFS) {
466                ALOGW("createProcessGroup failed, kernel missing CONFIG_CGROUP_CPUACCT?");
467            } else {
468                ALOGE("createProcessGroup(%d, %d) failed: %s", uid, pid, strerror(-rc));
469            }
470        }
471    }
472
473    SetGids(env, javaGids);
474
475    SetRLimits(env, javaRlimits);
476
477    int rc = setresgid(gid, gid, gid);
478    if (rc == -1) {
479      ALOGE("setresgid(%d) failed", gid);
480      RuntimeAbort(env);
481    }
482
483    rc = setresuid(uid, uid, uid);
484    if (rc == -1) {
485      ALOGE("setresuid(%d) failed", uid);
486      RuntimeAbort(env);
487    }
488
489    if (NeedsNoRandomizeWorkaround()) {
490        // Work around ARM kernel ASLR lossage (http://b/5817320).
491        int old_personality = personality(0xffffffff);
492        int new_personality = personality(old_personality | ADDR_NO_RANDOMIZE);
493        if (new_personality == -1) {
494            ALOGW("personality(%d) failed", new_personality);
495        }
496    }
497
498    SetCapabilities(env, permittedCapabilities, effectiveCapabilities);
499
500    SetSchedulerPolicy(env);
501
502    const char* se_info_c_str = NULL;
503    ScopedUtfChars* se_info = NULL;
504    if (java_se_info != NULL) {
505        se_info = new ScopedUtfChars(env, java_se_info);
506        se_info_c_str = se_info->c_str();
507        if (se_info_c_str == NULL) {
508          ALOGE("se_info_c_str == NULL");
509          RuntimeAbort(env);
510        }
511    }
512    const char* se_name_c_str = NULL;
513    ScopedUtfChars* se_name = NULL;
514    if (java_se_name != NULL) {
515        se_name = new ScopedUtfChars(env, java_se_name);
516        se_name_c_str = se_name->c_str();
517        if (se_name_c_str == NULL) {
518          ALOGE("se_name_c_str == NULL");
519          RuntimeAbort(env);
520        }
521    }
522    rc = selinux_android_setcontext(uid, is_system_server, se_info_c_str, se_name_c_str);
523    if (rc == -1) {
524      ALOGE("selinux_android_setcontext(%d, %d, \"%s\", \"%s\") failed", uid,
525            is_system_server, se_info_c_str, se_name_c_str);
526      RuntimeAbort(env);
527    }
528
529    // Make it easier to debug audit logs by setting the main thread's name to the
530    // nice name rather than "app_process".
531    if (se_info_c_str == NULL && is_system_server) {
532      se_name_c_str = "system_server";
533    }
534    if (se_info_c_str != NULL) {
535      SetThreadName(se_name_c_str);
536    }
537
538    delete se_info;
539    delete se_name;
540
541    UnsetSigChldHandler();
542
543    ckTime(start, "ForkAndSpecializeCommon:child process setup");
544
545    env->CallStaticVoidMethod(gZygoteClass, gCallPostForkChildHooks, debug_flags);
546    ckTime(start, "ForkAndSpecializeCommon:PostForkChildHooks returns");
547    if (env->ExceptionCheck()) {
548      ALOGE("Error calling post fork hooks.");
549      RuntimeAbort(env);
550    }
551  } else if (pid > 0) {
552    // the parent process
553  }
554  return pid;
555}
556}  // anonymous namespace
557
558namespace android {
559
560static jint com_android_internal_os_Zygote_nativeForkAndSpecialize(
561        JNIEnv* env, jclass, jint uid, jint gid, jintArray gids,
562        jint debug_flags, jobjectArray rlimits,
563        jint mount_external, jstring se_info, jstring se_name,
564        jintArray fdsToClose) {
565    // Grant CAP_WAKE_ALARM to the Bluetooth process.
566    jlong capabilities = 0;
567    if (uid == AID_BLUETOOTH) {
568        capabilities |= (1LL << CAP_WAKE_ALARM);
569    }
570
571    return ForkAndSpecializeCommon(env, uid, gid, gids, debug_flags,
572            rlimits, capabilities, capabilities, mount_external, se_info,
573            se_name, false, fdsToClose);
574}
575
576static jint com_android_internal_os_Zygote_nativeForkSystemServer(
577        JNIEnv* env, jclass, uid_t uid, gid_t gid, jintArray gids,
578        jint debug_flags, jobjectArray rlimits, jlong permittedCapabilities,
579        jlong effectiveCapabilities) {
580  pid_t pid = ForkAndSpecializeCommon(env, uid, gid, gids,
581                                      debug_flags, rlimits,
582                                      permittedCapabilities, effectiveCapabilities,
583                                      MOUNT_EXTERNAL_NONE, NULL, NULL, true, NULL);
584  if (pid > 0) {
585      // The zygote process checks whether the child process has died or not.
586      ALOGI("System server process %d has been created", pid);
587      gSystemServerPid = pid;
588      // There is a slight window that the system server process has crashed
589      // but it went unnoticed because we haven't published its pid yet. So
590      // we recheck here just to make sure that all is well.
591      int status;
592      if (waitpid(pid, &status, WNOHANG) == pid) {
593          ALOGE("System server process %d has died. Restarting Zygote!", pid);
594          RuntimeAbort(env);
595      }
596  }
597  return pid;
598}
599
600static JNINativeMethod gMethods[] = {
601    { "nativeForkAndSpecialize", "(II[II[[IILjava/lang/String;Ljava/lang/String;[I)I",
602      (void *) com_android_internal_os_Zygote_nativeForkAndSpecialize },
603    { "nativeForkSystemServer", "(II[II[[IJJ)I",
604      (void *) com_android_internal_os_Zygote_nativeForkSystemServer }
605};
606
607int register_com_android_internal_os_Zygote(JNIEnv* env) {
608  gZygoteClass = (jclass) env->NewGlobalRef(env->FindClass(kZygoteClassName));
609  if (gZygoteClass == NULL) {
610    RuntimeAbort(env);
611  }
612  gCallPostForkChildHooks = env->GetStaticMethodID(gZygoteClass, "callPostForkChildHooks", "(I)V");
613
614  return AndroidRuntime::registerNativeMethods(env, "com/android/internal/os/Zygote",
615      gMethods, NELEM(gMethods));
616}
617}  // namespace android
618
619