com_android_internal_os_Zygote.cpp revision a103ebed6e9a44a406b31636791d2970c882ec44
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "Zygote"
18
19// sys/mount.h has to come before linux/fs.h due to redefinition of MS_RDONLY, MS_BIND, etc
20#include <sys/mount.h>
21#include <linux/fs.h>
22
23#include <grp.h>
24#include <fcntl.h>
25#include <paths.h>
26#include <signal.h>
27#include <stdlib.h>
28#include <unistd.h>
29#include <sys/capability.h>
30#include <sys/personality.h>
31#include <sys/prctl.h>
32#include <sys/resource.h>
33#include <sys/stat.h>
34#include <sys/types.h>
35#include <sys/utsname.h>
36#include <sys/wait.h>
37
38
39#include <cutils/fs.h>
40#include <cutils/multiuser.h>
41#include <cutils/sched_policy.h>
42#include <private/android_filesystem_config.h>
43#include <utils/String8.h>
44#include <selinux/android.h>
45#include <processgroup/processgroup.h>
46
47#include "android_runtime/AndroidRuntime.h"
48#include "JNIHelp.h"
49#include "ScopedLocalRef.h"
50#include "ScopedPrimitiveArray.h"
51#include "ScopedUtfChars.h"
52
53#include "nativebridge/native_bridge.h"
54
55namespace {
56
57using android::String8;
58
59static pid_t gSystemServerPid = 0;
60
61static const char kZygoteClassName[] = "com/android/internal/os/Zygote";
62static jclass gZygoteClass;
63static jmethodID gCallPostForkChildHooks;
64
65// Must match values in com.android.internal.os.Zygote.
66enum MountExternalKind {
67  MOUNT_EXTERNAL_NONE = 0,
68  MOUNT_EXTERNAL_SINGLEUSER = 1,
69  MOUNT_EXTERNAL_MULTIUSER = 2,
70  MOUNT_EXTERNAL_MULTIUSER_ALL = 3,
71};
72
73static void RuntimeAbort(JNIEnv* env) {
74  env->FatalError("RuntimeAbort");
75}
76
77// This signal handler is for zygote mode, since the zygote must reap its children
78static void SigChldHandler(int /*signal_number*/) {
79  pid_t pid;
80  int status;
81
82  while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
83     // Log process-death status that we care about.  In general it is
84     // not safe to call LOG(...) from a signal handler because of
85     // possible reentrancy.  However, we know a priori that the
86     // current implementation of LOG() is safe to call from a SIGCHLD
87     // handler in the zygote process.  If the LOG() implementation
88     // changes its locking strategy or its use of syscalls within the
89     // lazy-init critical section, its use here may become unsafe.
90    if (WIFEXITED(status)) {
91      if (WEXITSTATUS(status)) {
92        ALOGI("Process %d exited cleanly (%d)", pid, WEXITSTATUS(status));
93      }
94    } else if (WIFSIGNALED(status)) {
95      if (WTERMSIG(status) != SIGKILL) {
96        ALOGI("Process %d exited due to signal (%d)", pid, WTERMSIG(status));
97      }
98      if (WCOREDUMP(status)) {
99        ALOGI("Process %d dumped core.", pid);
100      }
101    }
102
103    // If the just-crashed process is the system_server, bring down zygote
104    // so that it is restarted by init and system server will be restarted
105    // from there.
106    if (pid == gSystemServerPid) {
107      ALOGE("Exit zygote because system server (%d) has terminated");
108      kill(getpid(), SIGKILL);
109    }
110  }
111
112  // Note that we shouldn't consider ECHILD an error because
113  // the secondary zygote might have no children left to wait for.
114  if (pid < 0 && errno != ECHILD) {
115    ALOGW("Zygote SIGCHLD error in waitpid: %s", strerror(errno));
116  }
117}
118
119// Configures the SIGCHLD handler for the zygote process. This is configured
120// very late, because earlier in the runtime we may fork() and exec()
121// other processes, and we want to waitpid() for those rather than
122// have them be harvested immediately.
123//
124// This ends up being called repeatedly before each fork(), but there's
125// no real harm in that.
126static void SetSigChldHandler() {
127  struct sigaction sa;
128  memset(&sa, 0, sizeof(sa));
129  sa.sa_handler = SigChldHandler;
130
131  int err = sigaction(SIGCHLD, &sa, NULL);
132  if (err < 0) {
133    ALOGW("Error setting SIGCHLD handler: %d", errno);
134  }
135}
136
137// Sets the SIGCHLD handler back to default behavior in zygote children.
138static void UnsetSigChldHandler() {
139  struct sigaction sa;
140  memset(&sa, 0, sizeof(sa));
141  sa.sa_handler = SIG_DFL;
142
143  int err = sigaction(SIGCHLD, &sa, NULL);
144  if (err < 0) {
145    ALOGW("Error unsetting SIGCHLD handler: %d", errno);
146  }
147}
148
149// Calls POSIX setgroups() using the int[] object as an argument.
150// A NULL argument is tolerated.
151static void SetGids(JNIEnv* env, jintArray javaGids) {
152  if (javaGids == NULL) {
153    return;
154  }
155
156  ScopedIntArrayRO gids(env, javaGids);
157  if (gids.get() == NULL) {
158      RuntimeAbort(env);
159  }
160  int rc = setgroups(gids.size(), reinterpret_cast<const gid_t*>(&gids[0]));
161  if (rc == -1) {
162    ALOGE("setgroups failed");
163    RuntimeAbort(env);
164  }
165}
166
167// Sets the resource limits via setrlimit(2) for the values in the
168// two-dimensional array of integers that's passed in. The second dimension
169// contains a tuple of length 3: (resource, rlim_cur, rlim_max). NULL is
170// treated as an empty array.
171static void SetRLimits(JNIEnv* env, jobjectArray javaRlimits) {
172  if (javaRlimits == NULL) {
173    return;
174  }
175
176  rlimit rlim;
177  memset(&rlim, 0, sizeof(rlim));
178
179  for (int i = 0; i < env->GetArrayLength(javaRlimits); ++i) {
180    ScopedLocalRef<jobject> javaRlimitObject(env, env->GetObjectArrayElement(javaRlimits, i));
181    ScopedIntArrayRO javaRlimit(env, reinterpret_cast<jintArray>(javaRlimitObject.get()));
182    if (javaRlimit.size() != 3) {
183      ALOGE("rlimits array must have a second dimension of size 3");
184      RuntimeAbort(env);
185    }
186
187    rlim.rlim_cur = javaRlimit[1];
188    rlim.rlim_max = javaRlimit[2];
189
190    int rc = setrlimit(javaRlimit[0], &rlim);
191    if (rc == -1) {
192      ALOGE("setrlimit(%d, {%d, %d}) failed", javaRlimit[0], rlim.rlim_cur, rlim.rlim_max);
193      RuntimeAbort(env);
194    }
195  }
196}
197
198// The debug malloc library needs to know whether it's the zygote or a child.
199extern "C" int gMallocLeakZygoteChild;
200
201static void EnableKeepCapabilities(JNIEnv* env) {
202  int rc = prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0);
203  if (rc == -1) {
204    ALOGE("prctl(PR_SET_KEEPCAPS) failed");
205    RuntimeAbort(env);
206  }
207}
208
209static void DropCapabilitiesBoundingSet(JNIEnv* env) {
210  for (int i = 0; prctl(PR_CAPBSET_READ, i, 0, 0, 0) >= 0; i++) {
211    int rc = prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
212    if (rc == -1) {
213      if (errno == EINVAL) {
214        ALOGE("prctl(PR_CAPBSET_DROP) failed with EINVAL. Please verify "
215              "your kernel is compiled with file capabilities support");
216      } else {
217        ALOGE("prctl(PR_CAPBSET_DROP) failed");
218        RuntimeAbort(env);
219      }
220    }
221  }
222}
223
224static void SetCapabilities(JNIEnv* env, int64_t permitted, int64_t effective) {
225  __user_cap_header_struct capheader;
226  memset(&capheader, 0, sizeof(capheader));
227  capheader.version = _LINUX_CAPABILITY_VERSION_3;
228  capheader.pid = 0;
229
230  __user_cap_data_struct capdata[2];
231  memset(&capdata, 0, sizeof(capdata));
232  capdata[0].effective = effective;
233  capdata[1].effective = effective >> 32;
234  capdata[0].permitted = permitted;
235  capdata[1].permitted = permitted >> 32;
236
237  if (capset(&capheader, &capdata[0]) == -1) {
238    ALOGE("capset(%lld, %lld) failed", permitted, effective);
239    RuntimeAbort(env);
240  }
241}
242
243static void SetSchedulerPolicy(JNIEnv* env) {
244  errno = -set_sched_policy(0, SP_DEFAULT);
245  if (errno != 0) {
246    ALOGE("set_sched_policy(0, SP_DEFAULT) failed");
247    RuntimeAbort(env);
248  }
249}
250
251// Create a private mount namespace and bind mount appropriate emulated
252// storage for the given user.
253static bool MountEmulatedStorage(uid_t uid, jint mount_mode, bool force_mount_namespace) {
254  if (mount_mode == MOUNT_EXTERNAL_NONE && !force_mount_namespace) {
255    return true;
256  }
257
258  // Create a second private mount namespace for our process
259  if (unshare(CLONE_NEWNS) == -1) {
260      ALOGW("Failed to unshare(): %d", errno);
261      return false;
262  }
263
264  if (mount_mode == MOUNT_EXTERNAL_NONE) {
265    return true;
266  }
267
268  // See storage config details at http://source.android.com/tech/storage/
269  userid_t user_id = multiuser_get_user_id(uid);
270
271  // Create bind mounts to expose external storage
272  if (mount_mode == MOUNT_EXTERNAL_MULTIUSER || mount_mode == MOUNT_EXTERNAL_MULTIUSER_ALL) {
273    // These paths must already be created by init.rc
274    const char* source = getenv("EMULATED_STORAGE_SOURCE");
275    const char* target = getenv("EMULATED_STORAGE_TARGET");
276    const char* legacy = getenv("EXTERNAL_STORAGE");
277    if (source == NULL || target == NULL || legacy == NULL) {
278      ALOGW("Storage environment undefined; unable to provide external storage");
279      return false;
280    }
281
282    // Prepare source paths
283
284    // /mnt/shell/emulated/0
285    const String8 source_user(String8::format("%s/%d", source, user_id));
286    // /storage/emulated/0
287    const String8 target_user(String8::format("%s/%d", target, user_id));
288
289    if (fs_prepare_dir(source_user.string(), 0000, 0, 0) == -1
290        || fs_prepare_dir(target_user.string(), 0000, 0, 0) == -1) {
291      return false;
292    }
293
294    if (mount_mode == MOUNT_EXTERNAL_MULTIUSER_ALL) {
295      // Mount entire external storage tree for all users
296      if (TEMP_FAILURE_RETRY(mount(source, target, NULL, MS_BIND, NULL)) == -1) {
297        ALOGW("Failed to mount %s to %s :%d", source, target, errno);
298        return false;
299      }
300    } else {
301      // Only mount user-specific external storage
302      if (TEMP_FAILURE_RETRY(
303              mount(source_user.string(), target_user.string(), NULL, MS_BIND, NULL)) == -1) {
304        ALOGW("Failed to mount %s to %s: %d", source_user.string(), target_user.string(), errno);
305        return false;
306      }
307    }
308
309    if (fs_prepare_dir(legacy, 0000, 0, 0) == -1) {
310        return false;
311    }
312
313    // Finally, mount user-specific path into place for legacy users
314    if (TEMP_FAILURE_RETRY(
315            mount(target_user.string(), legacy, NULL, MS_BIND | MS_REC, NULL)) == -1) {
316      ALOGW("Failed to mount %s to %s: %d", target_user.string(), legacy, errno);
317      return false;
318    }
319  } else {
320    ALOGW("Mount mode %d unsupported", mount_mode);
321    return false;
322  }
323
324  return true;
325}
326
327static bool NeedsNoRandomizeWorkaround() {
328#if !defined(__arm__)
329    return false;
330#else
331    int major;
332    int minor;
333    struct utsname uts;
334    if (uname(&uts) == -1) {
335        return false;
336    }
337
338    if (sscanf(uts.release, "%d.%d", &major, &minor) != 2) {
339        return false;
340    }
341
342    // Kernels before 3.4.* need the workaround.
343    return (major < 3) || ((major == 3) && (minor < 4));
344#endif
345}
346
347// Utility to close down the Zygote socket file descriptors while
348// the child is still running as root with Zygote's privileges.  Each
349// descriptor (if any) is closed via dup2(), replacing it with a valid
350// (open) descriptor to /dev/null.
351
352static void DetachDescriptors(JNIEnv* env, jintArray fdsToClose) {
353  if (!fdsToClose) {
354    return;
355  }
356  jsize count = env->GetArrayLength(fdsToClose);
357  jint *ar = env->GetIntArrayElements(fdsToClose, 0);
358  if (!ar) {
359      ALOGE("Bad fd array");
360      RuntimeAbort(env);
361  }
362  jsize i;
363  int devnull;
364  for (i = 0; i < count; i++) {
365    devnull = open("/dev/null", O_RDWR);
366    if (devnull < 0) {
367      ALOGE("Failed to open /dev/null");
368      RuntimeAbort(env);
369      continue;
370    }
371    ALOGV("Switching descriptor %d to /dev/null: %d", ar[i], errno);
372    if (dup2(devnull, ar[i]) < 0) {
373      ALOGE("Failed dup2() on descriptor %d", ar[i]);
374      RuntimeAbort(env);
375    }
376    close(devnull);
377  }
378}
379
380void SetThreadName(const char* thread_name) {
381  bool hasAt = false;
382  bool hasDot = false;
383  const char* s = thread_name;
384  while (*s) {
385    if (*s == '.') {
386      hasDot = true;
387    } else if (*s == '@') {
388      hasAt = true;
389    }
390    s++;
391  }
392  const int len = s - thread_name;
393  if (len < 15 || hasAt || !hasDot) {
394    s = thread_name;
395  } else {
396    s = thread_name + len - 15;
397  }
398  // pthread_setname_np fails rather than truncating long strings.
399  char buf[16];       // MAX_TASK_COMM_LEN=16 is hard-coded into bionic
400  strlcpy(buf, s, sizeof(buf)-1);
401  errno = pthread_setname_np(pthread_self(), buf);
402  if (errno != 0) {
403    ALOGW("Unable to set the name of current thread to '%s'", buf);
404  }
405}
406
407// Utility routine to fork zygote and specialize the child process.
408static pid_t ForkAndSpecializeCommon(JNIEnv* env, uid_t uid, gid_t gid, jintArray javaGids,
409                                     jint debug_flags, jobjectArray javaRlimits,
410                                     jlong permittedCapabilities, jlong effectiveCapabilities,
411                                     jint mount_external,
412                                     jstring java_se_info, jstring java_se_name,
413                                     bool is_system_server, jintArray fdsToClose,
414                                     jstring instructionSet, jstring dataDir) {
415  SetSigChldHandler();
416
417  pid_t pid = fork();
418
419  if (pid == 0) {
420    // The child process.
421    gMallocLeakZygoteChild = 1;
422
423    // Clean up any descriptors which must be closed immediately
424    DetachDescriptors(env, fdsToClose);
425
426    // Keep capabilities across UID change, unless we're staying root.
427    if (uid != 0) {
428      EnableKeepCapabilities(env);
429    }
430
431    DropCapabilitiesBoundingSet(env);
432
433    bool need_native_bridge = false;
434    if (instructionSet != NULL) {
435      ScopedUtfChars isa_string(env, instructionSet);
436      need_native_bridge = android::NeedsNativeBridge(isa_string.c_str());
437    }
438
439    if (!MountEmulatedStorage(uid, mount_external, need_native_bridge)) {
440      ALOGW("Failed to mount emulated storage: %d", errno);
441      if (errno == ENOTCONN || errno == EROFS) {
442        // When device is actively encrypting, we get ENOTCONN here
443        // since FUSE was mounted before the framework restarted.
444        // When encrypted device is booting, we get EROFS since
445        // FUSE hasn't been created yet by init.
446        // In either case, continue without external storage.
447      } else {
448        ALOGE("Cannot continue without emulated storage");
449        RuntimeAbort(env);
450      }
451    }
452
453    if (!is_system_server) {
454        int rc = createProcessGroup(uid, getpid());
455        if (rc != 0) {
456            if (rc == -EROFS) {
457                ALOGW("createProcessGroup failed, kernel missing CONFIG_CGROUP_CPUACCT?");
458            } else {
459                ALOGE("createProcessGroup(%d, %d) failed: %s", uid, pid, strerror(-rc));
460            }
461        }
462    }
463
464    SetGids(env, javaGids);
465
466    SetRLimits(env, javaRlimits);
467
468    if (!is_system_server && need_native_bridge) {
469      // Set the environment for the apps running with native bridge.
470      ScopedUtfChars isa_string(env, instructionSet);  // Known non-null because of need_native_...
471      if (dataDir == NULL) {
472        android::PreInitializeNativeBridge(NULL, isa_string.c_str());
473      } else {
474        ScopedUtfChars data_dir(env, dataDir);
475        android::PreInitializeNativeBridge(data_dir.c_str(), isa_string.c_str());
476      }
477    }
478
479    int rc = setresgid(gid, gid, gid);
480    if (rc == -1) {
481      ALOGE("setresgid(%d) failed", gid);
482      RuntimeAbort(env);
483    }
484
485    rc = setresuid(uid, uid, uid);
486    if (rc == -1) {
487      ALOGE("setresuid(%d) failed", uid);
488      RuntimeAbort(env);
489    }
490
491    if (NeedsNoRandomizeWorkaround()) {
492        // Work around ARM kernel ASLR lossage (http://b/5817320).
493        int old_personality = personality(0xffffffff);
494        int new_personality = personality(old_personality | ADDR_NO_RANDOMIZE);
495        if (new_personality == -1) {
496            ALOGW("personality(%d) failed", new_personality);
497        }
498    }
499
500    SetCapabilities(env, permittedCapabilities, effectiveCapabilities);
501
502    SetSchedulerPolicy(env);
503
504    const char* se_info_c_str = NULL;
505    ScopedUtfChars* se_info = NULL;
506    if (java_se_info != NULL) {
507        se_info = new ScopedUtfChars(env, java_se_info);
508        se_info_c_str = se_info->c_str();
509        if (se_info_c_str == NULL) {
510          ALOGE("se_info_c_str == NULL");
511          RuntimeAbort(env);
512        }
513    }
514    const char* se_name_c_str = NULL;
515    ScopedUtfChars* se_name = NULL;
516    if (java_se_name != NULL) {
517        se_name = new ScopedUtfChars(env, java_se_name);
518        se_name_c_str = se_name->c_str();
519        if (se_name_c_str == NULL) {
520          ALOGE("se_name_c_str == NULL");
521          RuntimeAbort(env);
522        }
523    }
524    rc = selinux_android_setcontext(uid, is_system_server, se_info_c_str, se_name_c_str);
525    if (rc == -1) {
526      ALOGE("selinux_android_setcontext(%d, %d, \"%s\", \"%s\") failed", uid,
527            is_system_server, se_info_c_str, se_name_c_str);
528      RuntimeAbort(env);
529    }
530
531    // Make it easier to debug audit logs by setting the main thread's name to the
532    // nice name rather than "app_process".
533    if (se_info_c_str == NULL && is_system_server) {
534      se_name_c_str = "system_server";
535    }
536    if (se_info_c_str != NULL) {
537      SetThreadName(se_name_c_str);
538    }
539
540    delete se_info;
541    delete se_name;
542
543    UnsetSigChldHandler();
544
545    env->CallStaticVoidMethod(gZygoteClass, gCallPostForkChildHooks, debug_flags,
546                              is_system_server ? NULL : instructionSet);
547    if (env->ExceptionCheck()) {
548      ALOGE("Error calling post fork hooks.");
549      RuntimeAbort(env);
550    }
551  } else if (pid > 0) {
552    // the parent process
553  }
554  return pid;
555}
556}  // anonymous namespace
557
558namespace android {
559
560static jint com_android_internal_os_Zygote_nativeForkAndSpecialize(
561        JNIEnv* env, jclass, jint uid, jint gid, jintArray gids,
562        jint debug_flags, jobjectArray rlimits,
563        jint mount_external, jstring se_info, jstring se_name,
564        jintArray fdsToClose, jstring instructionSet, jstring appDataDir) {
565    // Grant CAP_WAKE_ALARM to the Bluetooth process.
566    jlong capabilities = 0;
567    if (uid == AID_BLUETOOTH) {
568        capabilities |= (1LL << CAP_WAKE_ALARM);
569    }
570
571    return ForkAndSpecializeCommon(env, uid, gid, gids, debug_flags,
572            rlimits, capabilities, capabilities, mount_external, se_info,
573            se_name, false, fdsToClose, instructionSet, appDataDir);
574}
575
576static jint com_android_internal_os_Zygote_nativeForkSystemServer(
577        JNIEnv* env, jclass, uid_t uid, gid_t gid, jintArray gids,
578        jint debug_flags, jobjectArray rlimits, jlong permittedCapabilities,
579        jlong effectiveCapabilities) {
580  pid_t pid = ForkAndSpecializeCommon(env, uid, gid, gids,
581                                      debug_flags, rlimits,
582                                      permittedCapabilities, effectiveCapabilities,
583                                      MOUNT_EXTERNAL_NONE, NULL, NULL, true, NULL,
584                                      NULL, NULL);
585  if (pid > 0) {
586      // The zygote process checks whether the child process has died or not.
587      ALOGI("System server process %d has been created", pid);
588      gSystemServerPid = pid;
589      // There is a slight window that the system server process has crashed
590      // but it went unnoticed because we haven't published its pid yet. So
591      // we recheck here just to make sure that all is well.
592      int status;
593      if (waitpid(pid, &status, WNOHANG) == pid) {
594          ALOGE("System server process %d has died. Restarting Zygote!", pid);
595          RuntimeAbort(env);
596      }
597  }
598  return pid;
599}
600
601static JNINativeMethod gMethods[] = {
602    { "nativeForkAndSpecialize",
603      "(II[II[[IILjava/lang/String;Ljava/lang/String;[ILjava/lang/String;Ljava/lang/String;)I",
604      (void *) com_android_internal_os_Zygote_nativeForkAndSpecialize },
605    { "nativeForkSystemServer", "(II[II[[IJJ)I",
606      (void *) com_android_internal_os_Zygote_nativeForkSystemServer }
607};
608
609int register_com_android_internal_os_Zygote(JNIEnv* env) {
610  gZygoteClass = (jclass) env->NewGlobalRef(env->FindClass(kZygoteClassName));
611  if (gZygoteClass == NULL) {
612    RuntimeAbort(env);
613  }
614  gCallPostForkChildHooks = env->GetStaticMethodID(gZygoteClass, "callPostForkChildHooks",
615                                                   "(ILjava/lang/String;)V");
616
617  return AndroidRuntime::registerNativeMethods(env, "com/android/internal/os/Zygote",
618      gMethods, NELEM(gMethods));
619}
620}  // namespace android
621
622