1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "Zygote"
18
19// sys/mount.h has to come before linux/fs.h due to redefinition of MS_RDONLY, MS_BIND, etc
20#include <sys/mount.h>
21#include <linux/fs.h>
22
23#include <grp.h>
24#include <fcntl.h>
25#include <paths.h>
26#include <signal.h>
27#include <stdlib.h>
28#include <unistd.h>
29#include <sys/capability.h>
30#include <sys/personality.h>
31#include <sys/prctl.h>
32#include <sys/resource.h>
33#include <sys/stat.h>
34#include <sys/types.h>
35#include <sys/utsname.h>
36#include <sys/wait.h>
37
38
39#include <cutils/fs.h>
40#include <cutils/multiuser.h>
41#include <cutils/sched_policy.h>
42#include <private/android_filesystem_config.h>
43#include <utils/String8.h>
44#include <selinux/android.h>
45#include <processgroup/processgroup.h>
46#include <inttypes.h>
47
48#include "android_runtime/AndroidRuntime.h"
49#include "JNIHelp.h"
50#include "ScopedLocalRef.h"
51#include "ScopedPrimitiveArray.h"
52#include "ScopedUtfChars.h"
53
54#include "nativebridge/native_bridge.h"
55
56namespace {
57
58using android::String8;
59
60static pid_t gSystemServerPid = 0;
61
62static const char kZygoteClassName[] = "com/android/internal/os/Zygote";
63static jclass gZygoteClass;
64static jmethodID gCallPostForkChildHooks;
65
66// Must match values in com.android.internal.os.Zygote.
67enum MountExternalKind {
68  MOUNT_EXTERNAL_NONE = 0,
69  MOUNT_EXTERNAL_SINGLEUSER = 1,
70  MOUNT_EXTERNAL_MULTIUSER = 2,
71  MOUNT_EXTERNAL_MULTIUSER_ALL = 3,
72};
73
74static void RuntimeAbort(JNIEnv* env) {
75  env->FatalError("RuntimeAbort");
76}
77
78// This signal handler is for zygote mode, since the zygote must reap its children
79static void SigChldHandler(int /*signal_number*/) {
80  pid_t pid;
81  int status;
82
83  while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
84     // Log process-death status that we care about.  In general it is
85     // not safe to call LOG(...) from a signal handler because of
86     // possible reentrancy.  However, we know a priori that the
87     // current implementation of LOG() is safe to call from a SIGCHLD
88     // handler in the zygote process.  If the LOG() implementation
89     // changes its locking strategy or its use of syscalls within the
90     // lazy-init critical section, its use here may become unsafe.
91    if (WIFEXITED(status)) {
92      if (WEXITSTATUS(status)) {
93        ALOGI("Process %d exited cleanly (%d)", pid, WEXITSTATUS(status));
94      }
95    } else if (WIFSIGNALED(status)) {
96      if (WTERMSIG(status) != SIGKILL) {
97        ALOGI("Process %d exited due to signal (%d)", pid, WTERMSIG(status));
98      }
99      if (WCOREDUMP(status)) {
100        ALOGI("Process %d dumped core.", pid);
101      }
102    }
103
104    // If the just-crashed process is the system_server, bring down zygote
105    // so that it is restarted by init and system server will be restarted
106    // from there.
107    if (pid == gSystemServerPid) {
108      ALOGE("Exit zygote because system server (%d) has terminated");
109      kill(getpid(), SIGKILL);
110    }
111  }
112
113  // Note that we shouldn't consider ECHILD an error because
114  // the secondary zygote might have no children left to wait for.
115  if (pid < 0 && errno != ECHILD) {
116    ALOGW("Zygote SIGCHLD error in waitpid: %s", strerror(errno));
117  }
118}
119
120// Configures the SIGCHLD handler for the zygote process. This is configured
121// very late, because earlier in the runtime we may fork() and exec()
122// other processes, and we want to waitpid() for those rather than
123// have them be harvested immediately.
124//
125// This ends up being called repeatedly before each fork(), but there's
126// no real harm in that.
127static void SetSigChldHandler() {
128  struct sigaction sa;
129  memset(&sa, 0, sizeof(sa));
130  sa.sa_handler = SigChldHandler;
131
132  int err = sigaction(SIGCHLD, &sa, NULL);
133  if (err < 0) {
134    ALOGW("Error setting SIGCHLD handler: %d", errno);
135  }
136}
137
138// Sets the SIGCHLD handler back to default behavior in zygote children.
139static void UnsetSigChldHandler() {
140  struct sigaction sa;
141  memset(&sa, 0, sizeof(sa));
142  sa.sa_handler = SIG_DFL;
143
144  int err = sigaction(SIGCHLD, &sa, NULL);
145  if (err < 0) {
146    ALOGW("Error unsetting SIGCHLD handler: %d", errno);
147  }
148}
149
150// Calls POSIX setgroups() using the int[] object as an argument.
151// A NULL argument is tolerated.
152static void SetGids(JNIEnv* env, jintArray javaGids) {
153  if (javaGids == NULL) {
154    return;
155  }
156
157  ScopedIntArrayRO gids(env, javaGids);
158  if (gids.get() == NULL) {
159      RuntimeAbort(env);
160  }
161  int rc = setgroups(gids.size(), reinterpret_cast<const gid_t*>(&gids[0]));
162  if (rc == -1) {
163    ALOGE("setgroups failed");
164    RuntimeAbort(env);
165  }
166}
167
168// Sets the resource limits via setrlimit(2) for the values in the
169// two-dimensional array of integers that's passed in. The second dimension
170// contains a tuple of length 3: (resource, rlim_cur, rlim_max). NULL is
171// treated as an empty array.
172static void SetRLimits(JNIEnv* env, jobjectArray javaRlimits) {
173  if (javaRlimits == NULL) {
174    return;
175  }
176
177  rlimit rlim;
178  memset(&rlim, 0, sizeof(rlim));
179
180  for (int i = 0; i < env->GetArrayLength(javaRlimits); ++i) {
181    ScopedLocalRef<jobject> javaRlimitObject(env, env->GetObjectArrayElement(javaRlimits, i));
182    ScopedIntArrayRO javaRlimit(env, reinterpret_cast<jintArray>(javaRlimitObject.get()));
183    if (javaRlimit.size() != 3) {
184      ALOGE("rlimits array must have a second dimension of size 3");
185      RuntimeAbort(env);
186    }
187
188    rlim.rlim_cur = javaRlimit[1];
189    rlim.rlim_max = javaRlimit[2];
190
191    int rc = setrlimit(javaRlimit[0], &rlim);
192    if (rc == -1) {
193      ALOGE("setrlimit(%d, {%d, %d}) failed", javaRlimit[0], rlim.rlim_cur, rlim.rlim_max);
194      RuntimeAbort(env);
195    }
196  }
197}
198
199// The debug malloc library needs to know whether it's the zygote or a child.
200extern "C" int gMallocLeakZygoteChild;
201
202static void EnableKeepCapabilities(JNIEnv* env) {
203  int rc = prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0);
204  if (rc == -1) {
205    ALOGE("prctl(PR_SET_KEEPCAPS) failed");
206    RuntimeAbort(env);
207  }
208}
209
210static void DropCapabilitiesBoundingSet(JNIEnv* env) {
211  for (int i = 0; prctl(PR_CAPBSET_READ, i, 0, 0, 0) >= 0; i++) {
212    int rc = prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
213    if (rc == -1) {
214      if (errno == EINVAL) {
215        ALOGE("prctl(PR_CAPBSET_DROP) failed with EINVAL. Please verify "
216              "your kernel is compiled with file capabilities support");
217      } else {
218        ALOGE("prctl(PR_CAPBSET_DROP) failed");
219        RuntimeAbort(env);
220      }
221    }
222  }
223}
224
225static void SetCapabilities(JNIEnv* env, int64_t permitted, int64_t effective) {
226  __user_cap_header_struct capheader;
227  memset(&capheader, 0, sizeof(capheader));
228  capheader.version = _LINUX_CAPABILITY_VERSION_3;
229  capheader.pid = 0;
230
231  __user_cap_data_struct capdata[2];
232  memset(&capdata, 0, sizeof(capdata));
233  capdata[0].effective = effective;
234  capdata[1].effective = effective >> 32;
235  capdata[0].permitted = permitted;
236  capdata[1].permitted = permitted >> 32;
237
238  if (capset(&capheader, &capdata[0]) == -1) {
239    ALOGE("capset(%lld, %lld) failed", permitted, effective);
240    RuntimeAbort(env);
241  }
242}
243
244static void SetSchedulerPolicy(JNIEnv* env) {
245  errno = -set_sched_policy(0, SP_DEFAULT);
246  if (errno != 0) {
247    ALOGE("set_sched_policy(0, SP_DEFAULT) failed");
248    RuntimeAbort(env);
249  }
250}
251
252// Create a private mount namespace and bind mount appropriate emulated
253// storage for the given user.
254static bool MountEmulatedStorage(uid_t uid, jint mount_mode, bool force_mount_namespace) {
255  if (mount_mode == MOUNT_EXTERNAL_NONE && !force_mount_namespace) {
256    return true;
257  }
258
259  // Create a second private mount namespace for our process
260  if (unshare(CLONE_NEWNS) == -1) {
261      ALOGW("Failed to unshare(): %d", errno);
262      return false;
263  }
264
265  if (mount_mode == MOUNT_EXTERNAL_NONE) {
266    return true;
267  }
268
269  // See storage config details at http://source.android.com/tech/storage/
270  userid_t user_id = multiuser_get_user_id(uid);
271
272  // Create bind mounts to expose external storage
273  if (mount_mode == MOUNT_EXTERNAL_MULTIUSER || mount_mode == MOUNT_EXTERNAL_MULTIUSER_ALL) {
274    // These paths must already be created by init.rc
275    const char* source = getenv("EMULATED_STORAGE_SOURCE");
276    const char* target = getenv("EMULATED_STORAGE_TARGET");
277    const char* legacy = getenv("EXTERNAL_STORAGE");
278    if (source == NULL || target == NULL || legacy == NULL) {
279      ALOGW("Storage environment undefined; unable to provide external storage");
280      return false;
281    }
282
283    // Prepare source paths
284
285    // /mnt/shell/emulated/0
286    const String8 source_user(String8::format("%s/%d", source, user_id));
287    // /storage/emulated/0
288    const String8 target_user(String8::format("%s/%d", target, user_id));
289
290    if (fs_prepare_dir(source_user.string(), 0000, 0, 0) == -1
291        || fs_prepare_dir(target_user.string(), 0000, 0, 0) == -1) {
292      return false;
293    }
294
295    if (mount_mode == MOUNT_EXTERNAL_MULTIUSER_ALL) {
296      // Mount entire external storage tree for all users
297      if (TEMP_FAILURE_RETRY(mount(source, target, NULL, MS_BIND, NULL)) == -1) {
298        ALOGW("Failed to mount %s to %s :%d", source, target, errno);
299        return false;
300      }
301    } else {
302      // Only mount user-specific external storage
303      if (TEMP_FAILURE_RETRY(
304              mount(source_user.string(), target_user.string(), NULL, MS_BIND, NULL)) == -1) {
305        ALOGW("Failed to mount %s to %s: %d", source_user.string(), target_user.string(), errno);
306        return false;
307      }
308    }
309
310    if (fs_prepare_dir(legacy, 0000, 0, 0) == -1) {
311        return false;
312    }
313
314    // Finally, mount user-specific path into place for legacy users
315    if (TEMP_FAILURE_RETRY(
316            mount(target_user.string(), legacy, NULL, MS_BIND | MS_REC, NULL)) == -1) {
317      ALOGW("Failed to mount %s to %s: %d", target_user.string(), legacy, errno);
318      return false;
319    }
320  } else {
321    ALOGW("Mount mode %d unsupported", mount_mode);
322    return false;
323  }
324
325  return true;
326}
327
328static bool NeedsNoRandomizeWorkaround() {
329#if !defined(__arm__)
330    return false;
331#else
332    int major;
333    int minor;
334    struct utsname uts;
335    if (uname(&uts) == -1) {
336        return false;
337    }
338
339    if (sscanf(uts.release, "%d.%d", &major, &minor) != 2) {
340        return false;
341    }
342
343    // Kernels before 3.4.* need the workaround.
344    return (major < 3) || ((major == 3) && (minor < 4));
345#endif
346}
347
348// Utility to close down the Zygote socket file descriptors while
349// the child is still running as root with Zygote's privileges.  Each
350// descriptor (if any) is closed via dup2(), replacing it with a valid
351// (open) descriptor to /dev/null.
352
353static void DetachDescriptors(JNIEnv* env, jintArray fdsToClose) {
354  if (!fdsToClose) {
355    return;
356  }
357  jsize count = env->GetArrayLength(fdsToClose);
358  jint *ar = env->GetIntArrayElements(fdsToClose, 0);
359  if (!ar) {
360      ALOGE("Bad fd array");
361      RuntimeAbort(env);
362  }
363  jsize i;
364  int devnull;
365  for (i = 0; i < count; i++) {
366    devnull = open("/dev/null", O_RDWR);
367    if (devnull < 0) {
368      ALOGE("Failed to open /dev/null");
369      RuntimeAbort(env);
370      continue;
371    }
372    ALOGV("Switching descriptor %d to /dev/null: %d", ar[i], errno);
373    if (dup2(devnull, ar[i]) < 0) {
374      ALOGE("Failed dup2() on descriptor %d", ar[i]);
375      RuntimeAbort(env);
376    }
377    close(devnull);
378  }
379}
380
381void SetThreadName(const char* thread_name) {
382  bool hasAt = false;
383  bool hasDot = false;
384  const char* s = thread_name;
385  while (*s) {
386    if (*s == '.') {
387      hasDot = true;
388    } else if (*s == '@') {
389      hasAt = true;
390    }
391    s++;
392  }
393  const int len = s - thread_name;
394  if (len < 15 || hasAt || !hasDot) {
395    s = thread_name;
396  } else {
397    s = thread_name + len - 15;
398  }
399  // pthread_setname_np fails rather than truncating long strings.
400  char buf[16];       // MAX_TASK_COMM_LEN=16 is hard-coded into bionic
401  strlcpy(buf, s, sizeof(buf)-1);
402  errno = pthread_setname_np(pthread_self(), buf);
403  if (errno != 0) {
404    ALOGW("Unable to set the name of current thread to '%s'", buf);
405  }
406}
407
408  // Temporary timing check.
409uint64_t MsTime() {
410  timespec now;
411  clock_gettime(CLOCK_MONOTONIC, &now);
412  return static_cast<uint64_t>(now.tv_sec) * UINT64_C(1000) + now.tv_nsec / UINT64_C(1000000);
413}
414
415
416void ckTime(uint64_t start, const char* where) {
417  uint64_t now = MsTime();
418  if ((now-start) > 1000) {
419    // If we are taking more than a second, log about it.
420    ALOGW("Slow operation: %"PRIu64" ms in %s", (uint64_t)(now-start), where);
421  }
422}
423
424// Utility routine to fork zygote and specialize the child process.
425static pid_t ForkAndSpecializeCommon(JNIEnv* env, uid_t uid, gid_t gid, jintArray javaGids,
426                                     jint debug_flags, jobjectArray javaRlimits,
427                                     jlong permittedCapabilities, jlong effectiveCapabilities,
428                                     jint mount_external,
429                                     jstring java_se_info, jstring java_se_name,
430                                     bool is_system_server, jintArray fdsToClose,
431                                     jstring instructionSet, jstring dataDir) {
432  uint64_t start = MsTime();
433  SetSigChldHandler();
434  ckTime(start, "ForkAndSpecializeCommon:SetSigChldHandler");
435
436  pid_t pid = fork();
437
438  if (pid == 0) {
439    // The child process.
440    gMallocLeakZygoteChild = 1;
441
442
443    // Clean up any descriptors which must be closed immediately
444    DetachDescriptors(env, fdsToClose);
445
446    ckTime(start, "ForkAndSpecializeCommon:Fork and detach");
447
448    // Keep capabilities across UID change, unless we're staying root.
449    if (uid != 0) {
450      EnableKeepCapabilities(env);
451    }
452
453    DropCapabilitiesBoundingSet(env);
454
455    bool need_native_bridge = false;
456    if (instructionSet != NULL) {
457      ScopedUtfChars isa_string(env, instructionSet);
458      need_native_bridge = android::NeedsNativeBridge(isa_string.c_str());
459    }
460
461    if (!MountEmulatedStorage(uid, mount_external, need_native_bridge)) {
462      ALOGW("Failed to mount emulated storage: %d", errno);
463      if (errno == ENOTCONN || errno == EROFS) {
464        // When device is actively encrypting, we get ENOTCONN here
465        // since FUSE was mounted before the framework restarted.
466        // When encrypted device is booting, we get EROFS since
467        // FUSE hasn't been created yet by init.
468        // In either case, continue without external storage.
469      } else {
470        ALOGE("Cannot continue without emulated storage");
471        RuntimeAbort(env);
472      }
473    }
474
475    if (!is_system_server) {
476        int rc = createProcessGroup(uid, getpid());
477        if (rc != 0) {
478            if (rc == -EROFS) {
479                ALOGW("createProcessGroup failed, kernel missing CONFIG_CGROUP_CPUACCT?");
480            } else {
481                ALOGE("createProcessGroup(%d, %d) failed: %s", uid, pid, strerror(-rc));
482            }
483        }
484    }
485
486    SetGids(env, javaGids);
487
488    SetRLimits(env, javaRlimits);
489
490    if (!is_system_server && need_native_bridge) {
491      // Set the environment for the apps running with native bridge.
492      ScopedUtfChars isa_string(env, instructionSet);  // Known non-null because of need_native_...
493      if (dataDir == NULL) {
494        android::PreInitializeNativeBridge(NULL, isa_string.c_str());
495      } else {
496        ScopedUtfChars data_dir(env, dataDir);
497        android::PreInitializeNativeBridge(data_dir.c_str(), isa_string.c_str());
498      }
499    }
500
501    int rc = setresgid(gid, gid, gid);
502    if (rc == -1) {
503      ALOGE("setresgid(%d) failed", gid);
504      RuntimeAbort(env);
505    }
506
507    rc = setresuid(uid, uid, uid);
508    if (rc == -1) {
509      ALOGE("setresuid(%d) failed", uid);
510      RuntimeAbort(env);
511    }
512
513    if (NeedsNoRandomizeWorkaround()) {
514        // Work around ARM kernel ASLR lossage (http://b/5817320).
515        int old_personality = personality(0xffffffff);
516        int new_personality = personality(old_personality | ADDR_NO_RANDOMIZE);
517        if (new_personality == -1) {
518            ALOGW("personality(%d) failed", new_personality);
519        }
520    }
521
522    SetCapabilities(env, permittedCapabilities, effectiveCapabilities);
523
524    SetSchedulerPolicy(env);
525
526    const char* se_info_c_str = NULL;
527    ScopedUtfChars* se_info = NULL;
528    if (java_se_info != NULL) {
529        se_info = new ScopedUtfChars(env, java_se_info);
530        se_info_c_str = se_info->c_str();
531        if (se_info_c_str == NULL) {
532          ALOGE("se_info_c_str == NULL");
533          RuntimeAbort(env);
534        }
535    }
536    const char* se_name_c_str = NULL;
537    ScopedUtfChars* se_name = NULL;
538    if (java_se_name != NULL) {
539        se_name = new ScopedUtfChars(env, java_se_name);
540        se_name_c_str = se_name->c_str();
541        if (se_name_c_str == NULL) {
542          ALOGE("se_name_c_str == NULL");
543          RuntimeAbort(env);
544        }
545    }
546    rc = selinux_android_setcontext(uid, is_system_server, se_info_c_str, se_name_c_str);
547    if (rc == -1) {
548      ALOGE("selinux_android_setcontext(%d, %d, \"%s\", \"%s\") failed", uid,
549            is_system_server, se_info_c_str, se_name_c_str);
550      RuntimeAbort(env);
551    }
552
553    // Make it easier to debug audit logs by setting the main thread's name to the
554    // nice name rather than "app_process".
555    if (se_info_c_str == NULL && is_system_server) {
556      se_name_c_str = "system_server";
557    }
558    if (se_info_c_str != NULL) {
559      SetThreadName(se_name_c_str);
560    }
561
562    delete se_info;
563    delete se_name;
564
565    UnsetSigChldHandler();
566
567    ckTime(start, "ForkAndSpecializeCommon:child process setup");
568
569    env->CallStaticVoidMethod(gZygoteClass, gCallPostForkChildHooks, debug_flags,
570                              is_system_server ? NULL : instructionSet);
571    ckTime(start, "ForkAndSpecializeCommon:PostForkChildHooks returns");
572    if (env->ExceptionCheck()) {
573      ALOGE("Error calling post fork hooks.");
574      RuntimeAbort(env);
575    }
576  } else if (pid > 0) {
577    // the parent process
578  }
579  return pid;
580}
581}  // anonymous namespace
582
583namespace android {
584
585static jint com_android_internal_os_Zygote_nativeForkAndSpecialize(
586        JNIEnv* env, jclass, jint uid, jint gid, jintArray gids,
587        jint debug_flags, jobjectArray rlimits,
588        jint mount_external, jstring se_info, jstring se_name,
589        jintArray fdsToClose, jstring instructionSet, jstring appDataDir) {
590    // Grant CAP_WAKE_ALARM to the Bluetooth process.
591    jlong capabilities = 0;
592    if (uid == AID_BLUETOOTH) {
593        capabilities |= (1LL << CAP_WAKE_ALARM);
594    }
595
596    return ForkAndSpecializeCommon(env, uid, gid, gids, debug_flags,
597            rlimits, capabilities, capabilities, mount_external, se_info,
598            se_name, false, fdsToClose, instructionSet, appDataDir);
599}
600
601static jint com_android_internal_os_Zygote_nativeForkSystemServer(
602        JNIEnv* env, jclass, uid_t uid, gid_t gid, jintArray gids,
603        jint debug_flags, jobjectArray rlimits, jlong permittedCapabilities,
604        jlong effectiveCapabilities) {
605  pid_t pid = ForkAndSpecializeCommon(env, uid, gid, gids,
606                                      debug_flags, rlimits,
607                                      permittedCapabilities, effectiveCapabilities,
608                                      MOUNT_EXTERNAL_NONE, NULL, NULL, true, NULL,
609                                      NULL, NULL);
610  if (pid > 0) {
611      // The zygote process checks whether the child process has died or not.
612      ALOGI("System server process %d has been created", pid);
613      gSystemServerPid = pid;
614      // There is a slight window that the system server process has crashed
615      // but it went unnoticed because we haven't published its pid yet. So
616      // we recheck here just to make sure that all is well.
617      int status;
618      if (waitpid(pid, &status, WNOHANG) == pid) {
619          ALOGE("System server process %d has died. Restarting Zygote!", pid);
620          RuntimeAbort(env);
621      }
622  }
623  return pid;
624}
625
626static JNINativeMethod gMethods[] = {
627    { "nativeForkAndSpecialize",
628      "(II[II[[IILjava/lang/String;Ljava/lang/String;[ILjava/lang/String;Ljava/lang/String;)I",
629      (void *) com_android_internal_os_Zygote_nativeForkAndSpecialize },
630    { "nativeForkSystemServer", "(II[II[[IJJ)I",
631      (void *) com_android_internal_os_Zygote_nativeForkSystemServer }
632};
633
634int register_com_android_internal_os_Zygote(JNIEnv* env) {
635  gZygoteClass = (jclass) env->NewGlobalRef(env->FindClass(kZygoteClassName));
636  if (gZygoteClass == NULL) {
637    RuntimeAbort(env);
638  }
639  gCallPostForkChildHooks = env->GetStaticMethodID(gZygoteClass, "callPostForkChildHooks",
640                                                   "(ILjava/lang/String;)V");
641
642  return AndroidRuntime::registerNativeMethods(env, "com/android/internal/os/Zygote",
643      gMethods, NELEM(gMethods));
644}
645}  // namespace android
646
647