com_android_internal_os_Zygote.cpp revision 629dc1801331ce89e8ee0ff7ee5dcde1d7512417
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#define LOG_TAG "Zygote" 18 19// sys/mount.h has to come before linux/fs.h due to redefinition of MS_RDONLY, MS_BIND, etc 20#include <sys/mount.h> 21#include <linux/fs.h> 22 23#include <grp.h> 24#include <fcntl.h> 25#include <paths.h> 26#include <signal.h> 27#include <stdlib.h> 28#include <unistd.h> 29#include <sys/capability.h> 30#include <sys/personality.h> 31#include <sys/prctl.h> 32#include <sys/resource.h> 33#include <sys/stat.h> 34#include <sys/types.h> 35#include <sys/utsname.h> 36#include <sys/wait.h> 37 38 39#include <cutils/fs.h> 40#include <cutils/multiuser.h> 41#include <cutils/sched_policy.h> 42#include <private/android_filesystem_config.h> 43#include <utils/String8.h> 44#include <selinux/android.h> 45#include <processgroup/processgroup.h> 46#include <inttypes.h> 47 48#include "android_runtime/AndroidRuntime.h" 49#include "JNIHelp.h" 50#include "ScopedLocalRef.h" 51#include "ScopedPrimitiveArray.h" 52#include "ScopedUtfChars.h" 53 54namespace { 55 56using android::String8; 57 58static pid_t gSystemServerPid = 0; 59 60static const char kZygoteClassName[] = "com/android/internal/os/Zygote"; 61static jclass gZygoteClass; 62static jmethodID gCallPostForkChildHooks; 63 64// Must match values in com.android.internal.os.Zygote. 65enum MountExternalKind { 66 MOUNT_EXTERNAL_NONE = 0, 67 MOUNT_EXTERNAL_SINGLEUSER = 1, 68 MOUNT_EXTERNAL_MULTIUSER = 2, 69 MOUNT_EXTERNAL_MULTIUSER_ALL = 3, 70}; 71 72static void RuntimeAbort(JNIEnv* env) { 73 env->FatalError("RuntimeAbort"); 74} 75 76// This signal handler is for zygote mode, since the zygote must reap its children 77static void SigChldHandler(int /*signal_number*/) { 78 pid_t pid; 79 int status; 80 81 while ((pid = waitpid(-1, &status, WNOHANG)) > 0) { 82 // Log process-death status that we care about. In general it is 83 // not safe to call LOG(...) from a signal handler because of 84 // possible reentrancy. However, we know a priori that the 85 // current implementation of LOG() is safe to call from a SIGCHLD 86 // handler in the zygote process. If the LOG() implementation 87 // changes its locking strategy or its use of syscalls within the 88 // lazy-init critical section, its use here may become unsafe. 89 if (WIFEXITED(status)) { 90 if (WEXITSTATUS(status)) { 91 ALOGI("Process %d exited cleanly (%d)", pid, WEXITSTATUS(status)); 92 } 93 } else if (WIFSIGNALED(status)) { 94 if (WTERMSIG(status) != SIGKILL) { 95 ALOGI("Process %d exited due to signal (%d)", pid, WTERMSIG(status)); 96 } 97 if (WCOREDUMP(status)) { 98 ALOGI("Process %d dumped core.", pid); 99 } 100 } 101 102 // If the just-crashed process is the system_server, bring down zygote 103 // so that it is restarted by init and system server will be restarted 104 // from there. 105 if (pid == gSystemServerPid) { 106 ALOGE("Exit zygote because system server (%d) has terminated"); 107 kill(getpid(), SIGKILL); 108 } 109 } 110 111 // Note that we shouldn't consider ECHILD an error because 112 // the secondary zygote might have no children left to wait for. 113 if (pid < 0 && errno != ECHILD) { 114 ALOGW("Zygote SIGCHLD error in waitpid: %s", strerror(errno)); 115 } 116} 117 118// Configures the SIGCHLD handler for the zygote process. This is configured 119// very late, because earlier in the runtime we may fork() and exec() 120// other processes, and we want to waitpid() for those rather than 121// have them be harvested immediately. 122// 123// This ends up being called repeatedly before each fork(), but there's 124// no real harm in that. 125static void SetSigChldHandler() { 126 struct sigaction sa; 127 memset(&sa, 0, sizeof(sa)); 128 sa.sa_handler = SigChldHandler; 129 130 int err = sigaction(SIGCHLD, &sa, NULL); 131 if (err < 0) { 132 ALOGW("Error setting SIGCHLD handler: %d", errno); 133 } 134} 135 136// Sets the SIGCHLD handler back to default behavior in zygote children. 137static void UnsetSigChldHandler() { 138 struct sigaction sa; 139 memset(&sa, 0, sizeof(sa)); 140 sa.sa_handler = SIG_DFL; 141 142 int err = sigaction(SIGCHLD, &sa, NULL); 143 if (err < 0) { 144 ALOGW("Error unsetting SIGCHLD handler: %d", errno); 145 } 146} 147 148// Calls POSIX setgroups() using the int[] object as an argument. 149// A NULL argument is tolerated. 150static void SetGids(JNIEnv* env, jintArray javaGids) { 151 if (javaGids == NULL) { 152 return; 153 } 154 155 ScopedIntArrayRO gids(env, javaGids); 156 if (gids.get() == NULL) { 157 RuntimeAbort(env); 158 } 159 int rc = setgroups(gids.size(), reinterpret_cast<const gid_t*>(&gids[0])); 160 if (rc == -1) { 161 ALOGE("setgroups failed"); 162 RuntimeAbort(env); 163 } 164} 165 166// Sets the resource limits via setrlimit(2) for the values in the 167// two-dimensional array of integers that's passed in. The second dimension 168// contains a tuple of length 3: (resource, rlim_cur, rlim_max). NULL is 169// treated as an empty array. 170static void SetRLimits(JNIEnv* env, jobjectArray javaRlimits) { 171 if (javaRlimits == NULL) { 172 return; 173 } 174 175 rlimit rlim; 176 memset(&rlim, 0, sizeof(rlim)); 177 178 for (int i = 0; i < env->GetArrayLength(javaRlimits); ++i) { 179 ScopedLocalRef<jobject> javaRlimitObject(env, env->GetObjectArrayElement(javaRlimits, i)); 180 ScopedIntArrayRO javaRlimit(env, reinterpret_cast<jintArray>(javaRlimitObject.get())); 181 if (javaRlimit.size() != 3) { 182 ALOGE("rlimits array must have a second dimension of size 3"); 183 RuntimeAbort(env); 184 } 185 186 rlim.rlim_cur = javaRlimit[1]; 187 rlim.rlim_max = javaRlimit[2]; 188 189 int rc = setrlimit(javaRlimit[0], &rlim); 190 if (rc == -1) { 191 ALOGE("setrlimit(%d, {%d, %d}) failed", javaRlimit[0], rlim.rlim_cur, rlim.rlim_max); 192 RuntimeAbort(env); 193 } 194 } 195} 196 197// The debug malloc library needs to know whether it's the zygote or a child. 198extern "C" int gMallocLeakZygoteChild; 199 200static void EnableKeepCapabilities(JNIEnv* env) { 201 int rc = prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0); 202 if (rc == -1) { 203 ALOGE("prctl(PR_SET_KEEPCAPS) failed"); 204 RuntimeAbort(env); 205 } 206} 207 208static void DropCapabilitiesBoundingSet(JNIEnv* env) { 209 for (int i = 0; prctl(PR_CAPBSET_READ, i, 0, 0, 0) >= 0; i++) { 210 int rc = prctl(PR_CAPBSET_DROP, i, 0, 0, 0); 211 if (rc == -1) { 212 if (errno == EINVAL) { 213 ALOGE("prctl(PR_CAPBSET_DROP) failed with EINVAL. Please verify " 214 "your kernel is compiled with file capabilities support"); 215 } else { 216 ALOGE("prctl(PR_CAPBSET_DROP) failed"); 217 RuntimeAbort(env); 218 } 219 } 220 } 221} 222 223static void SetCapabilities(JNIEnv* env, int64_t permitted, int64_t effective) { 224 __user_cap_header_struct capheader; 225 memset(&capheader, 0, sizeof(capheader)); 226 capheader.version = _LINUX_CAPABILITY_VERSION_3; 227 capheader.pid = 0; 228 229 __user_cap_data_struct capdata[2]; 230 memset(&capdata, 0, sizeof(capdata)); 231 capdata[0].effective = effective; 232 capdata[1].effective = effective >> 32; 233 capdata[0].permitted = permitted; 234 capdata[1].permitted = permitted >> 32; 235 236 if (capset(&capheader, &capdata[0]) == -1) { 237 ALOGE("capset(%lld, %lld) failed", permitted, effective); 238 RuntimeAbort(env); 239 } 240} 241 242static void SetSchedulerPolicy(JNIEnv* env) { 243 errno = -set_sched_policy(0, SP_DEFAULT); 244 if (errno != 0) { 245 ALOGE("set_sched_policy(0, SP_DEFAULT) failed"); 246 RuntimeAbort(env); 247 } 248} 249 250// Create a private mount namespace and bind mount appropriate emulated 251// storage for the given user. 252static bool MountEmulatedStorage(uid_t uid, jint mount_mode) { 253 if (mount_mode == MOUNT_EXTERNAL_NONE) { 254 return true; 255 } 256 257 // See storage config details at http://source.android.com/tech/storage/ 258 userid_t user_id = multiuser_get_user_id(uid); 259 260 // Create a second private mount namespace for our process 261 if (unshare(CLONE_NEWNS) == -1) { 262 ALOGW("Failed to unshare(): %d", errno); 263 return false; 264 } 265 266 // Create bind mounts to expose external storage 267 if (mount_mode == MOUNT_EXTERNAL_MULTIUSER || mount_mode == MOUNT_EXTERNAL_MULTIUSER_ALL) { 268 // These paths must already be created by init.rc 269 const char* source = getenv("EMULATED_STORAGE_SOURCE"); 270 const char* target = getenv("EMULATED_STORAGE_TARGET"); 271 const char* legacy = getenv("EXTERNAL_STORAGE"); 272 if (source == NULL || target == NULL || legacy == NULL) { 273 ALOGW("Storage environment undefined; unable to provide external storage"); 274 return false; 275 } 276 277 // Prepare source paths 278 279 // /mnt/shell/emulated/0 280 const String8 source_user(String8::format("%s/%d", source, user_id)); 281 // /storage/emulated/0 282 const String8 target_user(String8::format("%s/%d", target, user_id)); 283 284 if (fs_prepare_dir(source_user.string(), 0000, 0, 0) == -1 285 || fs_prepare_dir(target_user.string(), 0000, 0, 0) == -1) { 286 return false; 287 } 288 289 if (mount_mode == MOUNT_EXTERNAL_MULTIUSER_ALL) { 290 // Mount entire external storage tree for all users 291 if (TEMP_FAILURE_RETRY(mount(source, target, NULL, MS_BIND, NULL)) == -1) { 292 ALOGW("Failed to mount %s to %s :%d", source, target, errno); 293 return false; 294 } 295 } else { 296 // Only mount user-specific external storage 297 if (TEMP_FAILURE_RETRY( 298 mount(source_user.string(), target_user.string(), NULL, MS_BIND, NULL)) == -1) { 299 ALOGW("Failed to mount %s to %s: %d", source_user.string(), target_user.string(), errno); 300 return false; 301 } 302 } 303 304 if (fs_prepare_dir(legacy, 0000, 0, 0) == -1) { 305 return false; 306 } 307 308 // Finally, mount user-specific path into place for legacy users 309 if (TEMP_FAILURE_RETRY( 310 mount(target_user.string(), legacy, NULL, MS_BIND | MS_REC, NULL)) == -1) { 311 ALOGW("Failed to mount %s to %s: %d", target_user.string(), legacy, errno); 312 return false; 313 } 314 } else { 315 ALOGW("Mount mode %d unsupported", mount_mode); 316 return false; 317 } 318 319 return true; 320} 321 322static bool NeedsNoRandomizeWorkaround() { 323#if !defined(__arm__) 324 return false; 325#else 326 int major; 327 int minor; 328 struct utsname uts; 329 if (uname(&uts) == -1) { 330 return false; 331 } 332 333 if (sscanf(uts.release, "%d.%d", &major, &minor) != 2) { 334 return false; 335 } 336 337 // Kernels before 3.4.* need the workaround. 338 return (major < 3) || ((major == 3) && (minor < 4)); 339#endif 340} 341 342// Utility to close down the Zygote socket file descriptors while 343// the child is still running as root with Zygote's privileges. Each 344// descriptor (if any) is closed via dup2(), replacing it with a valid 345// (open) descriptor to /dev/null. 346 347static void DetachDescriptors(JNIEnv* env, jintArray fdsToClose) { 348 if (!fdsToClose) { 349 return; 350 } 351 jsize count = env->GetArrayLength(fdsToClose); 352 jint *ar = env->GetIntArrayElements(fdsToClose, 0); 353 if (!ar) { 354 ALOGE("Bad fd array"); 355 RuntimeAbort(env); 356 } 357 jsize i; 358 int devnull; 359 for (i = 0; i < count; i++) { 360 devnull = open("/dev/null", O_RDWR); 361 if (devnull < 0) { 362 ALOGE("Failed to open /dev/null"); 363 RuntimeAbort(env); 364 continue; 365 } 366 ALOGV("Switching descriptor %d to /dev/null: %d", ar[i], errno); 367 if (dup2(devnull, ar[i]) < 0) { 368 ALOGE("Failed dup2() on descriptor %d", ar[i]); 369 RuntimeAbort(env); 370 } 371 close(devnull); 372 } 373} 374 375void SetThreadName(const char* thread_name) { 376 bool hasAt = false; 377 bool hasDot = false; 378 const char* s = thread_name; 379 while (*s) { 380 if (*s == '.') { 381 hasDot = true; 382 } else if (*s == '@') { 383 hasAt = true; 384 } 385 s++; 386 } 387 const int len = s - thread_name; 388 if (len < 15 || hasAt || !hasDot) { 389 s = thread_name; 390 } else { 391 s = thread_name + len - 15; 392 } 393 // pthread_setname_np fails rather than truncating long strings. 394 char buf[16]; // MAX_TASK_COMM_LEN=16 is hard-coded into bionic 395 strlcpy(buf, s, sizeof(buf)-1); 396 errno = pthread_setname_np(pthread_self(), buf); 397 if (errno != 0) { 398 ALOGW("Unable to set the name of current thread to '%s'", buf); 399 } 400} 401 402 // Temporary timing check. 403uint64_t MsTime() { 404 timespec now; 405 clock_gettime(CLOCK_MONOTONIC, &now); 406 return static_cast<uint64_t>(now.tv_sec) * UINT64_C(1000) + now.tv_nsec / UINT64_C(1000000); 407} 408 409 410void ckTime(uint64_t start, const char* where) { 411 uint64_t now = MsTime(); 412 if ((now-start) > 1000) { 413 // If we are taking more than a second, log about it. 414 ALOGW("Slow operation: %"PRIu64" ms in %s", (uint64_t)(now-start), where); 415 } 416} 417 418// Utility routine to fork zygote and specialize the child process. 419static pid_t ForkAndSpecializeCommon(JNIEnv* env, uid_t uid, gid_t gid, jintArray javaGids, 420 jint debug_flags, jobjectArray javaRlimits, 421 jlong permittedCapabilities, jlong effectiveCapabilities, 422 jint mount_external, 423 jstring java_se_info, jstring java_se_name, 424 bool is_system_server, jintArray fdsToClose) { 425 uint64_t start = MsTime(); 426 SetSigChldHandler(); 427 ckTime(start, "ForkAndSpecializeCommon:SetSigChldHandler"); 428 429 pid_t pid = fork(); 430 431 if (pid == 0) { 432 // The child process. 433 gMallocLeakZygoteChild = 1; 434 435 436 // Clean up any descriptors which must be closed immediately 437 DetachDescriptors(env, fdsToClose); 438 439 ckTime(start, "ForkAndSpecializeCommon:Fork and detach"); 440 441 // Keep capabilities across UID change, unless we're staying root. 442 if (uid != 0) { 443 EnableKeepCapabilities(env); 444 } 445 446 DropCapabilitiesBoundingSet(env); 447 448 if (!MountEmulatedStorage(uid, mount_external)) { 449 ALOGW("Failed to mount emulated storage: %d", errno); 450 if (errno == ENOTCONN || errno == EROFS) { 451 // When device is actively encrypting, we get ENOTCONN here 452 // since FUSE was mounted before the framework restarted. 453 // When encrypted device is booting, we get EROFS since 454 // FUSE hasn't been created yet by init. 455 // In either case, continue without external storage. 456 } else { 457 ALOGE("Cannot continue without emulated storage"); 458 RuntimeAbort(env); 459 } 460 } 461 462 if (!is_system_server) { 463 int rc = createProcessGroup(uid, getpid()); 464 if (rc != 0) { 465 if (rc == -EROFS) { 466 ALOGW("createProcessGroup failed, kernel missing CONFIG_CGROUP_CPUACCT?"); 467 } else { 468 ALOGE("createProcessGroup(%d, %d) failed: %s", uid, pid, strerror(-rc)); 469 } 470 } 471 } 472 473 SetGids(env, javaGids); 474 475 SetRLimits(env, javaRlimits); 476 477 int rc = setresgid(gid, gid, gid); 478 if (rc == -1) { 479 ALOGE("setresgid(%d) failed", gid); 480 RuntimeAbort(env); 481 } 482 483 rc = setresuid(uid, uid, uid); 484 if (rc == -1) { 485 ALOGE("setresuid(%d) failed", uid); 486 RuntimeAbort(env); 487 } 488 489 if (NeedsNoRandomizeWorkaround()) { 490 // Work around ARM kernel ASLR lossage (http://b/5817320). 491 int old_personality = personality(0xffffffff); 492 int new_personality = personality(old_personality | ADDR_NO_RANDOMIZE); 493 if (new_personality == -1) { 494 ALOGW("personality(%d) failed", new_personality); 495 } 496 } 497 498 SetCapabilities(env, permittedCapabilities, effectiveCapabilities); 499 500 SetSchedulerPolicy(env); 501 502 const char* se_info_c_str = NULL; 503 ScopedUtfChars* se_info = NULL; 504 if (java_se_info != NULL) { 505 se_info = new ScopedUtfChars(env, java_se_info); 506 se_info_c_str = se_info->c_str(); 507 if (se_info_c_str == NULL) { 508 ALOGE("se_info_c_str == NULL"); 509 RuntimeAbort(env); 510 } 511 } 512 const char* se_name_c_str = NULL; 513 ScopedUtfChars* se_name = NULL; 514 if (java_se_name != NULL) { 515 se_name = new ScopedUtfChars(env, java_se_name); 516 se_name_c_str = se_name->c_str(); 517 if (se_name_c_str == NULL) { 518 ALOGE("se_name_c_str == NULL"); 519 RuntimeAbort(env); 520 } 521 } 522 rc = selinux_android_setcontext(uid, is_system_server, se_info_c_str, se_name_c_str); 523 if (rc == -1) { 524 ALOGE("selinux_android_setcontext(%d, %d, \"%s\", \"%s\") failed", uid, 525 is_system_server, se_info_c_str, se_name_c_str); 526 RuntimeAbort(env); 527 } 528 529 // Make it easier to debug audit logs by setting the main thread's name to the 530 // nice name rather than "app_process". 531 if (se_info_c_str == NULL && is_system_server) { 532 se_name_c_str = "system_server"; 533 } 534 if (se_info_c_str != NULL) { 535 SetThreadName(se_name_c_str); 536 } 537 538 delete se_info; 539 delete se_name; 540 541 UnsetSigChldHandler(); 542 543 ckTime(start, "ForkAndSpecializeCommon:child process setup"); 544 545 env->CallStaticVoidMethod(gZygoteClass, gCallPostForkChildHooks, debug_flags); 546 ckTime(start, "ForkAndSpecializeCommon:PostForkChildHooks returns"); 547 if (env->ExceptionCheck()) { 548 ALOGE("Error calling post fork hooks."); 549 RuntimeAbort(env); 550 } 551 } else if (pid > 0) { 552 // the parent process 553 } 554 return pid; 555} 556} // anonymous namespace 557 558namespace android { 559 560static jint com_android_internal_os_Zygote_nativeForkAndSpecialize( 561 JNIEnv* env, jclass, jint uid, jint gid, jintArray gids, 562 jint debug_flags, jobjectArray rlimits, 563 jint mount_external, jstring se_info, jstring se_name, 564 jintArray fdsToClose) { 565 // Grant CAP_WAKE_ALARM to the Bluetooth process. 566 jlong capabilities = 0; 567 if (uid == AID_BLUETOOTH) { 568 capabilities |= (1LL << CAP_WAKE_ALARM); 569 } 570 571 return ForkAndSpecializeCommon(env, uid, gid, gids, debug_flags, 572 rlimits, capabilities, capabilities, mount_external, se_info, 573 se_name, false, fdsToClose); 574} 575 576static jint com_android_internal_os_Zygote_nativeForkSystemServer( 577 JNIEnv* env, jclass, uid_t uid, gid_t gid, jintArray gids, 578 jint debug_flags, jobjectArray rlimits, jlong permittedCapabilities, 579 jlong effectiveCapabilities) { 580 pid_t pid = ForkAndSpecializeCommon(env, uid, gid, gids, 581 debug_flags, rlimits, 582 permittedCapabilities, effectiveCapabilities, 583 MOUNT_EXTERNAL_NONE, NULL, NULL, true, NULL); 584 if (pid > 0) { 585 // The zygote process checks whether the child process has died or not. 586 ALOGI("System server process %d has been created", pid); 587 gSystemServerPid = pid; 588 // There is a slight window that the system server process has crashed 589 // but it went unnoticed because we haven't published its pid yet. So 590 // we recheck here just to make sure that all is well. 591 int status; 592 if (waitpid(pid, &status, WNOHANG) == pid) { 593 ALOGE("System server process %d has died. Restarting Zygote!", pid); 594 RuntimeAbort(env); 595 } 596 } 597 return pid; 598} 599 600static JNINativeMethod gMethods[] = { 601 { "nativeForkAndSpecialize", "(II[II[[IILjava/lang/String;Ljava/lang/String;[I)I", 602 (void *) com_android_internal_os_Zygote_nativeForkAndSpecialize }, 603 { "nativeForkSystemServer", "(II[II[[IJJ)I", 604 (void *) com_android_internal_os_Zygote_nativeForkSystemServer } 605}; 606 607int register_com_android_internal_os_Zygote(JNIEnv* env) { 608 gZygoteClass = (jclass) env->NewGlobalRef(env->FindClass(kZygoteClassName)); 609 if (gZygoteClass == NULL) { 610 RuntimeAbort(env); 611 } 612 gCallPostForkChildHooks = env->GetStaticMethodID(gZygoteClass, "callPostForkChildHooks", "(I)V"); 613 614 return AndroidRuntime::registerNativeMethods(env, "com/android/internal/os/Zygote", 615 gMethods, NELEM(gMethods)); 616} 617} // namespace android 618 619