1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#define LOG_TAG "Zygote" 18 19// sys/mount.h has to come before linux/fs.h due to redefinition of MS_RDONLY, MS_BIND, etc 20#include <sys/mount.h> 21#include <linux/fs.h> 22 23#include <list> 24#include <sstream> 25#include <string> 26 27#include <fcntl.h> 28#include <grp.h> 29#include <inttypes.h> 30#include <malloc.h> 31#include <mntent.h> 32#include <paths.h> 33#include <signal.h> 34#include <stdlib.h> 35#include <sys/capability.h> 36#include <sys/cdefs.h> 37#include <sys/personality.h> 38#include <sys/prctl.h> 39#include <sys/resource.h> 40#include <sys/stat.h> 41#include <sys/time.h> 42#include <sys/types.h> 43#include <sys/utsname.h> 44#include <sys/wait.h> 45#include <unistd.h> 46 47#include "android-base/logging.h" 48#include <android-base/file.h> 49#include <android-base/stringprintf.h> 50#include <cutils/fs.h> 51#include <cutils/multiuser.h> 52#include <cutils/sched_policy.h> 53#include <private/android_filesystem_config.h> 54#include <utils/String8.h> 55#include <selinux/android.h> 56#include <seccomp_policy.h> 57#include <processgroup/processgroup.h> 58 59#include "core_jni_helpers.h" 60#include <nativehelper/JNIHelp.h> 61#include <nativehelper/ScopedLocalRef.h> 62#include <nativehelper/ScopedPrimitiveArray.h> 63#include <nativehelper/ScopedUtfChars.h> 64#include "fd_utils.h" 65 66#include "nativebridge/native_bridge.h" 67 68namespace { 69 70using android::String8; 71using android::base::StringPrintf; 72using android::base::WriteStringToFile; 73 74#define CREATE_ERROR(...) StringPrintf("%s:%d: ", __FILE__, __LINE__). \ 75 append(StringPrintf(__VA_ARGS__)) 76 77static pid_t gSystemServerPid = 0; 78 79static const char kZygoteClassName[] = "com/android/internal/os/Zygote"; 80static jclass gZygoteClass; 81static jmethodID gCallPostForkChildHooks; 82 83static bool g_is_security_enforced = true; 84 85// Must match values in com.android.internal.os.Zygote. 86enum MountExternalKind { 87 MOUNT_EXTERNAL_NONE = 0, 88 MOUNT_EXTERNAL_DEFAULT = 1, 89 MOUNT_EXTERNAL_READ = 2, 90 MOUNT_EXTERNAL_WRITE = 3, 91}; 92 93static void RuntimeAbort(JNIEnv* env, int line, const char* msg) { 94 std::ostringstream oss; 95 oss << __FILE__ << ":" << line << ": " << msg; 96 env->FatalError(oss.str().c_str()); 97} 98 99// This signal handler is for zygote mode, since the zygote must reap its children 100static void SigChldHandler(int /*signal_number*/) { 101 pid_t pid; 102 int status; 103 104 // It's necessary to save and restore the errno during this function. 105 // Since errno is stored per thread, changing it here modifies the errno 106 // on the thread on which this signal handler executes. If a signal occurs 107 // between a call and an errno check, it's possible to get the errno set 108 // here. 109 // See b/23572286 for extra information. 110 int saved_errno = errno; 111 112 while ((pid = waitpid(-1, &status, WNOHANG)) > 0) { 113 // Log process-death status that we care about. In general it is 114 // not safe to call LOG(...) from a signal handler because of 115 // possible reentrancy. However, we know a priori that the 116 // current implementation of LOG() is safe to call from a SIGCHLD 117 // handler in the zygote process. If the LOG() implementation 118 // changes its locking strategy or its use of syscalls within the 119 // lazy-init critical section, its use here may become unsafe. 120 if (WIFEXITED(status)) { 121 ALOGI("Process %d exited cleanly (%d)", pid, WEXITSTATUS(status)); 122 } else if (WIFSIGNALED(status)) { 123 ALOGI("Process %d exited due to signal (%d)", pid, WTERMSIG(status)); 124 if (WCOREDUMP(status)) { 125 ALOGI("Process %d dumped core.", pid); 126 } 127 } 128 129 // If the just-crashed process is the system_server, bring down zygote 130 // so that it is restarted by init and system server will be restarted 131 // from there. 132 if (pid == gSystemServerPid) { 133 ALOGE("Exit zygote because system server (%d) has terminated", pid); 134 kill(getpid(), SIGKILL); 135 } 136 } 137 138 // Note that we shouldn't consider ECHILD an error because 139 // the secondary zygote might have no children left to wait for. 140 if (pid < 0 && errno != ECHILD) { 141 ALOGW("Zygote SIGCHLD error in waitpid: %s", strerror(errno)); 142 } 143 144 errno = saved_errno; 145} 146 147// Configures the SIGCHLD/SIGHUP handlers for the zygote process. This is 148// configured very late, because earlier in the runtime we may fork() and 149// exec() other processes, and we want to waitpid() for those rather than 150// have them be harvested immediately. 151// 152// Ignore SIGHUP because all processes forked by the zygote are in the same 153// process group as the zygote and we don't want to be notified if we become 154// an orphaned group and have one or more stopped processes. This is not a 155// theoretical concern : 156// - we can become an orphaned group if one of our direct descendants forks 157// and is subsequently killed before its children. 158// - crash_dump routinely STOPs the process it's tracing. 159// 160// See issues b/71965619 and b/25567761 for further details. 161// 162// This ends up being called repeatedly before each fork(), but there's 163// no real harm in that. 164static void SetSignalHandlers() { 165 struct sigaction sig_chld = {}; 166 sig_chld.sa_handler = SigChldHandler; 167 168 if (sigaction(SIGCHLD, &sig_chld, NULL) < 0) { 169 ALOGW("Error setting SIGCHLD handler: %s", strerror(errno)); 170 } 171 172 struct sigaction sig_hup = {}; 173 sig_hup.sa_handler = SIG_IGN; 174 if (sigaction(SIGHUP, &sig_hup, NULL) < 0) { 175 ALOGW("Error setting SIGHUP handler: %s", strerror(errno)); 176 } 177} 178 179// Sets the SIGCHLD handler back to default behavior in zygote children. 180static void UnsetChldSignalHandler() { 181 struct sigaction sa; 182 memset(&sa, 0, sizeof(sa)); 183 sa.sa_handler = SIG_DFL; 184 185 if (sigaction(SIGCHLD, &sa, NULL) < 0) { 186 ALOGW("Error unsetting SIGCHLD handler: %s", strerror(errno)); 187 } 188} 189 190// Calls POSIX setgroups() using the int[] object as an argument. 191// A NULL argument is tolerated. 192static bool SetGids(JNIEnv* env, jintArray javaGids, std::string* error_msg) { 193 if (javaGids == NULL) { 194 return true; 195 } 196 197 ScopedIntArrayRO gids(env, javaGids); 198 if (gids.get() == NULL) { 199 *error_msg = CREATE_ERROR("Getting gids int array failed"); 200 return false; 201 } 202 int rc = setgroups(gids.size(), reinterpret_cast<const gid_t*>(&gids[0])); 203 if (rc == -1) { 204 *error_msg = CREATE_ERROR("setgroups failed: %s, gids.size=%zu", strerror(errno), gids.size()); 205 return false; 206 } 207 208 return true; 209} 210 211// Sets the resource limits via setrlimit(2) for the values in the 212// two-dimensional array of integers that's passed in. The second dimension 213// contains a tuple of length 3: (resource, rlim_cur, rlim_max). NULL is 214// treated as an empty array. 215static bool SetRLimits(JNIEnv* env, jobjectArray javaRlimits, std::string* error_msg) { 216 if (javaRlimits == NULL) { 217 return true; 218 } 219 220 rlimit rlim; 221 memset(&rlim, 0, sizeof(rlim)); 222 223 for (int i = 0; i < env->GetArrayLength(javaRlimits); ++i) { 224 ScopedLocalRef<jobject> javaRlimitObject(env, env->GetObjectArrayElement(javaRlimits, i)); 225 ScopedIntArrayRO javaRlimit(env, reinterpret_cast<jintArray>(javaRlimitObject.get())); 226 if (javaRlimit.size() != 3) { 227 *error_msg = CREATE_ERROR("rlimits array must have a second dimension of size 3"); 228 return false; 229 } 230 231 rlim.rlim_cur = javaRlimit[1]; 232 rlim.rlim_max = javaRlimit[2]; 233 234 int rc = setrlimit(javaRlimit[0], &rlim); 235 if (rc == -1) { 236 *error_msg = CREATE_ERROR("setrlimit(%d, {%ld, %ld}) failed", javaRlimit[0], rlim.rlim_cur, 237 rlim.rlim_max); 238 return false; 239 } 240 } 241 242 return true; 243} 244 245// The debug malloc library needs to know whether it's the zygote or a child. 246extern "C" int gMallocLeakZygoteChild; 247 248static void PreApplicationInit() { 249 // The child process sets this to indicate it's not the zygote. 250 gMallocLeakZygoteChild = 1; 251 252 // Set the jemalloc decay time to 1. 253 mallopt(M_DECAY_TIME, 1); 254} 255 256static void SetUpSeccompFilter(uid_t uid) { 257 if (!g_is_security_enforced) { 258 ALOGI("seccomp disabled by setenforce 0"); 259 return; 260 } 261 262 // Apply system or app filter based on uid. 263 if (uid >= AID_APP_START) { 264 set_app_seccomp_filter(); 265 } else { 266 set_system_seccomp_filter(); 267 } 268} 269 270static bool EnableKeepCapabilities(std::string* error_msg) { 271 int rc = prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0); 272 if (rc == -1) { 273 *error_msg = CREATE_ERROR("prctl(PR_SET_KEEPCAPS) failed: %s", strerror(errno)); 274 return false; 275 } 276 return true; 277} 278 279static bool DropCapabilitiesBoundingSet(std::string* error_msg) { 280 for (int i = 0; prctl(PR_CAPBSET_READ, i, 0, 0, 0) >= 0; i++) { 281 int rc = prctl(PR_CAPBSET_DROP, i, 0, 0, 0); 282 if (rc == -1) { 283 if (errno == EINVAL) { 284 ALOGE("prctl(PR_CAPBSET_DROP) failed with EINVAL. Please verify " 285 "your kernel is compiled with file capabilities support"); 286 } else { 287 *error_msg = CREATE_ERROR("prctl(PR_CAPBSET_DROP, %d) failed: %s", i, strerror(errno)); 288 return false; 289 } 290 } 291 } 292 return true; 293} 294 295static bool SetInheritable(uint64_t inheritable, std::string* error_msg) { 296 __user_cap_header_struct capheader; 297 memset(&capheader, 0, sizeof(capheader)); 298 capheader.version = _LINUX_CAPABILITY_VERSION_3; 299 capheader.pid = 0; 300 301 __user_cap_data_struct capdata[2]; 302 if (capget(&capheader, &capdata[0]) == -1) { 303 *error_msg = CREATE_ERROR("capget failed: %s", strerror(errno)); 304 return false; 305 } 306 307 capdata[0].inheritable = inheritable; 308 capdata[1].inheritable = inheritable >> 32; 309 310 if (capset(&capheader, &capdata[0]) == -1) { 311 *error_msg = CREATE_ERROR("capset(inh=%" PRIx64 ") failed: %s", inheritable, strerror(errno)); 312 return false; 313 } 314 315 return true; 316} 317 318static bool SetCapabilities(uint64_t permitted, uint64_t effective, uint64_t inheritable, 319 std::string* error_msg) { 320 __user_cap_header_struct capheader; 321 memset(&capheader, 0, sizeof(capheader)); 322 capheader.version = _LINUX_CAPABILITY_VERSION_3; 323 capheader.pid = 0; 324 325 __user_cap_data_struct capdata[2]; 326 memset(&capdata, 0, sizeof(capdata)); 327 capdata[0].effective = effective; 328 capdata[1].effective = effective >> 32; 329 capdata[0].permitted = permitted; 330 capdata[1].permitted = permitted >> 32; 331 capdata[0].inheritable = inheritable; 332 capdata[1].inheritable = inheritable >> 32; 333 334 if (capset(&capheader, &capdata[0]) == -1) { 335 *error_msg = CREATE_ERROR("capset(perm=%" PRIx64 ", eff=%" PRIx64 ", inh=%" PRIx64 ") " 336 "failed: %s", permitted, effective, inheritable, strerror(errno)); 337 return false; 338 } 339 return true; 340} 341 342static bool SetSchedulerPolicy(std::string* error_msg) { 343 errno = -set_sched_policy(0, SP_DEFAULT); 344 if (errno != 0) { 345 *error_msg = CREATE_ERROR("set_sched_policy(0, SP_DEFAULT) failed: %s", strerror(errno)); 346 return false; 347 } 348 return true; 349} 350 351static int UnmountTree(const char* path) { 352 size_t path_len = strlen(path); 353 354 FILE* fp = setmntent("/proc/mounts", "r"); 355 if (fp == NULL) { 356 ALOGE("Error opening /proc/mounts: %s", strerror(errno)); 357 return -errno; 358 } 359 360 // Some volumes can be stacked on each other, so force unmount in 361 // reverse order to give us the best chance of success. 362 std::list<std::string> toUnmount; 363 mntent* mentry; 364 while ((mentry = getmntent(fp)) != NULL) { 365 if (strncmp(mentry->mnt_dir, path, path_len) == 0) { 366 toUnmount.push_front(std::string(mentry->mnt_dir)); 367 } 368 } 369 endmntent(fp); 370 371 for (auto path : toUnmount) { 372 if (umount2(path.c_str(), MNT_DETACH)) { 373 ALOGW("Failed to unmount %s: %s", path.c_str(), strerror(errno)); 374 } 375 } 376 return 0; 377} 378 379// Create a private mount namespace and bind mount appropriate emulated 380// storage for the given user. 381static bool MountEmulatedStorage(uid_t uid, jint mount_mode, 382 bool force_mount_namespace, std::string* error_msg) { 383 // See storage config details at http://source.android.com/tech/storage/ 384 385 String8 storageSource; 386 if (mount_mode == MOUNT_EXTERNAL_DEFAULT) { 387 storageSource = "/mnt/runtime/default"; 388 } else if (mount_mode == MOUNT_EXTERNAL_READ) { 389 storageSource = "/mnt/runtime/read"; 390 } else if (mount_mode == MOUNT_EXTERNAL_WRITE) { 391 storageSource = "/mnt/runtime/write"; 392 } else if (!force_mount_namespace) { 393 // Sane default of no storage visible 394 return true; 395 } 396 397 // Create a second private mount namespace for our process 398 if (unshare(CLONE_NEWNS) == -1) { 399 *error_msg = CREATE_ERROR("Failed to unshare(): %s", strerror(errno)); 400 return false; 401 } 402 403 // Handle force_mount_namespace with MOUNT_EXTERNAL_NONE. 404 if (mount_mode == MOUNT_EXTERNAL_NONE) { 405 return true; 406 } 407 408 if (TEMP_FAILURE_RETRY(mount(storageSource.string(), "/storage", 409 NULL, MS_BIND | MS_REC | MS_SLAVE, NULL)) == -1) { 410 *error_msg = CREATE_ERROR("Failed to mount %s to /storage: %s", 411 storageSource.string(), 412 strerror(errno)); 413 return false; 414 } 415 416 // Mount user-specific symlink helper into place 417 userid_t user_id = multiuser_get_user_id(uid); 418 const String8 userSource(String8::format("/mnt/user/%d", user_id)); 419 if (fs_prepare_dir(userSource.string(), 0751, 0, 0) == -1) { 420 *error_msg = CREATE_ERROR("fs_prepare_dir failed on %s", userSource.string()); 421 return false; 422 } 423 if (TEMP_FAILURE_RETRY(mount(userSource.string(), "/storage/self", 424 NULL, MS_BIND, NULL)) == -1) { 425 *error_msg = CREATE_ERROR("Failed to mount %s to /storage/self: %s", 426 userSource.string(), 427 strerror(errno)); 428 return false; 429 } 430 431 return true; 432} 433 434static bool NeedsNoRandomizeWorkaround() { 435#if !defined(__arm__) 436 return false; 437#else 438 int major; 439 int minor; 440 struct utsname uts; 441 if (uname(&uts) == -1) { 442 return false; 443 } 444 445 if (sscanf(uts.release, "%d.%d", &major, &minor) != 2) { 446 return false; 447 } 448 449 // Kernels before 3.4.* need the workaround. 450 return (major < 3) || ((major == 3) && (minor < 4)); 451#endif 452} 453 454// Utility to close down the Zygote socket file descriptors while 455// the child is still running as root with Zygote's privileges. Each 456// descriptor (if any) is closed via dup2(), replacing it with a valid 457// (open) descriptor to /dev/null. 458 459static bool DetachDescriptors(JNIEnv* env, jintArray fdsToClose, std::string* error_msg) { 460 if (!fdsToClose) { 461 return true; 462 } 463 jsize count = env->GetArrayLength(fdsToClose); 464 ScopedIntArrayRO ar(env, fdsToClose); 465 if (ar.get() == NULL) { 466 *error_msg = "Bad fd array"; 467 return false; 468 } 469 jsize i; 470 int devnull; 471 for (i = 0; i < count; i++) { 472 devnull = open("/dev/null", O_RDWR); 473 if (devnull < 0) { 474 *error_msg = std::string("Failed to open /dev/null: ").append(strerror(errno)); 475 return false; 476 } 477 ALOGV("Switching descriptor %d to /dev/null: %s", ar[i], strerror(errno)); 478 if (dup2(devnull, ar[i]) < 0) { 479 *error_msg = StringPrintf("Failed dup2() on descriptor %d: %s", ar[i], strerror(errno)); 480 return false; 481 } 482 close(devnull); 483 } 484 return true; 485} 486 487void SetThreadName(const char* thread_name) { 488 bool hasAt = false; 489 bool hasDot = false; 490 const char* s = thread_name; 491 while (*s) { 492 if (*s == '.') { 493 hasDot = true; 494 } else if (*s == '@') { 495 hasAt = true; 496 } 497 s++; 498 } 499 const int len = s - thread_name; 500 if (len < 15 || hasAt || !hasDot) { 501 s = thread_name; 502 } else { 503 s = thread_name + len - 15; 504 } 505 // pthread_setname_np fails rather than truncating long strings. 506 char buf[16]; // MAX_TASK_COMM_LEN=16 is hard-coded into bionic 507 strlcpy(buf, s, sizeof(buf)-1); 508 errno = pthread_setname_np(pthread_self(), buf); 509 if (errno != 0) { 510 ALOGW("Unable to set the name of current thread to '%s': %s", buf, strerror(errno)); 511 } 512 // Update base::logging default tag. 513 android::base::SetDefaultTag(buf); 514} 515 516// The list of open zygote file descriptors. 517static FileDescriptorTable* gOpenFdTable = NULL; 518 519static bool FillFileDescriptorVector(JNIEnv* env, 520 jintArray java_fds, 521 std::vector<int>* fds, 522 std::string* error_msg) { 523 CHECK(fds != nullptr); 524 if (java_fds != nullptr) { 525 ScopedIntArrayRO ar(env, java_fds); 526 if (ar.get() == nullptr) { 527 *error_msg = "Bad fd array"; 528 return false; 529 } 530 fds->reserve(ar.size()); 531 for (size_t i = 0; i < ar.size(); ++i) { 532 fds->push_back(ar[i]); 533 } 534 } 535 return true; 536} 537 538// Utility routine to fork zygote and specialize the child process. 539static pid_t ForkAndSpecializeCommon(JNIEnv* env, uid_t uid, gid_t gid, jintArray javaGids, 540 jint runtime_flags, jobjectArray javaRlimits, 541 jlong permittedCapabilities, jlong effectiveCapabilities, 542 jint mount_external, 543 jstring java_se_info, jstring java_se_name, 544 bool is_system_server, jintArray fdsToClose, 545 jintArray fdsToIgnore, bool is_child_zygote, 546 jstring instructionSet, jstring dataDir) { 547 SetSignalHandlers(); 548 549 sigset_t sigchld; 550 sigemptyset(&sigchld); 551 sigaddset(&sigchld, SIGCHLD); 552 553 auto fail_fn = [env, java_se_name, is_system_server](const std::string& msg) 554 __attribute__ ((noreturn)) { 555 const char* se_name_c_str = nullptr; 556 std::unique_ptr<ScopedUtfChars> se_name; 557 if (java_se_name != nullptr) { 558 se_name.reset(new ScopedUtfChars(env, java_se_name)); 559 se_name_c_str = se_name->c_str(); 560 } 561 if (se_name_c_str == nullptr && is_system_server) { 562 se_name_c_str = "system_server"; 563 } 564 const std::string& error_msg = (se_name_c_str == nullptr) 565 ? msg 566 : StringPrintf("(%s) %s", se_name_c_str, msg.c_str()); 567 env->FatalError(error_msg.c_str()); 568 __builtin_unreachable(); 569 }; 570 571 // Temporarily block SIGCHLD during forks. The SIGCHLD handler might 572 // log, which would result in the logging FDs we close being reopened. 573 // This would cause failures because the FDs are not whitelisted. 574 // 575 // Note that the zygote process is single threaded at this point. 576 if (sigprocmask(SIG_BLOCK, &sigchld, nullptr) == -1) { 577 fail_fn(CREATE_ERROR("sigprocmask(SIG_SETMASK, { SIGCHLD }) failed: %s", strerror(errno))); 578 } 579 580 // Close any logging related FDs before we start evaluating the list of 581 // file descriptors. 582 __android_log_close(); 583 584 std::string error_msg; 585 586 // If this is the first fork for this zygote, create the open FD table. 587 // If it isn't, we just need to check whether the list of open files has 588 // changed (and it shouldn't in the normal case). 589 std::vector<int> fds_to_ignore; 590 if (!FillFileDescriptorVector(env, fdsToIgnore, &fds_to_ignore, &error_msg)) { 591 fail_fn(error_msg); 592 } 593 if (gOpenFdTable == NULL) { 594 gOpenFdTable = FileDescriptorTable::Create(fds_to_ignore, &error_msg); 595 if (gOpenFdTable == NULL) { 596 fail_fn(error_msg); 597 } 598 } else if (!gOpenFdTable->Restat(fds_to_ignore, &error_msg)) { 599 fail_fn(error_msg); 600 } 601 602 pid_t pid = fork(); 603 604 if (pid == 0) { 605 PreApplicationInit(); 606 607 // Clean up any descriptors which must be closed immediately 608 if (!DetachDescriptors(env, fdsToClose, &error_msg)) { 609 fail_fn(error_msg); 610 } 611 612 // Re-open all remaining open file descriptors so that they aren't shared 613 // with the zygote across a fork. 614 if (!gOpenFdTable->ReopenOrDetach(&error_msg)) { 615 fail_fn(error_msg); 616 } 617 618 if (sigprocmask(SIG_UNBLOCK, &sigchld, nullptr) == -1) { 619 fail_fn(CREATE_ERROR("sigprocmask(SIG_SETMASK, { SIGCHLD }) failed: %s", strerror(errno))); 620 } 621 622 // Keep capabilities across UID change, unless we're staying root. 623 if (uid != 0) { 624 if (!EnableKeepCapabilities(&error_msg)) { 625 fail_fn(error_msg); 626 } 627 } 628 629 if (!SetInheritable(permittedCapabilities, &error_msg)) { 630 fail_fn(error_msg); 631 } 632 if (!DropCapabilitiesBoundingSet(&error_msg)) { 633 fail_fn(error_msg); 634 } 635 636 bool use_native_bridge = !is_system_server && (instructionSet != NULL) 637 && android::NativeBridgeAvailable(); 638 if (use_native_bridge) { 639 ScopedUtfChars isa_string(env, instructionSet); 640 use_native_bridge = android::NeedsNativeBridge(isa_string.c_str()); 641 } 642 if (use_native_bridge && dataDir == NULL) { 643 // dataDir should never be null if we need to use a native bridge. 644 // In general, dataDir will never be null for normal applications. It can only happen in 645 // special cases (for isolated processes which are not associated with any app). These are 646 // launched by the framework and should not be emulated anyway. 647 use_native_bridge = false; 648 ALOGW("Native bridge will not be used because dataDir == NULL."); 649 } 650 651 if (!MountEmulatedStorage(uid, mount_external, use_native_bridge, &error_msg)) { 652 ALOGW("Failed to mount emulated storage: %s (%s)", error_msg.c_str(), strerror(errno)); 653 if (errno == ENOTCONN || errno == EROFS) { 654 // When device is actively encrypting, we get ENOTCONN here 655 // since FUSE was mounted before the framework restarted. 656 // When encrypted device is booting, we get EROFS since 657 // FUSE hasn't been created yet by init. 658 // In either case, continue without external storage. 659 } else { 660 fail_fn(error_msg); 661 } 662 } 663 664 // If this zygote isn't root, it won't be able to create a process group, 665 // since the directory is owned by root. 666 if (!is_system_server && getuid() == 0) { 667 int rc = createProcessGroup(uid, getpid()); 668 if (rc != 0) { 669 if (rc == -EROFS) { 670 ALOGW("createProcessGroup failed, kernel missing CONFIG_CGROUP_CPUACCT?"); 671 } else { 672 ALOGE("createProcessGroup(%d, %d) failed: %s", uid, pid, strerror(-rc)); 673 } 674 } 675 } 676 677 std::string error_msg; 678 if (!SetGids(env, javaGids, &error_msg)) { 679 fail_fn(error_msg); 680 } 681 682 if (!SetRLimits(env, javaRlimits, &error_msg)) { 683 fail_fn(error_msg); 684 } 685 686 if (use_native_bridge) { 687 ScopedUtfChars isa_string(env, instructionSet); 688 ScopedUtfChars data_dir(env, dataDir); 689 android::PreInitializeNativeBridge(data_dir.c_str(), isa_string.c_str()); 690 } 691 692 int rc = setresgid(gid, gid, gid); 693 if (rc == -1) { 694 fail_fn(CREATE_ERROR("setresgid(%d) failed: %s", gid, strerror(errno))); 695 } 696 697 // Must be called when the new process still has CAP_SYS_ADMIN, in this case, before changing 698 // uid from 0, which clears capabilities. The other alternative is to call 699 // prctl(PR_SET_NO_NEW_PRIVS, 1) afterward, but that breaks SELinux domain transition (see 700 // b/71859146). As the result, privileged syscalls used below still need to be accessible in 701 // app process. 702 SetUpSeccompFilter(uid); 703 704 rc = setresuid(uid, uid, uid); 705 if (rc == -1) { 706 fail_fn(CREATE_ERROR("setresuid(%d) failed: %s", uid, strerror(errno))); 707 } 708 709 if (NeedsNoRandomizeWorkaround()) { 710 // Work around ARM kernel ASLR lossage (http://b/5817320). 711 int old_personality = personality(0xffffffff); 712 int new_personality = personality(old_personality | ADDR_NO_RANDOMIZE); 713 if (new_personality == -1) { 714 ALOGW("personality(%d) failed: %s", new_personality, strerror(errno)); 715 } 716 } 717 718 if (!SetCapabilities(permittedCapabilities, effectiveCapabilities, permittedCapabilities, 719 &error_msg)) { 720 fail_fn(error_msg); 721 } 722 723 if (!SetSchedulerPolicy(&error_msg)) { 724 fail_fn(error_msg); 725 } 726 727 const char* se_info_c_str = NULL; 728 ScopedUtfChars* se_info = NULL; 729 if (java_se_info != NULL) { 730 se_info = new ScopedUtfChars(env, java_se_info); 731 se_info_c_str = se_info->c_str(); 732 if (se_info_c_str == NULL) { 733 fail_fn("se_info_c_str == NULL"); 734 } 735 } 736 const char* se_name_c_str = NULL; 737 ScopedUtfChars* se_name = NULL; 738 if (java_se_name != NULL) { 739 se_name = new ScopedUtfChars(env, java_se_name); 740 se_name_c_str = se_name->c_str(); 741 if (se_name_c_str == NULL) { 742 fail_fn("se_name_c_str == NULL"); 743 } 744 } 745 rc = selinux_android_setcontext(uid, is_system_server, se_info_c_str, se_name_c_str); 746 if (rc == -1) { 747 fail_fn(CREATE_ERROR("selinux_android_setcontext(%d, %d, \"%s\", \"%s\") failed", uid, 748 is_system_server, se_info_c_str, se_name_c_str)); 749 } 750 751 // Make it easier to debug audit logs by setting the main thread's name to the 752 // nice name rather than "app_process". 753 if (se_name_c_str == NULL && is_system_server) { 754 se_name_c_str = "system_server"; 755 } 756 if (se_name_c_str != NULL) { 757 SetThreadName(se_name_c_str); 758 } 759 760 delete se_info; 761 delete se_name; 762 763 // Unset the SIGCHLD handler, but keep ignoring SIGHUP (rationale in SetSignalHandlers). 764 UnsetChldSignalHandler(); 765 766 env->CallStaticVoidMethod(gZygoteClass, gCallPostForkChildHooks, runtime_flags, 767 is_system_server, is_child_zygote, instructionSet); 768 if (env->ExceptionCheck()) { 769 fail_fn("Error calling post fork hooks."); 770 } 771 } else if (pid > 0) { 772 // the parent process 773 774 // We blocked SIGCHLD prior to a fork, we unblock it here. 775 if (sigprocmask(SIG_UNBLOCK, &sigchld, nullptr) == -1) { 776 fail_fn(CREATE_ERROR("sigprocmask(SIG_SETMASK, { SIGCHLD }) failed: %s", strerror(errno))); 777 } 778 } 779 return pid; 780} 781 782static uint64_t GetEffectiveCapabilityMask(JNIEnv* env) { 783 __user_cap_header_struct capheader; 784 memset(&capheader, 0, sizeof(capheader)); 785 capheader.version = _LINUX_CAPABILITY_VERSION_3; 786 capheader.pid = 0; 787 788 __user_cap_data_struct capdata[2]; 789 if (capget(&capheader, &capdata[0]) == -1) { 790 ALOGE("capget failed: %s", strerror(errno)); 791 RuntimeAbort(env, __LINE__, "capget failed"); 792 } 793 794 return capdata[0].effective | 795 (static_cast<uint64_t>(capdata[1].effective) << 32); 796} 797} // anonymous namespace 798 799namespace android { 800 801static void com_android_internal_os_Zygote_nativeSecurityInit(JNIEnv*, jclass) { 802 // security_getenforce is not allowed on app process. Initialize and cache the value before 803 // zygote forks. 804 g_is_security_enforced = security_getenforce(); 805} 806 807static void com_android_internal_os_Zygote_nativePreApplicationInit(JNIEnv*, jclass) { 808 PreApplicationInit(); 809} 810 811static jint com_android_internal_os_Zygote_nativeForkAndSpecialize( 812 JNIEnv* env, jclass, jint uid, jint gid, jintArray gids, 813 jint runtime_flags, jobjectArray rlimits, 814 jint mount_external, jstring se_info, jstring se_name, 815 jintArray fdsToClose, jintArray fdsToIgnore, jboolean is_child_zygote, 816 jstring instructionSet, jstring appDataDir) { 817 jlong capabilities = 0; 818 819 // Grant CAP_WAKE_ALARM to the Bluetooth process. 820 // Additionally, allow bluetooth to open packet sockets so it can start the DHCP client. 821 // Grant CAP_SYS_NICE to allow Bluetooth to set RT priority for 822 // audio-related threads. 823 // TODO: consider making such functionality an RPC to netd. 824 if (multiuser_get_app_id(uid) == AID_BLUETOOTH) { 825 capabilities |= (1LL << CAP_WAKE_ALARM); 826 capabilities |= (1LL << CAP_NET_RAW); 827 capabilities |= (1LL << CAP_NET_BIND_SERVICE); 828 capabilities |= (1LL << CAP_SYS_NICE); 829 } 830 831 // Grant CAP_BLOCK_SUSPEND to processes that belong to GID "wakelock" 832 bool gid_wakelock_found = false; 833 if (gid == AID_WAKELOCK) { 834 gid_wakelock_found = true; 835 } else if (gids != NULL) { 836 jsize gids_num = env->GetArrayLength(gids); 837 ScopedIntArrayRO ar(env, gids); 838 if (ar.get() == NULL) { 839 RuntimeAbort(env, __LINE__, "Bad gids array"); 840 } 841 for (int i = 0; i < gids_num; i++) { 842 if (ar[i] == AID_WAKELOCK) { 843 gid_wakelock_found = true; 844 break; 845 } 846 } 847 } 848 if (gid_wakelock_found) { 849 capabilities |= (1LL << CAP_BLOCK_SUSPEND); 850 } 851 852 // If forking a child zygote process, that zygote will need to be able to change 853 // the UID and GID of processes it forks, as well as drop those capabilities. 854 if (is_child_zygote) { 855 capabilities |= (1LL << CAP_SETUID); 856 capabilities |= (1LL << CAP_SETGID); 857 capabilities |= (1LL << CAP_SETPCAP); 858 } 859 860 // Containers run without some capabilities, so drop any caps that are not 861 // available. 862 capabilities &= GetEffectiveCapabilityMask(env); 863 864 return ForkAndSpecializeCommon(env, uid, gid, gids, runtime_flags, 865 rlimits, capabilities, capabilities, mount_external, se_info, 866 se_name, false, fdsToClose, fdsToIgnore, is_child_zygote == JNI_TRUE, 867 instructionSet, appDataDir); 868} 869 870static jint com_android_internal_os_Zygote_nativeForkSystemServer( 871 JNIEnv* env, jclass, uid_t uid, gid_t gid, jintArray gids, 872 jint runtime_flags, jobjectArray rlimits, jlong permittedCapabilities, 873 jlong effectiveCapabilities) { 874 pid_t pid = ForkAndSpecializeCommon(env, uid, gid, gids, 875 runtime_flags, rlimits, 876 permittedCapabilities, effectiveCapabilities, 877 MOUNT_EXTERNAL_DEFAULT, NULL, NULL, true, NULL, 878 NULL, false, NULL, NULL); 879 if (pid > 0) { 880 // The zygote process checks whether the child process has died or not. 881 ALOGI("System server process %d has been created", pid); 882 gSystemServerPid = pid; 883 // There is a slight window that the system server process has crashed 884 // but it went unnoticed because we haven't published its pid yet. So 885 // we recheck here just to make sure that all is well. 886 int status; 887 if (waitpid(pid, &status, WNOHANG) == pid) { 888 ALOGE("System server process %d has died. Restarting Zygote!", pid); 889 RuntimeAbort(env, __LINE__, "System server process has died. Restarting Zygote!"); 890 } 891 892 // Assign system_server to the correct memory cgroup. 893 // Not all devices mount /dev/memcg so check for the file first 894 // to avoid unnecessarily printing errors and denials in the logs. 895 if (!access("/dev/memcg/system/tasks", F_OK) && 896 !WriteStringToFile(StringPrintf("%d", pid), "/dev/memcg/system/tasks")) { 897 ALOGE("couldn't write %d to /dev/memcg/system/tasks", pid); 898 } 899 } 900 return pid; 901} 902 903static void com_android_internal_os_Zygote_nativeAllowFileAcrossFork( 904 JNIEnv* env, jclass, jstring path) { 905 ScopedUtfChars path_native(env, path); 906 const char* path_cstr = path_native.c_str(); 907 if (!path_cstr) { 908 RuntimeAbort(env, __LINE__, "path_cstr == NULL"); 909 } 910 FileDescriptorWhitelist::Get()->Allow(path_cstr); 911} 912 913static void com_android_internal_os_Zygote_nativeUnmountStorageOnInit(JNIEnv* env, jclass) { 914 // Zygote process unmount root storage space initially before every child processes are forked. 915 // Every forked child processes (include SystemServer) only mount their own root storage space 916 // and no need unmount storage operation in MountEmulatedStorage method. 917 // Zygote process does not utilize root storage spaces and unshares its mount namespace below. 918 919 // See storage config details at http://source.android.com/tech/storage/ 920 // Create private mount namespace shared by all children 921 if (unshare(CLONE_NEWNS) == -1) { 922 RuntimeAbort(env, __LINE__, "Failed to unshare()"); 923 return; 924 } 925 926 // Mark rootfs as being a slave so that changes from default 927 // namespace only flow into our children. 928 if (mount("rootfs", "/", nullptr, (MS_SLAVE | MS_REC), nullptr) == -1) { 929 RuntimeAbort(env, __LINE__, "Failed to mount() rootfs as MS_SLAVE"); 930 return; 931 } 932 933 // Create a staging tmpfs that is shared by our children; they will 934 // bind mount storage into their respective private namespaces, which 935 // are isolated from each other. 936 const char* target_base = getenv("EMULATED_STORAGE_TARGET"); 937 if (target_base != nullptr) { 938#define STRINGIFY_UID(x) __STRING(x) 939 if (mount("tmpfs", target_base, "tmpfs", MS_NOSUID | MS_NODEV, 940 "uid=0,gid=" STRINGIFY_UID(AID_SDCARD_R) ",mode=0751") == -1) { 941 ALOGE("Failed to mount tmpfs to %s", target_base); 942 RuntimeAbort(env, __LINE__, "Failed to mount tmpfs"); 943 return; 944 } 945#undef STRINGIFY_UID 946 } 947 948 UnmountTree("/storage"); 949} 950 951static const JNINativeMethod gMethods[] = { 952 { "nativeSecurityInit", "()V", 953 (void *) com_android_internal_os_Zygote_nativeSecurityInit }, 954 { "nativeForkAndSpecialize", 955 "(II[II[[IILjava/lang/String;Ljava/lang/String;[I[IZLjava/lang/String;Ljava/lang/String;)I", 956 (void *) com_android_internal_os_Zygote_nativeForkAndSpecialize }, 957 { "nativeForkSystemServer", "(II[II[[IJJ)I", 958 (void *) com_android_internal_os_Zygote_nativeForkSystemServer }, 959 { "nativeAllowFileAcrossFork", "(Ljava/lang/String;)V", 960 (void *) com_android_internal_os_Zygote_nativeAllowFileAcrossFork }, 961 { "nativeUnmountStorageOnInit", "()V", 962 (void *) com_android_internal_os_Zygote_nativeUnmountStorageOnInit }, 963 { "nativePreApplicationInit", "()V", 964 (void *) com_android_internal_os_Zygote_nativePreApplicationInit } 965}; 966 967int register_com_android_internal_os_Zygote(JNIEnv* env) { 968 gZygoteClass = MakeGlobalRefOrDie(env, FindClassOrDie(env, kZygoteClassName)); 969 gCallPostForkChildHooks = GetStaticMethodIDOrDie(env, gZygoteClass, "callPostForkChildHooks", 970 "(IZZLjava/lang/String;)V"); 971 972 return RegisterMethodsOrDie(env, "com/android/internal/os/Zygote", gMethods, NELEM(gMethods)); 973} 974} // namespace android 975