1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// http://code.google.com/p/chromium/wiki/LinuxSUIDSandbox 6 7#include "common/sandbox.h" 8 9#define _GNU_SOURCE 10#include <asm/unistd.h> 11#include <errno.h> 12#include <fcntl.h> 13#include <limits.h> 14#include <sched.h> 15#include <signal.h> 16#include <stdarg.h> 17#include <stdbool.h> 18#include <stdint.h> 19#include <stdio.h> 20#include <stdlib.h> 21#include <string.h> 22#include <sys/prctl.h> 23#include <sys/resource.h> 24#include <sys/socket.h> 25#include <sys/stat.h> 26#include <sys/time.h> 27#include <sys/types.h> 28#include <sys/vfs.h> 29#include <sys/wait.h> 30#include <unistd.h> 31 32#include "linux_util.h" 33#include "process_util.h" 34#include "common/suid_unsafe_environment_variables.h" 35 36#if !defined(CLONE_NEWPID) 37#define CLONE_NEWPID 0x20000000 38#endif 39#if !defined(CLONE_NEWNET) 40#define CLONE_NEWNET 0x40000000 41#endif 42 43static bool DropRoot(); 44 45#define HANDLE_EINTR(x) TEMP_FAILURE_RETRY(x) 46 47static void FatalError(const char *msg, ...) 48 __attribute__((noreturn, format(printf, 1, 2))); 49 50static void FatalError(const char *msg, ...) { 51 va_list ap; 52 va_start(ap, msg); 53 54 vfprintf(stderr, msg, ap); 55 fprintf(stderr, ": %s\n", strerror(errno)); 56 fflush(stderr); 57 va_end(ap); 58 _exit(1); 59} 60 61static void ExitWithErrorSignalHandler(int signal) { 62 const char msg[] = "\nThe setuid sandbox got signaled, exiting.\n"; 63 if (-1 == write(2, msg, sizeof(msg) - 1)) { 64 // Do nothing. 65 } 66 67 _exit(1); 68} 69 70// We will chroot() to the helper's /proc/self directory. Anything there will 71// not exist anymore if we make sure to wait() for the helper. 72// 73// /proc/self/fdinfo or /proc/self/fd are especially safe and will be empty 74// even if the helper survives as a zombie. 75// 76// There is very little reason to use fdinfo/ instead of fd/ but we are 77// paranoid. fdinfo/ only exists since 2.6.22 so we allow fallback to fd/ 78#define SAFE_DIR "/proc/self/fdinfo" 79#define SAFE_DIR2 "/proc/self/fd" 80 81static bool SpawnChrootHelper() { 82 int sv[2]; 83 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) { 84 perror("socketpair"); 85 return false; 86 } 87 88 char *safedir = NULL; 89 struct stat sdir_stat; 90 if (!stat(SAFE_DIR, &sdir_stat) && S_ISDIR(sdir_stat.st_mode)) 91 safedir = SAFE_DIR; 92 else 93 if (!stat(SAFE_DIR2, &sdir_stat) && S_ISDIR(sdir_stat.st_mode)) 94 safedir = SAFE_DIR2; 95 else { 96 fprintf(stderr, "Could not find %s\n", SAFE_DIR2); 97 return false; 98 } 99 100 const pid_t pid = syscall( 101 __NR_clone, CLONE_FS | SIGCHLD, 0, 0, 0); 102 103 if (pid == -1) { 104 perror("clone"); 105 close(sv[0]); 106 close(sv[1]); 107 return false; 108 } 109 110 if (pid == 0) { 111 // We share our files structure with an untrusted process. As a security in 112 // depth measure, we make sure that we can't open anything by mistake. 113 // TODO(agl): drop CAP_SYS_RESOURCE / use SECURE_NOROOT 114 115 const struct rlimit nofile = {0, 0}; 116 if (setrlimit(RLIMIT_NOFILE, &nofile)) 117 FatalError("Setting RLIMIT_NOFILE"); 118 119 if (close(sv[1])) 120 FatalError("close"); 121 122 // wait for message 123 char msg; 124 ssize_t bytes; 125 do { 126 bytes = read(sv[0], &msg, 1); 127 } while (bytes == -1 && errno == EINTR); 128 129 if (bytes == 0) 130 _exit(0); 131 if (bytes != 1) 132 FatalError("read"); 133 134 // do chrooting 135 if (msg != kMsgChrootMe) 136 FatalError("Unknown message from sandboxed process"); 137 138 // sanity check 139 if (chdir(safedir)) 140 FatalError("Cannot chdir into /proc/ directory"); 141 142 if (chroot(safedir)) 143 FatalError("Cannot chroot into /proc/ directory"); 144 145 if (chdir("/")) 146 FatalError("Cannot chdir to / after chroot"); 147 148 const char reply = kMsgChrootSuccessful; 149 do { 150 bytes = write(sv[0], &reply, 1); 151 } while (bytes == -1 && errno == EINTR); 152 153 if (bytes != 1) 154 FatalError("Writing reply"); 155 156 _exit(0); 157 // We now become a zombie. /proc/self/fd(info) is now an empty dir and we 158 // are chrooted there. 159 // Our (unprivileged) parent should not even be able to open "." or "/" 160 // since they would need to pass the ptrace() check. If our parent wait() 161 // for us, our root directory will completely disappear. 162 } 163 164 if (close(sv[0])) { 165 close(sv[1]); 166 perror("close"); 167 return false; 168 } 169 170 // In the parent process, we install an environment variable containing the 171 // number of the file descriptor. 172 char desc_str[64]; 173 int printed = snprintf(desc_str, sizeof(desc_str), "%u", sv[1]); 174 if (printed < 0 || printed >= (int)sizeof(desc_str)) { 175 fprintf(stderr, "Failed to snprintf\n"); 176 return false; 177 } 178 179 if (setenv(kSandboxDescriptorEnvironmentVarName, desc_str, 1)) { 180 perror("setenv"); 181 close(sv[1]); 182 return false; 183 } 184 185 // We also install an environment variable containing the pid of the child 186 char helper_pid_str[64]; 187 printed = snprintf(helper_pid_str, sizeof(helper_pid_str), "%u", pid); 188 if (printed < 0 || printed >= (int)sizeof(helper_pid_str)) { 189 fprintf(stderr, "Failed to snprintf\n"); 190 return false; 191 } 192 193 if (setenv(kSandboxHelperPidEnvironmentVarName, helper_pid_str, 1)) { 194 perror("setenv"); 195 close(sv[1]); 196 return false; 197 } 198 199 return true; 200} 201 202// Block until child_pid exits, then exit. Try to preserve the exit code. 203static void WaitForChildAndExit(pid_t child_pid) { 204 int exit_code = -1; 205 siginfo_t reaped_child_info; 206 207 // Don't "Core" on SIGABRT. SIGABRT is sent by the Chrome OS session manager 208 // when things are hanging. 209 // Here, the current process is going to waitid() and _exit(), so there is no 210 // point in generating a crash report. The child process is the one 211 // blocking us. 212 if (signal(SIGABRT, ExitWithErrorSignalHandler) == SIG_ERR) { 213 FatalError("Failed to change signal handler"); 214 } 215 216 int wait_ret = 217 HANDLE_EINTR(waitid(P_PID, child_pid, &reaped_child_info, WEXITED)); 218 219 if (!wait_ret && reaped_child_info.si_pid == child_pid) { 220 if (reaped_child_info.si_code == CLD_EXITED) { 221 exit_code = reaped_child_info.si_status; 222 } else { 223 // Exit with code 0 if the child got signaled. 224 exit_code = 0; 225 } 226 } 227 _exit(exit_code); 228} 229 230static bool MoveToNewNamespaces() { 231 // These are the sets of flags which we'll try, in order. 232 const int kCloneExtraFlags[] = { 233 CLONE_NEWPID | CLONE_NEWNET, 234 CLONE_NEWPID, 235 }; 236 237 // We need to close kZygoteIdFd before the child can continue. We use this 238 // socketpair to tell the child when to continue; 239 int sync_fds[2]; 240 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sync_fds)) { 241 FatalError("Failed to create a socketpair"); 242 } 243 244 for (size_t i = 0; 245 i < sizeof(kCloneExtraFlags) / sizeof(kCloneExtraFlags[0]); 246 i++) { 247 pid_t pid = syscall(__NR_clone, SIGCHLD | kCloneExtraFlags[i], 0, 0, 0); 248 249 if (pid > 0) { 250 if (!DropRoot()) { 251 FatalError("Could not drop privileges"); 252 } else { 253 if (close(sync_fds[0]) || shutdown(sync_fds[1], SHUT_RD)) 254 FatalError("Could not close socketpair"); 255 // The kZygoteIdFd needs to be closed in the parent before 256 // Zygote gets started. 257 if (close(kZygoteIdFd)) 258 FatalError("close"); 259 // Tell our child to continue 260 if (HANDLE_EINTR(send(sync_fds[1], "C", 1, MSG_NOSIGNAL)) != 1) 261 FatalError("send"); 262 if (close(sync_fds[1])) 263 FatalError("close"); 264 // We want to keep a full process tree and we don't want our childs to 265 // be reparented to (the outer PID namespace) init. So we wait for it. 266 WaitForChildAndExit(pid); 267 } 268 // NOTREACHED 269 FatalError("Not reached"); 270 } 271 272 if (pid == 0) { 273 if (close(sync_fds[1]) || shutdown(sync_fds[0], SHUT_WR)) 274 FatalError("Could not close socketpair"); 275 276 // Wait for the parent to confirm it closed kZygoteIdFd before we 277 // continue 278 char should_continue; 279 if (HANDLE_EINTR(read(sync_fds[0], &should_continue, 1)) != 1) 280 FatalError("Read on socketpair"); 281 if (close(sync_fds[0])) 282 FatalError("close"); 283 284 if (kCloneExtraFlags[i] & CLONE_NEWPID) { 285 setenv(kSandboxPIDNSEnvironmentVarName, "", 1 /* overwrite */); 286 } else { 287 unsetenv(kSandboxPIDNSEnvironmentVarName); 288 } 289 290 if (kCloneExtraFlags[i] & CLONE_NEWNET) { 291 setenv(kSandboxNETNSEnvironmentVarName, "", 1 /* overwrite */); 292 } else { 293 unsetenv(kSandboxNETNSEnvironmentVarName); 294 } 295 296 break; 297 } 298 299 if (errno != EINVAL) { 300 perror("Failed to move to new PID namespace"); 301 return false; 302 } 303 } 304 305 // If the system doesn't support NEWPID then we carry on anyway. 306 return true; 307} 308 309static bool DropRoot() { 310 if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0)) { 311 perror("prctl(PR_SET_DUMPABLE)"); 312 return false; 313 } 314 315 if (prctl(PR_GET_DUMPABLE, 0, 0, 0, 0)) { 316 perror("Still dumpable after prctl(PR_SET_DUMPABLE)"); 317 return false; 318 } 319 320 gid_t rgid, egid, sgid; 321 if (getresgid(&rgid, &egid, &sgid)) { 322 perror("getresgid"); 323 return false; 324 } 325 326 if (setresgid(rgid, rgid, rgid)) { 327 perror("setresgid"); 328 return false; 329 } 330 331 uid_t ruid, euid, suid; 332 if (getresuid(&ruid, &euid, &suid)) { 333 perror("getresuid"); 334 return false; 335 } 336 337 if (setresuid(ruid, ruid, ruid)) { 338 perror("setresuid"); 339 return false; 340 } 341 342 return true; 343} 344 345static bool SetupChildEnvironment() { 346 unsigned i; 347 348 // ld.so may have cleared several environment variables because we are SUID. 349 // However, the child process might need them so zygote_host_linux.cc saves a 350 // copy in SANDBOX_$x. This is safe because we have dropped root by this 351 // point, so we can only exec a binary with the permissions of the user who 352 // ran us in the first place. 353 354 for (i = 0; kSUIDUnsafeEnvironmentVariables[i]; ++i) { 355 const char* const envvar = kSUIDUnsafeEnvironmentVariables[i]; 356 char* const saved_envvar = SandboxSavedEnvironmentVariable(envvar); 357 if (!saved_envvar) 358 return false; 359 360 const char* const value = getenv(saved_envvar); 361 if (value) { 362 setenv(envvar, value, 1 /* overwrite */); 363 unsetenv(saved_envvar); 364 } 365 366 free(saved_envvar); 367 } 368 369 return true; 370} 371 372bool CheckAndExportApiVersion() { 373 // Check the environment to see if a specific API version was requested. 374 // assume version 0 if none. 375 long api_number = -1; 376 char *api_string = getenv(kSandboxEnvironmentApiRequest); 377 if (!api_string) { 378 api_number = 0; 379 } else { 380 errno = 0; 381 char* endptr = NULL; 382 api_number = strtol(api_string, &endptr, 10); 383 if (!endptr || *endptr || errno != 0) 384 return false; 385 } 386 387 // Warn only for now. 388 if (api_number != kSUIDSandboxApiNumber) { 389 fprintf(stderr, "The setuid sandbox provides API version %ld, " 390 "but you need %ld\n" 391 "Please read " 392 "https://code.google.com/p/chromium/wiki/LinuxSUIDSandboxDevelopment." 393 "\n\n", 394 kSUIDSandboxApiNumber, 395 api_number); 396 } 397 398 // Export our version so that the sandboxed process can verify it did not 399 // use an old sandbox. 400 char version_string[64]; 401 snprintf(version_string, sizeof(version_string), "%ld", 402 kSUIDSandboxApiNumber); 403 if (setenv(kSandboxEnvironmentApiProvides, version_string, 1)) { 404 perror("setenv"); 405 return false; 406 } 407 408 return true; 409} 410 411int main(int argc, char **argv) { 412 if (argc <= 1) { 413 if (argc <= 0) { 414 return 1; 415 } 416 417 fprintf(stderr, "Usage: %s <renderer process> <args...>\n", argv[0]); 418 return 1; 419 } 420 421 // Allow someone to query our API version 422 if (argc == 2 && 0 == strcmp(argv[1], kSuidSandboxGetApiSwitch)) { 423 printf("%ld\n", kSUIDSandboxApiNumber); 424 return 0; 425 } 426 427 // In the SUID sandbox, if we succeed in calling MoveToNewNamespaces() 428 // below, then the zygote and all the renderers are in an alternate PID 429 // namespace and do not know their real PIDs. As such, they report the wrong 430 // PIDs to the task manager. 431 // 432 // To fix this, when the zygote spawns a new renderer, it gives the renderer 433 // a dummy socket, which has a unique inode number. Then it asks the sandbox 434 // host to find the PID of the process holding that fd by searching /proc. 435 // 436 // Since the zygote and renderers are all spawned by this setuid executable, 437 // their entries in /proc are owned by root and only readable by root. In 438 // order to search /proc for the fd we want, this setuid executable has to 439 // double as a helper and perform the search. The code block below does this 440 // when you call it with --find-inode INODE_NUMBER. 441 if (argc == 3 && (0 == strcmp(argv[1], kFindInodeSwitch))) { 442 pid_t pid; 443 char* endptr = NULL; 444 errno = 0; 445 ino_t inode = strtoull(argv[2], &endptr, 10); 446 if (inode == ULLONG_MAX || !endptr || *endptr || errno != 0) 447 return 1; 448 if (!FindProcessHoldingSocket(&pid, inode)) 449 return 1; 450 printf("%d\n", pid); 451 return 0; 452 } 453 // Likewise, we cannot adjust /proc/pid/oom_adj for sandboxed renderers 454 // because those files are owned by root. So we need another helper here. 455 if (argc == 4 && (0 == strcmp(argv[1], kAdjustOOMScoreSwitch))) { 456 char* endptr = NULL; 457 long score; 458 errno = 0; 459 unsigned long pid_ul = strtoul(argv[2], &endptr, 10); 460 if (pid_ul == ULONG_MAX || !endptr || *endptr || errno != 0) 461 return 1; 462 pid_t pid = pid_ul; 463 endptr = NULL; 464 errno = 0; 465 score = strtol(argv[3], &endptr, 10); 466 if (score == LONG_MAX || score == LONG_MIN || 467 !endptr || *endptr || errno != 0) 468 return 1; 469 return AdjustOOMScore(pid, score); 470 } 471 472 // Protect the core setuid sandbox functionality with an API version 473 if (!CheckAndExportApiVersion()) { 474 return 1; 475 } 476 477 if (!MoveToNewNamespaces()) 478 return 1; 479 if (!SpawnChrootHelper()) 480 return 1; 481 if (!DropRoot()) 482 return 1; 483 if (!SetupChildEnvironment()) 484 return 1; 485 486 execv(argv[1], &argv[1]); 487 FatalError("execv failed"); 488 489 return 1; 490} 491