1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// http://code.google.com/p/chromium/wiki/LinuxSUIDSandbox 6 7#include "common/sandbox.h" 8 9#define _GNU_SOURCE 10#include <asm/unistd.h> 11#include <errno.h> 12#include <fcntl.h> 13#include <limits.h> 14#include <sched.h> 15#include <signal.h> 16#include <stdarg.h> 17#include <stdbool.h> 18#include <stdint.h> 19#include <stdio.h> 20#include <stdlib.h> 21#include <string.h> 22#include <sys/prctl.h> 23#include <sys/resource.h> 24#include <sys/socket.h> 25#include <sys/stat.h> 26#include <sys/time.h> 27#include <sys/types.h> 28#include <sys/vfs.h> 29#include <sys/wait.h> 30#include <unistd.h> 31 32#include "linux_util.h" 33#include "process_util.h" 34#include "common/suid_unsafe_environment_variables.h" 35 36#if !defined(CLONE_NEWPID) 37#define CLONE_NEWPID 0x20000000 38#endif 39#if !defined(CLONE_NEWNET) 40#define CLONE_NEWNET 0x40000000 41#endif 42 43static bool DropRoot(); 44 45#define HANDLE_EINTR(x) TEMP_FAILURE_RETRY(x) 46 47static void FatalError(const char *msg, ...) 48 __attribute__((noreturn, format(printf, 1, 2))); 49 50static void FatalError(const char *msg, ...) { 51 va_list ap; 52 va_start(ap, msg); 53 54 vfprintf(stderr, msg, ap); 55 fprintf(stderr, ": %s\n", strerror(errno)); 56 fflush(stderr); 57 va_end(ap); 58 _exit(1); 59} 60 61// We will chroot() to the helper's /proc/self directory. Anything there will 62// not exist anymore if we make sure to wait() for the helper. 63// 64// /proc/self/fdinfo or /proc/self/fd are especially safe and will be empty 65// even if the helper survives as a zombie. 66// 67// There is very little reason to use fdinfo/ instead of fd/ but we are 68// paranoid. fdinfo/ only exists since 2.6.22 so we allow fallback to fd/ 69#define SAFE_DIR "/proc/self/fdinfo" 70#define SAFE_DIR2 "/proc/self/fd" 71 72static bool SpawnChrootHelper() { 73 int sv[2]; 74 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) { 75 perror("socketpair"); 76 return false; 77 } 78 79 char *safedir = NULL; 80 struct stat sdir_stat; 81 if (!stat(SAFE_DIR, &sdir_stat) && S_ISDIR(sdir_stat.st_mode)) 82 safedir = SAFE_DIR; 83 else 84 if (!stat(SAFE_DIR2, &sdir_stat) && S_ISDIR(sdir_stat.st_mode)) 85 safedir = SAFE_DIR2; 86 else { 87 fprintf(stderr, "Could not find %s\n", SAFE_DIR2); 88 return false; 89 } 90 91 const pid_t pid = syscall( 92 __NR_clone, CLONE_FS | SIGCHLD, 0, 0, 0); 93 94 if (pid == -1) { 95 perror("clone"); 96 close(sv[0]); 97 close(sv[1]); 98 return false; 99 } 100 101 if (pid == 0) { 102 // We share our files structure with an untrusted process. As a security in 103 // depth measure, we make sure that we can't open anything by mistake. 104 // TODO(agl): drop CAP_SYS_RESOURCE / use SECURE_NOROOT 105 106 const struct rlimit nofile = {0, 0}; 107 if (setrlimit(RLIMIT_NOFILE, &nofile)) 108 FatalError("Setting RLIMIT_NOFILE"); 109 110 if (close(sv[1])) 111 FatalError("close"); 112 113 // wait for message 114 char msg; 115 ssize_t bytes; 116 do { 117 bytes = read(sv[0], &msg, 1); 118 } while (bytes == -1 && errno == EINTR); 119 120 if (bytes == 0) 121 _exit(0); 122 if (bytes != 1) 123 FatalError("read"); 124 125 // do chrooting 126 if (msg != kMsgChrootMe) 127 FatalError("Unknown message from sandboxed process"); 128 129 // sanity check 130 if (chdir(safedir)) 131 FatalError("Cannot chdir into /proc/ directory"); 132 133 if (chroot(safedir)) 134 FatalError("Cannot chroot into /proc/ directory"); 135 136 if (chdir("/")) 137 FatalError("Cannot chdir to / after chroot"); 138 139 const char reply = kMsgChrootSuccessful; 140 do { 141 bytes = write(sv[0], &reply, 1); 142 } while (bytes == -1 && errno == EINTR); 143 144 if (bytes != 1) 145 FatalError("Writing reply"); 146 147 _exit(0); 148 // We now become a zombie. /proc/self/fd(info) is now an empty dir and we 149 // are chrooted there. 150 // Our (unprivileged) parent should not even be able to open "." or "/" 151 // since they would need to pass the ptrace() check. If our parent wait() 152 // for us, our root directory will completely disappear. 153 } 154 155 if (close(sv[0])) { 156 close(sv[1]); 157 perror("close"); 158 return false; 159 } 160 161 // In the parent process, we install an environment variable containing the 162 // number of the file descriptor. 163 char desc_str[64]; 164 int printed = snprintf(desc_str, sizeof(desc_str), "%u", sv[1]); 165 if (printed < 0 || printed >= (int)sizeof(desc_str)) { 166 fprintf(stderr, "Failed to snprintf\n"); 167 return false; 168 } 169 170 if (setenv(kSandboxDescriptorEnvironmentVarName, desc_str, 1)) { 171 perror("setenv"); 172 close(sv[1]); 173 return false; 174 } 175 176 // We also install an environment variable containing the pid of the child 177 char helper_pid_str[64]; 178 printed = snprintf(helper_pid_str, sizeof(helper_pid_str), "%u", pid); 179 if (printed < 0 || printed >= (int)sizeof(helper_pid_str)) { 180 fprintf(stderr, "Failed to snprintf\n"); 181 return false; 182 } 183 184 if (setenv(kSandboxHelperPidEnvironmentVarName, helper_pid_str, 1)) { 185 perror("setenv"); 186 close(sv[1]); 187 return false; 188 } 189 190 return true; 191} 192 193// Block until child_pid exits, then exit. Try to preserve the exit code. 194static void WaitForChildAndExit(pid_t child_pid) { 195 int exit_code = -1; 196 siginfo_t reaped_child_info; 197 198 int wait_ret = 199 HANDLE_EINTR(waitid(P_PID, child_pid, &reaped_child_info, WEXITED)); 200 201 if (!wait_ret && reaped_child_info.si_pid == child_pid) { 202 if (reaped_child_info.si_code == CLD_EXITED) { 203 exit_code = reaped_child_info.si_status; 204 } else { 205 // Exit with code 0 if the child got signaled. 206 exit_code = 0; 207 } 208 } 209 _exit(exit_code); 210} 211 212static bool MoveToNewNamespaces() { 213 // These are the sets of flags which we'll try, in order. 214 const int kCloneExtraFlags[] = { 215 CLONE_NEWPID | CLONE_NEWNET, 216 CLONE_NEWPID, 217 }; 218 219 // We need to close kZygoteIdFd before the child can continue. We use this 220 // socketpair to tell the child when to continue; 221 int sync_fds[2]; 222 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sync_fds)) { 223 FatalError("Failed to create a socketpair"); 224 } 225 226 for (size_t i = 0; 227 i < sizeof(kCloneExtraFlags) / sizeof(kCloneExtraFlags[0]); 228 i++) { 229 pid_t pid = syscall(__NR_clone, SIGCHLD | kCloneExtraFlags[i], 0, 0, 0); 230 231 if (pid > 0) { 232 if (!DropRoot()) { 233 FatalError("Could not drop privileges"); 234 } else { 235 if (close(sync_fds[0]) || shutdown(sync_fds[1], SHUT_RD)) 236 FatalError("Could not close socketpair"); 237 // The kZygoteIdFd needs to be closed in the parent before 238 // Zygote gets started. 239 if (close(kZygoteIdFd)) 240 FatalError("close"); 241 // Tell our child to continue 242 if (HANDLE_EINTR(send(sync_fds[1], "C", 1, MSG_NOSIGNAL)) != 1) 243 FatalError("send"); 244 if (close(sync_fds[1])) 245 FatalError("close"); 246 // We want to keep a full process tree and we don't want our childs to 247 // be reparented to (the outer PID namespace) init. So we wait for it. 248 WaitForChildAndExit(pid); 249 } 250 // NOTREACHED 251 FatalError("Not reached"); 252 } 253 254 if (pid == 0) { 255 if (close(sync_fds[1]) || shutdown(sync_fds[0], SHUT_WR)) 256 FatalError("Could not close socketpair"); 257 258 // Wait for the parent to confirm it closed kZygoteIdFd before we 259 // continue 260 char should_continue; 261 if (HANDLE_EINTR(read(sync_fds[0], &should_continue, 1)) != 1) 262 FatalError("Read on socketpair"); 263 if (close(sync_fds[0])) 264 FatalError("close"); 265 266 if (kCloneExtraFlags[i] & CLONE_NEWPID) { 267 setenv(kSandboxPIDNSEnvironmentVarName, "", 1 /* overwrite */); 268 } else { 269 unsetenv(kSandboxPIDNSEnvironmentVarName); 270 } 271 272 if (kCloneExtraFlags[i] & CLONE_NEWNET) { 273 setenv(kSandboxNETNSEnvironmentVarName, "", 1 /* overwrite */); 274 } else { 275 unsetenv(kSandboxNETNSEnvironmentVarName); 276 } 277 278 break; 279 } 280 281 if (errno != EINVAL) { 282 perror("Failed to move to new PID namespace"); 283 return false; 284 } 285 } 286 287 // If the system doesn't support NEWPID then we carry on anyway. 288 return true; 289} 290 291static bool DropRoot() { 292 if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0)) { 293 perror("prctl(PR_SET_DUMPABLE)"); 294 return false; 295 } 296 297 if (prctl(PR_GET_DUMPABLE, 0, 0, 0, 0)) { 298 perror("Still dumpable after prctl(PR_SET_DUMPABLE)"); 299 return false; 300 } 301 302 gid_t rgid, egid, sgid; 303 if (getresgid(&rgid, &egid, &sgid)) { 304 perror("getresgid"); 305 return false; 306 } 307 308 if (setresgid(rgid, rgid, rgid)) { 309 perror("setresgid"); 310 return false; 311 } 312 313 uid_t ruid, euid, suid; 314 if (getresuid(&ruid, &euid, &suid)) { 315 perror("getresuid"); 316 return false; 317 } 318 319 if (setresuid(ruid, ruid, ruid)) { 320 perror("setresuid"); 321 return false; 322 } 323 324 return true; 325} 326 327static bool SetupChildEnvironment() { 328 unsigned i; 329 330 // ld.so may have cleared several environment variables because we are SUID. 331 // However, the child process might need them so zygote_host_linux.cc saves a 332 // copy in SANDBOX_$x. This is safe because we have dropped root by this 333 // point, so we can only exec a binary with the permissions of the user who 334 // ran us in the first place. 335 336 for (i = 0; kSUIDUnsafeEnvironmentVariables[i]; ++i) { 337 const char* const envvar = kSUIDUnsafeEnvironmentVariables[i]; 338 char* const saved_envvar = SandboxSavedEnvironmentVariable(envvar); 339 if (!saved_envvar) 340 return false; 341 342 const char* const value = getenv(saved_envvar); 343 if (value) { 344 setenv(envvar, value, 1 /* overwrite */); 345 unsetenv(saved_envvar); 346 } 347 348 free(saved_envvar); 349 } 350 351 return true; 352} 353 354bool CheckAndExportApiVersion() { 355 // Check the environment to see if a specific API version was requested. 356 // assume version 0 if none. 357 long api_number = -1; 358 char *api_string = getenv(kSandboxEnvironmentApiRequest); 359 if (!api_string) { 360 api_number = 0; 361 } else { 362 errno = 0; 363 char* endptr = NULL; 364 api_number = strtol(api_string, &endptr, 10); 365 if (!endptr || *endptr || errno != 0) 366 return false; 367 } 368 369 // Warn only for now. 370 if (api_number != kSUIDSandboxApiNumber) { 371 fprintf(stderr, "The setuid sandbox provides API version %ld, " 372 "but you need %ld\n" 373 "Please read " 374 "https://code.google.com/p/chromium/wiki/LinuxSUIDSandboxDevelopment." 375 "\n\n", 376 kSUIDSandboxApiNumber, 377 api_number); 378 } 379 380 // Export our version so that the sandboxed process can verify it did not 381 // use an old sandbox. 382 char version_string[64]; 383 snprintf(version_string, sizeof(version_string), "%ld", 384 kSUIDSandboxApiNumber); 385 if (setenv(kSandboxEnvironmentApiProvides, version_string, 1)) { 386 perror("setenv"); 387 return false; 388 } 389 390 return true; 391} 392 393int main(int argc, char **argv) { 394 if (argc <= 1) { 395 if (argc <= 0) { 396 return 1; 397 } 398 399 fprintf(stderr, "Usage: %s <renderer process> <args...>\n", argv[0]); 400 return 1; 401 } 402 403 // Allow someone to query our API version 404 if (argc == 2 && 0 == strcmp(argv[1], kSuidSandboxGetApiSwitch)) { 405 printf("%ld\n", kSUIDSandboxApiNumber); 406 return 0; 407 } 408 409 // In the SUID sandbox, if we succeed in calling MoveToNewNamespaces() 410 // below, then the zygote and all the renderers are in an alternate PID 411 // namespace and do not know their real PIDs. As such, they report the wrong 412 // PIDs to the task manager. 413 // 414 // To fix this, when the zygote spawns a new renderer, it gives the renderer 415 // a dummy socket, which has a unique inode number. Then it asks the sandbox 416 // host to find the PID of the process holding that fd by searching /proc. 417 // 418 // Since the zygote and renderers are all spawned by this setuid executable, 419 // their entries in /proc are owned by root and only readable by root. In 420 // order to search /proc for the fd we want, this setuid executable has to 421 // double as a helper and perform the search. The code block below does this 422 // when you call it with --find-inode INODE_NUMBER. 423 if (argc == 3 && (0 == strcmp(argv[1], kFindInodeSwitch))) { 424 pid_t pid; 425 char* endptr = NULL; 426 errno = 0; 427 ino_t inode = strtoull(argv[2], &endptr, 10); 428 if (inode == ULLONG_MAX || !endptr || *endptr || errno != 0) 429 return 1; 430 if (!FindProcessHoldingSocket(&pid, inode)) 431 return 1; 432 printf("%d\n", pid); 433 return 0; 434 } 435 // Likewise, we cannot adjust /proc/pid/oom_adj for sandboxed renderers 436 // because those files are owned by root. So we need another helper here. 437 if (argc == 4 && (0 == strcmp(argv[1], kAdjustOOMScoreSwitch))) { 438 char* endptr = NULL; 439 long score; 440 errno = 0; 441 unsigned long pid_ul = strtoul(argv[2], &endptr, 10); 442 if (pid_ul == ULONG_MAX || !endptr || *endptr || errno != 0) 443 return 1; 444 pid_t pid = pid_ul; 445 endptr = NULL; 446 errno = 0; 447 score = strtol(argv[3], &endptr, 10); 448 if (score == LONG_MAX || score == LONG_MIN || 449 !endptr || *endptr || errno != 0) 450 return 1; 451 return AdjustOOMScore(pid, score); 452 } 453#if defined(OS_CHROMEOS) 454 if (argc == 3 && (0 == strcmp(argv[1], kAdjustLowMemMarginSwitch))) { 455 char* endptr = NULL; 456 errno = 0; 457 unsigned long margin_mb = strtoul(argv[2], &endptr, 10); 458 if (!endptr || *endptr || errno != 0) 459 return 1; 460 return AdjustLowMemoryMargin(margin_mb); 461 } 462#endif 463 464 // Protect the core setuid sandbox functionality with an API version 465 if (!CheckAndExportApiVersion()) { 466 return 1; 467 } 468 469 if (!MoveToNewNamespaces()) 470 return 1; 471 if (!SpawnChrootHelper()) 472 return 1; 473 if (!DropRoot()) 474 return 1; 475 if (!SetupChildEnvironment()) 476 return 1; 477 478 execv(argv[1], &argv[1]); 479 FatalError("execv failed"); 480 481 return 1; 482} 483