1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// http://code.google.com/p/chromium/wiki/LinuxSUIDSandbox
6
7#include "common/sandbox.h"
8
9#define _GNU_SOURCE
10#include <asm/unistd.h>
11#include <errno.h>
12#include <fcntl.h>
13#include <limits.h>
14#include <sched.h>
15#include <signal.h>
16#include <stdarg.h>
17#include <stdbool.h>
18#include <stdint.h>
19#include <stdio.h>
20#include <stdlib.h>
21#include <string.h>
22#include <sys/prctl.h>
23#include <sys/resource.h>
24#include <sys/socket.h>
25#include <sys/stat.h>
26#include <sys/time.h>
27#include <sys/types.h>
28#include <sys/vfs.h>
29#include <sys/wait.h>
30#include <unistd.h>
31
32#include "linux_util.h"
33#include "process_util.h"
34#include "common/suid_unsafe_environment_variables.h"
35
36#if !defined(CLONE_NEWPID)
37#define CLONE_NEWPID 0x20000000
38#endif
39#if !defined(CLONE_NEWNET)
40#define CLONE_NEWNET 0x40000000
41#endif
42
43static bool DropRoot();
44
45#define HANDLE_EINTR(x) TEMP_FAILURE_RETRY(x)
46
47static void FatalError(const char *msg, ...)
48    __attribute__((noreturn, format(printf, 1, 2)));
49
50static void FatalError(const char *msg, ...) {
51  va_list ap;
52  va_start(ap, msg);
53
54  vfprintf(stderr, msg, ap);
55  fprintf(stderr, ": %s\n", strerror(errno));
56  fflush(stderr);
57  va_end(ap);
58  _exit(1);
59}
60
61static void ExitWithErrorSignalHandler(int signal) {
62  const char msg[] = "\nThe setuid sandbox got signaled, exiting.\n";
63  if (-1 == write(2, msg, sizeof(msg) - 1)) {
64    // Do nothing.
65  }
66
67  _exit(1);
68}
69
70// We will chroot() to the helper's /proc/self directory. Anything there will
71// not exist anymore if we make sure to wait() for the helper.
72//
73// /proc/self/fdinfo or /proc/self/fd are especially safe and will be empty
74// even if the helper survives as a zombie.
75//
76// There is very little reason to use fdinfo/ instead of fd/ but we are
77// paranoid. fdinfo/ only exists since 2.6.22 so we allow fallback to fd/
78#define SAFE_DIR "/proc/self/fdinfo"
79#define SAFE_DIR2 "/proc/self/fd"
80
81static bool SpawnChrootHelper() {
82  int sv[2];
83  if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) {
84    perror("socketpair");
85    return false;
86  }
87
88  char *safedir = NULL;
89  struct stat sdir_stat;
90  if (!stat(SAFE_DIR, &sdir_stat) && S_ISDIR(sdir_stat.st_mode))
91    safedir = SAFE_DIR;
92  else
93    if (!stat(SAFE_DIR2, &sdir_stat) && S_ISDIR(sdir_stat.st_mode))
94      safedir = SAFE_DIR2;
95    else {
96      fprintf(stderr, "Could not find %s\n", SAFE_DIR2);
97      return false;
98    }
99
100  const pid_t pid = syscall(
101      __NR_clone, CLONE_FS | SIGCHLD, 0, 0, 0);
102
103  if (pid == -1) {
104    perror("clone");
105    close(sv[0]);
106    close(sv[1]);
107    return false;
108  }
109
110  if (pid == 0) {
111    // We share our files structure with an untrusted process. As a security in
112    // depth measure, we make sure that we can't open anything by mistake.
113    // TODO(agl): drop CAP_SYS_RESOURCE / use SECURE_NOROOT
114
115    const struct rlimit nofile = {0, 0};
116    if (setrlimit(RLIMIT_NOFILE, &nofile))
117      FatalError("Setting RLIMIT_NOFILE");
118
119    if (close(sv[1]))
120      FatalError("close");
121
122    // wait for message
123    char msg;
124    ssize_t bytes;
125    do {
126      bytes = read(sv[0], &msg, 1);
127    } while (bytes == -1 && errno == EINTR);
128
129    if (bytes == 0)
130      _exit(0);
131    if (bytes != 1)
132      FatalError("read");
133
134    // do chrooting
135    if (msg != kMsgChrootMe)
136      FatalError("Unknown message from sandboxed process");
137
138    // sanity check
139    if (chdir(safedir))
140      FatalError("Cannot chdir into /proc/ directory");
141
142    if (chroot(safedir))
143      FatalError("Cannot chroot into /proc/ directory");
144
145    if (chdir("/"))
146      FatalError("Cannot chdir to / after chroot");
147
148    const char reply = kMsgChrootSuccessful;
149    do {
150      bytes = write(sv[0], &reply, 1);
151    } while (bytes == -1 && errno == EINTR);
152
153    if (bytes != 1)
154      FatalError("Writing reply");
155
156    _exit(0);
157    // We now become a zombie. /proc/self/fd(info) is now an empty dir and we
158    // are chrooted there.
159    // Our (unprivileged) parent should not even be able to open "." or "/"
160    // since they would need to pass the ptrace() check. If our parent wait()
161    // for us, our root directory will completely disappear.
162  }
163
164  if (close(sv[0])) {
165    close(sv[1]);
166    perror("close");
167    return false;
168  }
169
170  // In the parent process, we install an environment variable containing the
171  // number of the file descriptor.
172  char desc_str[64];
173  int printed = snprintf(desc_str, sizeof(desc_str), "%u", sv[1]);
174  if (printed < 0 || printed >= (int)sizeof(desc_str)) {
175    fprintf(stderr, "Failed to snprintf\n");
176    return false;
177  }
178
179  if (setenv(kSandboxDescriptorEnvironmentVarName, desc_str, 1)) {
180    perror("setenv");
181    close(sv[1]);
182    return false;
183  }
184
185  // We also install an environment variable containing the pid of the child
186  char helper_pid_str[64];
187  printed = snprintf(helper_pid_str, sizeof(helper_pid_str), "%u", pid);
188  if (printed < 0 || printed >= (int)sizeof(helper_pid_str)) {
189    fprintf(stderr, "Failed to snprintf\n");
190    return false;
191  }
192
193  if (setenv(kSandboxHelperPidEnvironmentVarName, helper_pid_str, 1)) {
194    perror("setenv");
195    close(sv[1]);
196    return false;
197  }
198
199  return true;
200}
201
202// Block until child_pid exits, then exit. Try to preserve the exit code.
203static void WaitForChildAndExit(pid_t child_pid) {
204  int exit_code = -1;
205  siginfo_t reaped_child_info;
206
207  // Don't "Core" on SIGABRT. SIGABRT is sent by the Chrome OS session manager
208  // when things are hanging.
209  // Here, the current process is going to waitid() and _exit(), so there is no
210  // point in generating a crash report. The child process is the one
211  // blocking us.
212  if (signal(SIGABRT, ExitWithErrorSignalHandler) == SIG_ERR) {
213    FatalError("Failed to change signal handler");
214  }
215
216  int wait_ret =
217    HANDLE_EINTR(waitid(P_PID, child_pid, &reaped_child_info, WEXITED));
218
219  if (!wait_ret && reaped_child_info.si_pid == child_pid) {
220    if (reaped_child_info.si_code == CLD_EXITED) {
221      exit_code = reaped_child_info.si_status;
222    } else {
223      // Exit with code 0 if the child got signaled.
224      exit_code = 0;
225    }
226  }
227  _exit(exit_code);
228}
229
230static bool MoveToNewNamespaces() {
231  // These are the sets of flags which we'll try, in order.
232  const int kCloneExtraFlags[] = {
233    CLONE_NEWPID | CLONE_NEWNET,
234    CLONE_NEWPID,
235  };
236
237  // We need to close kZygoteIdFd before the child can continue. We use this
238  // socketpair to tell the child when to continue;
239  int sync_fds[2];
240  if (socketpair(AF_UNIX, SOCK_STREAM, 0, sync_fds)) {
241    FatalError("Failed to create a socketpair");
242  }
243
244  for (size_t i = 0;
245       i < sizeof(kCloneExtraFlags) / sizeof(kCloneExtraFlags[0]);
246       i++) {
247    pid_t pid = syscall(__NR_clone, SIGCHLD | kCloneExtraFlags[i], 0, 0, 0);
248
249    if (pid > 0) {
250      if (!DropRoot()) {
251        FatalError("Could not drop privileges");
252      } else {
253        if (close(sync_fds[0]) || shutdown(sync_fds[1], SHUT_RD))
254          FatalError("Could not close socketpair");
255        // The kZygoteIdFd needs to be closed in the parent before
256        // Zygote gets started.
257        if (close(kZygoteIdFd))
258          FatalError("close");
259        // Tell our child to continue
260        if (HANDLE_EINTR(send(sync_fds[1], "C", 1, MSG_NOSIGNAL)) != 1)
261          FatalError("send");
262        if (close(sync_fds[1]))
263          FatalError("close");
264        // We want to keep a full process tree and we don't want our childs to
265        // be reparented to (the outer PID namespace) init. So we wait for it.
266        WaitForChildAndExit(pid);
267      }
268      // NOTREACHED
269      FatalError("Not reached");
270    }
271
272    if (pid == 0) {
273      if (close(sync_fds[1]) || shutdown(sync_fds[0], SHUT_WR))
274        FatalError("Could not close socketpair");
275
276      // Wait for the parent to confirm it closed kZygoteIdFd before we
277      // continue
278      char should_continue;
279      if (HANDLE_EINTR(read(sync_fds[0], &should_continue, 1)) != 1)
280        FatalError("Read on socketpair");
281      if (close(sync_fds[0]))
282        FatalError("close");
283
284      if (kCloneExtraFlags[i] & CLONE_NEWPID) {
285        setenv(kSandboxPIDNSEnvironmentVarName, "", 1 /* overwrite */);
286      } else {
287        unsetenv(kSandboxPIDNSEnvironmentVarName);
288      }
289
290      if (kCloneExtraFlags[i] & CLONE_NEWNET) {
291        setenv(kSandboxNETNSEnvironmentVarName, "", 1 /* overwrite */);
292      } else {
293        unsetenv(kSandboxNETNSEnvironmentVarName);
294      }
295
296      break;
297    }
298
299    if (errno != EINVAL) {
300      perror("Failed to move to new PID namespace");
301      return false;
302    }
303  }
304
305  // If the system doesn't support NEWPID then we carry on anyway.
306  return true;
307}
308
309static bool DropRoot() {
310  if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0)) {
311    perror("prctl(PR_SET_DUMPABLE)");
312    return false;
313  }
314
315  if (prctl(PR_GET_DUMPABLE, 0, 0, 0, 0)) {
316    perror("Still dumpable after prctl(PR_SET_DUMPABLE)");
317    return false;
318  }
319
320  gid_t rgid, egid, sgid;
321  if (getresgid(&rgid, &egid, &sgid)) {
322    perror("getresgid");
323    return false;
324  }
325
326  if (setresgid(rgid, rgid, rgid)) {
327    perror("setresgid");
328    return false;
329  }
330
331  uid_t ruid, euid, suid;
332  if (getresuid(&ruid, &euid, &suid)) {
333    perror("getresuid");
334    return false;
335  }
336
337  if (setresuid(ruid, ruid, ruid)) {
338    perror("setresuid");
339    return false;
340  }
341
342  return true;
343}
344
345static bool SetupChildEnvironment() {
346  unsigned i;
347
348  // ld.so may have cleared several environment variables because we are SUID.
349  // However, the child process might need them so zygote_host_linux.cc saves a
350  // copy in SANDBOX_$x. This is safe because we have dropped root by this
351  // point, so we can only exec a binary with the permissions of the user who
352  // ran us in the first place.
353
354  for (i = 0; kSUIDUnsafeEnvironmentVariables[i]; ++i) {
355    const char* const envvar = kSUIDUnsafeEnvironmentVariables[i];
356    char* const saved_envvar = SandboxSavedEnvironmentVariable(envvar);
357    if (!saved_envvar)
358      return false;
359
360    const char* const value = getenv(saved_envvar);
361    if (value) {
362      setenv(envvar, value, 1 /* overwrite */);
363      unsetenv(saved_envvar);
364    }
365
366    free(saved_envvar);
367  }
368
369  return true;
370}
371
372bool CheckAndExportApiVersion() {
373  // Check the environment to see if a specific API version was requested.
374  // assume version 0 if none.
375  long api_number = -1;
376  char *api_string = getenv(kSandboxEnvironmentApiRequest);
377  if (!api_string) {
378    api_number = 0;
379  } else {
380    errno = 0;
381    char* endptr = NULL;
382    api_number = strtol(api_string, &endptr, 10);
383    if (!endptr || *endptr || errno != 0)
384      return false;
385  }
386
387  // Warn only for now.
388  if (api_number != kSUIDSandboxApiNumber) {
389    fprintf(stderr, "The setuid sandbox provides API version %ld, "
390      "but you need %ld\n"
391      "Please read "
392      "https://code.google.com/p/chromium/wiki/LinuxSUIDSandboxDevelopment."
393      "\n\n",
394      kSUIDSandboxApiNumber,
395      api_number);
396  }
397
398  // Export our version so that the sandboxed process can verify it did not
399  // use an old sandbox.
400  char version_string[64];
401  snprintf(version_string, sizeof(version_string), "%ld",
402           kSUIDSandboxApiNumber);
403  if (setenv(kSandboxEnvironmentApiProvides, version_string, 1)) {
404    perror("setenv");
405    return false;
406  }
407
408  return true;
409}
410
411int main(int argc, char **argv) {
412  if (argc <= 1) {
413    if (argc <= 0) {
414      return 1;
415    }
416
417    fprintf(stderr, "Usage: %s <renderer process> <args...>\n", argv[0]);
418    return 1;
419  }
420
421  // Allow someone to query our API version
422  if (argc == 2 && 0 == strcmp(argv[1], kSuidSandboxGetApiSwitch)) {
423    printf("%ld\n", kSUIDSandboxApiNumber);
424    return 0;
425  }
426
427  // In the SUID sandbox, if we succeed in calling MoveToNewNamespaces()
428  // below, then the zygote and all the renderers are in an alternate PID
429  // namespace and do not know their real PIDs. As such, they report the wrong
430  // PIDs to the task manager.
431  //
432  // To fix this, when the zygote spawns a new renderer, it gives the renderer
433  // a dummy socket, which has a unique inode number. Then it asks the sandbox
434  // host to find the PID of the process holding that fd by searching /proc.
435  //
436  // Since the zygote and renderers are all spawned by this setuid executable,
437  // their entries in /proc are owned by root and only readable by root. In
438  // order to search /proc for the fd we want, this setuid executable has to
439  // double as a helper and perform the search. The code block below does this
440  // when you call it with --find-inode INODE_NUMBER.
441  if (argc == 3 && (0 == strcmp(argv[1], kFindInodeSwitch))) {
442    pid_t pid;
443    char* endptr = NULL;
444    errno = 0;
445    ino_t inode = strtoull(argv[2], &endptr, 10);
446    if (inode == ULLONG_MAX || !endptr || *endptr || errno != 0)
447      return 1;
448    if (!FindProcessHoldingSocket(&pid, inode))
449      return 1;
450    printf("%d\n", pid);
451    return 0;
452  }
453  // Likewise, we cannot adjust /proc/pid/oom_adj for sandboxed renderers
454  // because those files are owned by root. So we need another helper here.
455  if (argc == 4 && (0 == strcmp(argv[1], kAdjustOOMScoreSwitch))) {
456    char* endptr = NULL;
457    long score;
458    errno = 0;
459    unsigned long pid_ul = strtoul(argv[2], &endptr, 10);
460    if (pid_ul == ULONG_MAX || !endptr || *endptr || errno != 0)
461      return 1;
462    pid_t pid = pid_ul;
463    endptr = NULL;
464    errno = 0;
465    score = strtol(argv[3], &endptr, 10);
466    if (score == LONG_MAX || score == LONG_MIN ||
467        !endptr || *endptr || errno != 0)
468      return 1;
469    return AdjustOOMScore(pid, score);
470  }
471
472  // Protect the core setuid sandbox functionality with an API version
473  if (!CheckAndExportApiVersion()) {
474    return 1;
475  }
476
477  if (!MoveToNewNamespaces())
478    return 1;
479  if (!SpawnChrootHelper())
480    return 1;
481  if (!DropRoot())
482    return 1;
483  if (!SetupChildEnvironment())
484    return 1;
485
486  execv(argv[1], &argv[1]);
487  FatalError("execv failed");
488
489  return 1;
490}
491