1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// http://code.google.com/p/chromium/wiki/LinuxSUIDSandbox
6
7#include "common/sandbox.h"
8
9#define _GNU_SOURCE
10#include <asm/unistd.h>
11#include <errno.h>
12#include <fcntl.h>
13#include <limits.h>
14#include <sched.h>
15#include <signal.h>
16#include <stdarg.h>
17#include <stdbool.h>
18#include <stdint.h>
19#include <stdio.h>
20#include <stdlib.h>
21#include <string.h>
22#include <sys/prctl.h>
23#include <sys/resource.h>
24#include <sys/socket.h>
25#include <sys/stat.h>
26#include <sys/time.h>
27#include <sys/types.h>
28#include <sys/vfs.h>
29#include <sys/wait.h>
30#include <unistd.h>
31
32#include "linux_util.h"
33#include "process_util.h"
34#include "common/suid_unsafe_environment_variables.h"
35
36#if !defined(CLONE_NEWPID)
37#define CLONE_NEWPID 0x20000000
38#endif
39#if !defined(CLONE_NEWNET)
40#define CLONE_NEWNET 0x40000000
41#endif
42
43static bool DropRoot();
44
45#define HANDLE_EINTR(x) TEMP_FAILURE_RETRY(x)
46
47static void FatalError(const char *msg, ...)
48    __attribute__((noreturn, format(printf, 1, 2)));
49
50static void FatalError(const char *msg, ...) {
51  va_list ap;
52  va_start(ap, msg);
53
54  vfprintf(stderr, msg, ap);
55  fprintf(stderr, ": %s\n", strerror(errno));
56  fflush(stderr);
57  va_end(ap);
58  _exit(1);
59}
60
61// We will chroot() to the helper's /proc/self directory. Anything there will
62// not exist anymore if we make sure to wait() for the helper.
63//
64// /proc/self/fdinfo or /proc/self/fd are especially safe and will be empty
65// even if the helper survives as a zombie.
66//
67// There is very little reason to use fdinfo/ instead of fd/ but we are
68// paranoid. fdinfo/ only exists since 2.6.22 so we allow fallback to fd/
69#define SAFE_DIR "/proc/self/fdinfo"
70#define SAFE_DIR2 "/proc/self/fd"
71
72static bool SpawnChrootHelper() {
73  int sv[2];
74  if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) {
75    perror("socketpair");
76    return false;
77  }
78
79  char *safedir = NULL;
80  struct stat sdir_stat;
81  if (!stat(SAFE_DIR, &sdir_stat) && S_ISDIR(sdir_stat.st_mode))
82    safedir = SAFE_DIR;
83  else
84    if (!stat(SAFE_DIR2, &sdir_stat) && S_ISDIR(sdir_stat.st_mode))
85      safedir = SAFE_DIR2;
86    else {
87      fprintf(stderr, "Could not find %s\n", SAFE_DIR2);
88      return false;
89    }
90
91  const pid_t pid = syscall(
92      __NR_clone, CLONE_FS | SIGCHLD, 0, 0, 0);
93
94  if (pid == -1) {
95    perror("clone");
96    close(sv[0]);
97    close(sv[1]);
98    return false;
99  }
100
101  if (pid == 0) {
102    // We share our files structure with an untrusted process. As a security in
103    // depth measure, we make sure that we can't open anything by mistake.
104    // TODO(agl): drop CAP_SYS_RESOURCE / use SECURE_NOROOT
105
106    const struct rlimit nofile = {0, 0};
107    if (setrlimit(RLIMIT_NOFILE, &nofile))
108      FatalError("Setting RLIMIT_NOFILE");
109
110    if (close(sv[1]))
111      FatalError("close");
112
113    // wait for message
114    char msg;
115    ssize_t bytes;
116    do {
117      bytes = read(sv[0], &msg, 1);
118    } while (bytes == -1 && errno == EINTR);
119
120    if (bytes == 0)
121      _exit(0);
122    if (bytes != 1)
123      FatalError("read");
124
125    // do chrooting
126    if (msg != kMsgChrootMe)
127      FatalError("Unknown message from sandboxed process");
128
129    // sanity check
130    if (chdir(safedir))
131      FatalError("Cannot chdir into /proc/ directory");
132
133    if (chroot(safedir))
134      FatalError("Cannot chroot into /proc/ directory");
135
136    if (chdir("/"))
137      FatalError("Cannot chdir to / after chroot");
138
139    const char reply = kMsgChrootSuccessful;
140    do {
141      bytes = write(sv[0], &reply, 1);
142    } while (bytes == -1 && errno == EINTR);
143
144    if (bytes != 1)
145      FatalError("Writing reply");
146
147    _exit(0);
148    // We now become a zombie. /proc/self/fd(info) is now an empty dir and we
149    // are chrooted there.
150    // Our (unprivileged) parent should not even be able to open "." or "/"
151    // since they would need to pass the ptrace() check. If our parent wait()
152    // for us, our root directory will completely disappear.
153  }
154
155  if (close(sv[0])) {
156    close(sv[1]);
157    perror("close");
158    return false;
159  }
160
161  // In the parent process, we install an environment variable containing the
162  // number of the file descriptor.
163  char desc_str[64];
164  int printed = snprintf(desc_str, sizeof(desc_str), "%u", sv[1]);
165  if (printed < 0 || printed >= (int)sizeof(desc_str)) {
166    fprintf(stderr, "Failed to snprintf\n");
167    return false;
168  }
169
170  if (setenv(kSandboxDescriptorEnvironmentVarName, desc_str, 1)) {
171    perror("setenv");
172    close(sv[1]);
173    return false;
174  }
175
176  // We also install an environment variable containing the pid of the child
177  char helper_pid_str[64];
178  printed = snprintf(helper_pid_str, sizeof(helper_pid_str), "%u", pid);
179  if (printed < 0 || printed >= (int)sizeof(helper_pid_str)) {
180    fprintf(stderr, "Failed to snprintf\n");
181    return false;
182  }
183
184  if (setenv(kSandboxHelperPidEnvironmentVarName, helper_pid_str, 1)) {
185    perror("setenv");
186    close(sv[1]);
187    return false;
188  }
189
190  return true;
191}
192
193// Block until child_pid exits, then exit. Try to preserve the exit code.
194static void WaitForChildAndExit(pid_t child_pid) {
195  int exit_code = -1;
196  siginfo_t reaped_child_info;
197
198  int wait_ret =
199    HANDLE_EINTR(waitid(P_PID, child_pid, &reaped_child_info, WEXITED));
200
201  if (!wait_ret && reaped_child_info.si_pid == child_pid) {
202    if (reaped_child_info.si_code == CLD_EXITED) {
203      exit_code = reaped_child_info.si_status;
204    } else {
205      // Exit with code 0 if the child got signaled.
206      exit_code = 0;
207    }
208  }
209  _exit(exit_code);
210}
211
212static bool MoveToNewNamespaces() {
213  // These are the sets of flags which we'll try, in order.
214  const int kCloneExtraFlags[] = {
215    CLONE_NEWPID | CLONE_NEWNET,
216    CLONE_NEWPID,
217  };
218
219  // We need to close kZygoteIdFd before the child can continue. We use this
220  // socketpair to tell the child when to continue;
221  int sync_fds[2];
222  if (socketpair(AF_UNIX, SOCK_STREAM, 0, sync_fds)) {
223    FatalError("Failed to create a socketpair");
224  }
225
226  for (size_t i = 0;
227       i < sizeof(kCloneExtraFlags) / sizeof(kCloneExtraFlags[0]);
228       i++) {
229    pid_t pid = syscall(__NR_clone, SIGCHLD | kCloneExtraFlags[i], 0, 0, 0);
230
231    if (pid > 0) {
232      if (!DropRoot()) {
233        FatalError("Could not drop privileges");
234      } else {
235        if (close(sync_fds[0]) || shutdown(sync_fds[1], SHUT_RD))
236          FatalError("Could not close socketpair");
237        // The kZygoteIdFd needs to be closed in the parent before
238        // Zygote gets started.
239        if (close(kZygoteIdFd))
240          FatalError("close");
241        // Tell our child to continue
242        if (HANDLE_EINTR(send(sync_fds[1], "C", 1, MSG_NOSIGNAL)) != 1)
243          FatalError("send");
244        if (close(sync_fds[1]))
245          FatalError("close");
246        // We want to keep a full process tree and we don't want our childs to
247        // be reparented to (the outer PID namespace) init. So we wait for it.
248        WaitForChildAndExit(pid);
249      }
250      // NOTREACHED
251      FatalError("Not reached");
252    }
253
254    if (pid == 0) {
255      if (close(sync_fds[1]) || shutdown(sync_fds[0], SHUT_WR))
256        FatalError("Could not close socketpair");
257
258      // Wait for the parent to confirm it closed kZygoteIdFd before we
259      // continue
260      char should_continue;
261      if (HANDLE_EINTR(read(sync_fds[0], &should_continue, 1)) != 1)
262        FatalError("Read on socketpair");
263      if (close(sync_fds[0]))
264        FatalError("close");
265
266      if (kCloneExtraFlags[i] & CLONE_NEWPID) {
267        setenv(kSandboxPIDNSEnvironmentVarName, "", 1 /* overwrite */);
268      } else {
269        unsetenv(kSandboxPIDNSEnvironmentVarName);
270      }
271
272      if (kCloneExtraFlags[i] & CLONE_NEWNET) {
273        setenv(kSandboxNETNSEnvironmentVarName, "", 1 /* overwrite */);
274      } else {
275        unsetenv(kSandboxNETNSEnvironmentVarName);
276      }
277
278      break;
279    }
280
281    if (errno != EINVAL) {
282      perror("Failed to move to new PID namespace");
283      return false;
284    }
285  }
286
287  // If the system doesn't support NEWPID then we carry on anyway.
288  return true;
289}
290
291static bool DropRoot() {
292  if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0)) {
293    perror("prctl(PR_SET_DUMPABLE)");
294    return false;
295  }
296
297  if (prctl(PR_GET_DUMPABLE, 0, 0, 0, 0)) {
298    perror("Still dumpable after prctl(PR_SET_DUMPABLE)");
299    return false;
300  }
301
302  gid_t rgid, egid, sgid;
303  if (getresgid(&rgid, &egid, &sgid)) {
304    perror("getresgid");
305    return false;
306  }
307
308  if (setresgid(rgid, rgid, rgid)) {
309    perror("setresgid");
310    return false;
311  }
312
313  uid_t ruid, euid, suid;
314  if (getresuid(&ruid, &euid, &suid)) {
315    perror("getresuid");
316    return false;
317  }
318
319  if (setresuid(ruid, ruid, ruid)) {
320    perror("setresuid");
321    return false;
322  }
323
324  return true;
325}
326
327static bool SetupChildEnvironment() {
328  unsigned i;
329
330  // ld.so may have cleared several environment variables because we are SUID.
331  // However, the child process might need them so zygote_host_linux.cc saves a
332  // copy in SANDBOX_$x. This is safe because we have dropped root by this
333  // point, so we can only exec a binary with the permissions of the user who
334  // ran us in the first place.
335
336  for (i = 0; kSUIDUnsafeEnvironmentVariables[i]; ++i) {
337    const char* const envvar = kSUIDUnsafeEnvironmentVariables[i];
338    char* const saved_envvar = SandboxSavedEnvironmentVariable(envvar);
339    if (!saved_envvar)
340      return false;
341
342    const char* const value = getenv(saved_envvar);
343    if (value) {
344      setenv(envvar, value, 1 /* overwrite */);
345      unsetenv(saved_envvar);
346    }
347
348    free(saved_envvar);
349  }
350
351  return true;
352}
353
354bool CheckAndExportApiVersion() {
355  // Check the environment to see if a specific API version was requested.
356  // assume version 0 if none.
357  long api_number = -1;
358  char *api_string = getenv(kSandboxEnvironmentApiRequest);
359  if (!api_string) {
360    api_number = 0;
361  } else {
362    errno = 0;
363    char* endptr = NULL;
364    api_number = strtol(api_string, &endptr, 10);
365    if (!endptr || *endptr || errno != 0)
366      return false;
367  }
368
369  // Warn only for now.
370  if (api_number != kSUIDSandboxApiNumber) {
371    fprintf(stderr, "The setuid sandbox provides API version %ld, "
372      "but you need %ld\n"
373      "Please read "
374      "https://code.google.com/p/chromium/wiki/LinuxSUIDSandboxDevelopment."
375      "\n\n",
376      kSUIDSandboxApiNumber,
377      api_number);
378  }
379
380  // Export our version so that the sandboxed process can verify it did not
381  // use an old sandbox.
382  char version_string[64];
383  snprintf(version_string, sizeof(version_string), "%ld",
384           kSUIDSandboxApiNumber);
385  if (setenv(kSandboxEnvironmentApiProvides, version_string, 1)) {
386    perror("setenv");
387    return false;
388  }
389
390  return true;
391}
392
393int main(int argc, char **argv) {
394  if (argc <= 1) {
395    if (argc <= 0) {
396      return 1;
397    }
398
399    fprintf(stderr, "Usage: %s <renderer process> <args...>\n", argv[0]);
400    return 1;
401  }
402
403  // Allow someone to query our API version
404  if (argc == 2 && 0 == strcmp(argv[1], kSuidSandboxGetApiSwitch)) {
405    printf("%ld\n", kSUIDSandboxApiNumber);
406    return 0;
407  }
408
409  // In the SUID sandbox, if we succeed in calling MoveToNewNamespaces()
410  // below, then the zygote and all the renderers are in an alternate PID
411  // namespace and do not know their real PIDs. As such, they report the wrong
412  // PIDs to the task manager.
413  //
414  // To fix this, when the zygote spawns a new renderer, it gives the renderer
415  // a dummy socket, which has a unique inode number. Then it asks the sandbox
416  // host to find the PID of the process holding that fd by searching /proc.
417  //
418  // Since the zygote and renderers are all spawned by this setuid executable,
419  // their entries in /proc are owned by root and only readable by root. In
420  // order to search /proc for the fd we want, this setuid executable has to
421  // double as a helper and perform the search. The code block below does this
422  // when you call it with --find-inode INODE_NUMBER.
423  if (argc == 3 && (0 == strcmp(argv[1], kFindInodeSwitch))) {
424    pid_t pid;
425    char* endptr = NULL;
426    errno = 0;
427    ino_t inode = strtoull(argv[2], &endptr, 10);
428    if (inode == ULLONG_MAX || !endptr || *endptr || errno != 0)
429      return 1;
430    if (!FindProcessHoldingSocket(&pid, inode))
431      return 1;
432    printf("%d\n", pid);
433    return 0;
434  }
435  // Likewise, we cannot adjust /proc/pid/oom_adj for sandboxed renderers
436  // because those files are owned by root. So we need another helper here.
437  if (argc == 4 && (0 == strcmp(argv[1], kAdjustOOMScoreSwitch))) {
438    char* endptr = NULL;
439    long score;
440    errno = 0;
441    unsigned long pid_ul = strtoul(argv[2], &endptr, 10);
442    if (pid_ul == ULONG_MAX || !endptr || *endptr || errno != 0)
443      return 1;
444    pid_t pid = pid_ul;
445    endptr = NULL;
446    errno = 0;
447    score = strtol(argv[3], &endptr, 10);
448    if (score == LONG_MAX || score == LONG_MIN ||
449        !endptr || *endptr || errno != 0)
450      return 1;
451    return AdjustOOMScore(pid, score);
452  }
453#if defined(OS_CHROMEOS)
454  if (argc == 3 && (0 == strcmp(argv[1], kAdjustLowMemMarginSwitch))) {
455    char* endptr = NULL;
456    errno = 0;
457    unsigned long margin_mb = strtoul(argv[2], &endptr, 10);
458    if (!endptr || *endptr || errno != 0)
459      return 1;
460    return AdjustLowMemoryMargin(margin_mb);
461  }
462#endif
463
464  // Protect the core setuid sandbox functionality with an API version
465  if (!CheckAndExportApiVersion()) {
466    return 1;
467  }
468
469  if (!MoveToNewNamespaces())
470    return 1;
471  if (!SpawnChrootHelper())
472    return 1;
473  if (!DropRoot())
474    return 1;
475  if (!SetupChildEnvironment())
476    return 1;
477
478  execv(argv[1], &argv[1]);
479  FatalError("execv failed");
480
481  return 1;
482}
483