1// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "sandbox/linux/services/credentials.h"
6
7#include <dirent.h>
8#include <errno.h>
9#include <fcntl.h>
10#include <signal.h>
11#include <stdio.h>
12#include <sys/capability.h>
13#include <sys/stat.h>
14#include <sys/syscall.h>
15#include <sys/types.h>
16#include <sys/wait.h>
17#include <unistd.h>
18
19#include "base/basictypes.h"
20#include "base/bind.h"
21#include "base/logging.h"
22#include "base/posix/eintr_wrapper.h"
23#include "base/strings/string_number_conversions.h"
24#include "base/template_util.h"
25#include "base/third_party/valgrind/valgrind.h"
26#include "base/threading/thread.h"
27
28namespace {
29
30bool IsRunningOnValgrind() { return RUNNING_ON_VALGRIND; }
31
32struct CapFreeDeleter {
33  inline void operator()(cap_t cap) const {
34    int ret = cap_free(cap);
35    CHECK_EQ(0, ret);
36  }
37};
38
39// Wrapper to manage libcap2's cap_t type.
40typedef scoped_ptr<typeof(*((cap_t)0)), CapFreeDeleter> ScopedCap;
41
42struct CapTextFreeDeleter {
43  inline void operator()(char* cap_text) const {
44    int ret = cap_free(cap_text);
45    CHECK_EQ(0, ret);
46  }
47};
48
49// Wrapper to manage the result from libcap2's cap_from_text().
50typedef scoped_ptr<char, CapTextFreeDeleter> ScopedCapText;
51
52struct FILECloser {
53  inline void operator()(FILE* f) const {
54    DCHECK(f);
55    PCHECK(0 == fclose(f));
56  }
57};
58
59// Don't use ScopedFILE in base since it doesn't check fclose().
60// TODO(jln): fix base/.
61typedef scoped_ptr<FILE, FILECloser> ScopedFILE;
62
63struct DIRCloser {
64  void operator()(DIR* d) const {
65    DCHECK(d);
66    PCHECK(0 == closedir(d));
67  }
68};
69
70typedef scoped_ptr<DIR, DIRCloser> ScopedDIR;
71
72COMPILE_ASSERT((base::is_same<uid_t, gid_t>::value), UidAndGidAreSameType);
73// generic_id_t can be used for either uid_t or gid_t.
74typedef uid_t generic_id_t;
75
76// Write a uid or gid mapping from |id| to |id| in |map_file|.
77bool WriteToIdMapFile(const char* map_file, generic_id_t id) {
78  ScopedFILE f(fopen(map_file, "w"));
79  PCHECK(f);
80  const uid_t inside_id = id;
81  const uid_t outside_id = id;
82  int num = fprintf(f.get(), "%d %d 1\n", inside_id, outside_id);
83  if (num < 0) return false;
84  // Manually call fflush() to catch permission failures.
85  int ret = fflush(f.get());
86  if (ret) {
87    VLOG(1) << "Could not write to id map file";
88    return false;
89  }
90  return true;
91}
92
93// Checks that the set of RES-uids and the set of RES-gids have
94// one element each and return that element in |resuid| and |resgid|
95// respectively. It's ok to pass NULL as one or both of the ids.
96bool GetRESIds(uid_t* resuid, gid_t* resgid) {
97  uid_t ruid, euid, suid;
98  gid_t rgid, egid, sgid;
99  PCHECK(getresuid(&ruid, &euid, &suid) == 0);
100  PCHECK(getresgid(&rgid, &egid, &sgid) == 0);
101  const bool uids_are_equal = (ruid == euid) && (ruid == suid);
102  const bool gids_are_equal = (rgid == egid) && (rgid == sgid);
103  if (!uids_are_equal || !gids_are_equal) return false;
104  if (resuid) *resuid = euid;
105  if (resgid) *resgid = egid;
106  return true;
107}
108
109// chroot() and chdir() to /proc/<tid>/fdinfo.
110void ChrootToThreadFdInfo(base::PlatformThreadId tid, bool* result) {
111  DCHECK(result);
112  *result = false;
113
114  COMPILE_ASSERT((base::is_same<base::PlatformThreadId, int>::value),
115                 TidIsAnInt);
116  const std::string current_thread_fdinfo = "/proc/" +
117      base::IntToString(tid) + "/fdinfo/";
118
119  // Make extra sure that /proc/<tid>/fdinfo is unique to the thread.
120  CHECK(0 == unshare(CLONE_FILES));
121  int chroot_ret = chroot(current_thread_fdinfo.c_str());
122  if (chroot_ret) {
123    PLOG(ERROR) << "Could not chroot";
124    return;
125  }
126
127  // CWD is essentially an implicit file descriptor, so be careful to not leave
128  // it behind.
129  PCHECK(0 == chdir("/"));
130
131  *result = true;
132  return;
133}
134
135// chroot() to an empty dir that is "safe". To be safe, it must not contain
136// any subdirectory (chroot-ing there would allow a chroot escape) and it must
137// be impossible to create an empty directory there.
138// We achieve this by doing the following:
139// 1. We create a new thread, which will create a new /proc/<tid>/ directory
140// 2. We chroot to /proc/<tid>/fdinfo/
141// This is already "safe", since fdinfo/ does not contain another directory and
142// one cannot create another directory there.
143// 3. The thread dies
144// After (3) happens, the directory is not available anymore in /proc.
145bool ChrootToSafeEmptyDir() {
146  base::Thread chrooter("sandbox_chrooter");
147  if (!chrooter.Start()) return false;
148  bool is_chrooted = false;
149  chrooter.message_loop()->PostTask(FROM_HERE,
150      base::Bind(&ChrootToThreadFdInfo, chrooter.thread_id(), &is_chrooted));
151  // Make sure our task has run before committing the return value.
152  chrooter.Stop();
153  return is_chrooted;
154}
155
156// CHECK() that an attempt to move to a new user namespace raised an expected
157// errno.
158void CheckCloneNewUserErrno(int error) {
159  // EPERM can happen if already in a chroot. EUSERS if too many nested
160  // namespaces are used. EINVAL for kernels that don't support the feature.
161  // Valgrind will ENOSYS unshare().
162  PCHECK(error == EPERM || error == EUSERS || error == EINVAL ||
163         error == ENOSYS);
164}
165
166}  // namespace.
167
168namespace sandbox {
169
170Credentials::Credentials() {
171}
172
173Credentials::~Credentials() {
174}
175
176int Credentials::CountOpenFds(int proc_fd) {
177  DCHECK_LE(0, proc_fd);
178  int proc_self_fd = openat(proc_fd, "self/fd", O_DIRECTORY | O_RDONLY);
179  PCHECK(0 <= proc_self_fd);
180
181  // Ownership of proc_self_fd is transferred here, it must not be closed
182  // or modified afterwards except via dir.
183  ScopedDIR dir(fdopendir(proc_self_fd));
184  CHECK(dir);
185
186  int count = 0;
187  struct dirent e;
188  struct dirent* de;
189  while (!readdir_r(dir.get(), &e, &de) && de) {
190    if (strcmp(e.d_name, ".") == 0 || strcmp(e.d_name, "..") == 0) {
191      continue;
192    }
193
194    int fd_num;
195    CHECK(base::StringToInt(e.d_name, &fd_num));
196    if (fd_num == proc_fd || fd_num == proc_self_fd) {
197      continue;
198    }
199
200    ++count;
201  }
202  return count;
203}
204
205bool Credentials::HasOpenDirectory(int proc_fd) {
206  int proc_self_fd = -1;
207  if (proc_fd >= 0) {
208    proc_self_fd = openat(proc_fd, "self/fd", O_DIRECTORY | O_RDONLY);
209  } else {
210    proc_self_fd = openat(AT_FDCWD, "/proc/self/fd", O_DIRECTORY | O_RDONLY);
211    if (proc_self_fd < 0) {
212      // If this process has been chrooted (eg into /proc/self/fdinfo) then
213      // the new root dir will not have directory listing permissions for us
214      // (hence EACCES).  And if we do have this permission, then /proc won't
215      // exist anyway (hence ENOENT).
216      DPCHECK(errno == EACCES || errno == ENOENT)
217        << "Unexpected failure when trying to open /proc/self/fd: ("
218        << errno << ") " << strerror(errno);
219
220      // If not available, guess false.
221      return false;
222    }
223  }
224  PCHECK(0 <= proc_self_fd);
225
226  // Ownership of proc_self_fd is transferred here, it must not be closed
227  // or modified afterwards except via dir.
228  ScopedDIR dir(fdopendir(proc_self_fd));
229  CHECK(dir);
230
231  struct dirent e;
232  struct dirent* de;
233  while (!readdir_r(dir.get(), &e, &de) && de) {
234    if (strcmp(e.d_name, ".") == 0 || strcmp(e.d_name, "..") == 0) {
235      continue;
236    }
237
238    int fd_num;
239    CHECK(base::StringToInt(e.d_name, &fd_num));
240    if (fd_num == proc_fd || fd_num == proc_self_fd) {
241      continue;
242    }
243
244    struct stat s;
245    // It's OK to use proc_self_fd here, fstatat won't modify it.
246    CHECK(fstatat(proc_self_fd, e.d_name, &s, 0) == 0);
247    if (S_ISDIR(s.st_mode)) {
248      return true;
249    }
250  }
251
252  // No open unmanaged directories found.
253  return false;
254}
255
256bool Credentials::DropAllCapabilities() {
257  ScopedCap cap(cap_init());
258  CHECK(cap);
259  PCHECK(0 == cap_set_proc(cap.get()));
260  // We never let this function fail.
261  return true;
262}
263
264bool Credentials::HasAnyCapability() const {
265  ScopedCap current_cap(cap_get_proc());
266  CHECK(current_cap);
267  ScopedCap empty_cap(cap_init());
268  CHECK(empty_cap);
269  return cap_compare(current_cap.get(), empty_cap.get()) != 0;
270}
271
272scoped_ptr<std::string> Credentials::GetCurrentCapString() const {
273  ScopedCap current_cap(cap_get_proc());
274  CHECK(current_cap);
275  ScopedCapText cap_text(cap_to_text(current_cap.get(), NULL));
276  CHECK(cap_text);
277  return scoped_ptr<std::string> (new std::string(cap_text.get()));
278}
279
280// static
281bool Credentials::SupportsNewUserNS() {
282  // Valgrind will let clone(2) pass-through, but doesn't support unshare(),
283  // so always consider UserNS unsupported there.
284  if (IsRunningOnValgrind()) {
285    return false;
286  }
287
288  // This is roughly a fork().
289  const pid_t pid = syscall(__NR_clone, CLONE_NEWUSER | SIGCHLD, 0, 0, 0);
290
291  if (pid == -1) {
292    CheckCloneNewUserErrno(errno);
293    return false;
294  }
295
296  // The parent process could have had threads. In the child, these threads
297  // have disappeared. Make sure to not do anything in the child, as this is a
298  // fragile execution environment.
299  if (pid == 0) {
300    _exit(0);
301  }
302
303  // Always reap the child.
304  siginfo_t infop;
305  PCHECK(0 == HANDLE_EINTR(waitid(P_PID, pid, &infop, WEXITED)));
306
307  // clone(2) succeeded, we can use CLONE_NEWUSER.
308  return true;
309}
310
311bool Credentials::MoveToNewUserNS() {
312  uid_t uid;
313  gid_t gid;
314  if (!GetRESIds(&uid, &gid)) {
315    // If all the uids (or gids) are not equal to each other, the security
316    // model will most likely confuse the caller, abort.
317    DVLOG(1) << "uids or gids differ!";
318    return false;
319  }
320  int ret = unshare(CLONE_NEWUSER);
321  if (ret) {
322    const int unshare_errno = errno;
323    VLOG(1) << "Looks like unprivileged CLONE_NEWUSER may not be available "
324            << "on this kernel.";
325    CheckCloneNewUserErrno(unshare_errno);
326    return false;
327  }
328
329  // The current {r,e,s}{u,g}id is now an overflow id (c.f.
330  // /proc/sys/kernel/overflowuid). Setup the uid and gid maps.
331  DCHECK(GetRESIds(NULL, NULL));
332  const char kGidMapFile[] = "/proc/self/gid_map";
333  const char kUidMapFile[] = "/proc/self/uid_map";
334  CHECK(WriteToIdMapFile(kGidMapFile, gid));
335  CHECK(WriteToIdMapFile(kUidMapFile, uid));
336  DCHECK(GetRESIds(NULL, NULL));
337  return true;
338}
339
340bool Credentials::DropFileSystemAccess() {
341  // Chrooting to a safe empty dir will only be safe if no directory file
342  // descriptor is available to the process.
343  DCHECK(!HasOpenDirectory(-1));
344  return ChrootToSafeEmptyDir();
345}
346
347}  // namespace sandbox.
348