crash_handler_host_linux.cc revision 731df977c0511bca2206b5f333555b1205ff1f43
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/crash_handler_host_linux.h"
6
7#include <stdint.h>
8#include <stdlib.h>
9#include <sys/socket.h>
10#include <sys/syscall.h>
11#include <sys/types.h>
12#include <unistd.h>
13
14#include "base/eintr_wrapper.h"
15#include "base/file_path.h"
16#include "base/format_macros.h"
17#include "base/linux_util.h"
18#include "base/logging.h"
19#include "base/message_loop.h"
20#include "base/path_service.h"
21#include "base/rand_util.h"
22#include "base/string_util.h"
23#include "base/task.h"
24#include "base/thread.h"
25#include "breakpad/src/client/linux/handler/exception_handler.h"
26#include "breakpad/src/client/linux/minidump_writer/linux_dumper.h"
27#include "breakpad/src/client/linux/minidump_writer/minidump_writer.h"
28#include "chrome/app/breakpad_linux.h"
29#include "chrome/browser/browser_thread.h"
30#include "chrome/common/chrome_paths.h"
31#include "chrome/common/env_vars.h"
32
33using google_breakpad::ExceptionHandler;
34
35namespace {
36
37// Handles the crash dump and frees the allocated BreakpadInfo struct.
38void CrashDumpTask(BreakpadInfo* info) {
39  HandleCrashDump(*info);
40  delete[] info->filename;
41  delete[] info->process_type;
42  delete[] info->crash_url;
43  delete[] info->guid;
44  delete[] info->distro;
45  delete info;
46}
47
48}  // namespace
49
50// Since classes derived from CrashHandlerHostLinux are singletons, it's only
51// destroyed at the end of the processes lifetime, which is greater in span than
52// the lifetime of the IO message loop.
53DISABLE_RUNNABLE_METHOD_REFCOUNT(CrashHandlerHostLinux);
54
55CrashHandlerHostLinux::CrashHandlerHostLinux() {
56  int fds[2];
57  // We use SOCK_SEQPACKET rather than SOCK_DGRAM to prevent the process from
58  // sending datagrams to other sockets on the system. The sandbox may prevent
59  // the process from calling socket() to create new sockets, but it'll still
60  // inherit some sockets. With PF_UNIX+SOCK_DGRAM, it can call sendmsg to send
61  // a datagram to any (abstract) socket on the same system. With
62  // SOCK_SEQPACKET, this is prevented.
63  CHECK_EQ(socketpair(AF_UNIX, SOCK_SEQPACKET, 0, fds), 0);
64  static const int on = 1;
65
66  // Enable passcred on the server end of the socket
67  CHECK_EQ(setsockopt(fds[1], SOL_SOCKET, SO_PASSCRED, &on, sizeof(on)), 0);
68
69  process_socket_ = fds[0];
70  browser_socket_ = fds[1];
71
72  BrowserThread::PostTask(
73      BrowserThread::IO, FROM_HERE,
74      NewRunnableMethod(this, &CrashHandlerHostLinux::Init));
75}
76
77CrashHandlerHostLinux::~CrashHandlerHostLinux() {
78  HANDLE_EINTR(close(process_socket_));
79  HANDLE_EINTR(close(browser_socket_));
80
81  // If we are quitting and there are crash dumps in the queue, discard them.
82  uploader_thread_->message_loop()->QuitNow();
83}
84
85void CrashHandlerHostLinux::Init() {
86  MessageLoopForIO* ml = MessageLoopForIO::current();
87  CHECK(ml->WatchFileDescriptor(
88      browser_socket_, true /* persistent */,
89      MessageLoopForIO::WATCH_READ,
90      &file_descriptor_watcher_, this));
91  ml->AddDestructionObserver(this);
92}
93
94void CrashHandlerHostLinux::InitCrashUploaderThread() {
95  SetProcessType();
96  uploader_thread_.reset(
97      new base::Thread(std::string(process_type_ + "_crash_uploader").c_str()));
98  uploader_thread_->Start();
99}
100
101void CrashHandlerHostLinux::OnFileCanWriteWithoutBlocking(int fd) {
102  DCHECK(false);
103}
104
105void CrashHandlerHostLinux::OnFileCanReadWithoutBlocking(int fd) {
106  DCHECK_EQ(fd, browser_socket_);
107
108  // A process has crashed and has signaled us by writing a datagram
109  // to the death signal socket. The datagram contains the crash context needed
110  // for writing the minidump as well as a file descriptor and a credentials
111  // block so that they can't lie about their pid.
112
113  // The length of the control message:
114  static const unsigned kControlMsgSize =
115      CMSG_SPACE(2*sizeof(int)) + CMSG_SPACE(sizeof(struct ucred));
116  // The length of the regular payload:
117  static const unsigned kCrashContextSize =
118      sizeof(ExceptionHandler::CrashContext);
119
120  struct msghdr msg = {0};
121  struct iovec iov[6];
122  char crash_context[kCrashContextSize];
123  char* guid = new char[kGuidSize + 1];
124  char* crash_url = new char[kMaxActiveURLSize + 1];
125  char* distro = new char[kDistroSize + 1];
126  char* tid_buf_addr = NULL;
127  int tid_fd = -1;
128  char control[kControlMsgSize];
129  const ssize_t expected_msg_size = sizeof(crash_context) +
130      kGuidSize + 1 +
131      kMaxActiveURLSize + 1 +
132      kDistroSize + 1 +
133      sizeof(tid_buf_addr) + sizeof(tid_fd);
134
135  iov[0].iov_base = crash_context;
136  iov[0].iov_len = sizeof(crash_context);
137  iov[1].iov_base = guid;
138  iov[1].iov_len = kGuidSize + 1;
139  iov[2].iov_base = crash_url;
140  iov[2].iov_len = kMaxActiveURLSize + 1;
141  iov[3].iov_base = distro;
142  iov[3].iov_len = kDistroSize + 1;
143  iov[4].iov_base = &tid_buf_addr;
144  iov[4].iov_len = sizeof(tid_buf_addr);
145  iov[5].iov_base = &tid_fd;
146  iov[5].iov_len = sizeof(tid_fd);
147  msg.msg_iov = iov;
148  msg.msg_iovlen = 6;
149  msg.msg_control = control;
150  msg.msg_controllen = kControlMsgSize;
151
152  const ssize_t msg_size = HANDLE_EINTR(recvmsg(browser_socket_, &msg, 0));
153  if (msg_size != expected_msg_size) {
154    LOG(ERROR) << "Error reading from death signal socket. Crash dumping"
155               << " is disabled."
156               << " msg_size:" << msg_size
157               << " errno:" << errno;
158    file_descriptor_watcher_.StopWatchingFileDescriptor();
159    return;
160  }
161
162  if (msg.msg_controllen != kControlMsgSize ||
163      msg.msg_flags & ~MSG_TRUNC) {
164    LOG(ERROR) << "Received death signal message with the wrong size;"
165               << " msg.msg_controllen:" << msg.msg_controllen
166               << " msg.msg_flags:" << msg.msg_flags
167               << " kCrashContextSize:" << kCrashContextSize
168               << " kControlMsgSize:" << kControlMsgSize;
169    return;
170  }
171
172  // Walk the control payload an extract the file descriptor and validated pid.
173  pid_t crashing_pid = -1;
174  int partner_fd = -1;
175  int signal_fd = -1;
176  for (struct cmsghdr *hdr = CMSG_FIRSTHDR(&msg); hdr;
177       hdr = CMSG_NXTHDR(&msg, hdr)) {
178    if (hdr->cmsg_level != SOL_SOCKET)
179      continue;
180    if (hdr->cmsg_type == SCM_RIGHTS) {
181      const unsigned len = hdr->cmsg_len -
182          (((uint8_t*)CMSG_DATA(hdr)) - (uint8_t*)hdr);
183      DCHECK_EQ(len % sizeof(int), 0u);
184      const unsigned num_fds = len / sizeof(int);
185      if (num_fds != 2) {
186        // A nasty process could try and send us too many descriptors and
187        // force a leak.
188        LOG(ERROR) << "Death signal contained wrong number of descriptors;"
189                   << " num_fds:" << num_fds;
190        for (unsigned i = 0; i < num_fds; ++i)
191          HANDLE_EINTR(close(reinterpret_cast<int*>(CMSG_DATA(hdr))[i]));
192        return;
193      } else {
194        partner_fd = reinterpret_cast<int*>(CMSG_DATA(hdr))[0];
195        signal_fd = reinterpret_cast<int*>(CMSG_DATA(hdr))[1];
196      }
197    } else if (hdr->cmsg_type == SCM_CREDENTIALS) {
198      const struct ucred *cred =
199          reinterpret_cast<struct ucred*>(CMSG_DATA(hdr));
200      crashing_pid = cred->pid;
201    }
202  }
203
204  if (crashing_pid == -1 || partner_fd == -1 || signal_fd == -1) {
205    LOG(ERROR) << "Death signal message didn't contain all expected control"
206               << " messages";
207    if (partner_fd >= 0)
208      HANDLE_EINTR(close(partner_fd));
209    if (signal_fd >= 0)
210      HANDLE_EINTR(close(signal_fd));
211    return;
212  }
213
214  // Kernel bug workaround (broken in 2.6.30 at least):
215  // The kernel doesn't translate PIDs in SCM_CREDENTIALS across PID
216  // namespaces. Thus |crashing_pid| might be garbage from our point of view.
217  // In the future we can remove this workaround, but we have to wait a couple
218  // of years to be sure that it's worked its way out into the world.
219
220  // The crashing process closes its copy of the signal_fd immediately after
221  // calling sendmsg(). We can thus not reliably look for with with
222  // FindProcessHoldingSocket(). But by necessity, it has to keep the
223  // partner_fd open until the crashdump is complete.
224  uint64_t inode_number;
225  if (!base::FileDescriptorGetInode(&inode_number, partner_fd)) {
226    LOG(WARNING) << "Failed to get inode number for passed socket";
227    HANDLE_EINTR(close(partner_fd));
228    HANDLE_EINTR(close(signal_fd));
229    return;
230  }
231  HANDLE_EINTR(close(partner_fd));
232
233  pid_t actual_crashing_pid = -1;
234  if (!base::FindProcessHoldingSocket(&actual_crashing_pid, inode_number)) {
235    LOG(WARNING) << "Failed to find process holding other end of crash reply "
236                    "socket";
237    HANDLE_EINTR(close(signal_fd));
238    return;
239  }
240
241  if (actual_crashing_pid != crashing_pid) {
242    crashing_pid = actual_crashing_pid;
243
244    // The crashing TID set inside the compromised context via sys_gettid()
245    // in ExceptionHandler::HandleSignal is also wrong and needs to be
246    // translated.
247    //
248    // We expect the crashing thread to be in sys_read(), waiting for use to
249    // write to |signal_fd|. Most newer kernels where we have the different pid
250    // namespaces also have /proc/[pid]/syscall, so we can look through
251    // |actual_crashing_pid|'s thread group and find the thread that's in the
252    // read syscall with the right arguments.
253
254    std::string expected_syscall_data;
255    // /proc/[pid]/syscall is formatted as follows:
256    // syscall_number arg1 ... arg6 sp pc
257    // but we just check syscall_number through arg3.
258    StringAppendF(&expected_syscall_data, "%d 0x%x %p 0x1 ",
259                  SYS_read, tid_fd, tid_buf_addr);
260    pid_t crashing_tid =
261        base::FindThreadIDWithSyscall(crashing_pid, expected_syscall_data);
262    if (crashing_tid == -1) {
263      // We didn't find the thread we want. Maybe it didn't reach sys_read()
264      // yet, or the kernel doesn't support /proc/[pid]/syscall or the thread
265      // went away.  We'll just take a guess here and assume the crashing
266      // thread is the thread group leader.
267      crashing_tid = crashing_pid;
268    }
269
270    ExceptionHandler::CrashContext* bad_context =
271        reinterpret_cast<ExceptionHandler::CrashContext*>(crash_context);
272    bad_context->tid = crashing_tid;
273  }
274
275  bool upload = true;
276  FilePath dumps_path("/tmp");
277  PathService::Get(base::DIR_TEMP, &dumps_path);
278  if (getenv(env_vars::kHeadless)) {
279    upload = false;
280    PathService::Get(chrome::DIR_CRASH_DUMPS, &dumps_path);
281  }
282  const uint64 rand = base::RandUint64();
283  const std::string minidump_filename =
284      StringPrintf("%s/chromium-%s-minidump-%016" PRIx64 ".dmp",
285                   dumps_path.value().c_str(), process_type_.c_str(), rand);
286  if (!google_breakpad::WriteMinidump(minidump_filename.c_str(),
287                                      crashing_pid, crash_context,
288                                      kCrashContextSize)) {
289    LOG(ERROR) << "Failed to write crash dump for pid " << crashing_pid;
290    HANDLE_EINTR(close(signal_fd));
291  }
292
293  // Send the done signal to the process: it can exit now.
294  memset(&msg, 0, sizeof(msg));
295  struct iovec done_iov;
296  done_iov.iov_base = const_cast<char*>("\x42");
297  done_iov.iov_len = 1;
298  msg.msg_iov = &done_iov;
299  msg.msg_iovlen = 1;
300
301  HANDLE_EINTR(sendmsg(signal_fd, &msg, MSG_DONTWAIT | MSG_NOSIGNAL));
302  HANDLE_EINTR(close(signal_fd));
303
304  // Sanitize the string data a bit more
305  guid[kGuidSize] = crash_url[kMaxActiveURLSize] = distro[kDistroSize] = 0;
306
307  BreakpadInfo* info = new BreakpadInfo;
308
309  char* minidump_filename_str = new char[minidump_filename.length() + 1];
310  minidump_filename.copy(minidump_filename_str, minidump_filename.length());
311  minidump_filename_str[minidump_filename.length()] = '\0';
312  info->filename = minidump_filename_str;
313
314  info->process_type_length = process_type_.length();
315  char* process_type_str = new char[info->process_type_length + 1];
316  process_type_.copy(process_type_str, info->process_type_length);
317  process_type_str[info->process_type_length] = '\0';
318  info->process_type = process_type_str;
319
320  info->crash_url_length = strlen(crash_url);
321  info->crash_url = crash_url;
322
323  info->guid_length = strlen(guid);
324  info->guid = guid;
325
326  info->distro_length = strlen(distro);
327  info->distro = distro;
328
329  info->upload = upload;
330
331  uploader_thread_->message_loop()->PostTask(
332      FROM_HERE,
333      NewRunnableFunction(&CrashDumpTask, info));
334}
335
336void CrashHandlerHostLinux::WillDestroyCurrentMessageLoop() {
337  file_descriptor_watcher_.StopWatchingFileDescriptor();
338}
339
340PluginCrashHandlerHostLinux::PluginCrashHandlerHostLinux() {
341  InitCrashUploaderThread();
342}
343
344PluginCrashHandlerHostLinux::~PluginCrashHandlerHostLinux() {
345}
346
347void PluginCrashHandlerHostLinux::SetProcessType() {
348  process_type_ = "plugin";
349}
350
351RendererCrashHandlerHostLinux::RendererCrashHandlerHostLinux() {
352  InitCrashUploaderThread();
353}
354
355RendererCrashHandlerHostLinux::~RendererCrashHandlerHostLinux() {
356}
357
358void RendererCrashHandlerHostLinux::SetProcessType() {
359  process_type_ = "renderer";
360}
361