user_collector.cc revision 294d5d1eeed1634800e0a52fe1bcf7418ac5d958
1// Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "crash-reporter/user_collector.h"
6
7#include <grp.h>  // For struct group.
8#include <pcrecpp.h>
9#include <pcrecpp.h>
10#include <pwd.h>  // For struct passwd.
11#include <sys/types.h>  // For getpwuid_r, getgrnam_r, WEXITSTATUS.
12
13#include <string>
14#include <vector>
15
16#include "base/file_util.h"
17#include "base/logging.h"
18#include "base/string_split.h"
19#include "base/string_util.h"
20#include "chromeos/process.h"
21#include "chromeos/syslog_logging.h"
22#include "gflags/gflags.h"
23
24#pragma GCC diagnostic ignored "-Wstrict-aliasing"
25DEFINE_bool(core2md_failure, false, "Core2md failure test");
26DEFINE_bool(directory_failure, false, "Spool directory failure test");
27DEFINE_string(filter_in, "",
28              "Ignore all crashes but this for testing");
29#pragma GCC diagnostic error "-Wstrict-aliasing"
30
31static const char kCollectionErrorSignature[] =
32    "crash_reporter-user-collection";
33// This procfs file is used to cause kernel core file writing to
34// instead pipe the core file into a user space process.  See
35// core(5) man page.
36static const char kCorePatternFile[] = "/proc/sys/kernel/core_pattern";
37static const char kCorePipeLimitFile[] = "/proc/sys/kernel/core_pipe_limit";
38// Set core_pipe_limit to 4 so that we can catch a few unrelated concurrent
39// crashes, but finite to avoid infinitely recursing on crash handling.
40static const char kCorePipeLimit[] = "4";
41static const char kCoreToMinidumpConverterPath[] = "/usr/bin/core2md";
42
43static const char kDefaultLogConfig[] = "/etc/crash_reporter_logs.conf";
44
45const char *UserCollector::kUserId = "Uid:\t";
46const char *UserCollector::kGroupId = "Gid:\t";
47
48UserCollector::UserCollector()
49    : generate_diagnostics_(false),
50      core_pattern_file_(kCorePatternFile),
51      core_pipe_limit_file_(kCorePipeLimitFile),
52      initialized_(false) {
53}
54
55void UserCollector::Initialize(
56    UserCollector::CountCrashFunction count_crash_function,
57    const std::string &our_path,
58    UserCollector::IsFeedbackAllowedFunction is_feedback_allowed_function,
59    bool generate_diagnostics) {
60  CrashCollector::Initialize(count_crash_function,
61                             is_feedback_allowed_function);
62  our_path_ = our_path;
63  initialized_ = true;
64  generate_diagnostics_ = generate_diagnostics;
65}
66
67UserCollector::~UserCollector() {
68}
69
70std::string UserCollector::GetPattern(bool enabled) const {
71  if (enabled) {
72    // Combine the three crash attributes into one parameter to try to reduce
73    // the size of the invocation line for crash_reporter since the kernel
74    // has a fixed-sized (128B) buffer that it will truncate into.  Note that
75    // the kernel does not support quoted arguments in core_pattern.
76    return StringPrintf("|%s --user=%%p:%%s:%%e", our_path_.c_str());
77  } else {
78    return "core";
79  }
80}
81
82bool UserCollector::SetUpInternal(bool enabled) {
83  CHECK(initialized_);
84  LOG(INFO) << (enabled ? "Enabling" : "Disabling") << " user crash handling";
85
86  if (file_util::WriteFile(FilePath(core_pipe_limit_file_),
87                           kCorePipeLimit,
88                           strlen(kCorePipeLimit)) !=
89      static_cast<int>(strlen(kCorePipeLimit))) {
90    LOG(ERROR) << "Unable to write " << core_pipe_limit_file_;
91    return false;
92  }
93  std::string pattern = GetPattern(enabled);
94  if (file_util::WriteFile(FilePath(core_pattern_file_),
95                           pattern.c_str(),
96                           pattern.length()) !=
97      static_cast<int>(pattern.length())) {
98    LOG(ERROR) << "Unable to write " << core_pattern_file_;
99    return false;
100  }
101  return true;
102}
103
104FilePath UserCollector::GetProcessPath(pid_t pid) {
105  return FilePath(StringPrintf("/proc/%d", pid));
106}
107
108bool UserCollector::GetSymlinkTarget(const FilePath &symlink,
109                                     FilePath *target) {
110  int max_size = 32;
111  scoped_array<char> buffer;
112  while (true) {
113    buffer.reset(new char[max_size + 1]);
114    ssize_t size = readlink(symlink.value().c_str(), buffer.get(), max_size);
115    if (size < 0) {
116      int saved_errno = errno;
117      LOG(ERROR) << "Readlink failed on " << symlink.value() << " with "
118                 << saved_errno;
119      return false;
120    }
121    buffer[size] = 0;
122    if (size == max_size) {
123      // Avoid overflow when doubling.
124      if (max_size * 2 > max_size) {
125        max_size *= 2;
126        continue;
127      } else {
128        return false;
129      }
130    }
131    break;
132  }
133
134  *target = FilePath(buffer.get());
135  return true;
136}
137
138bool UserCollector::GetExecutableBaseNameFromPid(uid_t pid,
139                                                 std::string *base_name) {
140  FilePath target;
141  FilePath process_path = GetProcessPath(pid);
142  FilePath exe_path = process_path.Append("exe");
143  if (!GetSymlinkTarget(exe_path, &target)) {
144    LOG(INFO) << "GetSymlinkTarget failed - Path " << process_path.value()
145              << " DirectoryExists: "
146              << file_util::DirectoryExists(process_path);
147    // Try to further diagnose exe readlink failure cause.
148    struct stat buf;
149    int stat_result = stat(exe_path.value().c_str(), &buf);
150    int saved_errno = errno;
151    if (stat_result < 0) {
152      LOG(INFO) << "stat " << exe_path.value() << " failed: " << stat_result
153                << " " << saved_errno;
154    } else {
155      LOG(INFO) << "stat " << exe_path.value() << " succeeded: st_mode="
156                << buf.st_mode;
157    }
158    return false;
159  }
160  *base_name = target.BaseName().value();
161  return true;
162}
163
164bool UserCollector::GetIdFromStatus(const char *prefix,
165                                    IdKind kind,
166                                    const std::string &status_contents,
167                                    int *id) {
168  // From fs/proc/array.c:task_state(), this file contains:
169  // \nUid:\t<uid>\t<euid>\t<suid>\t<fsuid>\n
170  std::vector<std::string> status_lines;
171  base::SplitString(status_contents, '\n', &status_lines);
172  std::vector<std::string>::iterator line_iterator;
173  for (line_iterator = status_lines.begin();
174       line_iterator != status_lines.end();
175       ++line_iterator) {
176    if (line_iterator->find(prefix) == 0)
177      break;
178  }
179  if (line_iterator == status_lines.end()) {
180    return false;
181  }
182  std::string id_substring = line_iterator->substr(strlen(prefix),
183                                                   std::string::npos);
184  std::vector<std::string> ids;
185  base::SplitString(id_substring, '\t', &ids);
186  if (ids.size() != kIdMax || kind < 0 || kind >= kIdMax) {
187    return false;
188  }
189  const char *number = ids[kind].c_str();
190  char *end_number = NULL;
191  *id = strtol(number, &end_number, 10);
192  if (*end_number != '\0')
193    return false;
194  return true;
195}
196
197void UserCollector::EnqueueCollectionErrorLog(pid_t pid,
198                                              const std::string &exec) {
199  FilePath crash_path;
200  LOG(INFO) << "Writing conversion problems as separate crash report.";
201  if (!GetCreatedCrashDirectoryByEuid(0, &crash_path, NULL)) {
202    LOG(ERROR) << "Could not even get log directory; out of space?";
203    return;
204  }
205  std::string dump_basename = FormatDumpBasename(exec, time(NULL), pid);
206  std::string error_log = chromeos::GetLog();
207  FilePath diag_log_path = GetCrashPath(crash_path, dump_basename, "diaglog");
208  if (GetLogContents(FilePath(kDefaultLogConfig), kCollectionErrorSignature,
209                     diag_log_path)) {
210    // We load the contents of diag_log into memory and append it to
211    // the error log.  We cannot just append to files because we need
212    // to always create new files to prevent attack.
213    std::string diag_log_contents;
214    file_util::ReadFileToString(diag_log_path, &diag_log_contents);
215    error_log.append(diag_log_contents);
216    file_util::Delete(diag_log_path, false);
217  }
218  FilePath log_path = GetCrashPath(crash_path, dump_basename, "log");
219  FilePath meta_path = GetCrashPath(crash_path, dump_basename, "meta");
220  // We must use WriteNewFile instead of file_util::WriteFile as we do
221  // not want to write with root access to a symlink that an attacker
222  // might have created.
223  WriteNewFile(log_path, error_log.data(), error_log.length());
224  AddCrashMetaData("sig", kCollectionErrorSignature);
225  WriteCrashMetaData(meta_path, exec, log_path.value());
226}
227
228bool UserCollector::CopyOffProcFiles(pid_t pid,
229                                     const FilePath &container_dir) {
230  if (!file_util::CreateDirectory(container_dir)) {
231    LOG(ERROR) << "Could not create " << container_dir.value().c_str();
232    return false;
233  }
234  FilePath process_path = GetProcessPath(pid);
235  if (!file_util::PathExists(process_path)) {
236    LOG(ERROR) << "Path " << process_path.value() << " does not exist";
237    return false;
238  }
239  static const char *proc_files[] = {
240    "auxv",
241    "cmdline",
242    "environ",
243    "maps",
244    "status"
245  };
246  for (unsigned i = 0; i < arraysize(proc_files); ++i) {
247    if (!file_util::CopyFile(process_path.Append(proc_files[i]),
248                             container_dir.Append(proc_files[i]))) {
249      LOG(ERROR) << "Could not copy " << proc_files[i] << " file";
250      return false;
251    }
252  }
253  return true;
254}
255
256bool UserCollector::GetCreatedCrashDirectory(pid_t pid,
257                                             FilePath *crash_file_path,
258                                             bool *out_of_capacity) {
259  FilePath process_path = GetProcessPath(pid);
260  std::string status;
261  if (FLAGS_directory_failure) {
262    LOG(ERROR) << "Purposefully failing to create spool directory";
263    return false;
264  }
265  if (!file_util::ReadFileToString(process_path.Append("status"),
266                                   &status)) {
267    LOG(ERROR) << "Could not read status file";
268    LOG(INFO) << "Path " << process_path.value() << " DirectoryExists: "
269              << file_util::DirectoryExists(process_path);
270    return false;
271  }
272  int process_euid;
273  if (!GetIdFromStatus(kUserId, kIdEffective, status, &process_euid)) {
274    LOG(ERROR) << "Could not find euid in status file";
275    return false;
276  }
277  if (!GetCreatedCrashDirectoryByEuid(process_euid,
278                                      crash_file_path,
279                                      out_of_capacity)) {
280    LOG(ERROR) << "Could not create crash directory";
281    return false;
282  }
283  return true;
284}
285
286bool UserCollector::CopyStdinToCoreFile(const FilePath &core_path) {
287  // Copy off all stdin to a core file.
288  FilePath stdin_path("/dev/fd/0");
289  if (file_util::CopyFile(stdin_path, core_path)) {
290    return true;
291  }
292
293  LOG(ERROR) << "Could not write core file";
294  // If the file system was full, make sure we remove any remnants.
295  file_util::Delete(core_path, false);
296  return false;
297}
298
299bool UserCollector::RunCoreToMinidump(const FilePath &core_path,
300                                      const FilePath &procfs_directory,
301                                      const FilePath &minidump_path,
302                                      const FilePath &temp_directory) {
303  FilePath output_path = temp_directory.Append("output");
304  chromeos::ProcessImpl core2md;
305  core2md.RedirectOutput(output_path.value());
306  core2md.AddArg(kCoreToMinidumpConverterPath);
307  core2md.AddArg(core_path.value());
308  core2md.AddArg(procfs_directory.value());
309
310  if (!FLAGS_core2md_failure) {
311    core2md.AddArg(minidump_path.value());
312  } else {
313    // To test how core2md errors are propagaged, cause an error
314    // by forgetting a required argument.
315  }
316
317  int errorlevel = core2md.Run();
318
319  std::string output;
320  file_util::ReadFileToString(output_path, &output);
321  if (errorlevel != 0) {
322    LOG(ERROR) << "Problem during " << kCoreToMinidumpConverterPath
323               << " [result=" << errorlevel << "]: " << output;
324    return false;
325  }
326
327  if (!file_util::PathExists(minidump_path)) {
328    LOG(ERROR) << "Minidump file " << minidump_path.value()
329               << " was not created";
330    return false;
331  }
332  return true;
333}
334
335bool UserCollector::ConvertCoreToMinidump(pid_t pid,
336                                          const FilePath &container_dir,
337                                          const FilePath &core_path,
338                                          const FilePath &minidump_path) {
339  if (!CopyOffProcFiles(pid, container_dir)) {
340    return false;
341  }
342
343  if (!CopyStdinToCoreFile(core_path)) {
344    return false;
345  }
346
347  bool conversion_result = RunCoreToMinidump(
348      core_path,
349      container_dir,  // procfs directory
350      minidump_path,
351      container_dir);  // temporary directory
352
353  if (conversion_result) {
354    LOG(INFO) << "Stored minidump to " << minidump_path.value();
355  }
356
357  return conversion_result;
358}
359
360bool UserCollector::ConvertAndEnqueueCrash(int pid,
361                                           const std::string &exec,
362                                           bool *out_of_capacity) {
363  FilePath crash_path;
364  if (!GetCreatedCrashDirectory(pid, &crash_path, out_of_capacity)) {
365    LOG(ERROR) << "Unable to find/create process-specific crash path";
366    return false;
367  }
368
369  // Directory like /tmp/crash_reporter/1234 which contains the
370  // procfs entries and other temporary files used during conversion.
371  FilePath container_dir(StringPrintf("/tmp/crash_reporter/%d", pid));
372  // Delete a pre-existing directory from crash reporter that may have
373  // been left around for diagnostics from a failed conversion attempt.
374  // If we don't, existing files can cause forking to fail.
375  file_util::Delete(container_dir, true);
376  std::string dump_basename = FormatDumpBasename(exec, time(NULL), pid);
377  FilePath core_path = GetCrashPath(crash_path, dump_basename, "core");
378  FilePath meta_path = GetCrashPath(crash_path, dump_basename, "meta");
379  FilePath minidump_path = GetCrashPath(crash_path, dump_basename, "dmp");
380  FilePath log_path = GetCrashPath(crash_path, dump_basename, "log");
381
382  if (GetLogContents(FilePath(kDefaultLogConfig), exec, log_path))
383    AddCrashMetaData("log", log_path.value());
384
385  if (!ConvertCoreToMinidump(pid, container_dir, core_path,
386                            minidump_path)) {
387    LOG(INFO) << "Leaving core file at " << core_path.value()
388              << " due to conversion error";
389    return false;
390  }
391
392  // Here we commit to sending this file.  We must not return false
393  // after this point or we will generate a log report as well as a
394  // crash report.
395  WriteCrashMetaData(meta_path,
396                     exec,
397                     minidump_path.value());
398
399  if (!IsDeveloperImage()) {
400    file_util::Delete(core_path, false);
401  } else {
402    LOG(INFO) << "Leaving core file at " << core_path.value()
403              << " due to developer image";
404  }
405
406  file_util::Delete(container_dir, true);
407  return true;
408}
409
410bool UserCollector::ParseCrashAttributes(const std::string &crash_attributes,
411                                         pid_t *pid, int *signal,
412                                         std::string *kernel_supplied_name) {
413  pcrecpp::RE re("(\\d+):(\\d+):(.*)");
414  return re.FullMatch(crash_attributes, pid, signal, kernel_supplied_name);
415}
416
417bool UserCollector::ShouldDump(bool has_owner_consent,
418                               bool is_developer,
419                               bool handle_chrome_crashes,
420                               const std::string &exec,
421                               std::string *reason) {
422  reason->clear();
423
424  // Treat Chrome crashes as if the user opted-out.  We stop counting Chrome
425  // crashes towards user crashes, so user crashes really mean non-Chrome
426  // user-space crashes.
427  if ((exec == "chrome" || exec == "supplied_chrome") &&
428      !handle_chrome_crashes) {
429    *reason = "ignoring - chrome crash";
430    return false;
431  }
432
433  // For developer builds, we always want to keep the crash reports unless
434  // we're testing the crash facilities themselves.  This overrides
435  // feedback.  Crash sending still obeys consent.
436  if (is_developer) {
437    *reason = "developer build - not testing - always dumping";
438    return true;
439  }
440
441  if (!has_owner_consent) {
442    *reason = "ignoring - no consent";
443    return false;
444  }
445
446  *reason = "handling";
447  return true;
448}
449
450bool UserCollector::HandleCrash(const std::string &crash_attributes,
451                                const char *force_exec) {
452  CHECK(initialized_);
453  int pid = 0;
454  int signal = 0;
455  std::string kernel_supplied_name;
456
457  if (!ParseCrashAttributes(crash_attributes, &pid, &signal,
458                            &kernel_supplied_name)) {
459    LOG(ERROR) << "Invalid parameter: --user=" <<  crash_attributes;
460    return false;
461  }
462
463  std::string exec;
464  if (force_exec) {
465    exec.assign(force_exec);
466  } else if (!GetExecutableBaseNameFromPid(pid, &exec)) {
467    // If we cannot find the exec name, use the kernel supplied name.
468    // We don't always use the kernel's since it truncates the name to
469    // 16 characters.
470    exec = StringPrintf("supplied_%s", kernel_supplied_name.c_str());
471  }
472
473  // Allow us to test the crash reporting mechanism successfully even if
474  // other parts of the system crash.
475  if (!FLAGS_filter_in.empty() &&
476      (FLAGS_filter_in == "none" ||
477       FLAGS_filter_in != exec)) {
478    // We use a different format message to make it more obvious in tests
479    // which crashes are test generated and which are real.
480    LOG(WARNING) << "Ignoring crash from " << exec << "[" << pid << "] while "
481                 << "filter_in=" << FLAGS_filter_in << ".";
482    return true;
483  }
484
485  std::string reason;
486  bool dump = ShouldDump(is_feedback_allowed_function_(),
487                         IsDeveloperImage(),
488                         ShouldHandleChromeCrashes(),
489                         exec,
490                         &reason);
491
492  LOG(WARNING) << "Received crash notification for " << exec << "[" << pid
493               << "] sig " << signal << " (" << reason << ")";
494
495  if (dump) {
496    count_crash_function_();
497
498    if (generate_diagnostics_) {
499      bool out_of_capacity = false;
500      bool convert_and_enqueue_result =
501          ConvertAndEnqueueCrash(pid, exec, &out_of_capacity);
502      if (!convert_and_enqueue_result) {
503        if (!out_of_capacity)
504          EnqueueCollectionErrorLog(pid, exec);
505        return false;
506      }
507    }
508  }
509
510  return true;
511}
512