1/*
2 * Copyright 2016, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <fcntl.h>
18#include <stdio.h>
19#include <stdlib.h>
20#include <sys/stat.h>
21#include <sys/types.h>
22#include <unistd.h>
23
24#include <array>
25#include <deque>
26#include <string>
27#include <unordered_map>
28#include <utility>
29
30#include <event2/event.h>
31#include <event2/listener.h>
32#include <event2/thread.h>
33
34#include <android-base/logging.h>
35#include <android-base/properties.h>
36#include <android-base/stringprintf.h>
37#include <android-base/unique_fd.h>
38#include <cutils/sockets.h>
39
40#include "debuggerd/handler.h"
41#include "dump_type.h"
42#include "protocol.h"
43#include "util.h"
44
45#include "intercept_manager.h"
46
47using android::base::GetIntProperty;
48using android::base::StringPrintf;
49using android::base::unique_fd;
50
51static InterceptManager* intercept_manager;
52
53enum CrashStatus {
54  kCrashStatusRunning,
55  kCrashStatusQueued,
56};
57
58// Ownership of Crash is a bit messy.
59// It's either owned by an active event that must have a timeout, or owned by
60// queued_requests, in the case that multiple crashes come in at the same time.
61struct Crash {
62  ~Crash() { event_free(crash_event); }
63
64  unique_fd crash_fd;
65  pid_t crash_pid;
66  event* crash_event = nullptr;
67  std::string crash_path;
68
69  DebuggerdDumpType crash_type;
70};
71
72class CrashQueue {
73 public:
74  CrashQueue(const std::string& dir_path, const std::string& file_name_prefix, size_t max_artifacts,
75             size_t max_concurrent_dumps)
76      : file_name_prefix_(file_name_prefix),
77        dir_path_(dir_path),
78        dir_fd_(open(dir_path.c_str(), O_DIRECTORY | O_RDONLY | O_CLOEXEC)),
79        max_artifacts_(max_artifacts),
80        next_artifact_(0),
81        max_concurrent_dumps_(max_concurrent_dumps),
82        num_concurrent_dumps_(0) {
83    if (dir_fd_ == -1) {
84      PLOG(FATAL) << "failed to open directory: " << dir_path;
85    }
86
87    // NOTE: If max_artifacts_ <= max_concurrent_dumps_, then theoretically the
88    // same filename could be handed out to multiple processes.
89    CHECK(max_artifacts_ > max_concurrent_dumps_);
90
91    find_oldest_artifact();
92  }
93
94  static CrashQueue* for_crash(const Crash* crash) {
95    return (crash->crash_type == kDebuggerdJavaBacktrace) ? for_anrs() : for_tombstones();
96  }
97
98  static CrashQueue* for_tombstones() {
99    static CrashQueue queue("/data/tombstones", "tombstone_" /* file_name_prefix */,
100                            GetIntProperty("tombstoned.max_tombstone_count", 10),
101                            1 /* max_concurrent_dumps */);
102    return &queue;
103  }
104
105  static CrashQueue* for_anrs() {
106    static CrashQueue queue("/data/anr", "trace_" /* file_name_prefix */,
107                            GetIntProperty("tombstoned.max_anr_count", 64),
108                            4 /* max_concurrent_dumps */);
109    return &queue;
110  }
111
112  std::pair<unique_fd, std::string> get_output() {
113    unique_fd result;
114    std::string file_name = StringPrintf("%s%02d", file_name_prefix_.c_str(), next_artifact_);
115
116    // Unlink and create the file, instead of using O_TRUNC, to avoid two processes
117    // interleaving their output in case we ever get into that situation.
118    if (unlinkat(dir_fd_, file_name.c_str(), 0) != 0 && errno != ENOENT) {
119      PLOG(FATAL) << "failed to unlink tombstone at " << dir_path_ << "/" << file_name;
120    }
121
122    result.reset(openat(dir_fd_, file_name.c_str(),
123                        O_CREAT | O_EXCL | O_WRONLY | O_APPEND | O_CLOEXEC, 0640));
124    if (result == -1) {
125      PLOG(FATAL) << "failed to create tombstone at " << dir_path_ << "/" << file_name;
126    }
127
128    next_artifact_ = (next_artifact_ + 1) % max_artifacts_;
129    return {std::move(result), dir_path_ + "/" + file_name};
130  }
131
132  bool maybe_enqueue_crash(Crash* crash) {
133    if (num_concurrent_dumps_ == max_concurrent_dumps_) {
134      queued_requests_.push_back(crash);
135      return true;
136    }
137
138    return false;
139  }
140
141  void maybe_dequeue_crashes(void (*handler)(Crash* crash)) {
142    while (!queued_requests_.empty() && num_concurrent_dumps_ < max_concurrent_dumps_) {
143      Crash* next_crash = queued_requests_.front();
144      queued_requests_.pop_front();
145      handler(next_crash);
146    }
147  }
148
149  void on_crash_started() { ++num_concurrent_dumps_; }
150
151  void on_crash_completed() { --num_concurrent_dumps_; }
152
153 private:
154  void find_oldest_artifact() {
155    size_t oldest_tombstone = 0;
156    time_t oldest_time = std::numeric_limits<time_t>::max();
157
158    for (size_t i = 0; i < max_artifacts_; ++i) {
159      std::string path = StringPrintf("%s/%s%02zu", dir_path_.c_str(), file_name_prefix_.c_str(), i);
160      struct stat st;
161      if (stat(path.c_str(), &st) != 0) {
162        if (errno == ENOENT) {
163          oldest_tombstone = i;
164          break;
165        } else {
166          PLOG(ERROR) << "failed to stat " << path;
167          continue;
168        }
169      }
170
171      if (st.st_mtime < oldest_time) {
172        oldest_tombstone = i;
173        oldest_time = st.st_mtime;
174      }
175    }
176
177    next_artifact_ = oldest_tombstone;
178  }
179
180  const std::string file_name_prefix_;
181
182  const std::string dir_path_;
183  const int dir_fd_;
184
185  const size_t max_artifacts_;
186  int next_artifact_;
187
188  const size_t max_concurrent_dumps_;
189  size_t num_concurrent_dumps_;
190
191  std::deque<Crash*> queued_requests_;
192
193  DISALLOW_COPY_AND_ASSIGN(CrashQueue);
194};
195
196// Whether java trace dumps are produced via tombstoned.
197static constexpr bool kJavaTraceDumpsEnabled = true;
198
199// Forward declare the callbacks so they can be placed in a sensible order.
200static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int, void*);
201static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg);
202static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg);
203
204static void perform_request(Crash* crash) {
205  unique_fd output_fd;
206  if (!intercept_manager->GetIntercept(crash->crash_pid, crash->crash_type, &output_fd)) {
207    std::tie(output_fd, crash->crash_path) = CrashQueue::for_crash(crash)->get_output();
208  }
209
210  TombstonedCrashPacket response = {
211    .packet_type = CrashPacketType::kPerformDump
212  };
213  ssize_t rc = send_fd(crash->crash_fd, &response, sizeof(response), std::move(output_fd));
214  if (rc == -1) {
215    PLOG(WARNING) << "failed to send response to CrashRequest";
216    goto fail;
217  } else if (rc != sizeof(response)) {
218    PLOG(WARNING) << "crash socket write returned short";
219    goto fail;
220  } else {
221    // TODO: Make this configurable by the interceptor?
222    struct timeval timeout = { 10, 0 };
223
224    event_base* base = event_get_base(crash->crash_event);
225    event_assign(crash->crash_event, base, crash->crash_fd, EV_TIMEOUT | EV_READ,
226                 crash_completed_cb, crash);
227    event_add(crash->crash_event, &timeout);
228  }
229
230  CrashQueue::for_crash(crash)->on_crash_started();
231  return;
232
233fail:
234  delete crash;
235}
236
237static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int,
238                            void*) {
239  event_base* base = evconnlistener_get_base(listener);
240  Crash* crash = new Crash();
241
242  // TODO: Make sure that only java crashes come in on the java socket
243  // and only native crashes on the native socket.
244  struct timeval timeout = { 1, 0 };
245  event* crash_event = event_new(base, sockfd, EV_TIMEOUT | EV_READ, crash_request_cb, crash);
246  crash->crash_fd.reset(sockfd);
247  crash->crash_event = crash_event;
248  event_add(crash_event, &timeout);
249}
250
251static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg) {
252  ssize_t rc;
253  Crash* crash = static_cast<Crash*>(arg);
254
255  TombstonedCrashPacket request = {};
256
257  if ((ev & EV_TIMEOUT) != 0) {
258    LOG(WARNING) << "crash request timed out";
259    goto fail;
260  } else if ((ev & EV_READ) == 0) {
261    LOG(WARNING) << "tombstoned received unexpected event from crash socket";
262    goto fail;
263  }
264
265  rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
266  if (rc == -1) {
267    PLOG(WARNING) << "failed to read from crash socket";
268    goto fail;
269  } else if (rc != sizeof(request)) {
270    LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
271                 << sizeof(request) << ")";
272    goto fail;
273  }
274
275  if (request.packet_type != CrashPacketType::kDumpRequest) {
276    LOG(WARNING) << "unexpected crash packet type, expected kDumpRequest, received  "
277                 << StringPrintf("%#2hhX", request.packet_type);
278    goto fail;
279  }
280
281  crash->crash_type = request.packet.dump_request.dump_type;
282  if (crash->crash_type < 0 || crash->crash_type > kDebuggerdAnyIntercept) {
283    LOG(WARNING) << "unexpected crash dump type: " << crash->crash_type;
284    goto fail;
285  }
286
287  if (crash->crash_type != kDebuggerdJavaBacktrace) {
288    crash->crash_pid = request.packet.dump_request.pid;
289  } else {
290    // Requests for java traces are sent from untrusted processes, so we
291    // must not trust the PID sent down with the request. Instead, we ask the
292    // kernel.
293    ucred cr = {};
294    socklen_t len = sizeof(cr);
295    int ret = getsockopt(sockfd, SOL_SOCKET, SO_PEERCRED, &cr, &len);
296    if (ret != 0) {
297      PLOG(ERROR) << "Failed to getsockopt(..SO_PEERCRED)";
298      goto fail;
299    }
300
301    crash->crash_pid = cr.pid;
302  }
303
304  LOG(INFO) << "received crash request for pid " << crash->crash_pid;
305
306  if (CrashQueue::for_crash(crash)->maybe_enqueue_crash(crash)) {
307    LOG(INFO) << "enqueueing crash request for pid " << crash->crash_pid;
308  } else {
309    perform_request(crash);
310  }
311
312  return;
313
314fail:
315  delete crash;
316}
317
318static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg) {
319  ssize_t rc;
320  Crash* crash = static_cast<Crash*>(arg);
321  TombstonedCrashPacket request = {};
322
323  CrashQueue::for_crash(crash)->on_crash_completed();
324
325  if ((ev & EV_READ) == 0) {
326    goto fail;
327  }
328
329  rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
330  if (rc == -1) {
331    PLOG(WARNING) << "failed to read from crash socket";
332    goto fail;
333  } else if (rc != sizeof(request)) {
334    LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
335                 << sizeof(request) << ")";
336    goto fail;
337  }
338
339  if (request.packet_type != CrashPacketType::kCompletedDump) {
340    LOG(WARNING) << "unexpected crash packet type, expected kCompletedDump, received "
341                 << uint32_t(request.packet_type);
342    goto fail;
343  }
344
345  if (!crash->crash_path.empty()) {
346    if (crash->crash_type == kDebuggerdJavaBacktrace) {
347      LOG(ERROR) << "Traces for pid " << crash->crash_pid << " written to: " << crash->crash_path;
348    } else {
349      // NOTE: Several tools parse this log message to figure out where the
350      // tombstone associated with a given native crash was written. Any changes
351      // to this message must be carefully considered.
352      LOG(ERROR) << "Tombstone written to: " << crash->crash_path;
353    }
354  }
355
356fail:
357  CrashQueue* queue = CrashQueue::for_crash(crash);
358  delete crash;
359
360  // If there's something queued up, let them proceed.
361  queue->maybe_dequeue_crashes(perform_request);
362}
363
364int main(int, char* []) {
365  umask(0137);
366
367  // Don't try to connect to ourselves if we crash.
368  struct sigaction action = {};
369  action.sa_handler = [](int signal) {
370    LOG(ERROR) << "received fatal signal " << signal;
371    _exit(1);
372  };
373  debuggerd_register_handlers(&action);
374
375  int intercept_socket = android_get_control_socket(kTombstonedInterceptSocketName);
376  int crash_socket = android_get_control_socket(kTombstonedCrashSocketName);
377
378  if (intercept_socket == -1 || crash_socket == -1) {
379    PLOG(FATAL) << "failed to get socket from init";
380  }
381
382  evutil_make_socket_nonblocking(intercept_socket);
383  evutil_make_socket_nonblocking(crash_socket);
384
385  event_base* base = event_base_new();
386  if (!base) {
387    LOG(FATAL) << "failed to create event_base";
388  }
389
390  intercept_manager = new InterceptManager(base, intercept_socket);
391
392  evconnlistener* tombstone_listener = evconnlistener_new(
393      base, crash_accept_cb, CrashQueue::for_tombstones(), -1, LEV_OPT_CLOSE_ON_FREE, crash_socket);
394  if (!tombstone_listener) {
395    LOG(FATAL) << "failed to create evconnlistener for tombstones.";
396  }
397
398  if (kJavaTraceDumpsEnabled) {
399    const int java_trace_socket = android_get_control_socket(kTombstonedJavaTraceSocketName);
400    if (java_trace_socket == -1) {
401      PLOG(FATAL) << "failed to get socket from init";
402    }
403
404    evutil_make_socket_nonblocking(java_trace_socket);
405    evconnlistener* java_trace_listener = evconnlistener_new(
406        base, crash_accept_cb, CrashQueue::for_anrs(), -1, LEV_OPT_CLOSE_ON_FREE, java_trace_socket);
407    if (!java_trace_listener) {
408      LOG(FATAL) << "failed to create evconnlistener for java traces.";
409    }
410  }
411
412  LOG(INFO) << "tombstoned successfully initialized";
413  event_base_dispatch(base);
414}
415