debuggerd.cpp revision e901c1bf8ab7455ccb013244959045d4cddc124b
1/*
2 * Copyright 2006, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <stdio.h>
18#include <errno.h>
19#include <signal.h>
20#include <pthread.h>
21#include <stdarg.h>
22#include <fcntl.h>
23#include <sys/types.h>
24#include <dirent.h>
25#include <time.h>
26
27#include <sys/ptrace.h>
28#include <sys/wait.h>
29#include <elf.h>
30#include <sys/stat.h>
31#include <sys/poll.h>
32
33#include <log/logger.h>
34
35#include <cutils/sockets.h>
36#include <cutils/properties.h>
37#include <cutils/debugger.h>
38
39#include <linux/input.h>
40
41#include <private/android_filesystem_config.h>
42
43#include "backtrace.h"
44#include "getevent.h"
45#include "tombstone.h"
46#include "utility.h"
47
48struct debugger_request_t {
49  debugger_action_t action;
50  pid_t pid, tid;
51  uid_t uid, gid;
52  uintptr_t abort_msg_address;
53  int32_t original_si_code;
54};
55
56static void wait_for_user_action(pid_t pid) {
57  // Find out the name of the process that crashed.
58  char path[64];
59  snprintf(path, sizeof(path), "/proc/%d/exe", pid);
60
61  char exe[PATH_MAX];
62  int count;
63  if ((count = readlink(path, exe, sizeof(exe) - 1)) == -1) {
64    ALOGE("readlink('%s') failed: %s", path, strerror(errno));
65    strlcpy(exe, "unknown", sizeof(exe));
66  } else {
67    exe[count] = '\0';
68  }
69
70  // Turn "/system/bin/app_process" into "app_process".
71  // gdbserver doesn't cope with full paths (though we should fix that
72  // and remove this).
73  char* name = strrchr(exe, '/');
74  if (name == NULL) {
75    name = exe; // No '/' found.
76  } else {
77    ++name; // Skip the '/'.
78  }
79
80  // Explain how to attach the debugger.
81  ALOGI("********************************************************\n"
82        "* Process %d has been suspended while crashing.\n"
83        "* To attach gdbserver for a gdb connection on port 5039\n"
84        "* and start gdbclient:\n"
85        "*\n"
86        "*     gdbclient %s :5039 %d\n"
87        "*\n"
88        "* Wait for gdb to start, then press the VOLUME DOWN key\n"
89        "* to let the process continue crashing.\n"
90        "********************************************************\n",
91        pid, name, pid);
92
93  // Wait for VOLUME DOWN.
94  if (init_getevent() == 0) {
95    while (true) {
96      input_event e;
97      if (get_event(&e, -1) == 0) {
98        if (e.type == EV_KEY && e.code == KEY_VOLUMEDOWN && e.value == 0) {
99          break;
100        }
101      }
102    }
103    uninit_getevent();
104  }
105
106  ALOGI("debuggerd resuming process %d", pid);
107}
108
109static int get_process_info(pid_t tid, pid_t* out_pid, uid_t* out_uid, uid_t* out_gid) {
110  char path[64];
111  snprintf(path, sizeof(path), "/proc/%d/status", tid);
112
113  FILE* fp = fopen(path, "r");
114  if (!fp) {
115    return -1;
116  }
117
118  int fields = 0;
119  char line[1024];
120  while (fgets(line, sizeof(line), fp)) {
121    size_t len = strlen(line);
122    if (len > 6 && !memcmp(line, "Tgid:\t", 6)) {
123      *out_pid = atoi(line + 6);
124      fields |= 1;
125    } else if (len > 5 && !memcmp(line, "Uid:\t", 5)) {
126      *out_uid = atoi(line + 5);
127      fields |= 2;
128    } else if (len > 5 && !memcmp(line, "Gid:\t", 5)) {
129      *out_gid = atoi(line + 5);
130      fields |= 4;
131    }
132  }
133  fclose(fp);
134  return fields == 7 ? 0 : -1;
135}
136
137static int read_request(int fd, debugger_request_t* out_request) {
138  ucred cr;
139  socklen_t len = sizeof(cr);
140  int status = getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cr, &len);
141  if (status != 0) {
142    ALOGE("cannot get credentials\n");
143    return -1;
144  }
145
146  ALOGV("reading tid\n");
147  fcntl(fd, F_SETFL, O_NONBLOCK);
148
149  pollfd pollfds[1];
150  pollfds[0].fd = fd;
151  pollfds[0].events = POLLIN;
152  pollfds[0].revents = 0;
153  status = TEMP_FAILURE_RETRY(poll(pollfds, 1, 3000));
154  if (status != 1) {
155    ALOGE("timed out reading tid (from pid=%d uid=%d)\n", cr.pid, cr.uid);
156    return -1;
157  }
158
159  debugger_msg_t msg;
160  memset(&msg, 0, sizeof(msg));
161  status = TEMP_FAILURE_RETRY(read(fd, &msg, sizeof(msg)));
162  if (status < 0) {
163    ALOGE("read failure? %s (pid=%d uid=%d)\n", strerror(errno), cr.pid, cr.uid);
164    return -1;
165  }
166  if (status != sizeof(debugger_msg_t)) {
167    ALOGE("invalid crash request of size %d (from pid=%d uid=%d)\n", status, cr.pid, cr.uid);
168    return -1;
169  }
170
171  out_request->action = msg.action;
172  out_request->tid = msg.tid;
173  out_request->pid = cr.pid;
174  out_request->uid = cr.uid;
175  out_request->gid = cr.gid;
176  out_request->abort_msg_address = msg.abort_msg_address;
177  out_request->original_si_code = msg.original_si_code;
178
179  if (msg.action == DEBUGGER_ACTION_CRASH) {
180    // Ensure that the tid reported by the crashing process is valid.
181    char buf[64];
182    struct stat s;
183    snprintf(buf, sizeof buf, "/proc/%d/task/%d", out_request->pid, out_request->tid);
184    if (stat(buf, &s)) {
185      ALOGE("tid %d does not exist in pid %d. ignoring debug request\n",
186          out_request->tid, out_request->pid);
187      return -1;
188    }
189  } else if (cr.uid == 0
190            || (cr.uid == AID_SYSTEM && msg.action == DEBUGGER_ACTION_DUMP_BACKTRACE)) {
191    // Only root or system can ask us to attach to any process and dump it explicitly.
192    // However, system is only allowed to collect backtraces but cannot dump tombstones.
193    status = get_process_info(out_request->tid, &out_request->pid,
194                              &out_request->uid, &out_request->gid);
195    if (status < 0) {
196      ALOGE("tid %d does not exist. ignoring explicit dump request\n", out_request->tid);
197      return -1;
198    }
199  } else {
200    // No one else is allowed to dump arbitrary processes.
201    return -1;
202  }
203  return 0;
204}
205
206static bool should_attach_gdb(debugger_request_t* request) {
207  if (request->action == DEBUGGER_ACTION_CRASH) {
208    char value[PROPERTY_VALUE_MAX];
209    property_get("debug.db.uid", value, "-1");
210    int debug_uid = atoi(value);
211    return debug_uid >= 0 && request->uid <= (uid_t)debug_uid;
212  }
213  return false;
214}
215
216static void handle_request(int fd) {
217  ALOGV("handle_request(%d)\n", fd);
218
219  debugger_request_t request;
220  memset(&request, 0, sizeof(request));
221  int status = read_request(fd, &request);
222  if (!status) {
223    ALOGV("BOOM: pid=%d uid=%d gid=%d tid=%d\n",
224         request.pid, request.uid, request.gid, request.tid);
225
226    // At this point, the thread that made the request is blocked in
227    // a read() call.  If the thread has crashed, then this gives us
228    // time to PTRACE_ATTACH to it before it has a chance to really fault.
229    //
230    // The PTRACE_ATTACH sends a SIGSTOP to the target process, but it
231    // won't necessarily have stopped by the time ptrace() returns.  (We
232    // currently assume it does.)  We write to the file descriptor to
233    // ensure that it can run as soon as we call PTRACE_CONT below.
234    // See details in bionic/libc/linker/debugger.c, in function
235    // debugger_signal_handler().
236    if (ptrace(PTRACE_ATTACH, request.tid, 0, 0)) {
237      ALOGE("ptrace attach failed: %s\n", strerror(errno));
238    } else {
239      bool detach_failed = false;
240      bool attach_gdb = should_attach_gdb(&request);
241      if (TEMP_FAILURE_RETRY(write(fd, "\0", 1)) != 1) {
242        ALOGE("failed responding to client: %s\n", strerror(errno));
243      } else {
244        char* tombstone_path = NULL;
245
246        if (request.action == DEBUGGER_ACTION_CRASH) {
247          close(fd);
248          fd = -1;
249        }
250
251        int total_sleep_time_usec = 0;
252        for (;;) {
253          int signal = wait_for_signal(request.tid, &total_sleep_time_usec);
254          if (signal < 0) {
255            break;
256          }
257
258          switch (signal) {
259            case SIGSTOP:
260              if (request.action == DEBUGGER_ACTION_DUMP_TOMBSTONE) {
261                ALOGV("stopped -- dumping to tombstone\n");
262                tombstone_path = engrave_tombstone(request.pid, request.tid,
263                                                   signal, request.original_si_code,
264                                                   request.abort_msg_address, true,
265                                                   &detach_failed, &total_sleep_time_usec);
266              } else if (request.action == DEBUGGER_ACTION_DUMP_BACKTRACE) {
267                ALOGV("stopped -- dumping to fd\n");
268                dump_backtrace(fd, -1, request.pid, request.tid, &detach_failed,
269                               &total_sleep_time_usec);
270              } else {
271                ALOGV("stopped -- continuing\n");
272                status = ptrace(PTRACE_CONT, request.tid, 0, 0);
273                if (status) {
274                  ALOGE("ptrace continue failed: %s\n", strerror(errno));
275                }
276                continue; // loop again
277              }
278              break;
279
280            case SIGABRT:
281            case SIGBUS:
282            case SIGFPE:
283            case SIGILL:
284            case SIGPIPE:
285            case SIGSEGV:
286#ifdef SIGSTKFLT
287            case SIGSTKFLT:
288#endif
289            case SIGTRAP:
290              ALOGV("stopped -- fatal signal\n");
291              // Send a SIGSTOP to the process to make all of
292              // the non-signaled threads stop moving.  Without
293              // this we get a lot of "ptrace detach failed:
294              // No such process".
295              kill(request.pid, SIGSTOP);
296              // don't dump sibling threads when attaching to GDB because it
297              // makes the process less reliable, apparently...
298              tombstone_path = engrave_tombstone(request.pid, request.tid,
299                                                 signal, request.original_si_code,
300                                                 request.abort_msg_address, !attach_gdb,
301                                                 &detach_failed, &total_sleep_time_usec);
302              break;
303
304            default:
305              ALOGE("process stopped due to unexpected signal %d\n", signal);
306              break;
307          }
308          break;
309        }
310
311        if (request.action == DEBUGGER_ACTION_DUMP_TOMBSTONE) {
312          if (tombstone_path) {
313            write(fd, tombstone_path, strlen(tombstone_path));
314          }
315          close(fd);
316          fd = -1;
317        }
318        free(tombstone_path);
319      }
320
321      ALOGV("detaching\n");
322      if (attach_gdb) {
323        // stop the process so we can debug
324        kill(request.pid, SIGSTOP);
325
326        // detach so we can attach gdbserver
327        if (ptrace(PTRACE_DETACH, request.tid, 0, 0)) {
328          ALOGE("ptrace detach from %d failed: %s\n", request.tid, strerror(errno));
329          detach_failed = true;
330        }
331
332        // if debug.db.uid is set, its value indicates if we should wait
333        // for user action for the crashing process.
334        // in this case, we log a message and turn the debug LED on
335        // waiting for a gdb connection (for instance)
336        wait_for_user_action(request.pid);
337      } else {
338        // just detach
339        if (ptrace(PTRACE_DETACH, request.tid, 0, 0)) {
340          ALOGE("ptrace detach from %d failed: %s\n", request.tid, strerror(errno));
341          detach_failed = true;
342        }
343      }
344
345      // resume stopped process (so it can crash in peace).
346      kill(request.pid, SIGCONT);
347
348      // If we didn't successfully detach, we're still the parent, and the
349      // actual parent won't receive a death notification via wait(2).  At this point
350      // there's not much we can do about that.
351      if (detach_failed) {
352        ALOGE("debuggerd committing suicide to free the zombie!\n");
353        kill(getpid(), SIGKILL);
354      }
355    }
356
357  }
358  if (fd >= 0) {
359    close(fd);
360  }
361}
362
363static int do_server() {
364  // debuggerd crashes can't be reported to debuggerd.
365  // Reset all of the crash handlers.
366  signal(SIGABRT, SIG_DFL);
367  signal(SIGBUS, SIG_DFL);
368  signal(SIGFPE, SIG_DFL);
369  signal(SIGILL, SIG_DFL);
370  signal(SIGSEGV, SIG_DFL);
371#ifdef SIGSTKFLT
372  signal(SIGSTKFLT, SIG_DFL);
373#endif
374  signal(SIGTRAP, SIG_DFL);
375
376  // Ignore failed writes to closed sockets
377  signal(SIGPIPE, SIG_IGN);
378
379  int logsocket = socket_local_client("logd", ANDROID_SOCKET_NAMESPACE_ABSTRACT, SOCK_DGRAM);
380  if (logsocket < 0) {
381    logsocket = -1;
382  } else {
383    fcntl(logsocket, F_SETFD, FD_CLOEXEC);
384  }
385
386  struct sigaction act;
387  act.sa_handler = SIG_DFL;
388  sigemptyset(&act.sa_mask);
389  sigaddset(&act.sa_mask,SIGCHLD);
390  act.sa_flags = SA_NOCLDWAIT;
391  sigaction(SIGCHLD, &act, 0);
392
393  int s = socket_local_server(DEBUGGER_SOCKET_NAME, ANDROID_SOCKET_NAMESPACE_ABSTRACT, SOCK_STREAM);
394  if (s < 0)
395    return 1;
396  fcntl(s, F_SETFD, FD_CLOEXEC);
397
398  ALOGI("debuggerd: " __DATE__ " " __TIME__ "\n");
399
400  for (;;) {
401    sockaddr addr;
402    socklen_t alen = sizeof(addr);
403
404    ALOGV("waiting for connection\n");
405    int fd = accept(s, &addr, &alen);
406    if (fd < 0) {
407      ALOGV("accept failed: %s\n", strerror(errno));
408      continue;
409    }
410
411    fcntl(fd, F_SETFD, FD_CLOEXEC);
412
413    handle_request(fd);
414  }
415  return 0;
416}
417
418static int do_explicit_dump(pid_t tid, bool dump_backtrace) {
419  fprintf(stdout, "Sending request to dump task %d.\n", tid);
420
421  if (dump_backtrace) {
422    fflush(stdout);
423    if (dump_backtrace_to_file(tid, fileno(stdout)) < 0) {
424      fputs("Error dumping backtrace.\n", stderr);
425      return 1;
426    }
427  } else {
428    char tombstone_path[PATH_MAX];
429    if (dump_tombstone(tid, tombstone_path, sizeof(tombstone_path)) < 0) {
430      fputs("Error dumping tombstone.\n", stderr);
431      return 1;
432    }
433    fprintf(stderr, "Tombstone written to: %s\n", tombstone_path);
434  }
435  return 0;
436}
437
438static void usage() {
439  fputs("Usage: -b [<tid>]\n"
440        "  -b dump backtrace to console, otherwise dump full tombstone file\n"
441        "\n"
442        "If tid specified, sends a request to debuggerd to dump that task.\n"
443        "Otherwise, starts the debuggerd server.\n", stderr);
444}
445
446int main(int argc, char** argv) {
447  if (argc == 1) {
448    return do_server();
449  }
450
451  bool dump_backtrace = false;
452  bool have_tid = false;
453  pid_t tid = 0;
454  for (int i = 1; i < argc; i++) {
455    if (!strcmp(argv[i], "-b")) {
456      dump_backtrace = true;
457    } else if (!have_tid) {
458      tid = atoi(argv[i]);
459      have_tid = true;
460    } else {
461      usage();
462      return 1;
463    }
464  }
465  if (!have_tid) {
466    usage();
467    return 1;
468  }
469  return do_explicit_dump(tid, dump_backtrace);
470}
471