debuggerd.cpp revision 787a2a697e0affb6e2c5785204e70a586d5696a0
1/* 2 * Copyright 2006, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include <stdio.h> 18#include <errno.h> 19#include <signal.h> 20#include <pthread.h> 21#include <stdarg.h> 22#include <fcntl.h> 23#include <sys/types.h> 24#include <dirent.h> 25#include <time.h> 26 27#include <sys/ptrace.h> 28#include <sys/wait.h> 29#include <elf.h> 30#include <sys/stat.h> 31#include <sys/poll.h> 32 33#include <log/logger.h> 34 35#include <cutils/sockets.h> 36#include <cutils/properties.h> 37#include <cutils/debugger.h> 38 39#include <linux/input.h> 40 41#include <private/android_filesystem_config.h> 42 43#include "backtrace.h" 44#include "getevent.h" 45#include "tombstone.h" 46#include "utility.h" 47 48struct debugger_request_t { 49 debugger_action_t action; 50 pid_t pid, tid; 51 uid_t uid, gid; 52 uintptr_t abort_msg_address; 53 int32_t original_si_code; 54}; 55 56static void wait_for_user_action(pid_t pid) { 57 // Find out the name of the process that crashed. 58 char path[64]; 59 snprintf(path, sizeof(path), "/proc/%d/exe", pid); 60 61 char exe[PATH_MAX]; 62 int count; 63 if ((count = readlink(path, exe, sizeof(exe) - 1)) == -1) { 64 ALOGE("readlink('%s') failed: %s", path, strerror(errno)); 65 strlcpy(exe, "unknown", sizeof(exe)); 66 } else { 67 exe[count] = '\0'; 68 } 69 70 // Turn "/system/bin/app_process" into "app_process". 71 // gdbserver doesn't cope with full paths (though we should fix that 72 // and remove this). 73 char* name = strrchr(exe, '/'); 74 if (name == NULL) { 75 name = exe; // No '/' found. 76 } else { 77 ++name; // Skip the '/'. 78 } 79 80 // Explain how to attach the debugger. 81 ALOGI("********************************************************\n" 82 "* Process %d has been suspended while crashing.\n" 83 "* To attach gdbserver for a gdb connection on port 5039\n" 84 "* and start gdbclient:\n" 85 "*\n" 86 "* gdbclient %s :5039 %d\n" 87 "*\n" 88 "* Wait for gdb to start, then press the VOLUME DOWN key\n" 89 "* to let the process continue crashing.\n" 90 "********************************************************\n", 91 pid, name, pid); 92 93 // Wait for VOLUME DOWN. 94 if (init_getevent() == 0) { 95 while (true) { 96 input_event e; 97 if (get_event(&e, -1) == 0) { 98 if (e.type == EV_KEY && e.code == KEY_VOLUMEDOWN && e.value == 0) { 99 break; 100 } 101 } 102 } 103 uninit_getevent(); 104 } 105 106 ALOGI("debuggerd resuming process %d", pid); 107} 108 109static int get_process_info(pid_t tid, pid_t* out_pid, uid_t* out_uid, uid_t* out_gid) { 110 char path[64]; 111 snprintf(path, sizeof(path), "/proc/%d/status", tid); 112 113 FILE* fp = fopen(path, "r"); 114 if (!fp) { 115 return -1; 116 } 117 118 int fields = 0; 119 char line[1024]; 120 while (fgets(line, sizeof(line), fp)) { 121 size_t len = strlen(line); 122 if (len > 6 && !memcmp(line, "Tgid:\t", 6)) { 123 *out_pid = atoi(line + 6); 124 fields |= 1; 125 } else if (len > 5 && !memcmp(line, "Uid:\t", 5)) { 126 *out_uid = atoi(line + 5); 127 fields |= 2; 128 } else if (len > 5 && !memcmp(line, "Gid:\t", 5)) { 129 *out_gid = atoi(line + 5); 130 fields |= 4; 131 } 132 } 133 fclose(fp); 134 return fields == 7 ? 0 : -1; 135} 136 137static int read_request(int fd, debugger_request_t* out_request) { 138 ucred cr; 139 socklen_t len = sizeof(cr); 140 int status = getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cr, &len); 141 if (status != 0) { 142 ALOGE("cannot get credentials\n"); 143 return -1; 144 } 145 146 ALOGV("reading tid\n"); 147 fcntl(fd, F_SETFL, O_NONBLOCK); 148 149 pollfd pollfds[1]; 150 pollfds[0].fd = fd; 151 pollfds[0].events = POLLIN; 152 pollfds[0].revents = 0; 153 status = TEMP_FAILURE_RETRY(poll(pollfds, 1, 3000)); 154 if (status != 1) { 155 ALOGE("timed out reading tid (from pid=%d uid=%d)\n", cr.pid, cr.uid); 156 return -1; 157 } 158 159 debugger_msg_t msg; 160 memset(&msg, 0, sizeof(msg)); 161 status = TEMP_FAILURE_RETRY(read(fd, &msg, sizeof(msg))); 162 if (status < 0) { 163 ALOGE("read failure? %s (pid=%d uid=%d)\n", strerror(errno), cr.pid, cr.uid); 164 return -1; 165 } 166 if (status != sizeof(debugger_msg_t)) { 167 ALOGE("invalid crash request of size %d (from pid=%d uid=%d)\n", status, cr.pid, cr.uid); 168 return -1; 169 } 170 171 out_request->action = msg.action; 172 out_request->tid = msg.tid; 173 out_request->pid = cr.pid; 174 out_request->uid = cr.uid; 175 out_request->gid = cr.gid; 176 out_request->abort_msg_address = msg.abort_msg_address; 177 out_request->original_si_code = msg.original_si_code; 178 179 if (msg.action == DEBUGGER_ACTION_CRASH) { 180 // Ensure that the tid reported by the crashing process is valid. 181 char buf[64]; 182 struct stat s; 183 snprintf(buf, sizeof buf, "/proc/%d/task/%d", out_request->pid, out_request->tid); 184 if (stat(buf, &s)) { 185 ALOGE("tid %d does not exist in pid %d. ignoring debug request\n", 186 out_request->tid, out_request->pid); 187 return -1; 188 } 189 } else if (cr.uid == 0 190 || (cr.uid == AID_SYSTEM && msg.action == DEBUGGER_ACTION_DUMP_BACKTRACE)) { 191 // Only root or system can ask us to attach to any process and dump it explicitly. 192 // However, system is only allowed to collect backtraces but cannot dump tombstones. 193 status = get_process_info(out_request->tid, &out_request->pid, 194 &out_request->uid, &out_request->gid); 195 if (status < 0) { 196 ALOGE("tid %d does not exist. ignoring explicit dump request\n", out_request->tid); 197 return -1; 198 } 199 } else { 200 // No one else is allowed to dump arbitrary processes. 201 return -1; 202 } 203 return 0; 204} 205 206static bool should_attach_gdb(debugger_request_t* request) { 207 if (request->action == DEBUGGER_ACTION_CRASH) { 208 char value[PROPERTY_VALUE_MAX]; 209 property_get("debug.db.uid", value, "-1"); 210 int debug_uid = atoi(value); 211 return debug_uid >= 0 && request->uid <= (uid_t)debug_uid; 212 } 213 return false; 214} 215 216static void handle_request(int fd) { 217 ALOGV("handle_request(%d)\n", fd); 218 219 debugger_request_t request; 220 memset(&request, 0, sizeof(request)); 221 int status = read_request(fd, &request); 222 if (!status) { 223 ALOGV("BOOM: pid=%d uid=%d gid=%d tid=%d\n", 224 request.pid, request.uid, request.gid, request.tid); 225 226 // At this point, the thread that made the request is blocked in 227 // a read() call. If the thread has crashed, then this gives us 228 // time to PTRACE_ATTACH to it before it has a chance to really fault. 229 // 230 // The PTRACE_ATTACH sends a SIGSTOP to the target process, but it 231 // won't necessarily have stopped by the time ptrace() returns. (We 232 // currently assume it does.) We write to the file descriptor to 233 // ensure that it can run as soon as we call PTRACE_CONT below. 234 // See details in bionic/libc/linker/debugger.c, in function 235 // debugger_signal_handler(). 236 if (ptrace(PTRACE_ATTACH, request.tid, 0, 0)) { 237 ALOGE("ptrace attach failed: %s\n", strerror(errno)); 238 } else { 239 bool detach_failed = false; 240 bool attach_gdb = should_attach_gdb(&request); 241 if (TEMP_FAILURE_RETRY(write(fd, "\0", 1)) != 1) { 242 ALOGE("failed responding to client: %s\n", strerror(errno)); 243 } else { 244 char* tombstone_path = NULL; 245 246 if (request.action == DEBUGGER_ACTION_CRASH) { 247 close(fd); 248 fd = -1; 249 } 250 251 int total_sleep_time_usec = 0; 252 for (;;) { 253 int signal = wait_for_signal(request.tid, &total_sleep_time_usec); 254 if (signal < 0) { 255 break; 256 } 257 258 switch (signal) { 259 case SIGSTOP: 260 if (request.action == DEBUGGER_ACTION_DUMP_TOMBSTONE) { 261 ALOGV("stopped -- dumping to tombstone\n"); 262 tombstone_path = engrave_tombstone(request.pid, request.tid, 263 signal, request.original_si_code, 264 request.abort_msg_address, true, 265 &detach_failed, &total_sleep_time_usec); 266 } else if (request.action == DEBUGGER_ACTION_DUMP_BACKTRACE) { 267 ALOGV("stopped -- dumping to fd\n"); 268 dump_backtrace(fd, -1, request.pid, request.tid, &detach_failed, 269 &total_sleep_time_usec); 270 } else { 271 ALOGV("stopped -- continuing\n"); 272 status = ptrace(PTRACE_CONT, request.tid, 0, 0); 273 if (status) { 274 ALOGE("ptrace continue failed: %s\n", strerror(errno)); 275 } 276 continue; // loop again 277 } 278 break; 279 280 case SIGABRT: 281 case SIGBUS: 282 case SIGFPE: 283 case SIGILL: 284 case SIGPIPE: 285 case SIGSEGV: 286#ifdef SIGSTKFLT 287 case SIGSTKFLT: 288#endif 289 case SIGTRAP: 290 ALOGV("stopped -- fatal signal\n"); 291 // Send a SIGSTOP to the process to make all of 292 // the non-signaled threads stop moving. Without 293 // this we get a lot of "ptrace detach failed: 294 // No such process". 295 kill(request.pid, SIGSTOP); 296 // don't dump sibling threads when attaching to GDB because it 297 // makes the process less reliable, apparently... 298 tombstone_path = engrave_tombstone(request.pid, request.tid, 299 signal, request.original_si_code, 300 request.abort_msg_address, !attach_gdb, 301 &detach_failed, &total_sleep_time_usec); 302 break; 303 304 default: 305 ALOGE("process stopped due to unexpected signal %d\n", signal); 306 break; 307 } 308 break; 309 } 310 311 if (request.action == DEBUGGER_ACTION_DUMP_TOMBSTONE) { 312 if (tombstone_path) { 313 write(fd, tombstone_path, strlen(tombstone_path)); 314 } 315 close(fd); 316 fd = -1; 317 } 318 free(tombstone_path); 319 } 320 321 ALOGV("detaching\n"); 322 if (attach_gdb) { 323 // stop the process so we can debug 324 kill(request.pid, SIGSTOP); 325 326 // detach so we can attach gdbserver 327 if (ptrace(PTRACE_DETACH, request.tid, 0, 0)) { 328 ALOGE("ptrace detach from %d failed: %s\n", request.tid, strerror(errno)); 329 detach_failed = true; 330 } 331 332 // if debug.db.uid is set, its value indicates if we should wait 333 // for user action for the crashing process. 334 // in this case, we log a message and turn the debug LED on 335 // waiting for a gdb connection (for instance) 336 wait_for_user_action(request.pid); 337 } else { 338 // just detach 339 if (ptrace(PTRACE_DETACH, request.tid, 0, 0)) { 340 ALOGE("ptrace detach from %d failed: %s\n", request.tid, strerror(errno)); 341 detach_failed = true; 342 } 343 } 344 345 // resume stopped process (so it can crash in peace). 346 kill(request.pid, SIGCONT); 347 348 // If we didn't successfully detach, we're still the parent, and the 349 // actual parent won't receive a death notification via wait(2). At this point 350 // there's not much we can do about that. 351 if (detach_failed) { 352 ALOGE("debuggerd committing suicide to free the zombie!\n"); 353 kill(getpid(), SIGKILL); 354 } 355 } 356 357 } 358 if (fd >= 0) { 359 close(fd); 360 } 361} 362 363static int do_server() { 364 // debuggerd crashes can't be reported to debuggerd. 365 // Reset all of the crash handlers. 366 signal(SIGABRT, SIG_DFL); 367 signal(SIGBUS, SIG_DFL); 368 signal(SIGFPE, SIG_DFL); 369 signal(SIGILL, SIG_DFL); 370 signal(SIGSEGV, SIG_DFL); 371#ifdef SIGSTKFLT 372 signal(SIGSTKFLT, SIG_DFL); 373#endif 374 signal(SIGTRAP, SIG_DFL); 375 376 // Ignore failed writes to closed sockets 377 signal(SIGPIPE, SIG_IGN); 378 379 int logsocket = socket_local_client("logd", ANDROID_SOCKET_NAMESPACE_ABSTRACT, SOCK_DGRAM); 380 if (logsocket < 0) { 381 logsocket = -1; 382 } else { 383 fcntl(logsocket, F_SETFD, FD_CLOEXEC); 384 } 385 386 struct sigaction act; 387 act.sa_handler = SIG_DFL; 388 sigemptyset(&act.sa_mask); 389 sigaddset(&act.sa_mask,SIGCHLD); 390 act.sa_flags = SA_NOCLDWAIT; 391 sigaction(SIGCHLD, &act, 0); 392 393 int s = socket_local_server(DEBUGGER_SOCKET_NAME, ANDROID_SOCKET_NAMESPACE_ABSTRACT, SOCK_STREAM); 394 if (s < 0) 395 return 1; 396 fcntl(s, F_SETFD, FD_CLOEXEC); 397 398 ALOGI("debuggerd: " __DATE__ " " __TIME__ "\n"); 399 400 for (;;) { 401 sockaddr addr; 402 socklen_t alen = sizeof(addr); 403 404 ALOGV("waiting for connection\n"); 405 int fd = accept(s, &addr, &alen); 406 if (fd < 0) { 407 ALOGV("accept failed: %s\n", strerror(errno)); 408 continue; 409 } 410 411 fcntl(fd, F_SETFD, FD_CLOEXEC); 412 413 handle_request(fd); 414 } 415 return 0; 416} 417 418static int do_explicit_dump(pid_t tid, bool dump_backtrace) { 419 fprintf(stdout, "Sending request to dump task %d.\n", tid); 420 421 if (dump_backtrace) { 422 fflush(stdout); 423 if (dump_backtrace_to_file(tid, fileno(stdout)) < 0) { 424 fputs("Error dumping backtrace.\n", stderr); 425 return 1; 426 } 427 } else { 428 char tombstone_path[PATH_MAX]; 429 if (dump_tombstone(tid, tombstone_path, sizeof(tombstone_path)) < 0) { 430 fputs("Error dumping tombstone.\n", stderr); 431 return 1; 432 } 433 fprintf(stderr, "Tombstone written to: %s\n", tombstone_path); 434 } 435 return 0; 436} 437 438static void usage() { 439 fputs("Usage: -b [<tid>]\n" 440 " -b dump backtrace to console, otherwise dump full tombstone file\n" 441 "\n" 442 "If tid specified, sends a request to debuggerd to dump that task.\n" 443 "Otherwise, starts the debuggerd server.\n", stderr); 444} 445 446int main(int argc, char** argv) { 447 if (argc == 1) { 448 return do_server(); 449 } 450 451 bool dump_backtrace = false; 452 bool have_tid = false; 453 pid_t tid = 0; 454 for (int i = 1; i < argc; i++) { 455 if (!strcmp(argv[i], "-b")) { 456 dump_backtrace = true; 457 } else if (!have_tid) { 458 tid = atoi(argv[i]); 459 have_tid = true; 460 } else { 461 usage(); 462 return 1; 463 } 464 } 465 if (!have_tid) { 466 usage(); 467 return 1; 468 } 469 return do_explicit_dump(tid, dump_backtrace); 470} 471