1/*
2 * Copyright (c) 2015, The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *   * Redistributions of source code must retain the above copyright
9 *     notice, this list of conditions and the following disclaimer.
10 *   * Redistributions in binary form must reproduce the above copyright
11 *     notice, this list of conditions and the following disclaimer
12 *     in the documentation and/or other materials provided with the
13 *     distribution.
14 *   * Neither the name of Google, Inc. nor the names of its contributors
15 *     may be used to endorse or promote products derived from this
16 *     software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
25 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <binder/IBinder.h>
33#include <binder/IServiceManager.h>
34#include <binder/Parcel.h>
35
36#include <ctime>
37#include <cutils/properties.h>
38#include <signal.h>
39#include <stdbool.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43
44#include <sys/resource.h>
45#include <sys/stat.h>
46#include <sys/time.h>
47#include <sys/types.h>
48#include <unistd.h>
49
50#include <utils/Log.h>
51#include <utils/String8.h>
52#include <utils/Trace.h>
53#include <zlib.h>
54
55using namespace android;
56
57#ifdef LOG_TAG
58#undef LOG_TAG
59#endif
60
61#define LOG_TAG "anrdaemon"
62
63static const int check_period = 1;              // in sec
64static const int tracing_check_period = 500000; // in micro sec
65static const int cpu_stat_entries = 7;          // number of cpu stat entries
66static const int min_buffer_size = 16;
67static const int max_buffer_size = 2048;
68static const char *min_buffer_size_str = "16";
69static const char *max_buffer_size_str = "2048";
70static const int time_buf_size = 20;
71static const int path_buf_size = 60;
72
73typedef struct cpu_stat {
74    unsigned long utime, ntime, stime, itime;
75    unsigned long iowtime, irqtime, sirqtime, steal;
76    unsigned long total;
77} cpu_stat_t;
78
79/*
80 * Logging on/off threshold.
81 * Uint: 0.01%; default to 99.90% cpu.
82 */
83static int idle_threshold = 10;
84
85static bool quit = false;
86static bool suspend= false;
87static bool dump_requested = false;
88static bool err = false;
89static char err_msg[100];
90static bool tracing = false;
91
92static const char *buf_size_kb = "2048";
93static const char *apps = "";
94static uint64_t tag = 0;
95
96static cpu_stat_t new_cpu;
97static cpu_stat_t old_cpu;
98
99/* Log certain kernel activity when enabled */
100static bool log_sched = false;
101static bool log_stack = false;
102static bool log_irq   = false;
103static bool log_sync  = false;
104static bool log_workq = false;
105
106/* Paths for debugfs controls*/
107static const char* dfs_trace_output_path =
108    "/d/tracing/trace";
109static const char* dfs_irq_path =
110    "/d/tracing/events/irq/enable";
111static const char* dfs_sync_path =
112    "/d/tracing/events/sync/enable";
113static const char* dfs_workq_path =
114    "/d/tracing/events/workqueue/enable";
115static const char* dfs_stack_path =
116    "/d/tracing/options/stacktrace";
117static const char* dfs_sched_switch_path =
118    "/d/tracing/events/sched/sched_switch/enable";
119static const char* dfs_sched_wakeup_path =
120    "/d/tracing/events/sched/sched_wakeup/enable";
121static const char* dfs_control_path =
122    "/d/tracing/tracing_on";
123static const char* dfs_buffer_size_path =
124    "/d/tracing/buffer_size_kb";
125static const char* dfs_tags_property = "debug.atrace.tags.enableflags";
126static const char* dfs_apps_property = "debug.atrace.app_cmdlines";
127
128/*
129 * Read accumulated cpu data from /proc/stat
130 */
131static void get_cpu_stat(cpu_stat_t *cpu) {
132    FILE *fp = NULL;
133    const char *params = "cpu  %lu %lu %lu %lu %lu %lu %lu %*d %*d %*d\n";
134
135    if ((fp = fopen("/proc/stat", "r")) == NULL) {
136        err = true;
137        sprintf(err_msg, "can't read from /proc/stat with errno %d", errno);
138    } else {
139        if (fscanf(fp, params, &cpu->utime, &cpu->ntime,
140                &cpu->stime, &cpu->itime, &cpu->iowtime, &cpu->irqtime,
141                &cpu->sirqtime) != cpu_stat_entries) {
142            /*
143             * If failed in getting status, new_cpu won't be updated and
144             * is_heavy_loaded() will return false.
145             */
146            ALOGE("Error in getting cpu status. Skipping this check.");
147            return;
148        }
149
150        cpu->total = cpu->utime + cpu->ntime + cpu->stime + cpu->itime
151            + cpu->iowtime + cpu->irqtime + cpu->sirqtime;
152
153        fclose(fp);
154    }
155}
156
157/*
158 * Calculate cpu usage in the past interval.
159 * If tracing is on, increase the idle threshold by 1.00% so that we do not
160 * turn on and off tracing frequently whe the cpu load is right close to
161 * threshold.
162 */
163static bool is_heavy_load(void) {
164    unsigned long diff_idle, diff_total;
165    int threshold = idle_threshold + (tracing?100:0);
166    get_cpu_stat(&new_cpu);
167    diff_idle = new_cpu.itime - old_cpu.itime;
168    diff_total = new_cpu.total - old_cpu.total;
169    old_cpu = new_cpu;
170    return (diff_idle * 10000 < diff_total * threshold);
171}
172
173/*
174 * Force the userland processes to refresh their property for logging.
175 */
176static void dfs_poke_binder(void) {
177    sp<IServiceManager> sm = defaultServiceManager();
178    Vector<String16> services = sm->listServices();
179    for (size_t i = 0; i < services.size(); i++) {
180        sp<IBinder> obj = sm->checkService(services[i]);
181        if (obj != NULL) {
182            Parcel data;
183            obj->transact(IBinder::SYSPROPS_TRANSACTION, data, NULL, 0);
184        }
185    }
186}
187
188/*
189 * Enable/disable a debugfs property by writing 0/1 to its path.
190 */
191static int dfs_enable(bool enable, const char* path) {
192    int fd = open(path, O_WRONLY);
193    if (fd == -1) {
194        err = true;
195        sprintf(err_msg, "Can't open %s. Error: %d", path, errno);
196        return -1;
197    }
198    const char* control = (enable?"1":"0");
199    ssize_t len = strlen(control);
200    int max_try = 10; // Fail if write was interrupted for 10 times
201    while (write(fd, control, len) != len) {
202        if (errno == EINTR && max_try-- > 0) {
203            usleep(100);
204            continue;
205        }
206
207        err = true;
208        sprintf(err_msg, "Error %d in writing to %s.", errno, path);
209    }
210    close(fd);
211    return (err?-1:0);
212}
213
214/*
215 * Set the userland tracing properties.
216 */
217static void dfs_set_property(uint64_t mtag, const char* mapp, bool enable) {
218    char buf[64];
219    snprintf(buf, 64, "%#" PRIx64, mtag);
220    if (property_set(dfs_tags_property, buf) < 0) {
221        err = true;
222        sprintf(err_msg, "Failed to set debug tags system properties.");
223    }
224
225    if (strlen(mapp) > 0
226            && property_set(dfs_apps_property, mapp) < 0) {
227        err = true;
228        sprintf(err_msg, "Failed to set debug applications.");
229    }
230
231    if (log_sched) {
232        dfs_enable(enable, dfs_sched_switch_path);
233        dfs_enable(enable, dfs_sched_wakeup_path);
234    }
235    if (log_stack) {
236        dfs_enable(enable, dfs_stack_path);
237    }
238    if (log_irq) {
239        dfs_enable(enable, dfs_irq_path);
240    }
241    if (log_sync) {
242        dfs_enable(enable, dfs_sync_path);
243    }
244    if (log_workq) {
245        dfs_enable(enable, dfs_workq_path);
246    }
247}
248
249/*
250 * Dump the log in a compressed format for systrace to visualize.
251 * Create a dump file "dump_of_anrdaemon.<current_time>" under /data/misc/anrd
252 */
253static void dump_trace()
254{
255    time_t now = time(0);
256    struct tm  tstruct;
257    char time_buf[time_buf_size];
258    char path_buf[path_buf_size];
259    const char* header = " done\nTRACE:\n";
260    ssize_t header_len = strlen(header);
261
262    ALOGI("Started to dump ANRdaemon trace.");
263
264    tstruct = *localtime(&now);
265    strftime(time_buf, time_buf_size, "%Y-%m-%d.%X", &tstruct);
266    snprintf(path_buf, path_buf_size, "/data/misc/anrd/dump_of_anrdaemon.%s", time_buf);
267    int output_fd = creat(path_buf, S_IRWXU);
268    if (output_fd == -1) {
269        ALOGE("Failed to create %s. Dump aborted.", path_buf);
270        return;
271    }
272
273    if (write(output_fd, header, strlen(header)) != header_len) {
274        ALOGE("Failed to write the header.");
275        close(output_fd);
276        return;
277    }
278
279    int trace_fd = open(dfs_trace_output_path, O_RDWR);
280    if (trace_fd == -1) {
281        ALOGE("Failed to open %s. Dump aborted.", dfs_trace_output_path);
282        close(output_fd);
283        return;
284    }
285
286    z_stream zs;
287    uint8_t *in, *out;
288    int result, flush;
289
290    memset(&zs, 0, sizeof(zs));
291    result = deflateInit(&zs, Z_DEFAULT_COMPRESSION);
292    if (result != Z_OK) {
293        ALOGE("error initializing zlib: %d\n", result);
294        close(trace_fd);
295        close(output_fd);
296        return;
297    }
298
299    const size_t bufSize = 64*1024;
300    in = (uint8_t*)malloc(bufSize);
301    out = (uint8_t*)malloc(bufSize);
302    flush = Z_NO_FLUSH;
303
304    zs.next_out = out;
305    zs.avail_out = bufSize;
306
307    do {
308        if (zs.avail_in == 0) {
309            result = read(trace_fd, in, bufSize);
310            if (result < 0) {
311                ALOGE("error reading trace: %s", strerror(errno));
312                result = Z_STREAM_END;
313                break;
314            } else if (result == 0) {
315                flush = Z_FINISH;
316            } else {
317                zs.next_in = in;
318                zs.avail_in = result;
319            }
320        }
321
322        if (zs.avail_out == 0) {
323            result = write(output_fd, out, bufSize);
324            if ((size_t)result < bufSize) {
325                ALOGE("error writing deflated trace: %s", strerror(errno));
326                result = Z_STREAM_END;
327                zs.avail_out = bufSize;
328                break;
329            }
330            zs.next_out = out;
331            zs.avail_out = bufSize;
332        }
333
334    } while ((result = deflate(&zs, flush)) == Z_OK);
335
336    if (result != Z_STREAM_END) {
337        ALOGE("error deflating trace: %s\n", zs.msg);
338    }
339
340    if (zs.avail_out < bufSize) {
341        size_t bytes = bufSize - zs.avail_out;
342        result = write(output_fd, out, bytes);
343        if ((size_t)result < bytes) {
344            ALOGE("error writing deflated trace: %s", strerror(errno));
345        }
346    }
347
348    result = deflateEnd(&zs);
349    if (result != Z_OK) {
350        ALOGE("error cleaning up zlib: %d\n", result);
351    }
352
353    free(in);
354    free(out);
355
356    close(trace_fd);
357    close(output_fd);
358
359    ALOGI("Finished dump. Output file stored at: %s", path_buf);
360}
361
362/*
363 * Start logging when cpu usage is high. Meanwhile, moniter the cpu usage and
364 * stop logging when it drops down.
365 */
366static void start_tracing(void) {
367    ALOGD("High cpu usage, start logging.");
368
369    if (dfs_enable(true, dfs_control_path) != 0) {
370        ALOGE("Failed to start tracing.");
371        return;
372    }
373    tracing = true;
374
375    /* Stop logging when cpu usage drops or the daemon is suspended.*/
376    do {
377        usleep(tracing_check_period);
378    } while (!suspend && !dump_requested && is_heavy_load());
379
380    if (dfs_enable(false, dfs_control_path) != 0) {
381        ALOGE("Failed to stop tracing.");
382        err = true;
383        return;
384    }
385    tracing = false;
386
387    if (suspend) {
388        ALOGI("trace stopped due to suspend. Send SIGCONT to resume.");
389    } else if (dump_requested) {
390        ALOGI("trace stopped due to dump request.");
391        dump_trace();
392        dump_requested = false;
393    } else {
394        ALOGD("Usage back to low, stop logging.");
395    }
396}
397
398/*
399 * Set the tracing log buffer size.
400 * Note the actual buffer size will be buf_size_kb * number of cores.
401 */
402static int set_tracing_buffer_size(void) {
403    int fd = open(dfs_buffer_size_path, O_WRONLY);
404    if (fd == -1) {
405        err = true;
406        sprintf(err_msg, "Can't open atrace buffer size file under /d/tracing.");
407        return -1;
408    }
409    ssize_t len = strlen(buf_size_kb);
410    if (write(fd, buf_size_kb, len) != len) {
411        err = true;
412        sprintf(err_msg, "Error in writing to atrace buffer size file.");
413    }
414    close(fd);
415    return (err?-1:0);
416
417}
418
419/*
420 * Main loop to moniter the cpu usage and decided whether to start logging.
421 */
422static void start(void) {
423    if ((set_tracing_buffer_size()) != 0)
424        return;
425
426    dfs_set_property(tag, apps, true);
427    dfs_poke_binder();
428
429    get_cpu_stat(&old_cpu);
430    sleep(check_period);
431
432    while (!quit && !err) {
433        if (!suspend && is_heavy_load()) {
434            /*
435             * Increase process priority to make sure we can stop logging when
436             * necessary and do not overwrite the buffer
437             */
438            setpriority(PRIO_PROCESS, 0, -20);
439            start_tracing();
440            setpriority(PRIO_PROCESS, 0, 0);
441        }
442        sleep(check_period);
443    }
444    return;
445}
446
447/*
448 * If trace is not running, dump trace right away.
449 * If trace is running, request to dump trace.
450 */
451static void request_dump_trace()
452{
453    if (!tracing) {
454        dump_trace();
455    } else if (!dump_requested) {
456        dump_requested = true;
457    }
458}
459
460static void handle_signal(int signo)
461{
462    switch (signo) {
463        case SIGQUIT:
464            suspend = true;
465            quit = true;
466            break;
467        case SIGSTOP:
468            suspend = true;
469            break;
470        case SIGCONT:
471            suspend = false;
472            break;
473        case SIGUSR1:
474            request_dump_trace();
475    }
476}
477
478/*
479 * Set the signal handler:
480 * SIGQUIT: Reset debugfs and tracing property and terminate the daemon.
481 * SIGSTOP: Stop logging and suspend the daemon.
482 * SIGCONT: Resume the daemon as normal.
483 * SIGUSR1: Dump the logging to a compressed format for systrace to visualize.
484 */
485static void register_sighandler(void)
486{
487    struct sigaction sa;
488    sigset_t block_mask;
489
490    sigemptyset(&block_mask);
491    sigaddset (&block_mask, SIGQUIT);
492    sigaddset (&block_mask, SIGSTOP);
493    sigaddset (&block_mask, SIGCONT);
494    sigaddset (&block_mask, SIGUSR1);
495
496    sa.sa_flags = 0;
497    sa.sa_mask = block_mask;
498    sa.sa_handler = handle_signal;
499    sigaction(SIGQUIT, &sa, NULL);
500    sigaction(SIGSTOP, &sa, NULL);
501    sigaction(SIGCONT, &sa, NULL);
502    sigaction(SIGUSR1, &sa, NULL);
503}
504
505static void show_help(void) {
506
507    fprintf(stderr, "usage: ANRdaemon [options] [categoris...]\n");
508    fprintf(stdout, "Options includes:\n"
509                    "   -a appname  enable app-level tracing for a comma "
510                       "separated list of cmdlines\n"
511                    "   -t N        cpu threshold for logging to start "
512                        "(uint = 0.01%%, min = 5000, max = 9999, default = 9990)\n"
513                    "   -s N        use a trace buffer size of N KB "
514                        "default to 2048KB\n"
515                    "   -h          show helps\n");
516    fprintf(stdout, "Categoris includes:\n"
517                    "   am         - activity manager\n"
518                    "   sm         - sync manager\n"
519                    "   input      - input\n"
520                    "   dalvik     - dalvik VM\n"
521                    "   audio      - Audio\n"
522                    "   gfx        - Graphics\n"
523                    "   rs         - RenderScript\n"
524                    "   hal        - Hardware Modules\n"
525                    "   irq        - kernel irq events\n"
526                    "   sched      - kernel scheduler activity\n"
527                    "   stack      - kernel stack\n"
528                    "   sync       - kernel sync activity\n"
529                    "   workq      - kernel work queues\n");
530    fprintf(stdout, "Control includes:\n"
531                    "   SIGQUIT: terminate the process\n"
532                    "   SIGSTOP: suspend all function of the daemon\n"
533                    "   SIGCONT: resume the normal function\n"
534                    "   SIGUSR1: dump the current logging in a compressed form\n");
535    exit(0);
536}
537
538static int get_options(int argc, char *argv[]) {
539    int opt = 0;
540    int threshold;
541    while ((opt = getopt(argc, argv, "a:s:t:h")) >= 0) {
542        switch(opt) {
543            case 'a':
544                apps = optarg;
545                break;
546            case 's':
547                if (atoi(optarg) > max_buffer_size)
548                    buf_size_kb = max_buffer_size_str;
549                else if (atoi(optarg) < min_buffer_size)
550                    buf_size_kb = min_buffer_size_str;
551                else
552                    buf_size_kb = optarg;
553                break;
554            case 't':
555                threshold = atoi(optarg);
556                if (threshold > 9999 || threshold < 5000) {
557                    fprintf(stderr, "logging threshold should be 5000-9999\n");
558                    return 1;
559                }
560                idle_threshold = 10000 - threshold;
561                break;
562            case 'h':
563                show_help();
564                break;
565            default:
566                fprintf(stderr, "Error in getting options.\n"
567                        "run \"%s -h\" for usage.\n", argv[0]);
568                return 1;
569        }
570    }
571
572    for (int i = optind; i < argc; i++) {
573        if (strcmp(argv[i], "am") == 0) {
574            tag |= ATRACE_TAG_ACTIVITY_MANAGER;
575        } else if (strcmp(argv[i], "input") == 0) {
576            tag |= ATRACE_TAG_INPUT;
577        } else if (strcmp(argv[i], "sm") == 0) {
578            tag |= ATRACE_TAG_SYNC_MANAGER;
579        } else if (strcmp(argv[i], "dalvik") == 0) {
580            tag |= ATRACE_TAG_DALVIK;
581        } else if (strcmp(argv[i], "gfx") == 0) {
582            tag |= ATRACE_TAG_GRAPHICS;
583        } else if (strcmp(argv[i], "audio") == 0) {
584            tag |= ATRACE_TAG_AUDIO;
585        } else if (strcmp(argv[i], "hal") == 0) {
586            tag |= ATRACE_TAG_HAL;
587        } else if (strcmp(argv[i], "rs") == 0) {
588            tag |= ATRACE_TAG_RS;
589        } else if (strcmp(argv[i], "sched") == 0) {
590            log_sched = true;
591        } else if (strcmp(argv[i], "stack") == 0) {
592            log_stack = true;
593        } else if (strcmp(argv[i], "workq") == 0) {
594            log_workq = true;
595        } else if (strcmp(argv[i], "irq") == 0) {
596            log_irq = true;
597        } else if (strcmp(argv[i], "sync") == 0) {
598            log_sync = true;
599        } else {
600            fprintf(stderr, "invalid category: %s\n"
601                    "run \"%s -h\" for usage.\n", argv[i], argv[0]);
602            return 1;
603        }
604    }
605
606    /* If nothing is enabled, don't run */
607    if (!tag && !log_sched && !log_stack && !log_workq && !log_irq && !log_sync) {
608        ALOGE("Specify at least one category to trace.");
609        return 1;
610    }
611
612    return 0;
613}
614
615int main(int argc, char *argv[])
616{
617    if(get_options(argc, argv) != 0)
618        return 1;
619
620    if (daemon(0, 0) != 0)
621        return 1;
622
623    register_sighandler();
624
625    /* Clear any the trace log file by overwrite it with a new file */
626    int fd = creat(dfs_trace_output_path, 0);
627    if (fd == -1) {
628        ALOGE("Faield to open and cleaup previous log");
629        return 1;
630    }
631    close(fd);
632
633    ALOGI("ANRdaemon starting");
634    start();
635
636    if (err)
637        ALOGE("ANRdaemon stopped due to Error: %s", err_msg);
638
639    ALOGI("ANRdaemon terminated.");
640
641    return (err?1:0);
642}
643