atrace.c revision b9314021ee1ccaa62eb115e8e0188f482a950f3b
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <errno.h>
18#include <fcntl.h>
19#include <signal.h>
20#include <stdarg.h>
21#include <stdbool.h>
22#include <stdio.h>
23#include <stdlib.h>
24#include <sys/sendfile.h>
25#include <time.h>
26#include <zlib.h>
27
28#define NELEM(x) ((int) (sizeof(x) / sizeof((x)[0])))
29
30/* Command line options */
31static int g_traceDurationSeconds = 5;
32static bool g_traceSchedSwitch = false;
33static bool g_traceCpuFrequency = false;
34static bool g_traceCpuIdle = false;
35static bool g_traceDisk = false;
36static bool g_traceGovernorLoad = false;
37static bool g_traceWorkqueue = false;
38static bool g_traceOverwrite = false;
39static int g_traceBufferSizeKB = 2048;
40static bool g_compress = false;
41
42/* Global state */
43static bool g_traceAborted = false;
44
45/* Sys file paths */
46static const char* k_traceClockPath =
47    "/sys/kernel/debug/tracing/trace_clock";
48
49static const char* k_traceBufferSizePath =
50    "/sys/kernel/debug/tracing/buffer_size_kb";
51
52static const char* k_tracingOverwriteEnablePath =
53    "/sys/kernel/debug/tracing/options/overwrite";
54
55static const char* k_schedSwitchEnablePath =
56    "/sys/kernel/debug/tracing/events/sched/sched_switch/enable";
57
58static const char* k_schedWakeupEnablePath =
59    "/sys/kernel/debug/tracing/events/sched/sched_wakeup/enable";
60
61static const char* k_cpuFreqEnablePath =
62    "/sys/kernel/debug/tracing/events/power/cpu_frequency/enable";
63
64static const char* k_cpuIdleEnablePath =
65    "/sys/kernel/debug/tracing/events/power/cpu_idle/enable";
66
67static const char* k_governorLoadEnablePath =
68    "/sys/kernel/debug/tracing/events/cpufreq_interactive/enable";
69
70static const char* k_workqueueEnablePath =
71    "/sys/kernel/debug/tracing/events/workqueue/enable";
72
73static const char* k_diskEnablePaths[] = {
74        "/sys/kernel/debug/tracing/events/ext4/ext4_sync_file_enter/enable",
75        "/sys/kernel/debug/tracing/events/ext4/ext4_sync_file_exit/enable",
76        "/sys/kernel/debug/tracing/events/block/block_rq_issue/enable",
77        "/sys/kernel/debug/tracing/events/block/block_rq_complete/enable",
78};
79
80static const char* k_tracingOnPath =
81    "/sys/kernel/debug/tracing/tracing_on";
82
83static const char* k_tracePath =
84    "/sys/kernel/debug/tracing/trace";
85
86static const char* k_traceMarkerPath =
87    "/sys/kernel/debug/tracing/trace_marker";
88
89// Write a string to a file, returning true if the write was successful.
90bool writeStr(const char* filename, const char* str)
91{
92    int fd = open(filename, O_WRONLY);
93    if (fd == -1) {
94        fprintf(stderr, "error opening %s: %s (%d)\n", filename,
95                strerror(errno), errno);
96        return false;
97    }
98
99    bool ok = true;
100    ssize_t len = strlen(str);
101    if (write(fd, str, len) != len) {
102        fprintf(stderr, "error writing to %s: %s (%d)\n", filename,
103                strerror(errno), errno);
104        ok = false;
105    }
106
107    close(fd);
108
109    return ok;
110}
111
112// Enable or disable a kernel option by writing a "1" or a "0" into a /sys file.
113static bool setKernelOptionEnable(const char* filename, bool enable)
114{
115    return writeStr(filename, enable ? "1" : "0");
116}
117
118// Enable or disable a collection of kernel options by writing a "1" or a "0" into each /sys file.
119static bool setMultipleKernelOptionsEnable(const char** filenames, size_t count, bool enable)
120{
121    bool result = true;
122    for (size_t i = 0; i < count; i++) {
123        result &= setKernelOptionEnable(filenames[i], enable);
124    }
125    return result;
126}
127
128// Enable or disable overwriting of the kernel trace buffers.  Disabling this
129// will cause tracing to stop once the trace buffers have filled up.
130static bool setTraceOverwriteEnable(bool enable)
131{
132    return setKernelOptionEnable(k_tracingOverwriteEnablePath, enable);
133}
134
135// Enable or disable tracing of the kernel scheduler switching.
136static bool setSchedSwitchTracingEnable(bool enable)
137{
138    bool ok = true;
139    ok &= setKernelOptionEnable(k_schedSwitchEnablePath, enable);
140    ok &= setKernelOptionEnable(k_schedWakeupEnablePath, enable);
141    return ok;
142}
143
144// Enable or disable tracing of the CPU clock frequency.
145static bool setCpuFrequencyTracingEnable(bool enable)
146{
147    return setKernelOptionEnable(k_cpuFreqEnablePath, enable);
148}
149
150// Enable or disable tracing of CPU idle events.
151static bool setCpuIdleTracingEnable(bool enable)
152{
153    return setKernelOptionEnable(k_cpuIdleEnablePath, enable);
154}
155
156// Enable or disable tracing of the interactive CPU frequency governor's idea of
157// the CPU load.
158static bool setGovernorLoadTracingEnable(bool enable)
159{
160    return setKernelOptionEnable(k_governorLoadEnablePath, enable);
161}
162
163// Enable or disable tracing of the kernel workqueues.
164static bool setWorkqueueTracingEnabled(bool enable)
165{
166    return setKernelOptionEnable(k_workqueueEnablePath, enable);
167}
168
169// Enable or disable tracing of disk I/O.
170static bool setDiskTracingEnabled(bool enable)
171{
172    return setMultipleKernelOptionsEnable(k_diskEnablePaths, NELEM(k_diskEnablePaths), enable);
173}
174
175// Enable or disable kernel tracing.
176static bool setTracingEnabled(bool enable)
177{
178    return setKernelOptionEnable(k_tracingOnPath, enable);
179}
180
181// Clear the contents of the kernel trace.
182static bool clearTrace()
183{
184    int traceFD = creat(k_tracePath, 0);
185    if (traceFD == -1) {
186        fprintf(stderr, "error truncating %s: %s (%d)\n", k_tracePath,
187                strerror(errno), errno);
188        return false;
189    }
190
191    close(traceFD);
192
193    return true;
194}
195
196// Set the size of the kernel's trace buffer in kilobytes.
197static bool setTraceBufferSizeKB(int size)
198{
199    char str[32] = "1";
200    int len;
201    if (size < 1) {
202        size = 1;
203    }
204    snprintf(str, 32, "%d", size);
205    return writeStr(k_traceBufferSizePath, str);
206}
207
208// Enable or disable the kernel's use of the global clock.  Disabling the global
209// clock will result in the kernel using a per-CPU local clock.
210static bool setGlobalClockEnable(bool enable)
211{
212    return writeStr(k_traceClockPath, enable ? "global" : "local");
213}
214
215// Check whether a file exists.
216static bool fileExists(const char* filename) {
217    return access(filename, F_OK) != -1;
218}
219
220// Enable tracing in the kernel.
221static bool startTrace(bool isRoot)
222{
223    bool ok = true;
224
225    // Set up the tracing options that don't require root.
226    ok &= setTraceOverwriteEnable(g_traceOverwrite);
227    ok &= setSchedSwitchTracingEnable(g_traceSchedSwitch);
228    ok &= setCpuFrequencyTracingEnable(g_traceCpuFrequency);
229    ok &= setCpuIdleTracingEnable(g_traceCpuIdle);
230    if (fileExists(k_governorLoadEnablePath) || g_traceGovernorLoad) {
231        ok &= setGovernorLoadTracingEnable(g_traceGovernorLoad);
232    }
233    ok &= setTraceBufferSizeKB(g_traceBufferSizeKB);
234    ok &= setGlobalClockEnable(true);
235
236    // Set up the tracing options that do require root.  The options that
237    // require root should have errored out earlier if we're not running as
238    // root.
239    if (isRoot) {
240        ok &= setWorkqueueTracingEnabled(g_traceWorkqueue);
241        ok &= setDiskTracingEnabled(g_traceDisk);
242    }
243
244    // Enable tracing.
245    ok &= setTracingEnabled(true);
246
247    if (!ok) {
248        fprintf(stderr, "error: unable to start trace\n");
249    }
250
251    return ok;
252}
253
254// Disable tracing in the kernel.
255static void stopTrace(bool isRoot)
256{
257    // Disable tracing.
258    setTracingEnabled(false);
259
260    // Set the options back to their defaults.
261    setTraceOverwriteEnable(true);
262    setSchedSwitchTracingEnable(false);
263    setCpuFrequencyTracingEnable(false);
264    if (fileExists(k_governorLoadEnablePath)) {
265        setGovernorLoadTracingEnable(false);
266    }
267    setGlobalClockEnable(false);
268
269    if (isRoot) {
270        setWorkqueueTracingEnabled(false);
271        setDiskTracingEnabled(false);
272    }
273
274    // Note that we can't reset the trace buffer size here because that would
275    // clear the trace before we've read it.
276}
277
278// Read the current kernel trace and write it to stdout.
279static void dumpTrace()
280{
281    int traceFD = open(k_tracePath, O_RDWR);
282    if (traceFD == -1) {
283        fprintf(stderr, "error opening %s: %s (%d)\n", k_tracePath,
284                strerror(errno), errno);
285        return;
286    }
287
288    if (g_compress) {
289        z_stream zs;
290        uint8_t *in, *out;
291        int result, flush;
292
293        bzero(&zs, sizeof(zs));
294        result = deflateInit(&zs, Z_DEFAULT_COMPRESSION);
295        if (result != Z_OK) {
296            fprintf(stderr, "error initializing zlib: %d\n", result);
297            close(traceFD);
298            return;
299        }
300
301        const size_t bufSize = 64*1024;
302        in = (uint8_t*)malloc(bufSize);
303        out = (uint8_t*)malloc(bufSize);
304        flush = Z_NO_FLUSH;
305
306        zs.next_out = out;
307        zs.avail_out = bufSize;
308
309        do {
310
311            if (zs.avail_in == 0) {
312                // More input is needed.
313                result = read(traceFD, in, bufSize);
314                if (result < 0) {
315                    fprintf(stderr, "error reading trace: %s (%d)\n",
316                            strerror(errno), errno);
317                    result = Z_STREAM_END;
318                    break;
319                } else if (result == 0) {
320                    flush = Z_FINISH;
321                } else {
322                    zs.next_in = in;
323                    zs.avail_in = result;
324                }
325            }
326
327            if (zs.avail_out == 0) {
328                // Need to write the output.
329                result = write(STDOUT_FILENO, out, bufSize);
330                if ((size_t)result < bufSize) {
331                    fprintf(stderr, "error writing deflated trace: %s (%d)\n",
332                            strerror(errno), errno);
333                    result = Z_STREAM_END; // skip deflate error message
334                    zs.avail_out = bufSize; // skip the final write
335                    break;
336                }
337                zs.next_out = out;
338                zs.avail_out = bufSize;
339            }
340
341        } while ((result = deflate(&zs, flush)) == Z_OK);
342
343        if (result != Z_STREAM_END) {
344            fprintf(stderr, "error deflating trace: %s\n", zs.msg);
345        }
346
347        if (zs.avail_out < bufSize) {
348            size_t bytes = bufSize - zs.avail_out;
349            result = write(STDOUT_FILENO, out, bytes);
350            if ((size_t)result < bytes) {
351                fprintf(stderr, "error writing deflated trace: %s (%d)\n",
352                        strerror(errno), errno);
353            }
354        }
355
356        result = deflateEnd(&zs);
357        if (result != Z_OK) {
358            fprintf(stderr, "error cleaning up zlib: %d\n", result);
359        }
360
361        free(in);
362        free(out);
363    } else {
364        ssize_t sent = 0;
365        while ((sent = sendfile(STDOUT_FILENO, traceFD, NULL, 64*1024*1024)) > 0);
366        if (sent == -1) {
367            fprintf(stderr, "error dumping trace: %s (%d)\n", strerror(errno),
368                    errno);
369        }
370    }
371
372    close(traceFD);
373}
374
375// Print the command usage help to stderr.
376static void showHelp(const char *cmd)
377{
378    fprintf(stderr, "usage: %s [options]\n", cmd);
379    fprintf(stderr, "options include:\n"
380                    "  -b N            use a trace buffer size of N KB\n"
381                    "  -c              trace into a circular buffer\n"
382                    "  -d              trace disk I/O\n"
383                    "  -f              trace CPU frequency changes\n"
384                    "  -l              trace CPU frequency governor load\n"
385                    "  -s              trace the kernel scheduler switches\n"
386                    "  -t N            trace for N seconds [defualt 5]\n"
387                    "  -w              trace the kernel workqueue\n"
388                    "  -z              compress the trace dump\n");
389}
390
391static void handleSignal(int signo) {
392    g_traceAborted = true;
393}
394
395static void registerSigHandler() {
396    struct sigaction sa;
397    sigemptyset(&sa.sa_mask);
398    sa.sa_flags = 0;
399    sa.sa_handler = handleSignal;
400    sigaction(SIGHUP, &sa, NULL);
401    sigaction(SIGINT, &sa, NULL);
402    sigaction(SIGQUIT, &sa, NULL);
403    sigaction(SIGTERM, &sa, NULL);
404}
405
406int main(int argc, char **argv)
407{
408    bool isRoot = (getuid() == 0);
409
410    if (argc == 2 && 0 == strcmp(argv[1], "--help")) {
411        showHelp(argv[0]);
412        exit(0);
413    }
414
415    for (;;) {
416        int ret;
417
418        ret = getopt(argc, argv, "b:cidflst:wz");
419
420        if (ret < 0) {
421            break;
422        }
423
424        switch(ret) {
425            case 'b':
426                g_traceBufferSizeKB = atoi(optarg);
427            break;
428
429            case 'c':
430                g_traceOverwrite = true;
431            break;
432
433            case 'i':
434                g_traceCpuIdle = true;
435            break;
436
437            case 'l':
438                g_traceGovernorLoad = true;
439            break;
440
441            case 'd':
442                if (!isRoot) {
443                    fprintf(stderr, "error: tracing disk activity requires root privileges\n");
444                    exit(1);
445                }
446                g_traceDisk = true;
447            break;
448
449            case 'f':
450                g_traceCpuFrequency = true;
451            break;
452
453            case 's':
454                g_traceSchedSwitch = true;
455            break;
456
457            case 't':
458                g_traceDurationSeconds = atoi(optarg);
459            break;
460
461            case 'w':
462                if (!isRoot) {
463                    fprintf(stderr, "error: tracing kernel work queues requires root privileges\n");
464                    exit(1);
465                }
466                g_traceWorkqueue = true;
467            break;
468
469            case 'z':
470                g_compress = true;
471            break;
472
473            default:
474                fprintf(stderr, "\n");
475                showHelp(argv[0]);
476                exit(-1);
477            break;
478        }
479    }
480
481    registerSigHandler();
482
483    bool ok = startTrace(isRoot);
484
485    if (ok) {
486        printf("capturing trace...");
487        fflush(stdout);
488
489        // We clear the trace after starting it because tracing gets enabled for
490        // each CPU individually in the kernel. Having the beginning of the trace
491        // contain entries from only one CPU can cause "begin" entries without a
492        // matching "end" entry to show up if a task gets migrated from one CPU to
493        // another.
494        ok = clearTrace();
495
496        if (ok) {
497            // Sleep to allow the trace to be captured.
498            struct timespec timeLeft;
499            timeLeft.tv_sec = g_traceDurationSeconds;
500            timeLeft.tv_nsec = 0;
501            do {
502                if (g_traceAborted) {
503                    break;
504                }
505            } while (nanosleep(&timeLeft, &timeLeft) == -1 && errno == EINTR);
506        }
507    }
508
509    // Stop the trace and restore the default settings.
510    stopTrace(isRoot);
511
512    if (ok) {
513        if (!g_traceAborted) {
514            printf(" done\nTRACE:\n");
515            fflush(stdout);
516            dumpTrace();
517        } else {
518            printf("\ntrace aborted.\n");
519            fflush(stdout);
520        }
521        clearTrace();
522    } else {
523        fprintf(stderr, "unable to start tracing\n");
524    }
525
526    // Reset the trace buffer size to 1.
527    setTraceBufferSizeKB(1);
528
529    return g_traceAborted ? 1 : 0;
530}
531