1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <errno.h>
18#include <fcntl.h>
19#include <signal.h>
20#include <stdarg.h>
21#include <stdbool.h>
22#include <stdio.h>
23#include <stdlib.h>
24#include <sys/sendfile.h>
25#include <time.h>
26#include <zlib.h>
27
28#define NELEM(x) ((int) (sizeof(x) / sizeof((x)[0])))
29
30/* Command line options */
31static int g_traceDurationSeconds = 5;
32static bool g_traceSchedSwitch = false;
33static bool g_traceFrequency = false;
34static bool g_traceBusUtilization = false;
35static bool g_traceCpuIdle = false;
36static bool g_traceDisk = false;
37static bool g_traceGovernorLoad = false;
38static bool g_traceSync = false;
39static bool g_traceWorkqueue = false;
40static bool g_traceOverwrite = false;
41static int g_traceBufferSizeKB = 2048;
42static bool g_compress = false;
43static bool g_nohup = false;
44static int g_initialSleepSecs = 0;
45
46/* Global state */
47static bool g_traceAborted = false;
48
49/* Sys file paths */
50static const char* k_traceClockPath =
51    "/sys/kernel/debug/tracing/trace_clock";
52
53static const char* k_traceBufferSizePath =
54    "/sys/kernel/debug/tracing/buffer_size_kb";
55
56static const char* k_tracingOverwriteEnablePath =
57    "/sys/kernel/debug/tracing/options/overwrite";
58
59static const char* k_schedSwitchEnablePath =
60    "/sys/kernel/debug/tracing/events/sched/sched_switch/enable";
61
62static const char* k_schedWakeupEnablePath =
63    "/sys/kernel/debug/tracing/events/sched/sched_wakeup/enable";
64
65static const char* k_memoryBusEnablePath =
66    "/sys/kernel/debug/tracing/events/memory_bus/enable";
67
68static const char* k_cpuFreqEnablePath =
69    "/sys/kernel/debug/tracing/events/power/cpu_frequency/enable";
70
71static const char *k_clockSetRateEnablePath =
72    "/sys/kernel/debug/tracing/events/power/clock_set_rate/enable";
73
74static const char* k_cpuIdleEnablePath =
75    "/sys/kernel/debug/tracing/events/power/cpu_idle/enable";
76
77static const char* k_governorLoadEnablePath =
78    "/sys/kernel/debug/tracing/events/cpufreq_interactive/enable";
79
80static const char* k_syncEnablePath =
81    "/sys/kernel/debug/tracing/events/sync/enable";
82
83static const char* k_workqueueEnablePath =
84    "/sys/kernel/debug/tracing/events/workqueue/enable";
85
86static const char* k_diskEnablePaths[] = {
87        "/sys/kernel/debug/tracing/events/ext4/ext4_sync_file_enter/enable",
88        "/sys/kernel/debug/tracing/events/ext4/ext4_sync_file_exit/enable",
89        "/sys/kernel/debug/tracing/events/block/block_rq_issue/enable",
90        "/sys/kernel/debug/tracing/events/block/block_rq_complete/enable",
91};
92
93static const char* k_tracingOnPath =
94    "/sys/kernel/debug/tracing/tracing_on";
95
96static const char* k_tracePath =
97    "/sys/kernel/debug/tracing/trace";
98
99static const char* k_traceMarkerPath =
100    "/sys/kernel/debug/tracing/trace_marker";
101
102// Check whether a file exists.
103static bool fileExists(const char* filename) {
104    return access(filename, F_OK) != -1;
105}
106
107// Write a string to a file, returning true if the write was successful.
108bool writeStr(const char* filename, const char* str)
109{
110    int fd = open(filename, O_WRONLY);
111    if (fd == -1) {
112        fprintf(stderr, "error opening %s: %s (%d)\n", filename,
113                strerror(errno), errno);
114        return false;
115    }
116
117    bool ok = true;
118    ssize_t len = strlen(str);
119    if (write(fd, str, len) != len) {
120        fprintf(stderr, "error writing to %s: %s (%d)\n", filename,
121                strerror(errno), errno);
122        ok = false;
123    }
124
125    close(fd);
126
127    return ok;
128}
129
130// Enable or disable a kernel option by writing a "1" or a "0" into a /sys file.
131static bool setKernelOptionEnable(const char* filename, bool enable)
132{
133    return writeStr(filename, enable ? "1" : "0");
134}
135
136// Enable or disable a collection of kernel options by writing a "1" or a "0" into each /sys file.
137static bool setMultipleKernelOptionsEnable(const char** filenames, size_t count, bool enable)
138{
139    bool result = true;
140    for (size_t i = 0; i < count; i++) {
141        result &= setKernelOptionEnable(filenames[i], enable);
142    }
143    return result;
144}
145
146// Enable or disable overwriting of the kernel trace buffers.  Disabling this
147// will cause tracing to stop once the trace buffers have filled up.
148static bool setTraceOverwriteEnable(bool enable)
149{
150    return setKernelOptionEnable(k_tracingOverwriteEnablePath, enable);
151}
152
153// Enable or disable tracing of the kernel scheduler switching.
154static bool setSchedSwitchTracingEnable(bool enable)
155{
156    bool ok = true;
157    ok &= setKernelOptionEnable(k_schedSwitchEnablePath, enable);
158    ok &= setKernelOptionEnable(k_schedWakeupEnablePath, enable);
159    return ok;
160}
161
162// Enable or disable tracing of the Bus utilization.
163static bool setBusUtilizationTracingEnable(bool enable)
164{
165    bool ok = true, oneSet = false;
166    // these can be platform specific so make sure that at least
167    // one succeeds.
168    if (fileExists(k_memoryBusEnablePath)) {
169        ok &= setKernelOptionEnable(k_memoryBusEnablePath, enable);
170        oneSet |= ok;
171    }
172    return ok && (oneSet || !enable);
173}
174
175// Enable or disable tracing of the CPU clock frequency.
176static bool setFrequencyTracingEnable(bool enable)
177{
178    bool ok = true;
179    ok &= setKernelOptionEnable(k_cpuFreqEnablePath, enable);
180    if (fileExists(k_clockSetRateEnablePath)) {
181        ok &= setKernelOptionEnable(k_clockSetRateEnablePath, enable);
182    }
183    return ok;
184}
185
186// Enable or disable tracing of CPU idle events.
187static bool setCpuIdleTracingEnable(bool enable)
188{
189    return setKernelOptionEnable(k_cpuIdleEnablePath, enable);
190}
191
192// Enable or disable tracing of the interactive CPU frequency governor's idea of
193// the CPU load.
194static bool setGovernorLoadTracingEnable(bool enable)
195{
196    bool ok = true;
197    if (fileExists(k_governorLoadEnablePath) || enable) {
198        ok &= setKernelOptionEnable(k_governorLoadEnablePath, enable);
199    }
200    return ok;
201}
202
203// Enable or disable tracing of sync timelines and waits.
204static bool setSyncTracingEnabled(bool enable)
205{
206    bool ok = true;
207    if (fileExists(k_syncEnablePath) || enable) {
208        ok &= setKernelOptionEnable(k_syncEnablePath, enable);
209    }
210    return ok;
211}
212
213// Enable or disable tracing of the kernel workqueues.
214static bool setWorkqueueTracingEnabled(bool enable)
215{
216    return setKernelOptionEnable(k_workqueueEnablePath, enable);
217}
218
219// Enable or disable tracing of disk I/O.
220static bool setDiskTracingEnabled(bool enable)
221{
222    return setMultipleKernelOptionsEnable(k_diskEnablePaths, NELEM(k_diskEnablePaths), enable);
223}
224
225// Enable or disable kernel tracing.
226static bool setTracingEnabled(bool enable)
227{
228    return setKernelOptionEnable(k_tracingOnPath, enable);
229}
230
231// Clear the contents of the kernel trace.
232static bool clearTrace()
233{
234    int traceFD = creat(k_tracePath, 0);
235    if (traceFD == -1) {
236        fprintf(stderr, "error truncating %s: %s (%d)\n", k_tracePath,
237                strerror(errno), errno);
238        return false;
239    }
240
241    close(traceFD);
242
243    return true;
244}
245
246// Set the size of the kernel's trace buffer in kilobytes.
247static bool setTraceBufferSizeKB(int size)
248{
249    char str[32] = "1";
250    int len;
251    if (size < 1) {
252        size = 1;
253    }
254    snprintf(str, 32, "%d", size);
255    return writeStr(k_traceBufferSizePath, str);
256}
257
258// Enable or disable the kernel's use of the global clock.  Disabling the global
259// clock will result in the kernel using a per-CPU local clock.
260static bool setGlobalClockEnable(bool enable)
261{
262    return writeStr(k_traceClockPath, enable ? "global" : "local");
263}
264
265// Enable tracing in the kernel.
266static bool startTrace(bool isRoot)
267{
268    bool ok = true;
269
270    // Set up the tracing options that don't require root.
271    ok &= setTraceOverwriteEnable(g_traceOverwrite);
272    ok &= setSchedSwitchTracingEnable(g_traceSchedSwitch);
273    ok &= setFrequencyTracingEnable(g_traceFrequency);
274    ok &= setCpuIdleTracingEnable(g_traceCpuIdle);
275    ok &= setGovernorLoadTracingEnable(g_traceGovernorLoad);
276    ok &= setTraceBufferSizeKB(g_traceBufferSizeKB);
277    ok &= setGlobalClockEnable(true);
278
279    // Set up the tracing options that do require root.  The options that
280    // require root should have errored out earlier if we're not running as
281    // root.
282    if (isRoot) {
283        ok &= setBusUtilizationTracingEnable(g_traceBusUtilization);
284        ok &= setSyncTracingEnabled(g_traceSync);
285        ok &= setWorkqueueTracingEnabled(g_traceWorkqueue);
286        ok &= setDiskTracingEnabled(g_traceDisk);
287    }
288
289    // Enable tracing.
290    ok &= setTracingEnabled(true);
291
292    if (!ok) {
293        fprintf(stderr, "error: unable to start trace\n");
294    }
295
296    return ok;
297}
298
299// Disable tracing in the kernel.
300static void stopTrace(bool isRoot)
301{
302    // Disable tracing.
303    setTracingEnabled(false);
304
305    // Set the options back to their defaults.
306    setTraceOverwriteEnable(true);
307    setSchedSwitchTracingEnable(false);
308    setFrequencyTracingEnable(false);
309    setGovernorLoadTracingEnable(false);
310    setGlobalClockEnable(false);
311
312    if (isRoot) {
313        setBusUtilizationTracingEnable(false);
314        setSyncTracingEnabled(false);
315        setWorkqueueTracingEnabled(false);
316        setDiskTracingEnabled(false);
317    }
318
319    // Note that we can't reset the trace buffer size here because that would
320    // clear the trace before we've read it.
321}
322
323// Read the current kernel trace and write it to stdout.
324static void dumpTrace()
325{
326    int traceFD = open(k_tracePath, O_RDWR);
327    if (traceFD == -1) {
328        fprintf(stderr, "error opening %s: %s (%d)\n", k_tracePath,
329                strerror(errno), errno);
330        return;
331    }
332
333    if (g_compress) {
334        z_stream zs;
335        uint8_t *in, *out;
336        int result, flush;
337
338        bzero(&zs, sizeof(zs));
339        result = deflateInit(&zs, Z_DEFAULT_COMPRESSION);
340        if (result != Z_OK) {
341            fprintf(stderr, "error initializing zlib: %d\n", result);
342            close(traceFD);
343            return;
344        }
345
346        const size_t bufSize = 64*1024;
347        in = (uint8_t*)malloc(bufSize);
348        out = (uint8_t*)malloc(bufSize);
349        flush = Z_NO_FLUSH;
350
351        zs.next_out = out;
352        zs.avail_out = bufSize;
353
354        do {
355
356            if (zs.avail_in == 0) {
357                // More input is needed.
358                result = read(traceFD, in, bufSize);
359                if (result < 0) {
360                    fprintf(stderr, "error reading trace: %s (%d)\n",
361                            strerror(errno), errno);
362                    result = Z_STREAM_END;
363                    break;
364                } else if (result == 0) {
365                    flush = Z_FINISH;
366                } else {
367                    zs.next_in = in;
368                    zs.avail_in = result;
369                }
370            }
371
372            if (zs.avail_out == 0) {
373                // Need to write the output.
374                result = write(STDOUT_FILENO, out, bufSize);
375                if ((size_t)result < bufSize) {
376                    fprintf(stderr, "error writing deflated trace: %s (%d)\n",
377                            strerror(errno), errno);
378                    result = Z_STREAM_END; // skip deflate error message
379                    zs.avail_out = bufSize; // skip the final write
380                    break;
381                }
382                zs.next_out = out;
383                zs.avail_out = bufSize;
384            }
385
386        } while ((result = deflate(&zs, flush)) == Z_OK);
387
388        if (result != Z_STREAM_END) {
389            fprintf(stderr, "error deflating trace: %s\n", zs.msg);
390        }
391
392        if (zs.avail_out < bufSize) {
393            size_t bytes = bufSize - zs.avail_out;
394            result = write(STDOUT_FILENO, out, bytes);
395            if ((size_t)result < bytes) {
396                fprintf(stderr, "error writing deflated trace: %s (%d)\n",
397                        strerror(errno), errno);
398            }
399        }
400
401        result = deflateEnd(&zs);
402        if (result != Z_OK) {
403            fprintf(stderr, "error cleaning up zlib: %d\n", result);
404        }
405
406        free(in);
407        free(out);
408    } else {
409        ssize_t sent = 0;
410        while ((sent = sendfile(STDOUT_FILENO, traceFD, NULL, 64*1024*1024)) > 0);
411        if (sent == -1) {
412            fprintf(stderr, "error dumping trace: %s (%d)\n", strerror(errno),
413                    errno);
414        }
415    }
416
417    close(traceFD);
418}
419
420// Print the command usage help to stderr.
421static void showHelp(const char *cmd)
422{
423    fprintf(stderr, "usage: %s [options]\n", cmd);
424    fprintf(stderr, "options include:\n"
425                    "  -b N            use a trace buffer size of N KB\n"
426                    "  -c              trace into a circular buffer\n"
427                    "  -d              trace disk I/O\n"
428                    "  -f              trace clock frequency changes\n"
429                    "  -l              trace CPU frequency governor load\n"
430                    "  -s              trace the kernel scheduler switches\n"
431                    "  -t N            trace for N seconds [defualt 5]\n"
432                    "  -u              trace bus utilization\n"
433                    "  -w              trace the kernel workqueue\n"
434                    "  -y              trace sync timelines and waits\n"
435                    "  -z              compress the trace dump\n");
436}
437
438static void handleSignal(int signo) {
439    if (!g_nohup) {
440        g_traceAborted = true;
441    }
442}
443
444static void registerSigHandler() {
445    struct sigaction sa;
446    sigemptyset(&sa.sa_mask);
447    sa.sa_flags = 0;
448    sa.sa_handler = handleSignal;
449    sigaction(SIGHUP, &sa, NULL);
450    sigaction(SIGINT, &sa, NULL);
451    sigaction(SIGQUIT, &sa, NULL);
452    sigaction(SIGTERM, &sa, NULL);
453}
454
455int main(int argc, char **argv)
456{
457    bool isRoot = (getuid() == 0);
458
459    if (argc == 2 && 0 == strcmp(argv[1], "--help")) {
460        showHelp(argv[0]);
461        exit(0);
462    }
463
464    for (;;) {
465        int ret;
466
467        ret = getopt(argc, argv, "b:cidflst:uwyznS:");
468
469        if (ret < 0) {
470            break;
471        }
472
473        switch(ret) {
474            case 'b':
475                g_traceBufferSizeKB = atoi(optarg);
476            break;
477
478            case 'c':
479                g_traceOverwrite = true;
480            break;
481
482            case 'i':
483                g_traceCpuIdle = true;
484            break;
485
486            case 'l':
487                g_traceGovernorLoad = true;
488            break;
489
490            case 'd':
491                if (!isRoot) {
492                    fprintf(stderr, "error: tracing disk activity requires root privileges\n");
493                    exit(1);
494                }
495                g_traceDisk = true;
496            break;
497
498            case 'f':
499                g_traceFrequency = true;
500            break;
501
502            case 'n':
503                g_nohup = true;
504                break;
505
506            case 's':
507                g_traceSchedSwitch = true;
508            break;
509
510            case 'S':
511                g_initialSleepSecs = atoi(optarg);
512            break;
513
514            case 't':
515                g_traceDurationSeconds = atoi(optarg);
516            break;
517
518            case 'u':
519                if (!isRoot) {
520                    fprintf(stderr, "error: tracing bus utilization requires root privileges\n");
521                    exit(1);
522                }
523                g_traceBusUtilization = true;
524            break;
525
526            case 'w':
527                if (!isRoot) {
528                    fprintf(stderr, "error: tracing kernel work queues requires root privileges\n");
529                    exit(1);
530                }
531                g_traceWorkqueue = true;
532            break;
533
534            case 'y':
535                if (!isRoot) {
536                    fprintf(stderr, "error: tracing sync requires root privileges\n");
537                    exit(1);
538                }
539                g_traceSync = true;
540            break;
541
542            case 'z':
543                g_compress = true;
544            break;
545
546            default:
547                fprintf(stderr, "\n");
548                showHelp(argv[0]);
549                exit(-1);
550            break;
551        }
552    }
553
554    registerSigHandler();
555
556    if (g_initialSleepSecs > 0) {
557        sleep(g_initialSleepSecs);
558    }
559
560    bool ok = startTrace(isRoot);
561
562    if (ok) {
563        printf("capturing trace...");
564        fflush(stdout);
565
566        // We clear the trace after starting it because tracing gets enabled for
567        // each CPU individually in the kernel. Having the beginning of the trace
568        // contain entries from only one CPU can cause "begin" entries without a
569        // matching "end" entry to show up if a task gets migrated from one CPU to
570        // another.
571        ok = clearTrace();
572
573        if (ok) {
574            // Sleep to allow the trace to be captured.
575            struct timespec timeLeft;
576            timeLeft.tv_sec = g_traceDurationSeconds;
577            timeLeft.tv_nsec = 0;
578            do {
579                if (g_traceAborted) {
580                    break;
581                }
582            } while (nanosleep(&timeLeft, &timeLeft) == -1 && errno == EINTR);
583        }
584    }
585
586    // Stop the trace and restore the default settings.
587    stopTrace(isRoot);
588
589    if (ok) {
590        if (!g_traceAborted) {
591            printf(" done\nTRACE:\n");
592            fflush(stdout);
593            dumpTrace();
594        } else {
595            printf("\ntrace aborted.\n");
596            fflush(stdout);
597        }
598        clearTrace();
599    } else {
600        fprintf(stderr, "unable to start tracing\n");
601    }
602
603    // Reset the trace buffer size to 1.
604    setTraceBufferSizeKB(1);
605
606    return g_traceAborted ? 1 : 0;
607}
608