Profile.cpp revision 949c3ec207a7720fb47f7b3ca1f84dfcfd70aaa9
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Android's method call profiling goodies.
19 */
20#include "Dalvik.h"
21#include <interp/InterpDefs.h>
22
23#include <stdlib.h>
24#include <stddef.h>
25#include <string.h>
26#include <sys/time.h>
27#include <time.h>
28#include <sys/mman.h>
29#include <sched.h>
30#include <errno.h>
31#include <fcntl.h>
32
33#include <cutils/open_memstream.h>
34
35#ifdef HAVE_ANDROID_OS
36# define UPDATE_MAGIC_PAGE      1
37#endif
38
39/*
40 * File format:
41 *  header
42 *  record 0
43 *  record 1
44 *  ...
45 *
46 * Header format:
47 *  u4  magic ('SLOW')
48 *  u2  version
49 *  u2  offset to data
50 *  u8  start date/time in usec
51 *  u2  record size in bytes (version >= 2 only)
52 *  ... padding to 32 bytes
53 *
54 * Record format v1:
55 *  u1  thread ID
56 *  u4  method ID | method action
57 *  u4  time delta since start, in usec
58 *
59 * Record format v2:
60 *  u2  thread ID
61 *  u4  method ID | method action
62 *  u4  time delta since start, in usec
63 *
64 * Record format v3:
65 *  u2  thread ID
66 *  u4  method ID | method action
67 *  u4  time delta since start, in usec
68 *  u4  wall time since start, in usec (when clock == "dual" only)
69 *
70 * 32 bits of microseconds is 70 minutes.
71 *
72 * All values are stored in little-endian order.
73 */
74#define TRACE_REC_SIZE_SINGLE_CLOCK  10 // using v2
75#define TRACE_REC_SIZE_DUAL_CLOCK    14 // using v3 with two timestamps
76#define TRACE_MAGIC         0x574f4c53
77#define TRACE_HEADER_LEN    32
78
79#define FILL_PATTERN        0xeeeeeeee
80
81
82/*
83 * Returns true if the thread CPU clock should be used.
84 */
85static inline bool useThreadCpuClock() {
86#if defined(HAVE_POSIX_CLOCKS)
87    return gDvm.profilerClockSource != kProfilerClockSourceWall;
88#else
89    return false;
90#endif
91}
92
93/*
94 * Returns true if the wall clock should be used.
95 */
96static inline bool useWallClock() {
97#if defined(HAVE_POSIX_CLOCKS)
98    return gDvm.profilerClockSource != kProfilerClockSourceThreadCpu;
99#else
100    return true;
101#endif
102}
103
104/*
105 * Get the wall-clock date/time, in usec.
106 */
107static inline u8 getWallTimeInUsec()
108{
109    struct timeval tv;
110
111    gettimeofday(&tv, NULL);
112    return tv.tv_sec * 1000000LL + tv.tv_usec;
113}
114
115#if defined(HAVE_POSIX_CLOCKS)
116/*
117 * Get the thread-cpu time, in usec.
118 * We use this clock when we can because it enables us to track the time that
119 * a thread spends running and not blocked.
120 */
121static inline u8 getThreadCpuTimeInUsec()
122{
123    struct timespec tm;
124
125    clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tm);
126    if (!(tm.tv_nsec >= 0 && tm.tv_nsec < 1*1000*1000*1000)) {
127        LOGE("bad nsec: %ld", tm.tv_nsec);
128        dvmAbort();
129    }
130    return tm.tv_sec * 1000000LL + tm.tv_nsec / 1000;
131}
132#endif
133
134/*
135 * Get the clock used for stopwatch-like timing measurements on a single thread.
136 */
137static inline u8 getStopwatchClock()
138{
139#if defined(HAVE_POSIX_CLOCKS)
140    return getThreadCpuTimeInUsec();
141#else
142    return getWallTimeInUsec();
143#endif
144}
145
146/*
147 * Write little-endian data.
148 */
149static inline void storeShortLE(u1* buf, u2 val)
150{
151    *buf++ = (u1) val;
152    *buf++ = (u1) (val >> 8);
153}
154static inline void storeIntLE(u1* buf, u4 val)
155{
156    *buf++ = (u1) val;
157    *buf++ = (u1) (val >> 8);
158    *buf++ = (u1) (val >> 16);
159    *buf++ = (u1) (val >> 24);
160}
161static inline void storeLongLE(u1* buf, u8 val)
162{
163    *buf++ = (u1) val;
164    *buf++ = (u1) (val >> 8);
165    *buf++ = (u1) (val >> 16);
166    *buf++ = (u1) (val >> 24);
167    *buf++ = (u1) (val >> 32);
168    *buf++ = (u1) (val >> 40);
169    *buf++ = (u1) (val >> 48);
170    *buf++ = (u1) (val >> 56);
171}
172
173/*
174 * Boot-time init.
175 */
176bool dvmProfilingStartup()
177{
178    /*
179     * Initialize "dmtrace" method profiling.
180     */
181    memset(&gDvm.methodTrace, 0, sizeof(gDvm.methodTrace));
182    dvmInitMutex(&gDvm.methodTrace.startStopLock);
183    pthread_cond_init(&gDvm.methodTrace.threadExitCond, NULL);
184
185    assert(!dvmCheckException(dvmThreadSelf()));
186
187    /*
188     * Allocate storage for instruction counters.
189     */
190    gDvm.executedInstrCounts = (int*) malloc(kNumPackedOpcodes * sizeof(int));
191    if (gDvm.executedInstrCounts == NULL)
192        return false;
193    memset(gDvm.executedInstrCounts, 0, kNumPackedOpcodes * sizeof(int));
194
195#ifdef UPDATE_MAGIC_PAGE
196    /*
197     * If we're running on the emulator, there's a magic page into which
198     * we can put interpreted method information.  This allows interpreted
199     * methods to show up in the emulator's code traces.
200     *
201     * We could key this off of the "ro.kernel.qemu" property, but there's
202     * no real harm in doing this on a real device.
203     */
204    int fd = open("/dev/qemu_trace", O_RDWR);
205    if (fd < 0) {
206        LOGV("Unable to open /dev/qemu_trace");
207    } else {
208        gDvm.emulatorTracePage = mmap(0, SYSTEM_PAGE_SIZE, PROT_READ|PROT_WRITE,
209                                      MAP_SHARED, fd, 0);
210        close(fd);
211        if (gDvm.emulatorTracePage == MAP_FAILED) {
212            LOGE("Unable to mmap /dev/qemu_trace");
213            gDvm.emulatorTracePage = NULL;
214        } else {
215            *(u4*) gDvm.emulatorTracePage = 0;
216        }
217    }
218#else
219    assert(gDvm.emulatorTracePage == NULL);
220#endif
221
222    return true;
223}
224
225/*
226 * Free up profiling resources.
227 */
228void dvmProfilingShutdown()
229{
230#ifdef UPDATE_MAGIC_PAGE
231    if (gDvm.emulatorTracePage != NULL)
232        munmap(gDvm.emulatorTracePage, SYSTEM_PAGE_SIZE);
233#endif
234    free(gDvm.executedInstrCounts);
235}
236
237/*
238 * Update the set of active profilers
239 */
240static void updateActiveProfilers(ExecutionSubModes newMode, bool enable)
241{
242    int oldValue, newValue;
243
244    // Update global count
245    do {
246        oldValue = gDvm.activeProfilers;
247        newValue = oldValue + (enable ? 1 : -1);
248        if (newValue < 0) {
249            LOGE("Can't have %d active profilers", newValue);
250            dvmAbort();
251        }
252    } while (android_atomic_release_cas(oldValue, newValue,
253            &gDvm.activeProfilers) != 0);
254
255    // Tell the threads
256    if (enable) {
257        dvmEnableAllSubMode(newMode);
258    } else {
259        dvmDisableAllSubMode(newMode);
260    }
261
262#if defined(WITH_JIT)
263    dvmCompilerUpdateGlobalState();
264#endif
265
266    LOGD("+++ active profiler count now %d", newValue);
267}
268
269
270/*
271 * Reset the "cpuClockBase" field in all threads.
272 */
273static void resetCpuClockBase()
274{
275    Thread* thread;
276
277    dvmLockThreadList(NULL);
278    for (thread = gDvm.threadList; thread != NULL; thread = thread->next) {
279        thread->cpuClockBaseSet = false;
280        thread->cpuClockBase = 0;
281    }
282    dvmUnlockThreadList();
283}
284
285/*
286 * Dump the thread list to the specified file.
287 */
288static void dumpThreadList(FILE* fp) {
289    dvmLockThreadList(NULL);
290    for (Thread* thread = gDvm.threadList; thread != NULL; thread = thread->next) {
291        std::string threadName(dvmGetThreadName(thread));
292        fprintf(fp, "%d\t%s\n", thread->threadId, threadName.c_str());
293    }
294    dvmUnlockThreadList();
295}
296
297/*
298 * This is a dvmHashForeach callback.
299 */
300static int dumpMarkedMethods(void* vclazz, void* vfp)
301{
302    DexStringCache stringCache;
303    ClassObject* clazz = (ClassObject*) vclazz;
304    FILE* fp = (FILE*) vfp;
305    Method* meth;
306    char* name;
307    int i;
308
309    dexStringCacheInit(&stringCache);
310
311    for (i = 0; i < clazz->virtualMethodCount; i++) {
312        meth = &clazz->virtualMethods[i];
313        if (meth->inProfile) {
314            name = dvmDescriptorToName(meth->clazz->descriptor);
315            fprintf(fp, "0x%08x\t%s\t%s\t%s\t%s\t%d\n", (int) meth,
316                name, meth->name,
317                dexProtoGetMethodDescriptor(&meth->prototype, &stringCache),
318                dvmGetMethodSourceFile(meth), dvmLineNumFromPC(meth, 0));
319            meth->inProfile = false;
320            free(name);
321        }
322    }
323
324    for (i = 0; i < clazz->directMethodCount; i++) {
325        meth = &clazz->directMethods[i];
326        if (meth->inProfile) {
327            name = dvmDescriptorToName(meth->clazz->descriptor);
328            fprintf(fp, "0x%08x\t%s\t%s\t%s\t%s\t%d\n", (int) meth,
329                name, meth->name,
330                dexProtoGetMethodDescriptor(&meth->prototype, &stringCache),
331                dvmGetMethodSourceFile(meth), dvmLineNumFromPC(meth, 0));
332            meth->inProfile = false;
333            free(name);
334        }
335    }
336
337    dexStringCacheRelease(&stringCache);
338
339    return 0;
340}
341
342/*
343 * Dump the list of "marked" methods to the specified file.
344 */
345static void dumpMethodList(FILE* fp)
346{
347    dvmHashTableLock(gDvm.loadedClasses);
348    dvmHashForeach(gDvm.loadedClasses, dumpMarkedMethods, (void*) fp);
349    dvmHashTableUnlock(gDvm.loadedClasses);
350}
351
352/*
353 * Start method tracing.  Method tracing is global to the VM (i.e. we
354 * trace all threads).
355 *
356 * This opens the output file (if an already open fd has not been supplied,
357 * and we're not going direct to DDMS) and allocates the data buffer.  This
358 * takes ownership of the file descriptor, closing it on completion.
359 *
360 * On failure, we throw an exception and return.
361 */
362void dvmMethodTraceStart(const char* traceFileName, int traceFd, int bufferSize,
363    int flags, bool directToDdms)
364{
365    MethodTraceState* state = &gDvm.methodTrace;
366
367    assert(bufferSize > 0);
368
369    dvmLockMutex(&state->startStopLock);
370    while (state->traceEnabled != 0) {
371        LOGI("TRACE start requested, but already in progress; stopping");
372        dvmUnlockMutex(&state->startStopLock);
373        dvmMethodTraceStop();
374        dvmLockMutex(&state->startStopLock);
375    }
376    /*
377     * ENHANCEMENT: To trace just a single thread, modify the
378     * following to take a Thread* argument, and set the appropriate
379     * interpBreak flags only on the target thread.
380     */
381    updateActiveProfilers(kSubModeMethodTrace, true);
382    LOGI("TRACE STARTED: '%s' %dKB", traceFileName, bufferSize / 1024);
383
384    /*
385     * Allocate storage and open files.
386     *
387     * We don't need to initialize the buffer, but doing so might remove
388     * some fault overhead if the pages aren't mapped until touched.
389     */
390    state->buf = (u1*) malloc(bufferSize);
391    if (state->buf == NULL) {
392        dvmThrowInternalError("buffer alloc failed");
393        goto fail;
394    }
395    if (!directToDdms) {
396        if (traceFd < 0) {
397            state->traceFile = fopen(traceFileName, "w");
398        } else {
399            state->traceFile = fdopen(traceFd, "w");
400        }
401        if (state->traceFile == NULL) {
402            int err = errno;
403            LOGE("Unable to open trace file '%s': %s",
404                traceFileName, strerror(err));
405            dvmThrowExceptionFmt(gDvm.exRuntimeException,
406                "Unable to open trace file '%s': %s",
407                traceFileName, strerror(err));
408            goto fail;
409        }
410    }
411    traceFd = -1;
412    memset(state->buf, (char)FILL_PATTERN, bufferSize);
413
414    state->directToDdms = directToDdms;
415    state->bufferSize = bufferSize;
416    state->overflow = false;
417
418    /*
419     * Enable alloc counts if we've been requested to do so.
420     */
421    state->flags = flags;
422    if ((flags & TRACE_ALLOC_COUNTS) != 0)
423        dvmStartAllocCounting();
424
425    /* reset our notion of the start time for all CPU threads */
426    resetCpuClockBase();
427
428    state->startWhen = getWallTimeInUsec();
429
430    if (useThreadCpuClock() && useWallClock()) {
431        state->traceVersion = 3;
432        state->recordSize = TRACE_REC_SIZE_DUAL_CLOCK;
433    } else {
434        state->traceVersion = 2;
435        state->recordSize = TRACE_REC_SIZE_SINGLE_CLOCK;
436    }
437
438    /*
439     * Output the header.
440     */
441    memset(state->buf, 0, TRACE_HEADER_LEN);
442    storeIntLE(state->buf + 0, TRACE_MAGIC);
443    storeShortLE(state->buf + 4, state->traceVersion);
444    storeShortLE(state->buf + 6, TRACE_HEADER_LEN);
445    storeLongLE(state->buf + 8, state->startWhen);
446    if (state->traceVersion >= 3) {
447        storeShortLE(state->buf + 16, state->recordSize);
448    }
449    state->curOffset = TRACE_HEADER_LEN;
450
451    /*
452     * Set the "enabled" flag.  Once we do this, threads will wait to be
453     * signaled before exiting, so we have to make sure we wake them up.
454     */
455    android_atomic_release_store(true, &state->traceEnabled);
456    dvmUnlockMutex(&state->startStopLock);
457    return;
458
459fail:
460    updateActiveProfilers(kSubModeMethodTrace, false);
461    if (state->traceFile != NULL) {
462        fclose(state->traceFile);
463        state->traceFile = NULL;
464    }
465    if (state->buf != NULL) {
466        free(state->buf);
467        state->buf = NULL;
468    }
469    if (traceFd >= 0)
470        close(traceFd);
471    dvmUnlockMutex(&state->startStopLock);
472}
473
474/*
475 * Run through the data buffer and pull out the methods that were visited.
476 * Set a mark so that we know which ones to output.
477 */
478static void markTouchedMethods(int endOffset)
479{
480    u1* ptr = gDvm.methodTrace.buf + TRACE_HEADER_LEN;
481    u1* end = gDvm.methodTrace.buf + endOffset;
482    size_t recordSize = gDvm.methodTrace.recordSize;
483    unsigned int methodVal;
484    Method* method;
485
486    while (ptr < end) {
487        methodVal = ptr[2] | (ptr[3] << 8) | (ptr[4] << 16)
488                    | (ptr[5] << 24);
489        method = (Method*) METHOD_ID(methodVal);
490
491        method->inProfile = true;
492        ptr += recordSize;
493    }
494}
495
496/*
497 * Exercises the clocks in the same way they will be during profiling.
498 */
499static inline void measureClockOverhead()
500{
501#if defined(HAVE_POSIX_CLOCKS)
502    if (useThreadCpuClock()) {
503        getThreadCpuTimeInUsec();
504    }
505#endif
506    if (useWallClock()) {
507        getWallTimeInUsec();
508    }
509}
510
511/*
512 * Compute the amount of overhead in a clock call, in nsec.
513 *
514 * This value is going to vary depending on what else is going on in the
515 * system.  When examined across several runs a pattern should emerge.
516 */
517static u4 getClockOverhead()
518{
519    u8 calStart, calElapsed;
520    int i;
521
522    calStart = getStopwatchClock();
523    for (i = 1000 * 4; i > 0; i--) {
524        measureClockOverhead();
525        measureClockOverhead();
526        measureClockOverhead();
527        measureClockOverhead();
528        measureClockOverhead();
529        measureClockOverhead();
530        measureClockOverhead();
531        measureClockOverhead();
532    }
533
534    calElapsed = getStopwatchClock() - calStart;
535    return (int) (calElapsed / (8*4));
536}
537
538/*
539 * Returns "true" if method tracing is currently active.
540 */
541bool dvmIsMethodTraceActive()
542{
543    const MethodTraceState* state = &gDvm.methodTrace;
544    return state->traceEnabled;
545}
546
547/*
548 * Stop method tracing.  We write the buffer to disk and generate a key
549 * file so we can interpret it.
550 */
551void dvmMethodTraceStop()
552{
553    MethodTraceState* state = &gDvm.methodTrace;
554    u8 elapsed;
555
556    /*
557     * We need this to prevent somebody from starting a new trace while
558     * we're in the process of stopping the old.
559     */
560    dvmLockMutex(&state->startStopLock);
561
562    if (!state->traceEnabled) {
563        /* somebody already stopped it, or it was never started */
564        LOGD("TRACE stop requested, but not running");
565        dvmUnlockMutex(&state->startStopLock);
566        return;
567    } else {
568        updateActiveProfilers(kSubModeMethodTrace, false);
569    }
570
571    /* compute elapsed time */
572    elapsed = getWallTimeInUsec() - state->startWhen;
573
574    /*
575     * Globally disable it, and allow other threads to notice.  We want
576     * to stall here for at least as long as dvmMethodTraceAdd needs
577     * to finish.  There's no real risk though -- it will take a while to
578     * write the data to disk, and we don't clear the buffer pointer until
579     * after that completes.
580     */
581    state->traceEnabled = false;
582    ANDROID_MEMBAR_FULL();
583    sched_yield();
584    usleep(250 * 1000);
585
586    if ((state->flags & TRACE_ALLOC_COUNTS) != 0)
587        dvmStopAllocCounting();
588
589    /*
590     * It's possible under some circumstances for a thread to have advanced
591     * the data pointer but not written the method value.  It's possible
592     * (though less likely) for the data pointer to be advanced, or partial
593     * data written, while we're doing work here.
594     *
595     * To avoid seeing partially-written data, we grab state->curOffset here,
596     * and use our local copy from here on.  We then scan through what's
597     * already written.  If we see the fill pattern in what should be the
598     * method pointer, we cut things off early.  (If we don't, we'll fail
599     * when we dereference the pointer.)
600     *
601     * There's a theoretical possibility of interrupting another thread
602     * after it has partially written the method pointer, in which case
603     * we'll likely crash when we dereference it.  The possibility of
604     * this actually happening should be at or near zero.  Fixing it
605     * completely could be done by writing the thread number last and
606     * using a sentinel value to indicate a partially-written record,
607     * but that requires memory barriers.
608     */
609    int finalCurOffset = state->curOffset;
610
611    size_t recordSize = state->recordSize;
612    if (finalCurOffset > TRACE_HEADER_LEN) {
613        u4 fillVal = METHOD_ID(FILL_PATTERN);
614        u1* scanPtr = state->buf + TRACE_HEADER_LEN;
615
616        while (scanPtr < state->buf + finalCurOffset) {
617            u4 methodVal = scanPtr[2] | (scanPtr[3] << 8) | (scanPtr[4] << 16)
618                        | (scanPtr[5] << 24);
619            if (METHOD_ID(methodVal) == fillVal) {
620                u1* scanBase = state->buf + TRACE_HEADER_LEN;
621                LOGW("Found unfilled record at %d (of %d)",
622                    (scanPtr - scanBase) / recordSize,
623                    (finalCurOffset - TRACE_HEADER_LEN) / recordSize);
624                finalCurOffset = scanPtr - state->buf;
625                break;
626            }
627
628            scanPtr += recordSize;
629        }
630    }
631
632    LOGI("TRACE STOPPED%s: writing %d records",
633        state->overflow ? " (NOTE: overflowed buffer)" : "",
634        (finalCurOffset - TRACE_HEADER_LEN) / recordSize);
635    if (gDvm.debuggerActive) {
636        LOGW("WARNING: a debugger is active; method-tracing results "
637             "will be skewed");
638    }
639
640    /*
641     * Do a quick calibration test to see how expensive our clock call is.
642     */
643    u4 clockNsec = getClockOverhead();
644
645    markTouchedMethods(finalCurOffset);
646
647    char* memStreamPtr;
648    size_t memStreamSize;
649    if (state->directToDdms) {
650        assert(state->traceFile == NULL);
651        state->traceFile = open_memstream(&memStreamPtr, &memStreamSize);
652        if (state->traceFile == NULL) {
653            /* not expected */
654            LOGE("Unable to open memstream");
655            dvmAbort();
656        }
657    }
658    assert(state->traceFile != NULL);
659
660    fprintf(state->traceFile, "%cversion\n", TOKEN_CHAR);
661    fprintf(state->traceFile, "%d\n", state->traceVersion);
662    fprintf(state->traceFile, "data-file-overflow=%s\n",
663        state->overflow ? "true" : "false");
664    if (useThreadCpuClock()) {
665        if (useWallClock()) {
666            fprintf(state->traceFile, "clock=dual\n");
667        } else {
668            fprintf(state->traceFile, "clock=thread-cpu\n");
669        }
670    } else {
671        fprintf(state->traceFile, "clock=wall\n");
672    }
673    fprintf(state->traceFile, "elapsed-time-usec=%llu\n", elapsed);
674    fprintf(state->traceFile, "num-method-calls=%d\n",
675        (finalCurOffset - TRACE_HEADER_LEN) / state->recordSize);
676    fprintf(state->traceFile, "clock-call-overhead-nsec=%d\n", clockNsec);
677    fprintf(state->traceFile, "vm=dalvik\n");
678    if ((state->flags & TRACE_ALLOC_COUNTS) != 0) {
679        fprintf(state->traceFile, "alloc-count=%d\n",
680            gDvm.allocProf.allocCount);
681        fprintf(state->traceFile, "alloc-size=%d\n",
682            gDvm.allocProf.allocSize);
683        fprintf(state->traceFile, "gc-count=%d\n",
684            gDvm.allocProf.gcCount);
685    }
686    fprintf(state->traceFile, "%cthreads\n", TOKEN_CHAR);
687    dumpThreadList(state->traceFile);
688    fprintf(state->traceFile, "%cmethods\n", TOKEN_CHAR);
689    dumpMethodList(state->traceFile);
690    fprintf(state->traceFile, "%cend\n", TOKEN_CHAR);
691
692    if (state->directToDdms) {
693        /*
694         * Data is in two places: memStreamPtr and state->buf.  Send
695         * the whole thing to DDMS, wrapped in an MPSE packet.
696         */
697        fflush(state->traceFile);
698
699        struct iovec iov[2];
700        iov[0].iov_base = memStreamPtr;
701        iov[0].iov_len = memStreamSize;
702        iov[1].iov_base = state->buf;
703        iov[1].iov_len = finalCurOffset;
704        dvmDbgDdmSendChunkV(CHUNK_TYPE("MPSE"), iov, 2);
705    } else {
706        /* append the profiling data */
707        if (fwrite(state->buf, finalCurOffset, 1, state->traceFile) != 1) {
708            int err = errno;
709            LOGE("trace fwrite(%d) failed: %s",
710                finalCurOffset, strerror(err));
711            dvmThrowExceptionFmt(gDvm.exRuntimeException,
712                "Trace data write failed: %s", strerror(err));
713        }
714    }
715
716    /* done! */
717    free(state->buf);
718    state->buf = NULL;
719    fclose(state->traceFile);
720    state->traceFile = NULL;
721
722    /* wake any threads that were waiting for profiling to complete */
723    dvmBroadcastCond(&state->threadExitCond);
724    dvmUnlockMutex(&state->startStopLock);
725}
726
727/*
728 * We just did something with a method.  Emit a record.
729 *
730 * Multiple threads may be banging on this all at once.  We use atomic ops
731 * rather than mutexes for speed.
732 */
733void dvmMethodTraceAdd(Thread* self, const Method* method, int action)
734{
735    MethodTraceState* state = &gDvm.methodTrace;
736    u4 methodVal;
737    int oldOffset, newOffset;
738    u1* ptr;
739
740    assert(method != NULL);
741
742#if defined(HAVE_POSIX_CLOCKS)
743    /*
744     * We can only access the per-thread CPU clock from within the
745     * thread, so we have to initialize the base time on the first use.
746     * (Looks like pthread_getcpuclockid(thread, &id) will do what we
747     * want, but it doesn't appear to be defined on the device.)
748     */
749    if (!self->cpuClockBaseSet) {
750        self->cpuClockBase = getThreadCpuTimeInUsec();
751        self->cpuClockBaseSet = true;
752        //LOGI("thread base id=%d 0x%llx",
753        //    self->threadId, self->cpuClockBase);
754    }
755#endif
756
757    /*
758     * Advance "curOffset" atomically.
759     */
760    do {
761        oldOffset = state->curOffset;
762        newOffset = oldOffset + state->recordSize;
763        if (newOffset > state->bufferSize) {
764            state->overflow = true;
765            return;
766        }
767    } while (android_atomic_release_cas(oldOffset, newOffset,
768            &state->curOffset) != 0);
769
770    //assert(METHOD_ACTION((u4) method) == 0);
771
772    methodVal = METHOD_COMBINE((u4) method, action);
773
774    /*
775     * Write data into "oldOffset".
776     */
777    ptr = state->buf + oldOffset;
778    *ptr++ = (u1) self->threadId;
779    *ptr++ = (u1) (self->threadId >> 8);
780    *ptr++ = (u1) methodVal;
781    *ptr++ = (u1) (methodVal >> 8);
782    *ptr++ = (u1) (methodVal >> 16);
783    *ptr++ = (u1) (methodVal >> 24);
784
785#if defined(HAVE_POSIX_CLOCKS)
786    if (useThreadCpuClock()) {
787        u4 cpuClockDiff = (u4) (getThreadCpuTimeInUsec() - self->cpuClockBase);
788        *ptr++ = (u1) cpuClockDiff;
789        *ptr++ = (u1) (cpuClockDiff >> 8);
790        *ptr++ = (u1) (cpuClockDiff >> 16);
791        *ptr++ = (u1) (cpuClockDiff >> 24);
792    }
793#endif
794
795    if (useWallClock()) {
796        u4 wallClockDiff = (u4) (getWallTimeInUsec() - state->startWhen);
797        *ptr++ = (u1) wallClockDiff;
798        *ptr++ = (u1) (wallClockDiff >> 8);
799        *ptr++ = (u1) (wallClockDiff >> 16);
800        *ptr++ = (u1) (wallClockDiff >> 24);
801    }
802}
803
804
805/*
806 * Register the METHOD_TRACE_ENTER action for the fast interpreter and
807 * JIT'ed code.
808 */
809void dvmFastMethodTraceEnter(const Method* method, Thread* self)
810{
811    if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) {
812        dvmMethodTraceAdd(self, method, METHOD_TRACE_ENTER);
813    }
814}
815
816/*
817 * Register the METHOD_TRACE_EXIT action for the fast interpreter and
818 * JIT'ed code for methods. The about-to-return callee method can be
819 * retrieved from self->interpSave.method.
820 */
821void dvmFastMethodTraceExit(Thread* self)
822{
823    if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) {
824        dvmMethodTraceAdd(self, self->interpSave.method,
825                          METHOD_TRACE_EXIT);
826    }
827}
828
829/*
830 * Register the METHOD_TRACE_EXIT action for the fast interpreter and
831 * JIT'ed code for JNI methods. The about-to-return JNI callee method is passed
832 * in explicitly.  Also used for inline-execute.
833 */
834void dvmFastNativeMethodTraceExit(const Method* method, Thread* self)
835{
836    if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) {
837        dvmMethodTraceAdd(self, method, METHOD_TRACE_EXIT);
838    }
839}
840
841/*
842 * We just did something with a method.  Emit a record by setting a value
843 * in a magic memory location.
844 */
845void dvmEmitEmulatorTrace(const Method* method, int action)
846{
847#ifdef UPDATE_MAGIC_PAGE
848    /*
849     * We store the address of the Dalvik bytecodes to the memory-mapped
850     * trace page for normal methods.  We also trace calls to native
851     * functions by storing the address of the native function to the
852     * trace page.
853     * Abstract methods don't have any bytecodes, so we don't trace them.
854     * (Abstract methods are never called, but in Dalvik they can be
855     * because we do a "late trap" to a native method to generate the
856     * abstract method exception.)
857     */
858    if (dvmIsAbstractMethod(method))
859        return;
860
861    u4* pMagic = (u4*) gDvm.emulatorTracePage;
862    u4 addr;
863
864    if (dvmIsNativeMethod(method)) {
865        /*
866         * The "action" parameter is one of:
867         *   0 = ENTER
868         *   1 = EXIT
869         *   2 = UNROLL
870         * To help the trace tools reconstruct the runtime stack containing
871         * a mix of normal plus native methods, we add 4 to the action if this
872         * is a native method.
873         */
874        action += 4;
875
876        /*
877         * Get the address of the native function.
878         * This isn't the right address -- how do I get it?
879         * Fortunately, the trace tools can get by without the address, but
880         * it would be nice to fix this.
881         */
882         addr = (u4) method->nativeFunc;
883    } else {
884        /*
885         * The dexlist output shows the &DexCode.insns offset value, which
886         * is offset from the start of the base DEX header. Method.insns
887         * is the absolute address, effectively offset from the start of
888         * the optimized DEX header. We either need to return the
889         * optimized DEX base file address offset by the right amount, or
890         * take the "real" address and subtract off the size of the
891         * optimized DEX header.
892         *
893         * Would be nice to factor this out at dexlist time, but we can't count
894         * on having access to the correct optimized DEX file.
895         */
896        assert(method->insns != NULL);
897        const DexOptHeader* pOptHdr = method->clazz->pDvmDex->pDexFile->pOptHeader;
898        addr = (u4) method->insns - pOptHdr->dexOffset;
899    }
900
901    *(pMagic+action) = addr;
902    LOGVV("Set %p = 0x%08x (%s.%s)",
903        pMagic+action, addr, method->clazz->descriptor, method->name);
904#endif
905}
906
907/*
908 * The GC calls this when it's about to start.  We add a marker to the
909 * trace output so the tool can exclude the GC cost from the results.
910 */
911void dvmMethodTraceGCBegin()
912{
913    TRACE_METHOD_ENTER(dvmThreadSelf(), gDvm.methodTraceGcMethod);
914}
915void dvmMethodTraceGCEnd()
916{
917    TRACE_METHOD_EXIT(dvmThreadSelf(), gDvm.methodTraceGcMethod);
918}
919
920/*
921 * The class loader calls this when it's loading or initializing a class.
922 */
923void dvmMethodTraceClassPrepBegin()
924{
925    TRACE_METHOD_ENTER(dvmThreadSelf(), gDvm.methodTraceClassPrepMethod);
926}
927void dvmMethodTraceClassPrepEnd()
928{
929    TRACE_METHOD_EXIT(dvmThreadSelf(), gDvm.methodTraceClassPrepMethod);
930}
931
932
933/*
934 * Enable emulator trace info.
935 */
936void dvmEmulatorTraceStart()
937{
938    /* If we could not map the emulator trace page, then do not enable tracing */
939    if (gDvm.emulatorTracePage == NULL)
940        return;
941
942    /* in theory we should make this an atomic inc; in practice not important */
943    gDvm.emulatorTraceEnableCount++;
944    if (gDvm.emulatorTraceEnableCount == 1)
945        LOGD("--- emulator method traces enabled");
946    updateActiveProfilers(kSubModeEmulatorTrace, true);
947}
948
949/*
950 * Disable emulator trace info.
951 */
952void dvmEmulatorTraceStop()
953{
954    if (gDvm.emulatorTraceEnableCount == 0) {
955        LOGE("ERROR: emulator tracing not enabled");
956        return;
957    }
958    /* in theory we should make this an atomic inc; in practice not important */
959    gDvm.emulatorTraceEnableCount--;
960    if (gDvm.emulatorTraceEnableCount == 0)
961        LOGD("--- emulator method traces disabled");
962    updateActiveProfilers(kSubModeEmulatorTrace,
963                          (gDvm.emulatorTraceEnableCount != 0));
964}
965
966
967/*
968 * Start instruction counting.
969 */
970void dvmStartInstructionCounting()
971{
972    /* in theory we should make this an atomic inc; in practice not important */
973    gDvm.instructionCountEnableCount++;
974    updateActiveProfilers(kSubModeInstCounting, true);
975}
976
977/*
978 * Stop instruction counting.
979 */
980void dvmStopInstructionCounting()
981{
982    if (gDvm.instructionCountEnableCount == 0) {
983        LOGE("ERROR: instruction counting not enabled");
984        dvmAbort();
985    }
986    gDvm.instructionCountEnableCount--;
987    updateActiveProfilers(kSubModeInstCounting,
988                          (gDvm.instructionCountEnableCount != 0));
989}
990
991
992/*
993 * Start alloc counting.  Note this doesn't affect the "active profilers"
994 * count, since the interpreter loop is not involved.
995 */
996void dvmStartAllocCounting()
997{
998    gDvm.allocProf.enabled = true;
999}
1000
1001/*
1002 * Stop alloc counting.
1003 */
1004void dvmStopAllocCounting()
1005{
1006    gDvm.allocProf.enabled = false;
1007}
1008