1/* Copyright (C) 2006-2007 The Android Open Source Project
2**
3** This software is licensed under the terms of the GNU General Public
4** License version 2, as published by the Free Software Foundation, and
5** may be copied, distributed, and modified under those terms.
6**
7** This program is distributed in the hope that it will be useful,
8** but WITHOUT ANY WARRANTY; without even the implied warranty of
9** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10** GNU General Public License for more details.
11*/
12
13#include <stdio.h>
14#include <stdlib.h>
15#include <string.h>
16#include <limits.h>
17#include <inttypes.h>
18#include <sys/stat.h>
19#include <sys/types.h>
20#include <errno.h>
21#include <sys/time.h>
22#include <time.h>
23#include "cpu.h"
24#include "exec-all.h"
25#include "android-trace.h"
26#include "varint.h"
27#include "android/utils/path.h"
28
29// For tracing dynamic execution of basic blocks
30typedef struct TraceBB {
31    char        *filename;
32    FILE        *fstream;
33    BBRec       buffer[kMaxNumBasicBlocks];
34    BBRec       *next;          // points to next record in buffer
35    uint64_t    flush_time;     // time of last buffer flush
36    char        compressed[kCompressedSize];
37    char        *compressed_ptr;
38    char        *high_water_ptr;
39    int64_t     prev_bb_num;
40    uint64_t    prev_bb_time;
41    uint64_t    current_bb_num;
42    uint64_t    current_bb_start_time;
43    uint64_t    recnum;         // counts number of trace records
44    uint32_t    current_bb_addr;
45    int         num_insns;
46} TraceBB;
47
48// For tracing simuation start times of instructions
49typedef struct TraceInsn {
50    char        *filename;
51    FILE        *fstream;
52    InsnRec     dummy;          // this is here so we can use buffer[-1]
53    InsnRec     buffer[kInsnBufferSize];
54    InsnRec     *current;
55    uint64_t    prev_time;      // time of last instruction start
56    char        compressed[kCompressedSize];
57    char        *compressed_ptr;
58    char        *high_water_ptr;
59} TraceInsn;
60
61// For tracing the static information about a basic block
62typedef struct TraceStatic {
63    char        *filename;
64    FILE        *fstream;
65    uint32_t    insns[kMaxInsnPerBB];
66    int         next_insn;
67    uint64_t    bb_num;
68    uint32_t    bb_addr;
69    int         is_thumb;
70} TraceStatic;
71
72// For tracing load and store addresses
73typedef struct TraceAddr {
74    char        *filename;
75    FILE        *fstream;
76    AddrRec     buffer[kMaxNumAddrs];
77    AddrRec     *next;
78    char        compressed[kCompressedSize];
79    char        *compressed_ptr;
80    char        *high_water_ptr;
81    uint32_t    prev_addr;
82    uint64_t    prev_time;
83} TraceAddr;
84
85// For tracing exceptions
86typedef struct TraceExc {
87    char        *filename;
88    FILE        *fstream;
89    char        compressed[kCompressedSize];
90    char        *compressed_ptr;
91    char        *high_water_ptr;
92    uint64_t    prev_time;
93    uint64_t    prev_bb_recnum;
94} TraceExc;
95
96// For tracing process id changes
97typedef struct TracePid {
98    char        *filename;
99    FILE        *fstream;
100    char        compressed[kCompressedSize];
101    char        *compressed_ptr;
102    uint64_t    prev_time;
103} TracePid;
104
105// For tracing Dalvik VM method enter and exit
106typedef struct TraceMethod {
107    char        *filename;
108    FILE        *fstream;
109    char        compressed[kCompressedSize];
110    char        *compressed_ptr;
111    uint64_t    prev_time;
112    uint32_t    prev_addr;
113    int32_t     prev_pid;
114} TraceMethod;
115
116extern TraceBB trace_bb;
117extern TraceInsn trace_insn;
118extern TraceStatic trace_static;
119extern TraceAddr trace_load;
120extern TraceAddr trace_store;
121extern TraceExc trace_exc;
122extern TracePid trace_pid;
123extern TraceMethod trace_method;
124
125TraceBB trace_bb;
126TraceInsn trace_insn;
127TraceStatic trace_static;
128TraceAddr trace_load;
129TraceAddr trace_store;
130TraceExc trace_exc;
131TracePid trace_pid;
132TraceMethod trace_method;
133static TraceHeader header;
134
135const char *trace_filename;
136int tracing;
137int trace_cache_miss;
138int trace_all_addr;
139
140// The simulation time in cpu clock cycles
141uint64_t sim_time = 1;
142
143// The current process id
144int current_pid;
145
146// The start and end (wall-clock) time in microseconds
147uint64_t start_time, end_time;
148uint64_t elapsed_usecs;
149
150// For debugging output
151FILE *ftrace_debug;
152
153// The maximum number of bytes consumed by an InsnRec after compression.
154// This is very conservative but needed to ensure no buffer overflows.
155#define kMaxInsnCompressed 14
156
157// The maximum number of bytes consumed by an BBRec after compression.
158// This is very conservative but needed to ensure no buffer overflows.
159#define kMaxBBCompressed 32
160
161// The maximum number of bytes consumed by an AddrRec after compression.
162// This is very conservative but needed to ensure no buffer overflows.
163#define kMaxAddrCompressed 14
164
165// The maximum number of bytes consumed by a MethodRec after compression.
166// This is very conservative but needed to ensure no buffer overflows.
167#define kMaxMethodCompressed 18
168
169// The maximum number of bytes consumed by an exception record after
170// compression.
171#define kMaxExcCompressed 38
172
173// The maximum number of bytes consumed by a pid record for
174// kPidSwitch, or kPidExit after compression.
175#define kMaxPidCompressed 15
176
177// The maximum number of bytes consumed by a pid record for kPidFork,
178// or kPidClone after compression.
179#define kMaxPid2Compressed 20
180
181// The maximum number of bytes consumed by a pid record for kPidExecArgs
182// after compression, not counting the bytes for the args.
183#define kMaxExecArgsCompressed 15
184
185// The maximum number of bytes consumed by a pid record for kPidName
186// after compression, not counting the bytes for the name.
187#define kMaxNameCompressed 20
188
189// The maximum number of bytes consumed by a pid record for kPidMmap
190// after compression, not counting the bytes for the pathname.
191#define kMaxMmapCompressed 33
192
193// The maximum number of bytes consumed by a pid record for kPidMunmap,
194// after compression.
195#define kMaxMunmapCompressed 28
196
197// The maximum number of bytes consumed by a pid record for kPidSymbol
198// after compression, not counting the bytes for the symbol name.
199#define kMaxSymbolCompressed 24
200
201// The maximum number of bytes consumed by a pid record for kPidKthreadName
202// after compression, not counting the bytes for the name.
203#define kMaxKthreadNameCompressed 25
204
205void trace_cleanup();
206
207// Return current time in microseconds as a 64-bit integer.
208uint64 Now() {
209    struct timeval        tv;
210
211    gettimeofday(&tv, NULL);
212    uint64 val = tv.tv_sec;
213    val = val * 1000000ull + tv.tv_usec;
214    return val;
215}
216
217static void create_trace_dir(const char *dirname)
218{
219    int err;
220
221    err = path_mkdir(dirname, 0755);
222    if (err != 0 && errno != EEXIST) {
223        printf("err: %d\n", err);
224        perror(dirname);
225        exit(1);
226    }
227}
228
229static char *create_trace_path(const char *filename, const char *ext)
230{
231    char *fname;
232    const char *base_start, *base_end;
233    int ii, len, base_len, dir_len, path_len, qtrace_len;
234
235    // Handle error cases
236    if (filename == NULL || *filename == 0 || strcmp(filename, "/") == 0)
237        return NULL;
238
239    // Ignore a trailing slash, if any
240    len = strlen(filename);
241    if (filename[len - 1] == '/')
242        len -= 1;
243
244    // Find the basename.  We don't use basename(3) because there are
245    // different behaviors for GNU and Posix in the case where the
246    // last character is a slash.
247    base_start = base_end = &filename[len];
248    for (ii = 0; ii < len; ++ii) {
249        base_start -= 1;
250        if (*base_start == '/') {
251            base_start += 1;
252            break;
253        }
254    }
255    base_len = base_end - base_start;
256    dir_len = len - base_len;
257    qtrace_len = strlen("/qtrace");
258
259    // Create space for the pathname: "/dir/basename/qtrace.ext"
260    // The "ext" string already contains the dot, so just add a byte
261    // for the terminating zero.
262    path_len = dir_len + base_len + qtrace_len + strlen(ext) + 1;
263    fname = malloc(path_len);
264    if (dir_len > 0)
265        strncpy(fname, filename, dir_len);
266    fname[dir_len] = 0;
267    strncat(fname, base_start, base_len);
268    strcat(fname, "/qtrace");
269    strcat(fname, ext);
270    return fname;
271}
272
273void convert_secs_to_date_time(time_t secs, uint32_t *pdate, uint32_t *ptime)
274{
275    struct tm *tm = localtime(&secs);
276    uint32_t year = tm->tm_year + 1900;
277    uint32_t thousands = year / 1000;
278    year -= thousands * 1000;
279    uint32_t hundreds = year / 100;
280    year -= hundreds * 100;
281    uint32_t tens = year / 10;
282    year -= tens * 10;
283    uint32_t ones = year;
284    year = (thousands << 12) | (hundreds << 8) | (tens << 4) | ones;
285
286    uint32_t mon = tm->tm_mon + 1;
287    tens = mon / 10;
288    ones = (mon - tens * 10);
289    mon = (tens << 4) | ones;
290
291    uint32_t day = tm->tm_mday;
292    tens = day / 10;
293    ones = (day - tens * 10);
294    day = (tens << 4) | ones;
295
296    *pdate = (year << 16) | (mon << 8) | day;
297
298    uint32_t hour = tm->tm_hour;
299    tens = hour / 10;
300    ones = (hour - tens * 10);
301    hour = (tens << 4) | ones;
302
303    uint32_t min = tm->tm_min;
304    tens = min / 10;
305    ones = (min - tens * 10);
306    min = (tens << 4) | ones;
307
308    uint32_t sec = tm->tm_sec;
309    tens = sec / 10;
310    ones = (sec - tens * 10);
311    sec = (tens << 4) | ones;
312
313    *ptime = (hour << 16) | (min << 8) | sec;
314}
315
316void write_trace_header(TraceHeader *header)
317{
318    TraceHeader swappedHeader;
319
320    memcpy(&swappedHeader, header, sizeof(TraceHeader));
321
322    convert32(swappedHeader.version);
323    convert32(swappedHeader.start_sec);
324    convert32(swappedHeader.start_usec);
325    convert32(swappedHeader.pdate);
326    convert32(swappedHeader.ptime);
327    convert32(swappedHeader.num_used_pids);
328    convert32(swappedHeader.first_unused_pid);
329    convert64(swappedHeader.num_static_bb);
330    convert64(swappedHeader.num_static_insn);
331    convert64(swappedHeader.num_dynamic_bb);
332    convert64(swappedHeader.num_dynamic_insn);
333    convert64(swappedHeader.elapsed_usecs);
334
335    fwrite(&swappedHeader, sizeof(TraceHeader), 1, trace_static.fstream);
336}
337
338void create_trace_bb(const char *filename)
339{
340    char *fname = create_trace_path(filename, ".bb");
341    trace_bb.filename = fname;
342
343    FILE *fstream = fopen(fname, "wb");
344    if (fstream == NULL) {
345        perror(fname);
346        exit(1);
347    }
348    trace_bb.fstream = fstream;
349    trace_bb.next = &trace_bb.buffer[0];
350    trace_bb.flush_time = 0;
351    trace_bb.compressed_ptr = trace_bb.compressed;
352    trace_bb.high_water_ptr = &trace_bb.compressed[kCompressedSize] - kMaxBBCompressed;
353    trace_bb.prev_bb_num = 0;
354    trace_bb.prev_bb_time = 0;
355    trace_bb.num_insns = 0;
356    trace_bb.recnum = 0;
357}
358
359void create_trace_insn(const char *filename)
360{
361    // Create the instruction time trace file
362    char *fname = create_trace_path(filename, ".insn");
363    trace_insn.filename = fname;
364
365    FILE *fstream = fopen(fname, "wb");
366    if (fstream == NULL) {
367        perror(fname);
368        exit(1);
369    }
370    trace_insn.fstream = fstream;
371    trace_insn.current = &trace_insn.dummy;
372    trace_insn.dummy.time_diff = 0;
373    trace_insn.dummy.repeat = 0;
374    trace_insn.prev_time = 0;
375    trace_insn.compressed_ptr = trace_insn.compressed;
376    trace_insn.high_water_ptr = &trace_insn.compressed[kCompressedSize] - kMaxInsnCompressed;
377}
378
379void create_trace_static(const char *filename)
380{
381    // Create the static basic block trace file
382    char *fname = create_trace_path(filename, ".static");
383    trace_static.filename = fname;
384
385    FILE *fstream = fopen(fname, "wb");
386    if (fstream == NULL) {
387        perror(fname);
388        exit(1);
389    }
390    trace_static.fstream = fstream;
391    trace_static.next_insn = 0;
392    trace_static.bb_num = 1;
393    trace_static.bb_addr = 0;
394
395    // Write an empty header to reserve space for it in the file.
396    // The header will be filled in later when post-processing the
397    // trace file.
398    memset(&header, 0, sizeof(TraceHeader));
399
400    // Write out the version number so that tools can detect if the trace
401    // file format is the same as what they expect.
402    header.version = TRACE_VERSION;
403
404    // Record the start time in the header now.
405    struct timeval tv;
406    struct timezone tz;
407    gettimeofday(&tv, &tz);
408    header.start_sec = tv.tv_sec;
409    header.start_usec = tv.tv_usec;
410    convert_secs_to_date_time(header.start_sec, &header.pdate, &header.ptime);
411    write_trace_header(&header);
412
413    // Write out the record for the unused basic block number 0.
414    uint64_t zero = 0;
415    fwrite(&zero, sizeof(uint64_t), 1, trace_static.fstream);	// bb_num
416    fwrite(&zero, sizeof(uint32_t), 1, trace_static.fstream);	// bb_addr
417    fwrite(&zero, sizeof(uint32_t), 1, trace_static.fstream);	// num_insns
418}
419
420void create_trace_addr(const char *filename)
421{
422    // The "qtrace.load" and "qtrace.store" files are optional
423    trace_load.fstream = NULL;
424    trace_store.fstream = NULL;
425    if (trace_all_addr || trace_cache_miss) {
426        // Create the "qtrace.load" file
427        char *fname = create_trace_path(filename, ".load");
428        trace_load.filename = fname;
429
430        FILE *fstream = fopen(fname, "wb");
431        if (fstream == NULL) {
432            perror(fname);
433            exit(1);
434        }
435        trace_load.fstream = fstream;
436        trace_load.next = &trace_load.buffer[0];
437        trace_load.compressed_ptr = trace_load.compressed;
438        trace_load.high_water_ptr = &trace_load.compressed[kCompressedSize] - kMaxAddrCompressed;
439        trace_load.prev_addr = 0;
440        trace_load.prev_time = 0;
441
442        // Create the "qtrace.store" file
443        fname = create_trace_path(filename, ".store");
444        trace_store.filename = fname;
445
446        fstream = fopen(fname, "wb");
447        if (fstream == NULL) {
448            perror(fname);
449            exit(1);
450        }
451        trace_store.fstream = fstream;
452        trace_store.next = &trace_store.buffer[0];
453        trace_store.compressed_ptr = trace_store.compressed;
454        trace_store.high_water_ptr = &trace_store.compressed[kCompressedSize] - kMaxAddrCompressed;
455        trace_store.prev_addr = 0;
456        trace_store.prev_time = 0;
457    }
458}
459
460void create_trace_exc(const char *filename)
461{
462    // Create the exception trace file
463    char *fname = create_trace_path(filename, ".exc");
464    trace_exc.filename = fname;
465
466    FILE *fstream = fopen(fname, "wb");
467    if (fstream == NULL) {
468        perror(fname);
469        exit(1);
470    }
471    trace_exc.fstream = fstream;
472    trace_exc.compressed_ptr = trace_exc.compressed;
473    trace_exc.high_water_ptr = &trace_exc.compressed[kCompressedSize] - kMaxExcCompressed;
474    trace_exc.prev_time = 0;
475    trace_exc.prev_bb_recnum = 0;
476}
477
478void create_trace_pid(const char *filename)
479{
480    // Create the pid trace file
481    char *fname = create_trace_path(filename, ".pid");
482    trace_pid.filename = fname;
483
484    FILE *fstream = fopen(fname, "wb");
485    if (fstream == NULL) {
486        perror(fname);
487        exit(1);
488    }
489    trace_pid.fstream = fstream;
490    trace_pid.compressed_ptr = trace_pid.compressed;
491    trace_pid.prev_time = 0;
492}
493
494void create_trace_method(const char *filename)
495{
496    // Create the method trace file
497    char *fname = create_trace_path(filename, ".method");
498    trace_method.filename = fname;
499
500    FILE *fstream = fopen(fname, "wb");
501    if (fstream == NULL) {
502        perror(fname);
503        exit(1);
504    }
505    trace_method.fstream = fstream;
506    trace_method.compressed_ptr = trace_method.compressed;
507    trace_method.prev_time = 0;
508    trace_method.prev_addr = 0;
509    trace_method.prev_pid = 0;
510}
511
512void trace_init(const char *filename)
513{
514    // Create the trace files
515    create_trace_dir(filename);
516    create_trace_bb(filename);
517    create_trace_insn(filename);
518    create_trace_static(filename);
519    create_trace_addr(filename);
520    create_trace_exc(filename);
521    create_trace_pid(filename);
522    create_trace_method(filename);
523
524#if 0
525    char *fname = create_trace_path(filename, ".debug");
526    ftrace_debug = fopen(fname, "wb");
527    if (ftrace_debug == NULL) {
528        perror(fname);
529        exit(1);
530    }
531#else
532    ftrace_debug = NULL;
533#endif
534    atexit(trace_cleanup);
535
536    // If tracing is on, then start timing the simulator
537    if (tracing)
538        start_time = Now();
539}
540
541/* the following array is used to deal with def-use register interlocks, which we
542 * can compute statically (ignoring conditions), very fortunately.
543 *
544 * the idea is that interlock_base contains the number of cycles "executed" from
545 * the start of a basic block. It is set to 0 in trace_bb_start, and incremented
546 * in each call to get_insn_ticks_arm.
547 *
548 * interlocks[N] correspond to the value of interlock_base after which a register N
549 * can be used by another operation, it is set each time an instruction writes to
550 * the register in get_insn_ticks()
551 */
552
553static int   interlocks[16];
554static int   interlock_base;
555
556static void
557_interlock_def(int  reg, int  delay)
558{
559    if (reg >= 0)
560        interlocks[reg] = interlock_base + delay;
561}
562
563static int
564_interlock_use(int  reg)
565{
566    int  delay = 0;
567
568    if (reg >= 0)
569    {
570        delay = interlocks[reg] - interlock_base;
571        if (delay < 0)
572            delay = 0;
573    }
574    return delay;
575}
576
577void trace_bb_start(uint32_t bb_addr)
578{
579    int  nn;
580
581    trace_static.bb_addr = bb_addr;
582    trace_static.is_thumb = 0;
583
584    interlock_base = 0;
585    for (nn = 0; nn < 16; nn++)
586        interlocks[nn] = 0;
587}
588
589void trace_add_insn(uint32_t insn, int is_thumb)
590{
591    trace_static.insns[trace_static.next_insn++] = insn;
592    // This relies on the fact that a basic block does not contain a mix
593    // of ARM and Thumb instructions.  If that is not true, then many
594    // software tools that read the trace will have to change.
595    trace_static.is_thumb = is_thumb;
596}
597
598void trace_bb_end()
599{
600    int		ii, num_insns;
601    uint32_t	insn;
602
603    uint64_t bb_num = hostToLE64(trace_static.bb_num);
604    // If these are Thumb instructions, then encode that fact by setting
605    // the low bit of the basic-block address to 1.
606    uint32_t bb_addr = trace_static.bb_addr | trace_static.is_thumb;
607    bb_addr = hostToLE32(bb_addr);
608    num_insns = hostToLE32(trace_static.next_insn);
609    fwrite(&bb_num, sizeof(bb_num), 1, trace_static.fstream);
610    fwrite(&bb_addr, sizeof(bb_addr), 1, trace_static.fstream);
611    fwrite(&num_insns, sizeof(num_insns), 1, trace_static.fstream);
612    for (ii = 0; ii < trace_static.next_insn; ++ii) {
613        insn = hostToLE32(trace_static.insns[ii]);
614        fwrite(&insn, sizeof(insn), 1, trace_static.fstream);
615    }
616
617    trace_static.bb_num += 1;
618    trace_static.next_insn = 0;
619}
620
621void trace_cleanup()
622{
623    if (tracing) {
624        end_time = Now();
625        elapsed_usecs += end_time - start_time;
626    }
627    header.elapsed_usecs = elapsed_usecs;
628    double elapsed_secs = elapsed_usecs / 1000000.0;
629    double cycles_per_sec = 0;
630    if (elapsed_secs != 0)
631        cycles_per_sec = sim_time / elapsed_secs;
632    char *suffix = "";
633    if (cycles_per_sec >= 1000000) {
634        cycles_per_sec /= 1000000.0;
635        suffix = "M";
636    } else if (cycles_per_sec > 1000) {
637        cycles_per_sec /= 1000.0;
638        suffix = "K";
639    }
640    printf("Elapsed seconds: %.2f, simulated cycles/sec: %.1f%s\n",
641           elapsed_secs, cycles_per_sec, suffix);
642    if (trace_bb.fstream) {
643        BBRec *ptr;
644        BBRec *next = trace_bb.next;
645        char *comp_ptr = trace_bb.compressed_ptr;
646        int64_t prev_bb_num = trace_bb.prev_bb_num;
647        uint64_t prev_bb_time = trace_bb.prev_bb_time;
648        for (ptr = trace_bb.buffer; ptr != next; ++ptr) {
649            if (comp_ptr >= trace_bb.high_water_ptr) {
650                uint32_t size = comp_ptr - trace_bb.compressed;
651                fwrite(trace_bb.compressed, sizeof(char), size,
652                       trace_bb.fstream);
653                comp_ptr = trace_bb.compressed;
654            }
655            int64_t bb_diff = ptr->bb_num - prev_bb_num;
656            prev_bb_num = ptr->bb_num;
657            uint64_t time_diff = ptr->start_time - prev_bb_time;
658            prev_bb_time = ptr->start_time;
659            comp_ptr = varint_encode_signed(bb_diff, comp_ptr);
660            comp_ptr = varint_encode(time_diff, comp_ptr);
661            comp_ptr = varint_encode(ptr->repeat, comp_ptr);
662            if (ptr->repeat)
663                comp_ptr = varint_encode(ptr->time_diff, comp_ptr);
664        }
665
666        // Add an extra record at the end containing the ending simulation
667        // time and a basic block number of 0.
668        uint64_t time_diff = sim_time - prev_bb_time;
669        if (time_diff > 0) {
670            int64_t bb_diff = -prev_bb_num;
671            comp_ptr = varint_encode_signed(bb_diff, comp_ptr);
672            comp_ptr = varint_encode(time_diff, comp_ptr);
673            comp_ptr = varint_encode(0, comp_ptr);
674        }
675
676        uint32_t size = comp_ptr - trace_bb.compressed;
677        if (size)
678            fwrite(trace_bb.compressed, sizeof(char), size, trace_bb.fstream);
679
680        // Terminate the file with three zeros so that we can detect
681        // the end of file quickly.
682        uint32_t zeros = 0;
683        fwrite(&zeros, 3, 1, trace_bb.fstream);
684        fclose(trace_bb.fstream);
685    }
686
687    if (trace_insn.fstream) {
688        InsnRec *ptr;
689        InsnRec *current = trace_insn.current + 1;
690        char *comp_ptr = trace_insn.compressed_ptr;
691        for (ptr = trace_insn.buffer; ptr != current; ++ptr) {
692            if (comp_ptr >= trace_insn.high_water_ptr) {
693                uint32_t size = comp_ptr - trace_insn.compressed;
694                uint32_t rval = fwrite(trace_insn.compressed, sizeof(char),
695                                       size, trace_insn.fstream);
696                if (rval != size) {
697                    fprintf(stderr, "fwrite() failed\n");
698                    perror(trace_insn.filename);
699                    exit(1);
700                }
701                comp_ptr = trace_insn.compressed;
702            }
703            comp_ptr = varint_encode(ptr->time_diff, comp_ptr);
704            comp_ptr = varint_encode(ptr->repeat, comp_ptr);
705        }
706
707        uint32_t size = comp_ptr - trace_insn.compressed;
708        if (size) {
709            uint32_t rval = fwrite(trace_insn.compressed, sizeof(char), size,
710                                   trace_insn.fstream);
711            if (rval != size) {
712                fprintf(stderr, "fwrite() failed\n");
713                perror(trace_insn.filename);
714                exit(1);
715            }
716        }
717        fclose(trace_insn.fstream);
718    }
719
720    if (trace_static.fstream) {
721        fseek(trace_static.fstream, 0, SEEK_SET);
722        write_trace_header(&header);
723        fclose(trace_static.fstream);
724    }
725
726    if (trace_load.fstream) {
727        AddrRec *ptr;
728        char *comp_ptr = trace_load.compressed_ptr;
729        AddrRec *next = trace_load.next;
730        uint32_t prev_addr = trace_load.prev_addr;
731        uint64_t prev_time = trace_load.prev_time;
732        for (ptr = trace_load.buffer; ptr != next; ++ptr) {
733            if (comp_ptr >= trace_load.high_water_ptr) {
734                uint32_t size = comp_ptr - trace_load.compressed;
735                fwrite(trace_load.compressed, sizeof(char), size,
736                       trace_load.fstream);
737                comp_ptr = trace_load.compressed;
738            }
739
740            int addr_diff = ptr->addr - prev_addr;
741            uint64_t time_diff = ptr->time - prev_time;
742            prev_addr = ptr->addr;
743            prev_time = ptr->time;
744
745            comp_ptr = varint_encode_signed(addr_diff, comp_ptr);
746            comp_ptr = varint_encode(time_diff, comp_ptr);
747        }
748
749        uint32_t size = comp_ptr - trace_load.compressed;
750        if (size) {
751            fwrite(trace_load.compressed, sizeof(char), size,
752                   trace_load.fstream);
753        }
754
755        // Terminate the file with two zeros so that we can detect
756        // the end of file quickly.
757        uint32_t zeros = 0;
758        fwrite(&zeros, 2, 1, trace_load.fstream);
759        fclose(trace_load.fstream);
760    }
761
762    if (trace_store.fstream) {
763        AddrRec *ptr;
764        char *comp_ptr = trace_store.compressed_ptr;
765        AddrRec *next = trace_store.next;
766        uint32_t prev_addr = trace_store.prev_addr;
767        uint64_t prev_time = trace_store.prev_time;
768        for (ptr = trace_store.buffer; ptr != next; ++ptr) {
769            if (comp_ptr >= trace_store.high_water_ptr) {
770                uint32_t size = comp_ptr - trace_store.compressed;
771                fwrite(trace_store.compressed, sizeof(char), size,
772                       trace_store.fstream);
773                comp_ptr = trace_store.compressed;
774            }
775
776            int addr_diff = ptr->addr - prev_addr;
777            uint64_t time_diff = ptr->time - prev_time;
778            prev_addr = ptr->addr;
779            prev_time = ptr->time;
780
781            comp_ptr = varint_encode_signed(addr_diff, comp_ptr);
782            comp_ptr = varint_encode(time_diff, comp_ptr);
783        }
784
785        uint32_t size = comp_ptr - trace_store.compressed;
786        if (size) {
787            fwrite(trace_store.compressed, sizeof(char), size,
788                   trace_store.fstream);
789        }
790
791        // Terminate the file with two zeros so that we can detect
792        // the end of file quickly.
793        uint32_t zeros = 0;
794        fwrite(&zeros, 2, 1, trace_store.fstream);
795        fclose(trace_store.fstream);
796    }
797
798    if (trace_exc.fstream) {
799        uint32_t size = trace_exc.compressed_ptr - trace_exc.compressed;
800        if (size) {
801            fwrite(trace_exc.compressed, sizeof(char), size,
802                   trace_exc.fstream);
803        }
804
805        // Terminate the file with 7 zeros so that we can detect
806        // the end of file quickly.
807        uint64_t zeros = 0;
808        fwrite(&zeros, 7, 1, trace_exc.fstream);
809        fclose(trace_exc.fstream);
810    }
811    if (trace_pid.fstream) {
812        uint32_t size = trace_pid.compressed_ptr - trace_pid.compressed;
813        if (size) {
814            fwrite(trace_pid.compressed, sizeof(char), size,
815                   trace_pid.fstream);
816        }
817
818        // Terminate the file with 2 zeros so that we can detect
819        // the end of file quickly.
820        uint64_t zeros = 0;
821        fwrite(&zeros, 2, 1, trace_pid.fstream);
822        fclose(trace_pid.fstream);
823    }
824    if (trace_method.fstream) {
825        uint32_t size = trace_method.compressed_ptr - trace_method.compressed;
826        if (size) {
827            fwrite(trace_method.compressed, sizeof(char), size,
828                   trace_method.fstream);
829        }
830
831        // Terminate the file with 2 zeros so that we can detect
832        // the end of file quickly.
833        uint64_t zeros = 0;
834        fwrite(&zeros, 2, 1, trace_method.fstream);
835        fclose(trace_method.fstream);
836    }
837    if (ftrace_debug)
838        fclose(ftrace_debug);
839}
840
841// Define the number of clock ticks for some instructions.  Add one to these
842// (in some cases) if there is an interlock.  We currently do not check for
843// interlocks.
844#define TICKS_OTHER	1
845#define TICKS_SMULxy	1
846#define TICKS_SMLAWy	1
847#define TICKS_SMLALxy	2
848#define TICKS_MUL	2
849#define TICKS_MLA	2
850#define TICKS_MULS	4	// no interlock penalty
851#define TICKS_MLAS	4	// no interlock penalty
852#define TICKS_UMULL	3
853#define TICKS_UMLAL	3
854#define TICKS_SMULL	3
855#define TICKS_SMLAL	3
856#define TICKS_UMULLS	5	// no interlock penalty
857#define TICKS_UMLALS	5	// no interlock penalty
858#define TICKS_SMULLS	5	// no interlock penalty
859#define TICKS_SMLALS	5	// no interlock penalty
860
861// Compute the number of cycles that this instruction will take,
862// not including any I-cache or D-cache misses.  This function
863// is called for each instruction in a basic block when that
864// block is being translated.
865int get_insn_ticks_arm(uint32_t insn)
866{
867#if 1
868    int   result   =  1;   /* by default, use 1 cycle */
869
870    /* See Chapter 12 of the ARM920T Reference Manual for details about clock cycles */
871
872    /* first check for invalid condition codes */
873    if ((insn >> 28) == 0xf)
874    {
875        if ((insn >> 25) == 0x7d) {  /* BLX */
876            result = 3;
877            goto Exit;
878        }
879        /* XXX: if we get there, we're either in an UNDEFINED instruction     */
880        /*      or in co-processor related ones. For now, only return 1 cycle */
881        goto Exit;
882    }
883
884    /* other cases */
885    switch ((insn >> 25) & 7)
886    {
887        case 0:
888            if ((insn & 0x00000090) == 0x00000090)  /* Multiplies, extra load/store, Table 3-2 */
889            {
890                /* XXX: TODO: Add support for multiplier operand content penalties in the translator */
891
892                if ((insn & 0x0fc000f0) == 0x00000090)   /* 3-2: Multiply (accumulate) */
893                {
894                    int  Rm = (insn & 15);
895                    int  Rs = (insn >> 8) & 15;
896                    int  Rn = (insn >> 12) & 15;
897
898                    if ((insn & 0x00200000) != 0) {  /* MLA */
899                        result += _interlock_use(Rn);
900                    } else {   /* MLU */
901                        if (Rn != 0)      /* UNDEFINED */
902                            goto Exit;
903                    }
904                    /* cycles=2+m, assume m=1, this should be adjusted at interpretation time */
905                    result += 2 + _interlock_use(Rm) + _interlock_use(Rs);
906                }
907                else if ((insn & 0x0f8000f0) == 0x00800090)  /* 3-2: Multiply (accumulate) long */
908                {
909                    int  Rm   = (insn & 15);
910                    int  Rs   = (insn >> 8) & 15;
911                    int  RdLo = (insn >> 12) & 15;
912                    int  RdHi = (insn >> 16) & 15;
913
914                    if ((insn & 0x00200000) != 0) { /* SMLAL & UMLAL */
915                        result += _interlock_use(RdLo) + _interlock_use(RdHi);
916                    }
917                    /* else SMLL and UMLL */
918
919                    /* cucles=3+m, assume m=1, this should be adjusted at interpretation time */
920                    result += 3 + _interlock_use(Rm) + _interlock_use(Rs);
921                }
922                else if ((insn & 0x0fd00ff0) == 0x01000090)  /* 3-2: Swap/swap byte */
923                {
924                    int  Rm = (insn & 15);
925                    int  Rd = (insn >> 8) & 15;
926
927                    result = 2 + _interlock_use(Rm);
928                    _interlock_def(Rd, result+1);
929                }
930                else if ((insn & 0x0e400ff0) == 0x00000090)  /* 3-2: load/store halfword, reg offset */
931                {
932                    int  Rm = (insn & 15);
933                    int  Rd = (insn >> 12) & 15;
934                    int  Rn = (insn >> 16) & 15;
935
936                    result += _interlock_use(Rn) + _interlock_use(Rm);
937                    if ((insn & 0x00100000) != 0)  /* it's a load, there's a 2-cycle interlock */
938                        _interlock_def(Rd, result+2);
939                }
940                else if ((insn & 0x0e400ff0) == 0x00400090)  /* 3-2: load/store halfword, imm offset */
941                {
942                    int  Rd = (insn >> 12) & 15;
943                    int  Rn = (insn >> 16) & 15;
944
945                    result += _interlock_use(Rn);
946                    if ((insn & 0x00100000) != 0)  /* it's a load, there's a 2-cycle interlock */
947                        _interlock_def(Rd, result+2);
948                }
949                else if ((insn & 0x0e500fd0) == 0x000000d0) /* 3-2: load/store two words, reg offset */
950                {
951                    /* XXX: TODO: Enhanced DSP instructions */
952                }
953                else if ((insn & 0x0e500fd0) == 0x001000d0) /* 3-2: load/store half/byte, reg offset */
954                {
955                    int  Rm = (insn & 15);
956                    int  Rd = (insn >> 12) & 15;
957                    int  Rn = (insn >> 16) & 15;
958
959                    result += _interlock_use(Rn) + _interlock_use(Rm);
960                    if ((insn & 0x00100000) != 0)  /* load, 2-cycle interlock */
961                        _interlock_def(Rd, result+2);
962                }
963                else if ((insn & 0x0e5000d0) == 0x004000d0) /* 3-2: load/store two words, imm offset */
964                {
965                    /* XXX: TODO: Enhanced DSP instructions */
966                }
967                else if ((insn & 0x0e5000d0) == 0x005000d0) /* 3-2: load/store half/byte, imm offset */
968                {
969                    int  Rd = (insn >> 12) & 15;
970                    int  Rn = (insn >> 16) & 15;
971
972                    result += _interlock_use(Rn);
973                    if ((insn & 0x00100000) != 0)  /* load, 2-cycle interlock */
974                        _interlock_def(Rd, result+2);
975                }
976                else
977                {
978                    /* UNDEFINED */
979                }
980            }
981            else if ((insn & 0x0f900000) == 0x01000000)  /* Misc. instructions, table 3-3 */
982            {
983                switch ((insn >> 4) & 15)
984                {
985                    case 0:
986                        if ((insn & 0x0fb0fff0) == 0x0120f000) /* move register to status register */
987                        {
988                            int  Rm = (insn & 15);
989                            result += _interlock_use(Rm);
990                        }
991                        break;
992
993                    case 1:
994                        if ( ((insn & 0x0ffffff0) == 0x01200010) ||  /* branch/exchange */
995                             ((insn & 0x0fff0ff0) == 0x01600010) )   /* count leading zeroes */
996                        {
997                            int  Rm = (insn & 15);
998                            result += _interlock_use(Rm);
999                        }
1000                        break;
1001
1002                    case 3:
1003                        if ((insn & 0x0ffffff0) == 0x01200030)   /* link/exchange */
1004                        {
1005                            int  Rm = (insn & 15);
1006                            result += _interlock_use(Rm);
1007                        }
1008                        break;
1009
1010                    default:
1011                        /* TODO: Enhanced DSP instructions */
1012                        ;
1013                }
1014            }
1015            else  /* Data processing */
1016            {
1017                int  Rm = (insn & 15);
1018                int  Rn = (insn >> 16) & 15;
1019
1020                result += _interlock_use(Rn) + _interlock_use(Rm);
1021                if ((insn & 0x10)) {   /* register-controlled shift => 1 cycle penalty */
1022                    int  Rs = (insn >> 8) & 15;
1023                    result += 1 + _interlock_use(Rs);
1024                }
1025            }
1026            break;
1027
1028        case 1:
1029            if ((insn & 0x01900000) == 0x01900000)
1030            {
1031                /* either UNDEFINED or move immediate to CPSR */
1032            }
1033            else  /* Data processing immediate */
1034            {
1035                int  Rn = (insn >> 12) & 15;
1036                result += _interlock_use(Rn);
1037            }
1038            break;
1039
1040        case 2:  /* load/store immediate */
1041            {
1042                int  Rn = (insn >> 16) & 15;
1043
1044                result += _interlock_use(Rn);
1045                if (insn & 0x00100000) {  /* LDR */
1046                    int  Rd = (insn >> 12) & 15;
1047
1048                    if (Rd == 15)  /* loading PC */
1049                        result = 5;
1050                    else
1051                        _interlock_def(Rd,result+1);
1052                }
1053            }
1054            break;
1055
1056        case 3:
1057            if ((insn & 0x10) == 0)  /* load/store register offset */
1058            {
1059                int  Rm = (insn & 15);
1060                int  Rn = (insn >> 16) & 15;
1061
1062                result += _interlock_use(Rm) + _interlock_use(Rn);
1063
1064                if (insn & 0x00100000) {  /* LDR */
1065                    int  Rd = (insn >> 12) & 15;
1066                    if (Rd == 15)
1067                        result = 5;
1068                    else
1069                        _interlock_def(Rd,result+1);
1070                }
1071            }
1072            /* else UNDEFINED */
1073            break;
1074
1075        case 4:  /* load/store multiple */
1076            {
1077                int       Rn   = (insn >> 16) & 15;
1078                uint32_t  mask = (insn & 0xffff);
1079                int       count;
1080
1081                for (count = 0; mask; count++)
1082                    mask &= (mask-1);
1083
1084                result += _interlock_use(Rn);
1085
1086                if (insn & 0x00100000)  /* LDM */
1087                {
1088                    int  nn;
1089
1090                    if (insn & 0x8000) {  /* loading PC */
1091                        result = count+4;
1092                    } else {  /* not loading PC */
1093                        result = (count < 2) ? 2 : count;
1094                    }
1095                    /* create defs, all registers locked until the end of the load */
1096                    for (nn = 0; nn < 15; nn++)
1097                        if ((insn & (1U << nn)) != 0)
1098                            _interlock_def(nn,result);
1099                }
1100                else  /* STM */
1101                    result = (count < 2) ? 2 : count;
1102            }
1103            break;
1104
1105        case 5:  /* branch and branch+link */
1106            break;
1107
1108        case 6:  /* coprocessor load/store */
1109            {
1110                int  Rn = (insn >> 16) & 15;
1111
1112                if (insn & 0x00100000)
1113                    result += _interlock_use(Rn);
1114
1115                /* XXX: other things to do ? */
1116            }
1117            break;
1118
1119        default: /* i.e. 7 */
1120            /* XXX: TODO: co-processor related things */
1121            ;
1122    }
1123Exit:
1124    interlock_base += result;
1125    return result;
1126#else /* old code - this seems to be completely buggy ?? */
1127    if ((insn & 0x0ff0f090) == 0x01600080) {
1128        return TICKS_SMULxy;
1129    } else if ((insn & 0x0ff00090) == 0x01200080) {
1130        return TICKS_SMLAWy;
1131    } else if ((insn & 0x0ff00090) == 0x01400080) {
1132        return TICKS_SMLALxy;
1133    } else if ((insn & 0x0f0000f0) == 0x00000090) {
1134        // multiply
1135        uint8_t bit23 = (insn >> 23) & 0x1;
1136        uint8_t bit22_U = (insn >> 22) & 0x1;
1137        uint8_t bit21_A = (insn >> 21) & 0x1;
1138        uint8_t bit20_S = (insn >> 20) & 0x1;
1139
1140        if (bit23 == 0) {
1141            // 32-bit multiply
1142            if (bit22_U != 0) {
1143                // This is an unexpected bit pattern.
1144                return TICKS_OTHER;
1145            }
1146            if (bit21_A == 0) {
1147                if (bit20_S)
1148                    return TICKS_MULS;
1149                return TICKS_MUL;
1150            }
1151            if (bit20_S)
1152                return TICKS_MLAS;
1153            return TICKS_MLA;
1154        }
1155        // 64-bit multiply
1156        if (bit22_U == 0) {
1157            // Unsigned multiply long
1158            if (bit21_A == 0) {
1159                if (bit20_S)
1160                    return TICKS_UMULLS;
1161                return TICKS_UMULL;
1162            }
1163            if (bit20_S)
1164                return TICKS_UMLALS;
1165            return TICKS_UMLAL;
1166        }
1167        // Signed multiply long
1168        if (bit21_A == 0) {
1169            if (bit20_S)
1170                return TICKS_SMULLS;
1171            return TICKS_SMULL;
1172        }
1173        if (bit20_S)
1174            return TICKS_SMLALS;
1175        return TICKS_SMLAL;
1176    }
1177    return TICKS_OTHER;
1178#endif
1179}
1180
1181int  get_insn_ticks_thumb(uint32_t  insn)
1182{
1183#if 1
1184    int  result = 1;
1185
1186    switch ((insn >> 11) & 31)
1187    {
1188        case 0:
1189        case 1:
1190        case 2:   /* Shift by immediate */
1191            {
1192                int  Rm = (insn >> 3) & 7;
1193                result += _interlock_use(Rm);
1194            }
1195            break;
1196
1197        case 3:  /* Add/Substract */
1198            {
1199                int  Rn = (insn >> 3) & 7;
1200                result += _interlock_use(Rn);
1201
1202                if ((insn & 0x0400) == 0) {  /* register value */
1203                    int  Rm = (insn >> 6) & 7;
1204                    result += _interlock_use(Rm);
1205                }
1206            }
1207            break;
1208
1209        case 4:  /* move immediate */
1210            break;
1211
1212        case 5:
1213        case 6:
1214        case 7:  /* add/substract/compare immediate */
1215            {
1216                int  Rd = (insn >> 8) & 7;
1217                result += _interlock_use(Rd);
1218            }
1219            break;
1220
1221        case 8:
1222            {
1223                if ((insn & 0x0400) == 0)  /* data processing register */
1224                {
1225                    /* the registers can also be Rs and Rn in some cases */
1226                    /* but they're always read anyway and located at the */
1227                    /* same place, so we don't check the opcode          */
1228                    int  Rm = (insn >> 3) & 7;
1229                    int  Rd = (insn >> 3) & 7;
1230
1231                    result += _interlock_use(Rm) + _interlock_use(Rd);
1232                }
1233                else switch ((insn >> 8) & 3)
1234                {
1235                    case 0:
1236                    case 1:
1237                    case 2:  /* special data processing */
1238                        {
1239                            int  Rn = (insn & 7) | ((insn >> 4) & 0x8);
1240                            int  Rm = ((insn >> 3) & 15);
1241
1242                            result += _interlock_use(Rn) + _interlock_use(Rm);
1243                        }
1244                        break;
1245
1246                    case 3:
1247                        if ((insn & 0xff07) == 0x4700)  /* branch/exchange */
1248                        {
1249                            int  Rm = (insn >> 3) & 15;
1250
1251                            result = 3 + _interlock_use(Rm);
1252                        }
1253                        /* else UNDEFINED */
1254                        break;
1255                }
1256            }
1257            break;
1258
1259        case 9:  /* load from literal pool */
1260            {
1261                int  Rd = (insn >> 8) & 7;
1262                _interlock_def(Rd,result+1);
1263            }
1264            break;
1265
1266        case 10:
1267        case 11:  /* load/store register offset */
1268            {
1269                int  Rd = (insn & 7);
1270                int  Rn = (insn >> 3) & 7;
1271                int  Rm = (insn >> 6) & 7;
1272
1273                result += _interlock_use(Rn) + _interlock_use(Rm);
1274
1275                switch ((insn >> 9) & 7)
1276                {
1277                    case 0: /* STR  */
1278                    case 1: /* STRH */
1279                    case 2: /* STRB */
1280                        result += _interlock_use(Rd);
1281                        break;
1282
1283                    case 3: /* LDRSB */
1284                    case 5: /* LDRH */
1285                    case 6: /* LDRB */
1286                    case 7: /* LDRSH */
1287                        _interlock_def(Rd,result+2);
1288                        break;
1289
1290                    case 4: /* LDR */
1291                        _interlock_def(Rd,result+1);
1292                }
1293            }
1294            break;
1295
1296        case 12:  /* store word immediate offset */
1297        case 14:  /* store byte immediate offset */
1298            {
1299                int  Rd = (insn & 7);
1300                int  Rn = (insn >> 3) & 7;
1301
1302                result += _interlock_use(Rd) + _interlock_use(Rn);
1303            }
1304            break;
1305
1306        case 13:  /* load word immediate offset */
1307            {
1308                int  Rd = (insn & 7);
1309                int  Rn = (insn >> 3) & 7;
1310
1311                result += _interlock_use(Rn);
1312                _interlock_def(Rd,result+1);
1313            }
1314            break;
1315
1316        case 15:  /* load byte immediate offset */
1317            {
1318                int  Rd = (insn & 7);
1319                int  Rn = (insn >> 3) & 7;
1320
1321                result += _interlock_use(Rn);
1322                _interlock_def(Rd,result+2);
1323            }
1324            break;
1325
1326        case 16:  /* store halfword immediate offset */
1327            {
1328                int  Rd = (insn & 7);
1329                int  Rn = (insn >> 3) & 7;
1330
1331                result += _interlock_use(Rn) + _interlock_use(Rd);
1332            }
1333            break;
1334
1335        case 17:  /* load halfword immediate offset */
1336            {
1337                int  Rd = (insn & 7);
1338                int  Rn = (insn >> 3) & 7;
1339
1340                result += _interlock_use(Rn);
1341                _interlock_def(Rd,result+2);
1342            }
1343            break;
1344
1345        case 18:  /* store to stack */
1346            {
1347                int  Rd = (insn >> 8) & 3;
1348                result += _interlock_use(Rd);
1349            }
1350            break;
1351
1352        case 19:  /* load from stack */
1353            {
1354                int  Rd = (insn >> 8) & 3;
1355                _interlock_def(Rd,result+1);
1356            }
1357            break;
1358
1359        case 20:  /* add to PC */
1360        case 21:  /* add to SP */
1361            {
1362                int  Rd = (insn >> 8) & 3;
1363                result += _interlock_use(Rd);
1364            }
1365            break;
1366
1367        case 22:
1368        case 23:  /* misc. instructions, table 6-2 */
1369            {
1370                if ((insn & 0xff00) == 0xb000)  /* adjust stack pointer */
1371                {
1372                    result += _interlock_use(14);
1373                }
1374                else if ((insn & 0x0600) == 0x0400)  /* push pop register list */
1375                {
1376                    uint32_t  mask = insn & 0x01ff;
1377                    int       count, nn;
1378
1379                    for (count = 0; mask; count++)
1380                        mask &= (mask-1);
1381
1382                    result = (count < 2) ? 2 : count;
1383
1384                    if (insn & 0x0800)  /* pop register list */
1385                    {
1386                        for (nn = 0; nn < 9; nn++)
1387                            if (insn & (1 << nn))
1388                                _interlock_def(nn, result);
1389                    }
1390                    else  /* push register list */
1391                    {
1392                        for (nn = 0; nn < 9; nn++)
1393                            if (insn & (1 << nn))
1394                                result += _interlock_use(nn);
1395                    }
1396                }
1397                /* else  software breakpoint */
1398            }
1399            break;
1400
1401        case 24:  /* store multiple */
1402            {
1403                int  Rd = (insn >> 8) & 7;
1404                uint32_t  mask = insn & 255;
1405                int       count, nn;
1406
1407                for (count = 0; mask; count++)
1408                    mask &= (mask-1);
1409
1410                result = (count < 2) ? 2 : count;
1411                result += _interlock_use(Rd);
1412
1413                for (nn = 0; nn < 8; nn++)
1414                    if (insn & (1 << nn))
1415                        result += _interlock_use(nn);
1416            }
1417            break;
1418
1419        case 25:  /* load multiple */
1420            {
1421                int  Rd = (insn >> 8) & 7;
1422                uint32_t  mask = insn & 255;
1423                int       count, nn;
1424
1425                for (count = 0; mask; count++)
1426                    mask &= (mask-1);
1427
1428                result  = (count < 2) ? 2 : count;
1429                result += _interlock_use(Rd);
1430
1431                for (nn = 0; nn < 8; nn++)
1432                    if (insn & (1 << nn))
1433                        _interlock_def(nn, result);
1434            }
1435            break;
1436
1437        case 26:
1438        case 27:  /* conditional branch / undefined / software interrupt */
1439            switch ((insn >> 8) & 15)
1440            {
1441                case 14: /* UNDEFINED */
1442                case 15: /* SWI */
1443                    break;
1444
1445                default:  /* conditional branch */
1446                    result = 3;
1447            }
1448            break;
1449
1450        case 28:  /* unconditional branch */
1451            result = 3;
1452            break;
1453
1454        case 29:  /* BLX suffix or undefined */
1455            if ((insn & 1) == 0)
1456                result = 3;
1457            break;
1458
1459        case 30:  /* BLX/BLX prefix */
1460            break;
1461
1462        case 31:  /* BL suffix */
1463            result = 3;
1464            break;
1465    }
1466    interlock_base += result;
1467    return result;
1468#else /* old code */
1469    if ((insn & 0xfc00) == 0x4340) /* MUL */
1470        return TICKS_SMULxy;
1471
1472    return TICKS_OTHER;
1473#endif
1474}
1475
1476// Adds an exception trace record.
1477void trace_exception(uint32 target_pc)
1478{
1479    if (trace_exc.fstream == NULL)
1480        return;
1481
1482    // Sometimes we get an unexpected exception as the first record.  If the
1483    // basic block number is zero, then we know it is bogus.
1484    if (trace_bb.current_bb_num == 0)
1485        return;
1486
1487    uint32_t current_pc = trace_bb.current_bb_addr + 4 * (trace_bb.num_insns - 1);
1488#if 0
1489    if (ftrace_debug) {
1490        fprintf(ftrace_debug, "t%llu exc pc: 0x%x bb_addr: 0x%x num_insns: %d current_pc: 0x%x bb_num %llu bb_start_time %llu\n",
1491                sim_time, target_pc, trace_bb.current_bb_addr,
1492                trace_bb.num_insns, current_pc, trace_bb.current_bb_num,
1493                trace_bb.current_bb_start_time);
1494    }
1495#endif
1496    char *comp_ptr = trace_exc.compressed_ptr;
1497    if (comp_ptr >= trace_exc.high_water_ptr) {
1498        uint32_t size = comp_ptr - trace_exc.compressed;
1499        fwrite(trace_exc.compressed, sizeof(char), size, trace_exc.fstream);
1500        comp_ptr = trace_exc.compressed;
1501    }
1502    uint64_t time_diff = sim_time - trace_exc.prev_time;
1503    trace_exc.prev_time = sim_time;
1504    uint64_t bb_recnum_diff = trace_bb.recnum - trace_exc.prev_bb_recnum;
1505    trace_exc.prev_bb_recnum = trace_bb.recnum;
1506    comp_ptr = varint_encode(time_diff, comp_ptr);
1507    comp_ptr = varint_encode(current_pc, comp_ptr);
1508    comp_ptr = varint_encode(bb_recnum_diff, comp_ptr);
1509    comp_ptr = varint_encode(target_pc, comp_ptr);
1510    comp_ptr = varint_encode(trace_bb.current_bb_num, comp_ptr);
1511    comp_ptr = varint_encode(trace_bb.current_bb_start_time, comp_ptr);
1512    comp_ptr = varint_encode(trace_bb.num_insns, comp_ptr);
1513    trace_exc.compressed_ptr = comp_ptr;
1514}
1515
1516void trace_pid_1arg(int pid, int rec_type)
1517{
1518    if (trace_pid.fstream == NULL)
1519        return;
1520    char *comp_ptr = trace_pid.compressed_ptr;
1521    char *max_end_ptr = comp_ptr + kMaxPidCompressed;
1522    if (max_end_ptr >= &trace_pid.compressed[kCompressedSize]) {
1523        uint32_t size = comp_ptr - trace_pid.compressed;
1524        fwrite(trace_pid.compressed, sizeof(char), size, trace_pid.fstream);
1525        comp_ptr = trace_pid.compressed;
1526    }
1527    uint64_t time_diff = sim_time - trace_pid.prev_time;
1528    trace_pid.prev_time = sim_time;
1529    comp_ptr = varint_encode(time_diff, comp_ptr);
1530    comp_ptr = varint_encode(rec_type, comp_ptr);
1531    comp_ptr = varint_encode(pid, comp_ptr);
1532    trace_pid.compressed_ptr = comp_ptr;
1533}
1534
1535void trace_pid_2arg(int tgid, int pid, int rec_type)
1536{
1537    if (trace_pid.fstream == NULL)
1538        return;
1539    char *comp_ptr = trace_pid.compressed_ptr;
1540    char *max_end_ptr = comp_ptr + kMaxPid2Compressed;
1541    if (max_end_ptr >= &trace_pid.compressed[kCompressedSize]) {
1542        uint32_t size = comp_ptr - trace_pid.compressed;
1543        fwrite(trace_pid.compressed, sizeof(char), size, trace_pid.fstream);
1544        comp_ptr = trace_pid.compressed;
1545    }
1546    uint64_t time_diff = sim_time - trace_pid.prev_time;
1547    trace_pid.prev_time = sim_time;
1548    comp_ptr = varint_encode(time_diff, comp_ptr);
1549    comp_ptr = varint_encode(rec_type, comp_ptr);
1550    comp_ptr = varint_encode(tgid, comp_ptr);
1551    comp_ptr = varint_encode(pid, comp_ptr);
1552    trace_pid.compressed_ptr = comp_ptr;
1553}
1554
1555void trace_switch(int pid)
1556{
1557#if 0
1558    if (ftrace_debug && trace_pid.fstream)
1559        fprintf(ftrace_debug, "t%lld switch %d\n", sim_time, pid);
1560#endif
1561    trace_pid_1arg(pid, kPidSwitch);
1562    current_pid = pid;
1563}
1564
1565void trace_fork(int tgid, int pid)
1566{
1567#if 0
1568    if (ftrace_debug && trace_pid.fstream)
1569        fprintf(ftrace_debug, "t%lld fork %d\n", sim_time, pid);
1570#endif
1571    trace_pid_2arg(tgid, pid, kPidFork);
1572}
1573
1574void trace_clone(int tgid, int pid)
1575{
1576#if 0
1577    if (ftrace_debug && trace_pid.fstream)
1578        fprintf(ftrace_debug, "t%lld clone %d\n", sim_time, pid);
1579#endif
1580    trace_pid_2arg(tgid, pid, kPidClone);
1581}
1582
1583void trace_exit(int exitcode)
1584{
1585#if 0
1586    if (ftrace_debug && trace_pid.fstream)
1587        fprintf(ftrace_debug, "t%lld exit %d\n", sim_time, exitcode);
1588#endif
1589    trace_pid_1arg(exitcode, kPidExit);
1590}
1591
1592void trace_name(char *name)
1593{
1594#if 0
1595    if (ftrace_debug && trace_pid.fstream) {
1596        fprintf(ftrace_debug, "t%lld pid %d name %s\n",
1597                sim_time, current_pid, name);
1598    }
1599#endif
1600    if (trace_pid.fstream == NULL)
1601        return;
1602    int len = strlen(name);
1603    char *comp_ptr = trace_pid.compressed_ptr;
1604    char *max_end_ptr = comp_ptr + len + kMaxNameCompressed;
1605    if (max_end_ptr >= &trace_pid.compressed[kCompressedSize]) {
1606        uint32_t size = comp_ptr - trace_pid.compressed;
1607        fwrite(trace_pid.compressed, sizeof(char), size, trace_pid.fstream);
1608        comp_ptr = trace_pid.compressed;
1609    }
1610    uint64_t time_diff = sim_time - trace_pid.prev_time;
1611    trace_pid.prev_time = sim_time;
1612    comp_ptr = varint_encode(time_diff, comp_ptr);
1613    int rec_type = kPidName;
1614    comp_ptr = varint_encode(rec_type, comp_ptr);
1615    comp_ptr = varint_encode(current_pid, comp_ptr);
1616    comp_ptr = varint_encode(len, comp_ptr);
1617    strncpy(comp_ptr, name, len);
1618    comp_ptr += len;
1619    trace_pid.compressed_ptr = comp_ptr;
1620}
1621
1622void trace_execve(const char *argv, int len)
1623{
1624    int ii;
1625
1626    if (trace_pid.fstream == NULL)
1627        return;
1628    // Count the number of args
1629    int alen = 0;
1630    int sum_len = 0;
1631    int argc = 0;
1632    const char *ptr = argv;
1633    while (sum_len < len) {
1634        argc += 1;
1635        alen = strlen(ptr);
1636        ptr += alen + 1;
1637        sum_len += alen + 1;
1638    }
1639
1640#if 0
1641    if (ftrace_debug) {
1642        fprintf(ftrace_debug, "t%lld argc: %d\n", sim_time, argc);
1643        alen = 0;
1644        ptr = argv;
1645        for (ii = 0; ii < argc; ++ii) {
1646            fprintf(ftrace_debug, "  argv[%d]: %s\n", ii, ptr);
1647            alen = strlen(ptr);
1648            ptr += alen + 1;
1649        }
1650    }
1651#endif
1652
1653    char *comp_ptr = trace_pid.compressed_ptr;
1654    char *max_end_ptr = comp_ptr + len + 5 * argc + kMaxExecArgsCompressed;
1655    if (max_end_ptr >= &trace_pid.compressed[kCompressedSize]) {
1656        uint32_t size = comp_ptr - trace_pid.compressed;
1657        fwrite(trace_pid.compressed, sizeof(char), size, trace_pid.fstream);
1658        comp_ptr = trace_pid.compressed;
1659    }
1660    uint64_t time_diff = sim_time - trace_pid.prev_time;
1661    trace_pid.prev_time = sim_time;
1662    comp_ptr = varint_encode(time_diff, comp_ptr);
1663    int rec_type = kPidExec;
1664    comp_ptr = varint_encode(rec_type, comp_ptr);
1665    comp_ptr = varint_encode(argc, comp_ptr);
1666
1667    ptr = argv;
1668    for (ii = 0; ii < argc; ++ii) {
1669        alen = strlen(ptr);
1670        comp_ptr = varint_encode(alen, comp_ptr);
1671        strncpy(comp_ptr, ptr, alen);
1672        comp_ptr += alen;
1673        ptr += alen + 1;
1674    }
1675    trace_pid.compressed_ptr = comp_ptr;
1676}
1677
1678void trace_mmap(unsigned long vstart, unsigned long vend,
1679                unsigned long offset, const char *path)
1680{
1681    if (trace_pid.fstream == NULL)
1682        return;
1683#if 0
1684    if (ftrace_debug)
1685        fprintf(ftrace_debug, "t%lld mmap %08lx - %08lx, offset %08lx '%s'\n",
1686                sim_time, vstart, vend, offset, path);
1687#endif
1688    int len = strlen(path);
1689    char *comp_ptr = trace_pid.compressed_ptr;
1690    char *max_end_ptr = comp_ptr + len + kMaxMmapCompressed;
1691    if (max_end_ptr >= &trace_pid.compressed[kCompressedSize]) {
1692        uint32_t size = comp_ptr - trace_pid.compressed;
1693        fwrite(trace_pid.compressed, sizeof(char), size, trace_pid.fstream);
1694        comp_ptr = trace_pid.compressed;
1695    }
1696    uint64_t time_diff = sim_time - trace_pid.prev_time;
1697    trace_pid.prev_time = sim_time;
1698    comp_ptr = varint_encode(time_diff, comp_ptr);
1699    int rec_type = kPidMmap;
1700    comp_ptr = varint_encode(rec_type, comp_ptr);
1701    comp_ptr = varint_encode(vstart, comp_ptr);
1702    comp_ptr = varint_encode(vend, comp_ptr);
1703    comp_ptr = varint_encode(offset, comp_ptr);
1704    comp_ptr = varint_encode(len, comp_ptr);
1705    strncpy(comp_ptr, path, len);
1706    trace_pid.compressed_ptr = comp_ptr + len;
1707}
1708
1709void trace_munmap(unsigned long vstart, unsigned long vend)
1710{
1711    if (trace_pid.fstream == NULL)
1712        return;
1713#if 0
1714    if (ftrace_debug)
1715        fprintf(ftrace_debug, "t%lld munmap %08lx - %08lx\n",
1716                sim_time, vstart, vend);
1717#endif
1718    char *comp_ptr = trace_pid.compressed_ptr;
1719    char *max_end_ptr = comp_ptr + kMaxMunmapCompressed;
1720    if (max_end_ptr >= &trace_pid.compressed[kCompressedSize]) {
1721        uint32_t size = comp_ptr - trace_pid.compressed;
1722        fwrite(trace_pid.compressed, sizeof(char), size, trace_pid.fstream);
1723        comp_ptr = trace_pid.compressed;
1724    }
1725    uint64_t time_diff = sim_time - trace_pid.prev_time;
1726    trace_pid.prev_time = sim_time;
1727    comp_ptr = varint_encode(time_diff, comp_ptr);
1728    int rec_type = kPidMunmap;
1729    comp_ptr = varint_encode(rec_type, comp_ptr);
1730    comp_ptr = varint_encode(vstart, comp_ptr);
1731    comp_ptr = varint_encode(vend, comp_ptr);
1732    trace_pid.compressed_ptr = comp_ptr;
1733}
1734
1735void trace_dynamic_symbol_add(unsigned long vaddr, const char *name)
1736{
1737    if (trace_pid.fstream == NULL)
1738        return;
1739#if 0
1740    if (ftrace_debug)
1741        fprintf(ftrace_debug, "t%lld sym %08lx '%s'\n", sim_time, vaddr, name);
1742#endif
1743    int len = strlen(name);
1744    char *comp_ptr = trace_pid.compressed_ptr;
1745    char *max_end_ptr = comp_ptr + len + kMaxSymbolCompressed;
1746    if (max_end_ptr >= &trace_pid.compressed[kCompressedSize]) {
1747        uint32_t size = comp_ptr - trace_pid.compressed;
1748        fwrite(trace_pid.compressed, sizeof(char), size, trace_pid.fstream);
1749        comp_ptr = trace_pid.compressed;
1750    }
1751    uint64_t time_diff = sim_time - trace_pid.prev_time;
1752    trace_pid.prev_time = sim_time;
1753    comp_ptr = varint_encode(time_diff, comp_ptr);
1754    int rec_type = kPidSymbolAdd;
1755    comp_ptr = varint_encode(rec_type, comp_ptr);
1756    comp_ptr = varint_encode(vaddr, comp_ptr);
1757    comp_ptr = varint_encode(len, comp_ptr);
1758    strncpy(comp_ptr, name, len);
1759    trace_pid.compressed_ptr = comp_ptr + len;
1760}
1761
1762void trace_dynamic_symbol_remove(unsigned long vaddr)
1763{
1764    if (trace_pid.fstream == NULL)
1765        return;
1766#if 0
1767    if (ftrace_debug)
1768        fprintf(ftrace_debug, "t%lld remove %08lx\n", sim_time, vaddr);
1769#endif
1770    char *comp_ptr = trace_pid.compressed_ptr;
1771    char *max_end_ptr = comp_ptr + kMaxSymbolCompressed;
1772    if (max_end_ptr >= &trace_pid.compressed[kCompressedSize]) {
1773        uint32_t size = comp_ptr - trace_pid.compressed;
1774        fwrite(trace_pid.compressed, sizeof(char), size, trace_pid.fstream);
1775        comp_ptr = trace_pid.compressed;
1776    }
1777    uint64_t time_diff = sim_time - trace_pid.prev_time;
1778    trace_pid.prev_time = sim_time;
1779    comp_ptr = varint_encode(time_diff, comp_ptr);
1780    int rec_type = kPidSymbolRemove;
1781    comp_ptr = varint_encode(rec_type, comp_ptr);
1782    comp_ptr = varint_encode(vaddr, comp_ptr);
1783    trace_pid.compressed_ptr = comp_ptr;
1784}
1785
1786void trace_init_name(int tgid, int pid, const char *name)
1787{
1788    if (trace_pid.fstream == NULL)
1789        return;
1790#if 0
1791    if (ftrace_debug)
1792        fprintf(ftrace_debug, "t%lld kthread %d %s\n", sim_time, pid, name);
1793#endif
1794    int len = strlen(name);
1795    char *comp_ptr = trace_pid.compressed_ptr;
1796    char *max_end_ptr = comp_ptr + len + kMaxKthreadNameCompressed;
1797    if (max_end_ptr >= &trace_pid.compressed[kCompressedSize]) {
1798        uint32_t size = comp_ptr - trace_pid.compressed;
1799        fwrite(trace_pid.compressed, sizeof(char), size, trace_pid.fstream);
1800        comp_ptr = trace_pid.compressed;
1801    }
1802    uint64_t time_diff = sim_time - trace_pid.prev_time;
1803    trace_pid.prev_time = sim_time;
1804    comp_ptr = varint_encode(time_diff, comp_ptr);
1805    int rec_type = kPidKthreadName;
1806    comp_ptr = varint_encode(rec_type, comp_ptr);
1807    comp_ptr = varint_encode(tgid, comp_ptr);
1808    comp_ptr = varint_encode(pid, comp_ptr);
1809    comp_ptr = varint_encode(len, comp_ptr);
1810    strncpy(comp_ptr, name, len);
1811    trace_pid.compressed_ptr = comp_ptr + len;
1812}
1813
1814void trace_init_exec(unsigned long start, unsigned long end,
1815                     unsigned long offset, const char *exe)
1816{
1817}
1818
1819// This function is called by the generated code to record the basic
1820// block number.
1821void trace_bb_helper(uint64_t bb_num, TranslationBlock *tb)
1822{
1823    BBRec *bb_rec = tb->bb_rec;
1824    uint64_t prev_time = tb->prev_time;
1825    trace_bb.current_bb_addr = tb->pc;
1826    trace_bb.current_bb_num = bb_num;
1827    trace_bb.current_bb_start_time = sim_time;
1828    trace_bb.num_insns = 0;
1829    trace_bb.recnum += 1;
1830
1831#if 0
1832    if (ftrace_debug)
1833        fprintf(ftrace_debug, "t%lld %lld\n", sim_time, bb_num);
1834#endif
1835    if (bb_rec && bb_rec->bb_num == bb_num && prev_time > trace_bb.flush_time) {
1836        uint64_t time_diff = sim_time - prev_time;
1837        if (bb_rec->repeat == 0) {
1838            bb_rec->repeat = 1;
1839            bb_rec->time_diff = time_diff;
1840            tb->prev_time = sim_time;
1841            return;
1842        } else if (time_diff == bb_rec->time_diff) {
1843            bb_rec->repeat += 1;
1844            tb->prev_time = sim_time;
1845            return;
1846        }
1847    }
1848
1849    BBRec *next = trace_bb.next;
1850    if (next == &trace_bb.buffer[kMaxNumBasicBlocks]) {
1851        BBRec *ptr;
1852        char *comp_ptr = trace_bb.compressed_ptr;
1853        int64_t prev_bb_num = trace_bb.prev_bb_num;
1854        uint64_t prev_bb_time = trace_bb.prev_bb_time;
1855        for (ptr = trace_bb.buffer; ptr != next; ++ptr) {
1856            if (comp_ptr >= trace_bb.high_water_ptr) {
1857                uint32_t size = comp_ptr - trace_bb.compressed;
1858                fwrite(trace_bb.compressed, sizeof(char), size, trace_bb.fstream);
1859                comp_ptr = trace_bb.compressed;
1860            }
1861            int64_t bb_diff = ptr->bb_num - prev_bb_num;
1862            prev_bb_num = ptr->bb_num;
1863            uint64_t time_diff = ptr->start_time - prev_bb_time;
1864            prev_bb_time = ptr->start_time;
1865            comp_ptr = varint_encode_signed(bb_diff, comp_ptr);
1866            comp_ptr = varint_encode(time_diff, comp_ptr);
1867            comp_ptr = varint_encode(ptr->repeat, comp_ptr);
1868            if (ptr->repeat)
1869                comp_ptr = varint_encode(ptr->time_diff, comp_ptr);
1870        }
1871        trace_bb.compressed_ptr = comp_ptr;
1872        trace_bb.prev_bb_num = prev_bb_num;
1873        trace_bb.prev_bb_time = prev_bb_time;
1874
1875        next = trace_bb.buffer;
1876        trace_bb.flush_time = sim_time;
1877    }
1878    tb->bb_rec = next;
1879    next->bb_num = bb_num;
1880    next->start_time = sim_time;
1881    next->time_diff = 0;
1882    next->repeat = 0;
1883    tb->prev_time = sim_time;
1884    next += 1;
1885    trace_bb.next = next;
1886}
1887
1888// This function is called by the generated code to record the simulation
1889// time at the start of each instruction.
1890void trace_insn_helper()
1891{
1892    InsnRec *current = trace_insn.current;
1893    uint64_t time_diff = sim_time - trace_insn.prev_time;
1894    trace_insn.prev_time = sim_time;
1895
1896    // Keep track of the number of traced instructions so far in this
1897    // basic block in case we get an exception in the middle of the bb.
1898    trace_bb.num_insns += 1;
1899
1900#if 0
1901    if (ftrace_debug) {
1902        uint32_t current_pc = trace_bb.current_bb_addr + 4 * (trace_bb.num_insns - 1);
1903        fprintf(ftrace_debug, "%llu %x\n", sim_time, current_pc);
1904    }
1905#endif
1906    if (time_diff == current->time_diff) {
1907        current->repeat += 1;
1908        if (current->repeat != 0)
1909            return;
1910
1911        // The repeat count wrapped around, so back up one and create
1912        // a new record.
1913        current->repeat -= 1;
1914    }
1915    current += 1;
1916
1917    if (current == &trace_insn.buffer[kInsnBufferSize]) {
1918        InsnRec *ptr;
1919        char *comp_ptr = trace_insn.compressed_ptr;
1920        for (ptr = trace_insn.buffer; ptr != current; ++ptr) {
1921            if (comp_ptr >= trace_insn.high_water_ptr) {
1922                uint32_t size = comp_ptr - trace_insn.compressed;
1923                uint32_t rval = fwrite(trace_insn.compressed, sizeof(char),
1924                                       size, trace_insn.fstream);
1925                if (rval != size) {
1926                    fprintf(stderr, "fwrite() failed\n");
1927                    perror(trace_insn.filename);
1928                    exit(1);
1929                }
1930                comp_ptr = trace_insn.compressed;
1931            }
1932            comp_ptr = varint_encode(ptr->time_diff, comp_ptr);
1933            comp_ptr = varint_encode(ptr->repeat, comp_ptr);
1934        }
1935        trace_insn.compressed_ptr = comp_ptr;
1936        current = trace_insn.buffer;
1937    }
1938    current->time_diff = time_diff;
1939    current->repeat = 0;
1940    trace_insn.current = current;
1941}
1942
1943// Adds an interpreted method trace record.  Each trace record is a time
1944// stamped entry or exit to a method in a language executed by a "virtual
1945// machine".  This allows profiling tools to show the method names instead
1946// of the core virtual machine interpreter.
1947void trace_interpreted_method(uint32_t addr, int call_type)
1948{
1949    if (trace_method.fstream == NULL)
1950        return;
1951#if 0
1952    fprintf(stderr, "trace_method time: %llu p%d 0x%x %d\n",
1953            sim_time, current_pid, addr, call_type);
1954#endif
1955    char *comp_ptr = trace_method.compressed_ptr;
1956    char *max_end_ptr = comp_ptr + kMaxMethodCompressed;
1957    if (max_end_ptr >= &trace_method.compressed[kCompressedSize]) {
1958        uint32_t size = comp_ptr - trace_method.compressed;
1959        fwrite(trace_method.compressed, sizeof(char), size, trace_method.fstream);
1960        comp_ptr = trace_method.compressed;
1961    }
1962    uint64_t time_diff = sim_time - trace_method.prev_time;
1963    trace_method.prev_time = sim_time;
1964
1965    int32_t addr_diff = addr - trace_method.prev_addr;
1966    trace_method.prev_addr = addr;
1967
1968    int32_t pid_diff = current_pid - trace_method.prev_pid;
1969    trace_method.prev_pid = current_pid;
1970
1971    comp_ptr = varint_encode(time_diff, comp_ptr);
1972    comp_ptr = varint_encode_signed(addr_diff, comp_ptr);
1973    comp_ptr = varint_encode_signed(pid_diff, comp_ptr);
1974    comp_ptr = varint_encode(call_type, comp_ptr);
1975    trace_method.compressed_ptr = comp_ptr;
1976}
1977
1978uint64_t trace_static_bb_num(void)
1979{
1980    return trace_static.bb_num;
1981}
1982