1#!/usr/bin/python
2# Copyright 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6# Use: ../mergetraces.py `ls cyglog.* -Sr` > merged_cyglog
7
8""""Merge multiple logs files from different processes into a single log.
9
10Given two log files of execution traces, merge the traces into a single trace.
11Merging will use timestamps (i.e. the first two columns of logged calls) to
12create a single log that is an ordered trace of calls by both processes.
13"""
14
15import optparse
16import string
17import sys
18
19def ParseLogLines(lines):
20  """Parse log file lines.
21
22  Args:
23    lines: lines from log file produced by profiled run
24
25    Below is an example of a small log file:
26    5086e000-52e92000 r-xp 00000000 b3:02 51276      libchromeview.so
27    secs       usecs      pid:threadid    func
28    START
29    1314897086 795828     3587:1074648168 0x509e105c
30    1314897086 795874     3587:1074648168 0x509e0eb4
31    1314897086 796326     3587:1074648168 0x509e0e3c
32    1314897086 796552     3587:1074648168 0x509e07bc
33    END
34
35  Returns:
36    tuple conisiting of 1) an ordered list of the logged calls, as an array of
37    fields, 2) the virtual start address of the library, used to compute the
38    offset of the symbol in the library and 3) the virtual end address
39  """
40  call_lines = []
41  vm_start = 0
42  vm_end = 0
43  dash_index = lines[0].find ('-')
44  space_index = lines[0].find (' ')
45  vm_start = int (lines[0][:dash_index], 16)
46  vm_end = int (lines[0][dash_index+1:space_index], 16)
47  for line in lines[2:]:
48    line = line.strip()
49    # print hex (vm_start)
50    fields = line.split()
51    call_lines.append (fields)
52
53  return (call_lines, vm_start, vm_end)
54
55def HasDuplicates(calls):
56  """Makes sure that calls are only logged once.
57
58  Args:
59    calls: list of calls logged
60
61  Returns:
62    boolean indicating if calls has duplicate calls
63  """
64  seen = set([])
65  for call in calls:
66    if call[3] in seen:
67      return True
68    seen.add(call[3])
69  return False
70
71def CheckTimestamps(calls):
72  """Prints warning to stderr if the call timestamps are not in order.
73
74  Args:
75    calls: list of calls logged
76  """
77  index = 0
78  last_timestamp_secs = -1
79  last_timestamp_us = -1
80  while (index < len (calls)):
81    timestamp_secs = int (calls[index][0])
82    timestamp_us = int (calls[index][1])
83    timestamp = (timestamp_secs * 1000000) + timestamp_us
84    last_timestamp = (last_timestamp_secs * 1000000) + last_timestamp_us
85    if (timestamp < last_timestamp):
86      raise Exception("last_timestamp: " + str(last_timestamp_secs)
87                       + " " + str(last_timestamp_us) + " timestamp: "
88                       + str(timestamp_secs) + " " + str(timestamp_us) + "\n")
89    last_timestamp_secs = timestamp_secs
90    last_timestamp_us = timestamp_us
91    index = index + 1
92
93def Convert (call_lines, startAddr, endAddr):
94  """Converts the call addresses to static offsets and removes invalid calls.
95
96  Removes profiled calls not in shared library using start and end virtual
97  addresses, converts strings to integer values, coverts virtual addresses to
98  address in shared library.
99
100  Returns:
101     list of calls as tuples (sec, usec, pid:tid, callee)
102  """
103  converted_calls = []
104  call_addresses = []
105  for fields in call_lines:
106    secs = int (fields[0])
107    usecs = int (fields[1])
108    callee = int (fields[3], 16)
109    # print ("callee: " + hex (callee) + " start: " + hex (startAddr) + " end: "
110    #        + hex (endAddr))
111    if (callee >= startAddr and callee < endAddr
112        and (not callee in call_addresses)):
113      converted_calls.append((secs, usecs, fields[2], (callee - startAddr)))
114      call_addresses.append(callee)
115  return converted_calls
116
117def Timestamp(trace_entry):
118  return int (trace_entry[0]) * 1000000 + int(trace_entry[1])
119
120def AddTrace (tracemap, trace):
121  """Adds a trace to the tracemap.
122
123  Adds entries in the trace to the tracemap. All new calls will be added to
124  the tracemap. If the calls already exist in the tracemap then they will be
125  replaced if they happened sooner in the new trace.
126
127  Args:
128    tracemap: the tracemap
129    trace: the trace
130
131  """
132  for trace_entry in trace:
133    call = trace_entry[3]
134    if (not call in tracemap) or (
135        Timestamp(tracemap[call]) > Timestamp(trace_entry)):
136      tracemap[call] = trace_entry
137
138def GroupByProcessAndThreadId(input_trace):
139  """Returns an array of traces grouped by pid and tid.
140
141  This is used to make the order of functions not depend on thread scheduling
142  which can be greatly impacted when profiling is done with cygprofile. As a
143  result each thread has its own contiguous segment of code (ordered by
144  timestamp) and processes also have their code isolated (i.e. not interleaved).
145  """
146  def MakeTimestamp(sec, usec):
147    return sec * 1000000 + usec
148
149  def PidAndTidFromString(pid_and_tid):
150    strings = pid_and_tid.split(':')
151    return (int(strings[0]), int(strings[1]))
152
153  tid_to_pid_map = {}
154  pid_first_seen = {}
155  tid_first_seen = {}
156
157  for (sec, usec, pid_and_tid, _) in input_trace:
158    (pid, tid) = PidAndTidFromString(pid_and_tid)
159
160    # Make sure that thread IDs are unique since this is a property we rely on.
161    if tid_to_pid_map.setdefault(tid, pid) != pid:
162      raise Exception(
163          'Seen PIDs %d and %d for TID=%d. Thread-IDs must be unique' % (
164              tid_to_pid_map[tid], pid, tid))
165
166    if not pid in pid_first_seen:
167      pid_first_seen[pid] = MakeTimestamp(sec, usec)
168    if not tid in tid_first_seen:
169      tid_first_seen[tid] = MakeTimestamp(sec, usec)
170
171  def CompareEvents(event1, event2):
172    (sec1, usec1, pid_and_tid, _) = event1
173    (pid1, tid1) = PidAndTidFromString(pid_and_tid)
174    (sec2, usec2, pid_and_tid, _) = event2
175    (pid2, tid2) = PidAndTidFromString(pid_and_tid)
176
177    pid_cmp = cmp(pid_first_seen[pid1], pid_first_seen[pid2])
178    if pid_cmp != 0:
179      return pid_cmp
180    tid_cmp = cmp(tid_first_seen[tid1], tid_first_seen[tid2])
181    if tid_cmp != 0:
182      return tid_cmp
183    return cmp(MakeTimestamp(sec1, usec1), MakeTimestamp(sec2, usec2))
184
185  return sorted(input_trace, cmp=CompareEvents)
186
187def main():
188  """Merge two traces for code in specified library and write to stdout.
189
190  Merges the two traces and coverts the virtual addresses to the offsets in the
191  library.  First line of merged trace has dummy virtual address of 0-ffffffff
192  so that symbolizing the addresses uses the addresses in the log, since the
193  addresses have already been converted to static offsets.
194  """
195  parser = optparse.OptionParser('usage: %prog trace1 ... traceN')
196  (_, args) = parser.parse_args()
197  if len(args) <= 1:
198    parser.error('expected at least the following args: trace1 trace2')
199
200  step = 0
201
202  # Maps function addresses to their corresponding trace entry.
203  tracemap = dict()
204
205  for trace_file in args:
206    step += 1
207    sys.stderr.write("    " + str(step) + "/" + str(len(args)) +
208                     ": " + trace_file + ":\n")
209
210    trace_lines = map(string.rstrip, open(trace_file).readlines())
211    (trace_calls, trace_start, trace_end) = ParseLogLines(trace_lines)
212    CheckTimestamps(trace_calls)
213    sys.stderr.write("Len: " + str(len(trace_calls)) +
214                     ". Start: " + hex(trace_start) +
215                     ", end: " + hex(trace_end) + '\n')
216
217    trace_calls = Convert(trace_calls, trace_start, trace_end)
218    sys.stderr.write("Converted len: " + str(len(trace_calls)) + "\n")
219
220    AddTrace(tracemap, trace_calls)
221    sys.stderr.write("Merged len: " + str(len(tracemap)) + "\n")
222
223  # Extract the resulting trace from the tracemap
224  merged_trace = []
225  for call in tracemap:
226    merged_trace.append(tracemap[call])
227  merged_trace.sort(key=Timestamp)
228
229  grouped_trace = GroupByProcessAndThreadId(merged_trace)
230
231  print "0-ffffffff r-xp 00000000 xx:00 00000 ./"
232  print "secs\tusecs\tpid:threadid\tfunc"
233  for call in grouped_trace:
234    print (str(call[0]) + "\t" + str(call[1]) + "\t" + call[2] + "\t" +
235           hex(call[3]))
236
237if __name__ == '__main__':
238  main()
239