1# Copyright 2008 the V8 project authors. All rights reserved.
2# Redistribution and use in source and binary forms, with or without
3# modification, are permitted provided that the following conditions are
4# met:
5#
6#     * Redistributions of source code must retain the above copyright
7#       notice, this list of conditions and the following disclaimer.
8#     * Redistributions in binary form must reproduce the above
9#       copyright notice, this list of conditions and the following
10#       disclaimer in the documentation and/or other materials provided
11#       with the distribution.
12#     * Neither the name of Google Inc. nor the names of its
13#       contributors may be used to endorse or promote products derived
14#       from this software without specific prior written permission.
15#
16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28import csv, splaytree, sys, re
29from operator import itemgetter
30import getopt, os, string
31
32class CodeEntry(object):
33
34  def __init__(self, start_addr, name):
35    self.start_addr = start_addr
36    self.tick_count = 0
37    self.name = name
38    self.stacks = {}
39
40  def Tick(self, pc, stack):
41    self.tick_count += 1
42    if len(stack) > 0:
43      stack.insert(0, self.ToString())
44      stack_key = tuple(stack)
45      self.stacks[stack_key] = self.stacks.setdefault(stack_key, 0) + 1
46
47  def RegionTicks(self):
48    return None
49
50  def SetStartAddress(self, start_addr):
51    self.start_addr = start_addr
52
53  def ToString(self):
54    return self.name
55
56  def IsSharedLibraryEntry(self):
57    return False
58
59  def IsICEntry(self):
60    return False
61
62  def IsJSFunction(self):
63    return False
64
65class SharedLibraryEntry(CodeEntry):
66
67  def __init__(self, start_addr, name):
68    CodeEntry.__init__(self, start_addr, name)
69
70  def IsSharedLibraryEntry(self):
71    return True
72
73
74class JSCodeEntry(CodeEntry):
75
76  def __init__(self, start_addr, name, type, size, assembler):
77    CodeEntry.__init__(self, start_addr, name)
78    self.type = type
79    self.size = size
80    self.assembler = assembler
81    self.region_ticks = None
82    self.builtin_ic_re = re.compile('^(Keyed)?(Call|Load|Store)IC_')
83
84  def Tick(self, pc, stack):
85    super(JSCodeEntry, self).Tick(pc, stack)
86    if not pc is None:
87      offset = pc - self.start_addr
88      seen = []
89      narrowest = None
90      narrowest_width = None
91      for region in self.Regions():
92        if region.Contains(offset):
93          if (not region.name in seen):
94            seen.append(region.name)
95          if narrowest is None or region.Width() < narrowest.Width():
96            narrowest = region
97      if len(seen) == 0:
98        return
99      if self.region_ticks is None:
100        self.region_ticks = {}
101      for name in seen:
102        if not name in self.region_ticks:
103          self.region_ticks[name] = [0, 0]
104        self.region_ticks[name][0] += 1
105        if name == narrowest.name:
106          self.region_ticks[name][1] += 1
107
108  def RegionTicks(self):
109    return self.region_ticks
110
111  def Regions(self):
112    if self.assembler:
113      return self.assembler.regions
114    else:
115      return []
116
117  def ToString(self):
118    name = self.name
119    if name == '':
120      name = '<anonymous>'
121    elif name.startswith(' '):
122      name = '<anonymous>' + name
123    return self.type + ': ' + name
124
125  def IsICEntry(self):
126    return self.type in ('CallIC', 'LoadIC', 'StoreIC') or \
127      (self.type == 'Builtin' and self.builtin_ic_re.match(self.name))
128
129  def IsJSFunction(self):
130    return self.type in ('Function', 'LazyCompile', 'Script')
131
132class CodeRegion(object):
133
134  def __init__(self, start_offset, name):
135    self.start_offset = start_offset
136    self.name = name
137    self.end_offset = None
138
139  def Contains(self, pc):
140    return (self.start_offset <= pc) and (pc <= self.end_offset)
141
142  def Width(self):
143    return self.end_offset - self.start_offset
144
145
146class Assembler(object):
147
148  def __init__(self):
149    # Mapping from region ids to open regions
150    self.pending_regions = {}
151    self.regions = []
152
153
154class FunctionEnumerator(object):
155
156  def __init__(self):
157    self.known_funcs = {}
158    self.next_func_id = 0
159
160  def GetFunctionId(self, name):
161    if not self.known_funcs.has_key(name):
162      self.known_funcs[name] = self.next_func_id
163      self.next_func_id += 1
164    return self.known_funcs[name]
165
166  def GetKnownFunctions(self):
167    known_funcs_items = self.known_funcs.items();
168    known_funcs_items.sort(key = itemgetter(1))
169    result = []
170    for func, id_not_used in known_funcs_items:
171      result.append(func)
172    return result
173
174
175VMStates = { 'JS': 0, 'GC': 1, 'COMPILER': 2, 'OTHER': 3, 'EXTERNAL' : 4 }
176
177
178class TickProcessor(object):
179
180  def __init__(self):
181    self.log_file = ''
182    self.deleted_code = []
183    self.vm_extent = {}
184    # Map from assembler ids to the pending assembler objects
185    self.pending_assemblers = {}
186    # Map from code addresses the have been allocated but not yet officially
187    # created to their assemblers.
188    self.assemblers = {}
189    self.js_entries = splaytree.SplayTree()
190    self.cpp_entries = splaytree.SplayTree()
191    self.total_number_of_ticks = 0
192    self.number_of_library_ticks = 0
193    self.unaccounted_number_of_ticks = 0
194    self.excluded_number_of_ticks = 0
195    self.number_of_gc_ticks = 0
196    # Flag indicating whether to ignore unaccounted ticks in the report
197    self.ignore_unknown = False
198    self.func_enum = FunctionEnumerator()
199    self.packed_stacks = []
200
201  def ProcessLogfile(self, filename, included_state = None, ignore_unknown = False, separate_ic = False, call_graph_json = False):
202    self.log_file = filename
203    self.included_state = included_state
204    self.ignore_unknown = ignore_unknown
205    self.separate_ic = separate_ic
206    self.call_graph_json = call_graph_json
207
208    try:
209      logfile = open(filename, 'rb')
210    except IOError:
211      sys.exit("Could not open logfile: " + filename)
212    try:
213      try:
214        logreader = csv.reader(logfile)
215        row_num = 1
216        for row in logreader:
217          row_num += 1
218          if row[0] == 'tick':
219            self.ProcessTick(int(row[1], 16), int(row[2], 16), int(row[3], 16), int(row[4]), self.PreprocessStack(row[5:]))
220          elif row[0] == 'code-creation':
221            self.ProcessCodeCreation(row[1], int(row[2], 16), int(row[3]), row[4])
222          elif row[0] == 'code-move':
223            self.ProcessCodeMove(int(row[1], 16), int(row[2], 16))
224          elif row[0] == 'code-delete':
225            self.ProcessCodeDelete(int(row[1], 16))
226          elif row[0] == 'function-creation':
227            self.ProcessFunctionCreation(int(row[1], 16), int(row[2], 16))
228          elif row[0] == 'function-move':
229            self.ProcessFunctionMove(int(row[1], 16), int(row[2], 16))
230          elif row[0] == 'function-delete':
231            self.ProcessFunctionDelete(int(row[1], 16))
232          elif row[0] == 'shared-library':
233            self.AddSharedLibraryEntry(row[1], int(row[2], 16), int(row[3], 16))
234            self.ParseVMSymbols(row[1], int(row[2], 16), int(row[3], 16))
235          elif row[0] == 'begin-code-region':
236            self.ProcessBeginCodeRegion(int(row[1], 16), int(row[2], 16), int(row[3], 16), row[4])
237          elif row[0] == 'end-code-region':
238            self.ProcessEndCodeRegion(int(row[1], 16), int(row[2], 16), int(row[3], 16))
239          elif row[0] == 'code-allocate':
240            self.ProcessCodeAllocate(int(row[1], 16), int(row[2], 16))
241      except csv.Error:
242        print("parse error in line " + str(row_num))
243        raise
244    finally:
245      logfile.close()
246
247  def AddSharedLibraryEntry(self, filename, start, end):
248    # Mark the pages used by this library.
249    i = start
250    while i < end:
251      page = i >> 12
252      self.vm_extent[page] = 1
253      i += 4096
254    # Add the library to the entries so that ticks for which we do not
255    # have symbol information is reported as belonging to the library.
256    self.cpp_entries.Insert(start, SharedLibraryEntry(start, filename))
257
258  def ParseVMSymbols(self, filename, start, end):
259    return
260
261  def ProcessCodeAllocate(self, addr, assem):
262    if assem in self.pending_assemblers:
263      assembler = self.pending_assemblers.pop(assem)
264      self.assemblers[addr] = assembler
265
266  def ProcessCodeCreation(self, type, addr, size, name):
267    if addr in self.assemblers:
268      assembler = self.assemblers.pop(addr)
269    else:
270      assembler = None
271    self.js_entries.Insert(addr, JSCodeEntry(addr, name, type, size, assembler))
272
273  def ProcessCodeMove(self, from_addr, to_addr):
274    try:
275      removed_node = self.js_entries.Remove(from_addr)
276      removed_node.value.SetStartAddress(to_addr);
277      self.js_entries.Insert(to_addr, removed_node.value)
278    except splaytree.KeyNotFoundError:
279      print('Code move event for unknown code: 0x%x' % from_addr)
280
281  def ProcessCodeDelete(self, from_addr):
282    try:
283      removed_node = self.js_entries.Remove(from_addr)
284      self.deleted_code.append(removed_node.value)
285    except splaytree.KeyNotFoundError:
286      print('Code delete event for unknown code: 0x%x' % from_addr)
287
288  def ProcessFunctionCreation(self, func_addr, code_addr):
289    js_entry_node = self.js_entries.Find(code_addr)
290    if js_entry_node:
291      js_entry = js_entry_node.value
292      self.js_entries.Insert(func_addr, JSCodeEntry(func_addr, js_entry.name, js_entry.type, 1, None))
293
294  def ProcessFunctionMove(self, from_addr, to_addr):
295    try:
296      removed_node = self.js_entries.Remove(from_addr)
297      removed_node.value.SetStartAddress(to_addr);
298      self.js_entries.Insert(to_addr, removed_node.value)
299    except splaytree.KeyNotFoundError:
300      return
301
302  def ProcessFunctionDelete(self, from_addr):
303    try:
304      removed_node = self.js_entries.Remove(from_addr)
305      self.deleted_code.append(removed_node.value)
306    except splaytree.KeyNotFoundError:
307      return
308
309  def ProcessBeginCodeRegion(self, id, assm, start, name):
310    if not assm in self.pending_assemblers:
311      self.pending_assemblers[assm] = Assembler()
312    assembler = self.pending_assemblers[assm]
313    assembler.pending_regions[id] = CodeRegion(start, name)
314
315  def ProcessEndCodeRegion(self, id, assm, end):
316    assm = self.pending_assemblers[assm]
317    region = assm.pending_regions.pop(id)
318    region.end_offset = end
319    assm.regions.append(region)
320
321  def IncludeTick(self, pc, sp, state):
322    return (self.included_state is None) or (self.included_state == state)
323
324  def FindEntry(self, pc):
325    page = pc >> 12
326    if page in self.vm_extent:
327      entry = self.cpp_entries.FindGreatestsLessThan(pc)
328      if entry != None:
329        return entry.value
330      else:
331        return entry
332    max = self.js_entries.FindMax()
333    min = self.js_entries.FindMin()
334    if max != None and pc < (max.key + max.value.size) and pc > min.key:
335      return self.js_entries.FindGreatestsLessThan(pc).value
336    return None
337
338  def PreprocessStack(self, stack):
339    # remove all non-addresses (e.g. 'overflow') and convert to int
340    result = []
341    for frame in stack:
342      if frame.startswith('0x'):
343        result.append(int(frame, 16))
344    return result
345
346  def ProcessStack(self, stack):
347    result = []
348    for frame in stack:
349      entry = self.FindEntry(frame)
350      if entry != None:
351        result.append(entry.ToString())
352    return result
353
354  def ProcessTick(self, pc, sp, func, state, stack):
355    if state == VMStates['GC']:
356      self.number_of_gc_ticks += 1
357    if not self.IncludeTick(pc, sp, state):
358      self.excluded_number_of_ticks += 1;
359      return
360    self.total_number_of_ticks += 1
361    entry = self.FindEntry(pc)
362    if entry == None:
363      self.unaccounted_number_of_ticks += 1
364      return
365    if entry.IsSharedLibraryEntry():
366      self.number_of_library_ticks += 1
367    if entry.IsICEntry() and not self.separate_ic:
368      if len(stack) > 0:
369        caller_pc = stack.pop(0)
370        self.total_number_of_ticks -= 1
371        self.ProcessTick(caller_pc, sp, func, state, stack)
372      else:
373        self.unaccounted_number_of_ticks += 1
374    else:
375      processed_stack = self.ProcessStack(stack)
376      if not entry.IsSharedLibraryEntry() and not entry.IsJSFunction():
377        func_entry_node = self.js_entries.Find(func)
378        if func_entry_node and func_entry_node.value.IsJSFunction():
379          processed_stack.insert(0, func_entry_node.value.ToString())
380      entry.Tick(pc, processed_stack)
381      if self.call_graph_json:
382        self.AddToPackedStacks(pc, stack)
383
384  def AddToPackedStacks(self, pc, stack):
385    full_stack = stack
386    full_stack.insert(0, pc)
387    func_names = self.ProcessStack(full_stack)
388    func_ids = []
389    for func in func_names:
390      func_ids.append(self.func_enum.GetFunctionId(func))
391    self.packed_stacks.append(func_ids)
392
393  def PrintResults(self):
394    if not self.call_graph_json:
395      self.PrintStatistics()
396    else:
397      self.PrintCallGraphJSON()
398
399  def PrintStatistics(self):
400    print('Statistical profiling result from %s, (%d ticks, %d unaccounted, %d excluded).' %
401          (self.log_file,
402           self.total_number_of_ticks,
403           self.unaccounted_number_of_ticks,
404           self.excluded_number_of_ticks))
405    if self.total_number_of_ticks > 0:
406      js_entries = self.js_entries.ExportValueList()
407      js_entries.extend(self.deleted_code)
408      cpp_entries = self.cpp_entries.ExportValueList()
409      # Print the unknown ticks percentage if they are not ignored.
410      if not self.ignore_unknown and self.unaccounted_number_of_ticks > 0:
411        self.PrintHeader('Unknown')
412        self.PrintCounter(self.unaccounted_number_of_ticks)
413      # Print the library ticks.
414      self.PrintHeader('Shared libraries')
415      self.PrintEntries(cpp_entries, lambda e:e.IsSharedLibraryEntry())
416      # Print the JavaScript ticks.
417      self.PrintHeader('JavaScript')
418      self.PrintEntries(js_entries, lambda e:not e.IsSharedLibraryEntry())
419      # Print the C++ ticks.
420      self.PrintHeader('C++')
421      self.PrintEntries(cpp_entries, lambda e:not e.IsSharedLibraryEntry())
422      # Print the GC ticks.
423      self.PrintHeader('GC')
424      self.PrintCounter(self.number_of_gc_ticks)
425      # Print call profile.
426      print('\n [Call profile]:')
427      print('   total  call path')
428      js_entries.extend(cpp_entries)
429      self.PrintCallProfile(js_entries)
430
431  def PrintHeader(self, header_title):
432    print('\n [%s]:' % header_title)
433    print('   ticks  total  nonlib   name')
434
435  def PrintCounter(self, ticks_count):
436    percentage = ticks_count * 100.0 / self.total_number_of_ticks
437    print('  %(ticks)5d  %(total)5.1f%%' % {
438      'ticks' : ticks_count,
439      'total' : percentage,
440    })
441
442  def PrintEntries(self, entries, condition):
443    # If ignoring unaccounted ticks don't include these in percentage
444    # calculations
445    number_of_accounted_ticks = self.total_number_of_ticks
446    if self.ignore_unknown:
447      number_of_accounted_ticks -= self.unaccounted_number_of_ticks
448
449    number_of_non_library_ticks = number_of_accounted_ticks - self.number_of_library_ticks
450    entries.sort(key=lambda e: (e.tick_count, e.ToString()), reverse=True)
451    for entry in entries:
452      if entry.tick_count > 0 and condition(entry):
453        total_percentage = entry.tick_count * 100.0 / number_of_accounted_ticks
454        if entry.IsSharedLibraryEntry():
455          non_library_percentage = 0
456        else:
457          non_library_percentage = entry.tick_count * 100.0 / number_of_non_library_ticks
458        print('  %(ticks)5d  %(total)5.1f%% %(nonlib)6.1f%%  %(name)s' % {
459          'ticks' : entry.tick_count,
460          'total' : total_percentage,
461          'nonlib' : non_library_percentage,
462          'name' : entry.ToString()
463        })
464        region_ticks = entry.RegionTicks()
465        if not region_ticks is None:
466          items = region_ticks.items()
467          items.sort(key=lambda e: e[1][1], reverse=True)
468          for (name, ticks) in items:
469            print('                      flat   cum')
470            print('                     %(flat)5.1f%% %(accum)5.1f%% %(name)s' % {
471              'flat' : ticks[1] * 100.0 / entry.tick_count,
472              'accum' : ticks[0] * 100.0 / entry.tick_count,
473              'name': name
474            })
475
476  def PrintCallProfile(self, entries):
477    all_stacks = {}
478    total_stacks = 0
479    for entry in entries:
480      all_stacks.update(entry.stacks)
481      for count in entry.stacks.itervalues():
482        total_stacks += count
483    all_stacks_items = all_stacks.items();
484    all_stacks_items.sort(key = itemgetter(1), reverse=True)
485    missing_percentage = (self.total_number_of_ticks - total_stacks) * 100.0 / self.total_number_of_ticks
486    print('  %(ticks)5d  %(total)5.1f%%  <no call path information>' % {
487      'ticks' : self.total_number_of_ticks - total_stacks,
488      'total' : missing_percentage
489    })
490    for stack, count in all_stacks_items:
491      total_percentage = count * 100.0 / self.total_number_of_ticks
492      print('  %(ticks)5d  %(total)5.1f%%  %(call_path)s' % {
493        'ticks' : count,
494        'total' : total_percentage,
495        'call_path' : stack[0] + '  <-  ' + stack[1]
496      })
497
498  def PrintCallGraphJSON(self):
499    print('\nvar __profile_funcs = ["' +
500          '",\n"'.join(self.func_enum.GetKnownFunctions()) +
501          '"];')
502    print('var __profile_ticks = [')
503    str_packed_stacks = []
504    for stack in self.packed_stacks:
505      str_packed_stacks.append('[' + ','.join(map(str, stack)) + ']')
506    print(',\n'.join(str_packed_stacks))
507    print('];')
508
509class CmdLineProcessor(object):
510
511  def __init__(self):
512    self.options = ["js",
513                    "gc",
514                    "compiler",
515                    "other",
516                    "external",
517                    "ignore-unknown",
518                    "separate-ic",
519                    "call-graph-json"]
520    # default values
521    self.state = None
522    self.ignore_unknown = False
523    self.log_file = None
524    self.separate_ic = False
525    self.call_graph_json = False
526
527  def ProcessArguments(self):
528    try:
529      opts, args = getopt.getopt(sys.argv[1:], "jgcoe", self.options)
530    except getopt.GetoptError:
531      self.PrintUsageAndExit()
532    for key, value in opts:
533      if key in ("-j", "--js"):
534        self.state = VMStates['JS']
535      if key in ("-g", "--gc"):
536        self.state = VMStates['GC']
537      if key in ("-c", "--compiler"):
538        self.state = VMStates['COMPILER']
539      if key in ("-o", "--other"):
540        self.state = VMStates['OTHER']
541      if key in ("-e", "--external"):
542        self.state = VMStates['EXTERNAL']
543      if key in ("--ignore-unknown"):
544        self.ignore_unknown = True
545      if key in ("--separate-ic"):
546        self.separate_ic = True
547      if key in ("--call-graph-json"):
548        self.call_graph_json = True
549    self.ProcessRequiredArgs(args)
550
551  def ProcessRequiredArgs(self, args):
552    return
553
554  def GetRequiredArgsNames(self):
555    return
556
557  def PrintUsageAndExit(self):
558    print('Usage: %(script_name)s --{%(opts)s} %(req_opts)s' % {
559        'script_name': os.path.basename(sys.argv[0]),
560        'opts': string.join(self.options, ','),
561        'req_opts': self.GetRequiredArgsNames()
562    })
563    sys.exit(2)
564
565  def RunLogfileProcessing(self, tick_processor):
566    tick_processor.ProcessLogfile(self.log_file, self.state, self.ignore_unknown,
567                                  self.separate_ic, self.call_graph_json)
568
569
570if __name__ == '__main__':
571  sys.exit('You probably want to run windows-tick-processor.py or linux-tick-processor.py.')
572