1#!/usr/bin/env python
2#
3# Copyright (C) 2017 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""pprof_proto_generator.py: read perf.data, generate pprof.profile, which can be
19    used by pprof.
20
21  Example:
22    python app_profiler.py
23    python pprof_proto_generator.py
24    pprof -text pprof.profile
25"""
26
27from __future__ import print_function
28import argparse
29import os
30import os.path
31import profile_pb2
32import re
33import shutil
34import sys
35import time
36
37from annotate import Addr2Line
38from simpleperf_report_lib import *
39from utils import *
40
41
42def load_pprof_profile(filename):
43    profile = profile_pb2.Profile()
44    with open(filename, "rb") as f:
45        profile.ParseFromString(f.read())
46    return profile
47
48
49def store_pprof_profile(filename, profile):
50    with open(filename, 'wb') as f:
51        f.write(profile.SerializeToString())
52
53
54class PprofProfilePrinter(object):
55
56    def __init__(self, profile):
57        self.profile = profile
58        self.string_table = profile.string_table
59
60    def show(self):
61        p = self.profile
62        sub_space = '  '
63        print('Profile {')
64        print('%d sample_types' % len(p.sample_type))
65        for i in range(len(p.sample_type)):
66            print('sample_type[%d] = ' % i, end='')
67            self.show_value_type(p.sample_type[i])
68        print('%d samples' % len(p.sample))
69        for i in range(len(p.sample)):
70            print('sample[%d]:' % i)
71            self.show_sample(p.sample[i], sub_space)
72        print('%d mappings' % len(p.mapping))
73        for i in range(len(p.mapping)):
74            print('mapping[%d]:' % i)
75            self.show_mapping(p.mapping[i], sub_space)
76        print('%d locations' % len(p.location))
77        for i in range(len(p.location)):
78            print('location[%d]:' % i)
79            self.show_location(p.location[i], sub_space)
80        for i in range(len(p.function)):
81            print('function[%d]:' % i)
82            self.show_function(p.function[i], sub_space)
83        print('%d strings' % len(p.string_table))
84        for i in range(len(p.string_table)):
85            print('string[%d]: %s' % (i, p.string_table[i]))
86        print('drop_frames: %s' % self.string(p.drop_frames))
87        print('keep_frames: %s' % self.string(p.keep_frames))
88        print('time_nanos: %u' % p.time_nanos)
89        print('duration_nanos: %u' % p.duration_nanos)
90        print('period_type: ', end='')
91        self.show_value_type(p.period_type)
92        print('period: %u' % p.period)
93        for i in range(len(p.comment)):
94            print('comment[%d] = %s' % (i, self.string(p.comment[i])))
95        print('default_sample_type: %d' % p.default_sample_type)
96        print('} // Profile')
97        print()
98
99    def show_value_type(self, value_type, space=''):
100        print('%sValueType(typeID=%d, unitID=%d, type=%s, unit=%s)' %
101              (space, value_type.type, value_type.unit,
102               self.string(value_type.type), self.string(value_type.unit)))
103
104    def show_sample(self, sample, space=''):
105        sub_space = space + '  '
106        for i in range(len(sample.location_id)):
107            print('%slocation_id[%d]: id %d' % (space, i, sample.location_id[i]))
108            self.show_location_id(sample.location_id[i], sub_space)
109        for i in range(len(sample.value)):
110            print('%svalue[%d] = %d' % (space, i, sample.value[i]))
111        for i in range(len(sample.label)):
112            print('%slabel[%d] = ', (space, i))
113
114    def show_location_id(self, location_id, space=''):
115        location = self.profile.location[location_id - 1]
116        self.show_location(location, space)
117
118    def show_location(self, location, space=''):
119        sub_space = space + '  '
120        print('%sid: %d' % (space, location.id))
121        print('%smapping_id: %d' % (space, location.mapping_id))
122        self.show_mapping_id(location.mapping_id, sub_space)
123        print('%saddress: %x' % (space, location.address))
124        for i in range(len(location.line)):
125            print('%sline[%d]:' % (space, i))
126            self.show_line(location.line[i], sub_space)
127
128    def show_mapping_id(self, mapping_id, space=''):
129        mapping = self.profile.mapping[mapping_id - 1]
130        self.show_mapping(mapping, space)
131
132    def show_mapping(self, mapping, space=''):
133        print('%sid: %d' % (space, mapping.id))
134        print('%smemory_start: %x' % (space, mapping.memory_start))
135        print('%smemory_limit: %x' % (space, mapping.memory_limit))
136        print('%sfile_offset: %x' % (space, mapping.file_offset))
137        print('%sfilename: %s(%d)' % (space, self.string(mapping.filename),
138                                      mapping.filename))
139        print('%sbuild_id: %s(%d)' % (space, self.string(mapping.build_id),
140                                      mapping.build_id))
141        print('%shas_functions: %s' % (space, mapping.has_functions))
142        print('%shas_filenames: %s' % (space, mapping.has_filenames))
143        print('%shas_line_numbers: %s' % (space, mapping.has_line_numbers))
144        print('%shas_inline_frames: %s' % (space, mapping.has_inline_frames))
145
146    def show_line(self, line, space=''):
147        sub_space = space + '  '
148        print('%sfunction_id: %d' % (space, line.function_id))
149        self.show_function_id(line.function_id, sub_space)
150        print('%sline: %d' % (space, line.line))
151
152    def show_function_id(self, function_id, space=''):
153        function = self.profile.function[function_id - 1]
154        self.show_function(function, space)
155
156    def show_function(self, function, space=''):
157        print('%sid: %d' % (space, function.id))
158        print('%sname: %s' % (space, self.string(function.name)))
159        print('%ssystem_name: %s' % (space, self.string(function.system_name)))
160        print('%sfilename: %s' % (space, self.string(function.filename)))
161        print('%sstart_line: %d' % (space, function.start_line))
162
163    def show_label(self, label, space=''):
164        print('%sLabel(%s =', space, self.string(label.key), end='')
165        if label.HasField('str'):
166            print('%s)' % self.get_string(label.str))
167        else:
168            print('%d)' % label.num)
169
170    def string(self, id):
171        return self.string_table[id]
172
173
174class Sample(object):
175
176    def __init__(self):
177        self.location_ids = []
178        self.values = {}
179
180    def add_location_id(self, location_id):
181        self.location_ids.append(location_id)
182
183    def add_value(self, id, value):
184        self.values[id] = self.values.get(id, 0) + value
185
186    def add_values(self, values):
187        for id in values.keys():
188            value = values[id]
189            self.add_value(id, value)
190
191    @property
192    def key(self):
193        return tuple(self.location_ids)
194
195
196class Location(object):
197
198    def __init__(self, mapping_id, address, vaddr_in_dso):
199        self.id = -1  # unset
200        self.mapping_id = mapping_id
201        self.address = address
202        self.vaddr_in_dso = vaddr_in_dso
203        self.lines = []
204
205    @property
206    def key(self):
207        return (self.mapping_id, self.address)
208
209
210class Line(object):
211
212    def __init__(self):
213        self.function_id = 0
214        self.line = 0
215
216
217class Mapping(object):
218
219    def __init__(self, start, end, pgoff, filename_id, build_id_id):
220        self.id = -1  # unset
221        self.memory_start = start
222        self.memory_limit = end
223        self.file_offset = pgoff
224        self.filename_id = filename_id
225        self.build_id_id = build_id_id
226
227    @property
228    def key(self):
229        return (
230            self.memory_start,
231            self.memory_limit,
232            self.file_offset,
233            self.filename_id,
234            self.build_id_id)
235
236
237class Function(object):
238
239    def __init__(self, name_id, dso_name_id, vaddr_in_dso):
240        self.id = -1  # unset
241        self.name_id = name_id
242        self.dso_name_id = dso_name_id
243        self.vaddr_in_dso = vaddr_in_dso
244        self.source_filename_id = 0
245        self.start_line = 0
246
247    @property
248    def key(self):
249        return (self.name_id, self.dso_name_id)
250
251
252class PprofProfileGenerator(object):
253
254    def __init__(self, config):
255        self.config = config
256        self.lib = ReportLib()
257
258        config['binary_cache_dir'] = 'binary_cache'
259        if not os.path.isdir(config['binary_cache_dir']):
260            config['binary_cache_dir'] = None
261        else:
262            self.lib.SetSymfs(config['binary_cache_dir'])
263        if config.get('record_file'):
264            self.lib.SetRecordFile(config['record_file'])
265        kallsyms = 'binary_cache/kallsyms'
266        if os.path.isfile(kallsyms):
267            self.lib.SetKallsymsFile(kallsyms)
268        self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None
269        if config.get('pid_filters'):
270            self.pid_filter = {int(x) for x in config['pid_filters']}
271        else:
272            self.pid_filter = None
273        if config.get('tid_filters'):
274            self.tid_filter = {int(x) for x in config['tid_filters']}
275        else:
276            self.tid_filter = None
277        self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None
278
279    def gen(self):
280        self.profile = profile_pb2.Profile()
281        self.profile.string_table.append('')
282        self.string_table = {}
283        self.sample_types = {}
284        self.sample_map = {}
285        self.sample_list = []
286        self.location_map = {}
287        self.location_list = []
288        self.mapping_map = {}
289        self.mapping_list = []
290        self.function_map = {}
291        self.function_list = []
292
293        # 1. Process all samples in perf.data, aggregate samples.
294        while True:
295            report_sample = self.lib.GetNextSample()
296            if report_sample is None:
297                self.lib.Close()
298                break
299            event = self.lib.GetEventOfCurrentSample()
300            symbol = self.lib.GetSymbolOfCurrentSample()
301            callchain = self.lib.GetCallChainOfCurrentSample()
302
303            if not self._filter_report_sample(report_sample):
304                continue
305
306            sample_type_id = self.get_sample_type_id(event.name)
307            sample = Sample()
308            sample.add_value(sample_type_id, 1)
309            sample.add_value(sample_type_id + 1, report_sample.period)
310            if self._filter_symbol(symbol):
311                location_id = self.get_location_id(symbol.vaddr_in_file, symbol)
312                sample.add_location_id(location_id)
313            for i in range(callchain.nr):
314                entry = callchain.entries[i]
315                if self._filter_symbol(symbol):
316                    location_id = self.get_location_id(entry.ip, entry.symbol)
317                    sample.add_location_id(location_id)
318            if sample.location_ids:
319                self.add_sample(sample)
320
321        # 2. Generate line info for locations and functions.
322        self.gen_source_lines()
323
324        # 3. Produce samples/locations/functions in profile
325        for sample in self.sample_list:
326            self.gen_profile_sample(sample)
327        for mapping in self.mapping_list:
328            self.gen_profile_mapping(mapping)
329        for location in self.location_list:
330            self.gen_profile_location(location)
331        for function in self.function_list:
332            self.gen_profile_function(function)
333
334        return self.profile
335
336    def _filter_report_sample(self, sample):
337        """Return true if the sample can be used."""
338        if self.comm_filter:
339            if sample.thread_comm not in self.comm_filter:
340                return False
341            if self.pid_filter:
342                if sample.pid not in self.pid_filter:
343                    return False
344            if self.tid_filter:
345                if sample.tid not in self.tid_filter:
346                    return False
347        return True
348
349    def _filter_symbol(self, symbol):
350        if not self.dso_filter or symbol.dso_name in self.dso_filter:
351            return True
352        return False
353
354    def get_string_id(self, str):
355        if len(str) == 0:
356            return 0
357        id = self.string_table.get(str)
358        if id is not None:
359            return id
360        id = len(self.string_table) + 1
361        self.string_table[str] = id
362        self.profile.string_table.append(str)
363        return id
364
365    def get_string(self, string_id):
366        return self.profile.string_table[string_id]
367
368    def get_sample_type_id(self, name):
369        id = self.sample_types.get(name)
370        if id is not None:
371            return id
372        id = len(self.profile.sample_type)
373        sample_type = self.profile.sample_type.add()
374        sample_type.type = self.get_string_id('event_' + name + '_samples')
375        sample_type.unit = self.get_string_id('count')
376        sample_type = self.profile.sample_type.add()
377        sample_type.type = self.get_string_id('event_' + name + '_count')
378        sample_type.unit = self.get_string_id('count')
379        self.sample_types[name] = id
380        return id
381
382    def get_location_id(self, ip, symbol):
383        mapping_id = self.get_mapping_id(symbol.mapping[0], symbol.dso_name)
384        location = Location(mapping_id, ip, symbol.vaddr_in_file)
385        function_id = self.get_function_id(symbol.symbol_name, symbol.dso_name,
386                                           symbol.symbol_addr)
387        if function_id:
388            # Add Line only when it has a valid function id, see http://b/36988814.
389            # Default line info only contains the function name
390            line = Line()
391            line.function_id = function_id
392            location.lines.append(line)
393
394        exist_location = self.location_map.get(location.key)
395        if exist_location:
396            return exist_location.id
397        # location_id starts from 1
398        location.id = len(self.location_list) + 1
399        self.location_list.append(location)
400        self.location_map[location.key] = location
401        return location.id
402
403    def get_mapping_id(self, report_mapping, filename):
404        filename_id = self.get_string_id(filename)
405        build_id = self.lib.GetBuildIdForPath(filename)
406        if build_id and build_id[0:2] == "0x":
407            build_id = build_id[2:]
408        build_id_id = self.get_string_id(build_id)
409        mapping = Mapping(report_mapping.start, report_mapping.end,
410                          report_mapping.pgoff, filename_id, build_id_id)
411        exist_mapping = self.mapping_map.get(mapping.key)
412        if exist_mapping:
413            return exist_mapping.id
414        # mapping_id starts from 1
415        mapping.id = len(self.mapping_list) + 1
416        self.mapping_list.append(mapping)
417        self.mapping_map[mapping.key] = mapping
418        return mapping.id
419
420    def get_mapping(self, mapping_id):
421        return self.mapping_list[mapping_id - 1] if mapping_id > 0 else None
422
423    def get_function_id(self, name, dso_name, vaddr_in_file):
424        if name == 'unknown':
425            return 0
426        function = Function(self.get_string_id(name), self.get_string_id(dso_name), vaddr_in_file)
427        exist_function = self.function_map.get(function.key)
428        if exist_function:
429            return exist_function.id
430        # function_id starts from 1
431        function.id = len(self.function_list) + 1
432        self.function_list.append(function)
433        self.function_map[function.key] = function
434        return function.id
435
436    def get_function(self, function_id):
437        return self.function_list[function_id - 1] if function_id > 0 else None
438
439    def add_sample(self, sample):
440        exist_sample = self.sample_map.get(sample.key)
441        if exist_sample:
442            exist_sample.add_values(sample.values)
443        else:
444            self.sample_list.append(sample)
445            self.sample_map[sample.key] = sample
446
447    def gen_source_lines(self):
448        # 1. Create Addr2line instance
449        if not self.config.get('binary_cache_dir'):
450            log_info("Can't generate line information because binary_cache is missing.")
451            return
452        if not self.config['addr2line_path'] or not is_executable_available(
453            self.config['addr2line_path']):
454            if not find_tool_path('addr2line'):
455                log_info("Can't generate line information because can't find addr2line.")
456                return
457
458        addr2line = Addr2Line(self.config['addr2line_path'], self.config['binary_cache_dir'])
459
460        # 2. Put all needed addresses to it.
461        for location in self.location_list:
462            mapping = self.get_mapping(location.mapping_id)
463            dso_name = self.get_string(mapping.filename_id)
464            addr2line.add_addr(dso_name, location.vaddr_in_dso)
465        for function in self.function_list:
466            dso_name = self.get_string(function.dso_name_id)
467            addr2line.add_addr(dso_name, function.vaddr_in_dso)
468
469        # 3. Generate source lines.
470        addr2line.convert_addrs_to_lines()
471
472        # 4. Annotate locations and functions.
473        for location in self.location_list:
474            mapping = self.get_mapping(location.mapping_id)
475            dso_name = self.get_string(mapping.filename_id)
476            sources = addr2line.get_sources(dso_name, location.vaddr_in_dso)
477            source_id = 0
478            for source in sources:
479                if source.file and source.function and source.line:
480                    function_id = self.get_function_id(source.function, dso_name, 0)
481                    if function_id == 0:
482                        continue
483                    if source_id == 0:
484                        # Clear default line info
485                        location.lines = []
486                    location.lines.append(self.add_line(source, dso_name, function_id))
487                    source_id += 1
488
489        for function in self.function_list:
490            dso_name = self.get_string(function.dso_name_id)
491            if function.vaddr_in_dso:
492                sources = addr2line.get_sources(dso_name, function.vaddr_in_dso)
493                source = sources[0] if sources else None
494                if source and source.file:
495                    function.source_filename_id = self.get_string_id(source.file)
496                    if source.line:
497                        function.start_line = source.line
498
499    def add_line(self, source, dso_name, function_id):
500        line = Line()
501        function = self.get_function(function_id)
502        function.source_filename_id = self.get_string_id(source.file)
503        line.function_id = function_id
504        line.line = source.line
505        return line
506
507    def gen_profile_sample(self, sample):
508        profile_sample = self.profile.sample.add()
509        profile_sample.location_id.extend(sample.location_ids)
510        sample_type_count = len(self.sample_types) * 2
511        values = [0] * sample_type_count
512        for id in sample.values.keys():
513            values[id] = sample.values[id]
514        profile_sample.value.extend(values)
515
516    def gen_profile_mapping(self, mapping):
517        profile_mapping = self.profile.mapping.add()
518        profile_mapping.id = mapping.id
519        profile_mapping.memory_start = mapping.memory_start
520        profile_mapping.memory_limit = mapping.memory_limit
521        profile_mapping.file_offset = mapping.file_offset
522        profile_mapping.filename = mapping.filename_id
523        profile_mapping.build_id = mapping.build_id_id
524        profile_mapping.has_filenames = True
525        profile_mapping.has_functions = True
526        if self.config.get('binary_cache_dir'):
527            profile_mapping.has_line_numbers = True
528            profile_mapping.has_inline_frames = True
529        else:
530            profile_mapping.has_line_numbers = False
531            profile_mapping.has_inline_frames = False
532
533    def gen_profile_location(self, location):
534        profile_location = self.profile.location.add()
535        profile_location.id = location.id
536        profile_location.mapping_id = location.mapping_id
537        profile_location.address = location.address
538        for i in range(len(location.lines)):
539            line = profile_location.line.add()
540            line.function_id = location.lines[i].function_id
541            line.line = location.lines[i].line
542
543    def gen_profile_function(self, function):
544        profile_function = self.profile.function.add()
545        profile_function.id = function.id
546        profile_function.name = function.name_id
547        profile_function.system_name = function.name_id
548        profile_function.filename = function.source_filename_id
549        profile_function.start_line = function.start_line
550
551
552def main():
553    parser = argparse.ArgumentParser(description='Generate pprof profile data in pprof.profile.')
554    parser.add_argument('--show', nargs='?', action='append', help='print existing pprof.profile.')
555    parser.add_argument('-i', '--perf_data_path', default='perf.data', help=
556"""The path of profiling data.""")
557    parser.add_argument('-o', '--output_file', default='pprof.profile', help=
558"""The path of generated pprof profile data.""")
559    parser.add_argument('--comm', nargs='+', action='append', help=
560"""Use samples only in threads with selected names.""")
561    parser.add_argument('--pid', nargs='+', action='append', help=
562"""Use samples only in processes with selected process ids.""")
563    parser.add_argument('--tid', nargs='+', action='append', help=
564"""Use samples only in threads with selected thread ids.""")
565    parser.add_argument('--dso', nargs='+', action='append', help=
566"""Use samples only in selected binaries.""")
567    parser.add_argument('--addr2line', help=
568"""Set the path of addr2line.""")
569
570    args = parser.parse_args()
571    if args.show:
572        show_file = args.show[0] if args.show[0] else 'pprof.profile'
573        profile = load_pprof_profile(show_file)
574        printer = PprofProfilePrinter(profile)
575        printer.show()
576        return
577
578    config = {}
579    config['perf_data_path'] = args.perf_data_path
580    config['output_file'] = args.output_file
581    config['comm_filters'] = flatten_arg_list(args.comm)
582    config['pid_filters'] = flatten_arg_list(args.pid)
583    config['tid_filters'] = flatten_arg_list(args.tid)
584    config['dso_filters'] = flatten_arg_list(args.dso)
585    config['addr2line_path'] = args.addr2line
586    generator = PprofProfileGenerator(config)
587    profile = generator.gen()
588    store_pprof_profile(config['output_file'], profile)
589
590
591if __name__ == '__main__':
592    main()
593