1#!/usr/bin/env python
2#
3# Copyright (C) 2017 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""pprof_proto_generator.py: read perf.data, generate pprof.profile, which can be
19    used by pprof.
20
21  Example:
22    python app_profiler.py
23    python pprof_proto_generator.py
24    pprof -text pprof.profile
25"""
26
27from __future__ import print_function
28import argparse
29import os
30import os.path
31import profile_pb2
32import re
33import shutil
34import subprocess
35import sys
36import time
37
38from annotate import Addr2Line
39from simpleperf_report_lib import *
40from utils import *
41
42
43def load_pprof_profile(filename):
44    profile = profile_pb2.Profile()
45    with open(filename, "rb") as f:
46        profile.ParseFromString(f.read())
47    return profile
48
49
50def store_pprof_profile(filename, profile):
51    with open(filename, 'wb') as f:
52        f.write(profile.SerializeToString())
53
54
55class PprofProfilePrinter(object):
56
57    def __init__(self, profile):
58        self.profile = profile
59        self.string_table = profile.string_table
60
61    def show(self):
62        p = self.profile
63        sub_space = '  '
64        print('Profile {')
65        print('%d sample_types' % len(p.sample_type))
66        for i in range(len(p.sample_type)):
67            print('sample_type[%d] = ' % i, end='')
68            self.show_value_type(p.sample_type[i])
69        print('%d samples' % len(p.sample))
70        for i in range(len(p.sample)):
71            print('sample[%d]:' % i)
72            self.show_sample(p.sample[i], sub_space)
73        print('%d mappings' % len(p.mapping))
74        for i in range(len(p.mapping)):
75            print('mapping[%d]:' % i)
76            self.show_mapping(p.mapping[i], sub_space)
77        print('%d locations' % len(p.location))
78        for i in range(len(p.location)):
79            print('location[%d]:' % i)
80            self.show_location(p.location[i], sub_space)
81        for i in range(len(p.function)):
82            print('function[%d]:' % i)
83            self.show_function(p.function[i], sub_space)
84        print('%d strings' % len(p.string_table))
85        for i in range(len(p.string_table)):
86            print('string[%d]: %s' % (i, p.string_table[i]))
87        print('drop_frames: %s' % self.string(p.drop_frames))
88        print('keep_frames: %s' % self.string(p.keep_frames))
89        print('time_nanos: %u' % p.time_nanos)
90        print('duration_nanos: %u' % p.duration_nanos)
91        print('period_type: ', end='')
92        self.show_value_type(p.period_type)
93        print('period: %u' % p.period)
94        for i in range(len(p.comment)):
95            print('comment[%d] = %s' % (i, self.string(p.comment[i])))
96        print('default_sample_type: %d' % p.default_sample_type)
97        print('} // Profile')
98        print()
99
100    def show_value_type(self, value_type, space=''):
101        print('%sValueType(typeID=%d, unitID=%d, type=%s, unit=%s)' %
102              (space, value_type.type, value_type.unit,
103               self.string(value_type.type), self.string(value_type.unit)))
104
105    def show_sample(self, sample, space=''):
106        sub_space = space + '  '
107        for i in range(len(sample.location_id)):
108            print('%slocation_id[%d]: id %d' % (space, i, sample.location_id[i]))
109            self.show_location_id(sample.location_id[i], sub_space)
110        for i in range(len(sample.value)):
111            print('%svalue[%d] = %d' % (space, i, sample.value[i]))
112        for i in range(len(sample.label)):
113            print('%slabel[%d] = ', (space, i))
114
115    def show_location_id(self, location_id, space=''):
116        location = self.profile.location[location_id - 1]
117        self.show_location(location, space)
118
119    def show_location(self, location, space=''):
120        sub_space = space + '  '
121        print('%sid: %d' % (space, location.id))
122        print('%smapping_id: %d' % (space, location.mapping_id))
123        self.show_mapping_id(location.mapping_id, sub_space)
124        print('%saddress: %x' % (space, location.address))
125        for i in range(len(location.line)):
126            print('%sline[%d]:' % (space, i))
127            self.show_line(location.line[i], sub_space)
128
129    def show_mapping_id(self, mapping_id, space=''):
130        mapping = self.profile.mapping[mapping_id - 1]
131        self.show_mapping(mapping, space)
132
133    def show_mapping(self, mapping, space=''):
134        print('%sid: %d' % (space, mapping.id))
135        print('%smemory_start: %x' % (space, mapping.memory_start))
136        print('%smemory_limit: %x' % (space, mapping.memory_limit))
137        print('%sfile_offset: %x' % (space, mapping.file_offset))
138        print('%sfilename: %s(%d)' % (space, self.string(mapping.filename),
139                                      mapping.filename))
140        print('%sbuild_id: %s(%d)' % (space, self.string(mapping.build_id),
141                                      mapping.build_id))
142        print('%shas_functions: %s' % (space, mapping.has_functions))
143        print('%shas_filenames: %s' % (space, mapping.has_filenames))
144        print('%shas_line_numbers: %s' % (space, mapping.has_line_numbers))
145        print('%shas_inline_frames: %s' % (space, mapping.has_inline_frames))
146
147    def show_line(self, line, space=''):
148        sub_space = space + '  '
149        print('%sfunction_id: %d' % (space, line.function_id))
150        self.show_function_id(line.function_id, sub_space)
151        print('%sline: %d' % (space, line.line))
152
153    def show_function_id(self, function_id, space=''):
154        function = self.profile.function[function_id - 1]
155        self.show_function(function, space)
156
157    def show_function(self, function, space=''):
158        print('%sid: %d' % (space, function.id))
159        print('%sname: %s' % (space, self.string(function.name)))
160        print('%ssystem_name: %s' % (space, self.string(function.system_name)))
161        print('%sfilename: %s' % (space, self.string(function.filename)))
162        print('%sstart_line: %d' % (space, function.start_line))
163
164    def show_label(self, label, space=''):
165        print('%sLabel(%s =', space, self.string(label.key), end='')
166        if label.HasField('str'):
167            print('%s)' % self.get_string(label.str))
168        else:
169            print('%d)' % label.num)
170
171    def string(self, id):
172        return self.string_table[id]
173
174
175class Sample(object):
176
177    def __init__(self):
178        self.location_ids = []
179        self.values = {}
180
181    def add_location_id(self, location_id):
182        self.location_ids.append(location_id)
183
184    def add_value(self, id, value):
185        self.values[id] = self.values.get(id, 0) + value
186
187    def add_values(self, values):
188        for id in values.keys():
189            value = values[id]
190            self.add_value(id, value)
191
192    @property
193    def key(self):
194        return tuple(self.location_ids)
195
196
197class Location(object):
198
199    def __init__(self, mapping_id, address, vaddr_in_dso):
200        self.id = -1  # unset
201        self.mapping_id = mapping_id
202        self.address = address
203        self.vaddr_in_dso = vaddr_in_dso
204        self.lines = []
205
206    @property
207    def key(self):
208        return (self.mapping_id, self.address)
209
210
211class Line(object):
212
213    def __init__(self):
214        self.function_id = 0
215        self.line = 0
216
217
218class Mapping(object):
219
220    def __init__(self, start, end, pgoff, filename_id, build_id_id):
221        self.id = -1  # unset
222        self.memory_start = start
223        self.memory_limit = end
224        self.file_offset = pgoff
225        self.filename_id = filename_id
226        self.build_id_id = build_id_id
227
228    @property
229    def key(self):
230        return (
231            self.memory_start,
232            self.memory_limit,
233            self.file_offset,
234            self.filename_id,
235            self.build_id_id)
236
237
238class Function(object):
239
240    def __init__(self, name_id, dso_name_id, vaddr_in_dso):
241        self.id = -1  # unset
242        self.name_id = name_id
243        self.dso_name_id = dso_name_id
244        self.vaddr_in_dso = vaddr_in_dso
245        self.source_filename_id = 0
246        self.start_line = 0
247
248    @property
249    def key(self):
250        return (self.name_id, self.dso_name_id)
251
252
253class PprofProfileGenerator(object):
254
255    def __init__(self, config):
256        self.config = config
257        self.lib = ReportLib()
258
259        if config.get('binary_cache_dir'):
260            self.lib.SetSymfs(config['binary_cache_dir'])
261        if config.get('record_file'):
262            self.lib.SetRecordFile(config['record_file'])
263        if config.get('kallsyms'):
264            self.lib.SetKallsymsFile(config['kallsyms'])
265        self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None
266        if config.get('pid_filters'):
267            self.pid_filter = {int(x) for x in config['pid_filters']}
268        else:
269            self.pid_filter = None
270        if config.get('tid_filters'):
271            self.tid_filter = {int(x) for x in config['tid_filters']}
272        else:
273            self.tid_filter = None
274        self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None
275
276    def gen(self):
277        self.profile = profile_pb2.Profile()
278        self.profile.string_table.append('')
279        self.string_table = {}
280        self.sample_types = {}
281        self.sample_map = {}
282        self.sample_list = []
283        self.location_map = {}
284        self.location_list = []
285        self.mapping_map = {}
286        self.mapping_list = []
287        self.function_map = {}
288        self.function_list = []
289
290        # 1. Process all samples in perf.data, aggregate samples.
291        while True:
292            report_sample = self.lib.GetNextSample()
293            if report_sample is None:
294                self.lib.Close()
295                break
296            event = self.lib.GetEventOfCurrentSample()
297            symbol = self.lib.GetSymbolOfCurrentSample()
298            callchain = self.lib.GetCallChainOfCurrentSample()
299
300            if not self._filter_report_sample(report_sample):
301                continue
302
303            sample_type_id = self.get_sample_type_id(event.name)
304            sample = Sample()
305            sample.add_value(sample_type_id, 1)
306            sample.add_value(sample_type_id + 1, report_sample.period)
307            if self._filter_symbol(symbol):
308                location_id = self.get_location_id(symbol.vaddr_in_file, symbol)
309                sample.add_location_id(location_id)
310            for i in range(callchain.nr):
311                entry = callchain.entries[i]
312                if self._filter_symbol(symbol):
313                    location_id = self.get_location_id(entry.ip, entry.symbol)
314                    sample.add_location_id(location_id)
315            if sample.location_ids:
316                self.add_sample(sample)
317
318        # 2. Generate line info for locations and functions.
319        self.gen_source_lines()
320
321        # 3. Produce samples/locations/functions in profile
322        for sample in self.sample_list:
323            self.gen_profile_sample(sample)
324        for mapping in self.mapping_list:
325            self.gen_profile_mapping(mapping)
326        for location in self.location_list:
327            self.gen_profile_location(location)
328        for function in self.function_list:
329            self.gen_profile_function(function)
330
331        return self.profile
332
333    def _filter_report_sample(self, sample):
334        """Return true if the sample can be used."""
335        if self.comm_filter:
336            if sample.thread_comm not in self.comm_filter:
337                return False
338            if self.pid_filter:
339                if sample.pid not in self.pid_filter:
340                    return False
341            if self.tid_filter:
342                if sample.tid not in self.tid_filter:
343                    return False
344        return True
345
346    def _filter_symbol(self, symbol):
347        if not self.dso_filter or symbol.dso_name in self.dso_filter:
348            return True
349        return False
350
351    def get_string_id(self, str):
352        if len(str) == 0:
353            return 0
354        id = self.string_table.get(str)
355        if id is not None:
356            return id
357        id = len(self.string_table) + 1
358        self.string_table[str] = id
359        self.profile.string_table.append(str)
360        return id
361
362    def get_string(self, string_id):
363        return self.profile.string_table[string_id]
364
365    def get_sample_type_id(self, name):
366        id = self.sample_types.get(name)
367        if id is not None:
368            return id
369        id = len(self.profile.sample_type)
370        sample_type = self.profile.sample_type.add()
371        sample_type.type = self.get_string_id('event_' + name + '_samples')
372        sample_type.unit = self.get_string_id('count')
373        sample_type = self.profile.sample_type.add()
374        sample_type.type = self.get_string_id('event_' + name + '_count')
375        sample_type.unit = self.get_string_id('count')
376        self.sample_types[name] = id
377        return id
378
379    def get_location_id(self, ip, symbol):
380        mapping_id = self.get_mapping_id(symbol.mapping[0], symbol.dso_name)
381        location = Location(mapping_id, ip, symbol.vaddr_in_file)
382        # Default line info only contains the function name
383        line = Line()
384        line.function_id = self.get_function_id(symbol.symbol_name, symbol.dso_name,
385                                                symbol.symbol_addr)
386        location.lines.append(line)
387
388        exist_location = self.location_map.get(location.key)
389        if exist_location:
390            return exist_location.id
391        # location_id starts from 1
392        location.id = len(self.location_list) + 1
393        self.location_list.append(location)
394        self.location_map[location.key] = location
395        return location.id
396
397    def get_mapping_id(self, report_mapping, filename):
398        filename_id = self.get_string_id(filename)
399        build_id = self.lib.GetBuildIdForPath(filename)
400        if build_id and build_id[0:2] == "0x":
401            build_id = build_id[2:]
402        build_id_id = self.get_string_id(build_id)
403        mapping = Mapping(report_mapping.start, report_mapping.end,
404                          report_mapping.pgoff, filename_id, build_id_id)
405        exist_mapping = self.mapping_map.get(mapping.key)
406        if exist_mapping:
407            return exist_mapping.id
408        # mapping_id starts from 1
409        mapping.id = len(self.mapping_list) + 1
410        self.mapping_list.append(mapping)
411        self.mapping_map[mapping.key] = mapping
412        return mapping.id
413
414    def get_mapping(self, mapping_id):
415        return self.mapping_list[mapping_id - 1] if mapping_id > 0 else None
416
417    def get_function_id(self, name, dso_name, vaddr_in_file):
418        if name == 'unknown':
419            return 0
420        function = Function(self.get_string_id(name), self.get_string_id(dso_name), vaddr_in_file)
421        exist_function = self.function_map.get(function.key)
422        if exist_function:
423            return exist_function.id
424        # function_id starts from 1
425        function.id = len(self.function_list) + 1
426        self.function_list.append(function)
427        self.function_map[function.key] = function
428        return function.id
429
430    def get_function(self, function_id):
431        return self.function_list[function_id - 1] if function_id > 0 else None
432
433    def add_sample(self, sample):
434        exist_sample = self.sample_map.get(sample.key)
435        if exist_sample:
436            exist_sample.add_values(sample.values)
437        else:
438            self.sample_list.append(sample)
439            self.sample_map[sample.key] = sample
440
441    def gen_source_lines(self):
442        # 1. Create Addr2line instance
443        addr2line = Addr2Line(self.config['addr2line_path'], self.config['binary_cache_dir'])
444
445        # 2. Put all needed addresses to it.
446        for location in self.location_list:
447            mapping = self.get_mapping(location.mapping_id)
448            dso_name = self.get_string(mapping.filename_id)
449            addr2line.add_addr(dso_name, location.vaddr_in_dso)
450        for function in self.function_list:
451            dso_name = self.get_string(function.dso_name_id)
452            addr2line.add_addr(dso_name, function.vaddr_in_dso)
453
454        # 3. Generate source lines.
455        addr2line.convert_addrs_to_lines()
456
457        # 4. Annotate locations and functions.
458        for location in self.location_list:
459            mapping = self.get_mapping(location.mapping_id)
460            dso_name = self.get_string(mapping.filename_id)
461            sources = addr2line.get_sources(dso_name, location.vaddr_in_dso)
462            source_id = 0
463            for source in sources:
464                if source.file and source.function and source.line:
465                    if source_id == 0:
466                        # Clear default line info
467                        location.lines = []
468                    location.lines.append(self.add_line(source, dso_name))
469                    source_id += 1
470
471        for function in self.function_list:
472            dso_name = self.get_string(function.dso_name_id)
473            if function.vaddr_in_dso:
474                sources = addr2line.get_sources(dso_name, function.vaddr_in_dso)
475                source = sources[0] if sources else None
476                if source and source.file:
477                    function.source_filename_id = self.get_string_id(source.file)
478                    if source.line:
479                        function.start_line = source.line
480
481    def add_line(self, source, dso_name):
482        line = Line()
483        function_id = self.get_function_id(source.function, dso_name, 0)
484        function = self.get_function(function_id)
485        function.source_filename_id = self.get_string_id(source.file)
486        line.function_id = function_id
487        line.line = source.line
488        return line
489
490    def gen_profile_sample(self, sample):
491        profile_sample = self.profile.sample.add()
492        profile_sample.location_id.extend(sample.location_ids)
493        sample_type_count = len(self.sample_types) * 2
494        values = [0] * sample_type_count
495        for id in sample.values.keys():
496            values[id] = sample.values[id]
497        profile_sample.value.extend(values)
498
499    def gen_profile_mapping(self, mapping):
500        profile_mapping = self.profile.mapping.add()
501        profile_mapping.id = mapping.id
502        profile_mapping.memory_start = mapping.memory_start
503        profile_mapping.memory_limit = mapping.memory_limit
504        profile_mapping.file_offset = mapping.file_offset
505        profile_mapping.filename = mapping.filename_id
506        profile_mapping.build_id = mapping.build_id_id
507        profile_mapping.has_filenames = True
508        profile_mapping.has_functions = True
509        profile_mapping.has_line_numbers = True
510        profile_mapping.has_inline_frames = True
511
512    def gen_profile_location(self, location):
513        profile_location = self.profile.location.add()
514        profile_location.id = location.id
515        profile_location.mapping_id = location.mapping_id
516        profile_location.address = location.address
517        for i in range(len(location.lines)):
518            line = profile_location.line.add()
519            line.function_id = location.lines[i].function_id
520            line.line = location.lines[i].line
521
522    def gen_profile_function(self, function):
523        profile_function = self.profile.function.add()
524        profile_function.id = function.id
525        profile_function.name = function.name_id
526        profile_function.system_name = function.name_id
527        profile_function.filename = function.source_filename_id
528        profile_function.start_line = function.start_line
529
530
531def main():
532    parser = argparse.ArgumentParser(description='Generate pprof profile data in pprof.profile.')
533    parser.add_argument('--show', nargs=1, help='print existing profile.pprof')
534    parser.add_argument('--config', nargs=1, default='pprof_proto_generator.config',
535                        help='Set config file, default is gen_pprof_proto.config.')
536    args = parser.parse_args(sys.argv[1:])
537    if args.show:
538        profile = load_pprof_profile(args.show[0])
539        printer = PprofProfilePrinter(profile)
540        printer.show()
541        return
542    config = load_config(args.config)
543    generator = PprofProfileGenerator(config)
544    profile = generator.gen()
545    store_pprof_profile(config['output_file'], profile)
546
547
548if __name__ == '__main__':
549    main()
550