diff options
Diffstat (limited to 'simpleperf/scripts/pprof_proto_generator.py')
-rwxr-xr-x | simpleperf/scripts/pprof_proto_generator.py | 176 |
1 files changed, 83 insertions, 93 deletions
diff --git a/simpleperf/scripts/pprof_proto_generator.py b/simpleperf/scripts/pprof_proto_generator.py index 57c988b9..11806852 100755 --- a/simpleperf/scripts/pprof_proto_generator.py +++ b/simpleperf/scripts/pprof_proto_generator.py @@ -19,35 +19,24 @@ used by pprof. Example: - ./app_profiler.py - ./pprof_proto_generator.py + python app_profiler.py + python pprof_proto_generator.py pprof -text pprof.profile """ -import logging +import argparse import os import os.path -import re -import sys from simpleperf_report_lib import ReportLib -from simpleperf_utils import (Addr2Nearestline, BaseArgumentParser, BinaryFinder, extant_dir, - flatten_arg_list, log_exit, ReadElf, ToolFinder) +from simpleperf_utils import (Addr2Nearestline, BinaryFinder, extant_dir, + flatten_arg_list, log_info, log_exit, ReadElf, ToolFinder) try: import profile_pb2 except ImportError: log_exit('google.protobuf module is missing. Please install it first.') -# Some units of common event names -EVENT_UNITS = { - 'cpu-clock': 'nanoseconds', - 'cpu-cycles': 'cpu-cycles', - 'instructions': 'instructions', - 'task-clock': 'nanoseconds', -} - - def load_pprof_profile(filename): profile = profile_pb2.Profile() with open(filename, "rb") as f: @@ -118,8 +107,7 @@ class PprofProfilePrinter(object): for i in range(len(sample.value)): print('%svalue[%d] = %d' % (space, i, sample.value[i])) for i in range(len(sample.label)): - print('%slabel[%d] = %s:%s' % (space, i, self.string(sample.label[i].key), - self.string(sample.label[i].str))) + print('%slabel[%d] = ', (space, i)) def show_location_id(self, location_id, space=''): location = self.profile.location[location_id - 1] @@ -174,20 +162,11 @@ class PprofProfilePrinter(object): return self.string_table[string_id] -class Label(object): - def __init__(self, key_id: int, str_id: int): - # See profile.Label.key - self.key_id = key_id - # See profile.Label.str - self.str_id = str_id - - class Sample(object): def __init__(self): self.location_ids = [] self.values = {} - self.labels = [] def add_location_id(self, location_id): self.location_ids.append(location_id) @@ -270,6 +249,15 @@ class PprofProfileGenerator(object): config['binary_cache_dir'] = 'binary_cache' if not os.path.isdir(config['binary_cache_dir']): config['binary_cache_dir'] = None + self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None + if config.get('pid_filters'): + self.pid_filter = {int(x) for x in config['pid_filters']} + else: + self.pid_filter = None + if config.get('tid_filters'): + self.tid_filter = {int(x) for x in config['tid_filters']} + else: + self.tid_filter = None self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None self.max_chain_length = config['max_chain_length'] self.profile = profile_pb2.Profile() @@ -302,17 +290,8 @@ class PprofProfileGenerator(object): if self.config.get('show_art_frames'): self.lib.ShowArtFrames() - self.lib.SetReportOptions(self.config['report_lib_options']) - - comments = [ - "Simpleperf Record Command:\n" + self.lib.GetRecordCmd(), - "Converted to pprof with:\n" + " ".join(sys.argv), - "Architecture:\n" + self.lib.GetArch(), - ] - for comment in comments: - self.profile.comment.append(self.get_string_id(comment)) - - numbers_re = re.compile(r"\d+") + for file_path in self.config['proguard_mapping_file'] or []: + self.lib.AddProguardMappingFile(file_path) # Process all samples in perf.data, aggregate samples. while True: @@ -325,26 +304,13 @@ class PprofProfileGenerator(object): symbol = self.lib.GetSymbolOfCurrentSample() callchain = self.lib.GetCallChainOfCurrentSample() + if not self._filter_report_sample(report_sample): + continue + sample_type_id = self.get_sample_type_id(event.name) sample = Sample() sample.add_value(sample_type_id, 1) sample.add_value(sample_type_id + 1, report_sample.period) - sample.labels.append(Label( - self.get_string_id("thread"), - self.get_string_id(report_sample.thread_comm))) - # Heuristic: threadpools doing similar work are often named as - # name-1, name-2, name-3. Combine threadpools into one label - # "name-%d" if they only differ by a number. - sample.labels.append(Label( - self.get_string_id("threadpool"), - self.get_string_id( - numbers_re.sub("%d", report_sample.thread_comm)))) - sample.labels.append(Label( - self.get_string_id("pid"), - self.get_string_id(str(report_sample.pid)))) - sample.labels.append(Label( - self.get_string_id("tid"), - self.get_string_id(str(report_sample.tid)))) if self._filter_symbol(symbol): location_id = self.get_location_id(report_sample.ip, symbol) sample.add_location_id(location_id) @@ -356,9 +322,9 @@ class PprofProfileGenerator(object): if sample.location_ids: self.add_sample(sample) - def gen(self, jobs: int): + def gen(self): # 1. Generate line info for locations and functions. - self.gen_source_lines(jobs) + self.gen_source_lines() # 2. Produce samples/locations/functions in profile. for sample in self.sample_list: @@ -372,6 +338,19 @@ class PprofProfileGenerator(object): return self.profile + def _filter_report_sample(self, sample): + """Return true if the sample can be used.""" + if self.comm_filter: + if sample.thread_comm not in self.comm_filter: + return False + if self.pid_filter: + if sample.pid not in self.pid_filter: + return False + if self.tid_filter: + if sample.tid not in self.tid_filter: + return False + return True + def _filter_symbol(self, symbol): if not self.dso_filter or symbol.dso_name in self.dso_filter: return True @@ -397,12 +376,11 @@ class PprofProfileGenerator(object): return sample_type_id sample_type_id = len(self.profile.sample_type) sample_type = self.profile.sample_type.add() - sample_type.type = self.get_string_id(name + '_samples') - sample_type.unit = self.get_string_id('samples') + sample_type.type = self.get_string_id('event_' + name + '_samples') + sample_type.unit = self.get_string_id('count') sample_type = self.profile.sample_type.add() - sample_type.type = self.get_string_id(name) - units = EVENT_UNITS.get(name, 'count') - sample_type.unit = self.get_string_id(units) + sample_type.type = self.get_string_id('event_' + name + '_count') + sample_type.unit = self.get_string_id('count') self.sample_types[name] = sample_type_id return sample_type_id @@ -448,14 +426,26 @@ class PprofProfileGenerator(object): return value binary_path = dso_name - build_id = self.lib.GetBuildIdForPath(dso_name) + build_id = '' + + # The build ids in perf.data are padded to 20 bytes, but pprof needs without padding. + # So read build id from the binary in binary_cache, and check it with build id in + # perf.data. + build_id_in_perf_data = self.lib.GetBuildIdForPath(dso_name) # Try elf_path in binary cache. - elf_path = self.binary_finder.find_binary(dso_name, build_id) + elf_path = self.binary_finder.find_binary(dso_name, build_id_in_perf_data) if elf_path: + build_id = build_id_in_perf_data binary_path = str(elf_path) - # The build ids in perf.data are padded to 20 bytes, but pprof needs without padding. - build_id = ReadElf.unpad_build_id(build_id) + # When there is no matching elf_path, try converting build_id in perf.data. + if not build_id and build_id_in_perf_data.startswith('0x'): + # Fallback to the way used by TrimZeroesFromBuildIDString() in quipper. + build_id = build_id_in_perf_data[2:] # remove '0x' + padding = '0' * 8 + while build_id.endswith(padding): + build_id = build_id[:-len(padding)] + self.binary_map[dso_name] = (binary_path, build_id) return (binary_path, build_id) @@ -486,13 +476,13 @@ class PprofProfileGenerator(object): self.sample_list.append(sample) self.sample_map[sample.key] = sample - def gen_source_lines(self, jobs: int): + def gen_source_lines(self): # 1. Create Addr2line instance if not self.config.get('binary_cache_dir'): - logging.info("Can't generate line information because binary_cache is missing.") + log_info("Can't generate line information because binary_cache is missing.") return if not ToolFinder.find_tool_path('llvm-symbolizer', self.config['ndk_path']): - logging.info("Can't generate line information because can't find llvm-symbolizer.") + log_info("Can't generate line information because can't find llvm-symbolizer.") return # We have changed dso names to paths in binary_cache in self.get_binary(). So no need to # pass binary_cache_dir to BinaryFinder. @@ -511,7 +501,7 @@ class PprofProfileGenerator(object): addr2line.add_addr(dso_name, None, function.vaddr_in_dso, function.vaddr_in_dso) # 3. Generate source lines. - addr2line.convert_addrs_to_lines(jobs) + addr2line.convert_addrs_to_lines() # 4. Annotate locations and functions. for location in self.location_list: @@ -525,18 +515,14 @@ class PprofProfileGenerator(object): sources = addr2line.get_addr_source(dso, location.vaddr_in_dso) if not sources: continue - for i, source in enumerate(sources): + for (source_id, source) in enumerate(sources): source_file, source_line, function_name = source - if i == 0: - # Don't override original function name from report library, which is more - # accurate when proguard mapping file is given. - function_id = location.lines[0].function_id - # Clear default line info. - location.lines.clear() - else: - function_id = self.get_function_id(function_name, dso_name, 0) + function_id = self.get_function_id(function_name, dso_name, 0) if function_id == 0: continue + if source_id == 0: + # Clear default line info + location.lines = [] location.lines.append(self.add_line(source_file, source_line, function_id)) for function in self.function_list: @@ -568,11 +554,6 @@ class PprofProfileGenerator(object): values[sample_type_id] = sample.values[sample_type_id] profile_sample.value.extend(values) - for l in sample.labels: - label = profile_sample.label.add() - label.key = l.key_id - label.str = l.str_id - def gen_profile_mapping(self, mapping): profile_mapping = self.profile.mapping.add() profile_mapping.id = mapping.id @@ -610,22 +591,28 @@ class PprofProfileGenerator(object): def main(): - parser = BaseArgumentParser(description='Generate pprof profile data in pprof.profile.') + parser = argparse.ArgumentParser(description='Generate pprof profile data in pprof.profile.') parser.add_argument('--show', nargs='?', action='append', help='print existing pprof.profile.') parser.add_argument('-i', '--record_file', nargs='+', default=['perf.data'], help=""" Set profiling data file to report. Default is perf.data""") parser.add_argument('-o', '--output_file', default='pprof.profile', help=""" The path of generated pprof profile data.""") + parser.add_argument('--comm', nargs='+', action='append', help=""" + Use samples only in threads with selected names.""") + parser.add_argument('--pid', nargs='+', action='append', help=""" + Use samples only in processes with selected process ids.""") + parser.add_argument('--tid', nargs='+', action='append', help=""" + Use samples only in threads with selected thread ids.""") + parser.add_argument('--dso', nargs='+', action='append', help=""" + Use samples only in selected binaries.""") parser.add_argument('--max_chain_length', type=int, default=1000000000, help=""" Maximum depth of samples to be converted.""") # Large value as infinity standin. parser.add_argument('--ndk_path', type=extant_dir, help='Set the path of a ndk release.') + parser.add_argument('--show_art_frames', action='store_true', + help='Show frames of internal methods in the ART Java interpreter.') parser.add_argument( - '-j', '--jobs', type=int, default=os.cpu_count(), - help='Use multithreading to speed up source code annotation.') - sample_filter_group = parser.add_argument_group('Sample filter options') - sample_filter_group.add_argument('--dso', nargs='+', action='append', help=""" - Use samples only in selected binaries.""") - parser.add_report_lib_options(sample_filter_group=sample_filter_group) + '--proguard-mapping-file', nargs='+', + help='Add proguard mapping file to de-obfuscate symbols') args = parser.parse_args() if args.show: @@ -637,16 +624,19 @@ def main(): config = {} config['output_file'] = args.output_file + config['comm_filters'] = flatten_arg_list(args.comm) + config['pid_filters'] = flatten_arg_list(args.pid) + config['tid_filters'] = flatten_arg_list(args.tid) config['dso_filters'] = flatten_arg_list(args.dso) config['ndk_path'] = args.ndk_path + config['show_art_frames'] = args.show_art_frames config['max_chain_length'] = args.max_chain_length - config['report_lib_options'] = args.report_lib_options + config['proguard_mapping_file'] = args.proguard_mapping_file generator = PprofProfileGenerator(config) for record_file in args.record_file: generator.load_record_file(record_file) - profile = generator.gen(args.jobs) + profile = generator.gen() store_pprof_profile(config['output_file'], profile) - logging.info("Report is generated at '%s' successfully." % config['output_file']) if __name__ == '__main__': |