summaryrefslogtreecommitdiff
path: root/simpleperf/scripts/pprof_proto_generator.py
diff options
context:
space:
mode:
Diffstat (limited to 'simpleperf/scripts/pprof_proto_generator.py')
-rwxr-xr-xsimpleperf/scripts/pprof_proto_generator.py176
1 files changed, 83 insertions, 93 deletions
diff --git a/simpleperf/scripts/pprof_proto_generator.py b/simpleperf/scripts/pprof_proto_generator.py
index 57c988b9..11806852 100755
--- a/simpleperf/scripts/pprof_proto_generator.py
+++ b/simpleperf/scripts/pprof_proto_generator.py
@@ -19,35 +19,24 @@
used by pprof.
Example:
- ./app_profiler.py
- ./pprof_proto_generator.py
+ python app_profiler.py
+ python pprof_proto_generator.py
pprof -text pprof.profile
"""
-import logging
+import argparse
import os
import os.path
-import re
-import sys
from simpleperf_report_lib import ReportLib
-from simpleperf_utils import (Addr2Nearestline, BaseArgumentParser, BinaryFinder, extant_dir,
- flatten_arg_list, log_exit, ReadElf, ToolFinder)
+from simpleperf_utils import (Addr2Nearestline, BinaryFinder, extant_dir,
+ flatten_arg_list, log_info, log_exit, ReadElf, ToolFinder)
try:
import profile_pb2
except ImportError:
log_exit('google.protobuf module is missing. Please install it first.')
-# Some units of common event names
-EVENT_UNITS = {
- 'cpu-clock': 'nanoseconds',
- 'cpu-cycles': 'cpu-cycles',
- 'instructions': 'instructions',
- 'task-clock': 'nanoseconds',
-}
-
-
def load_pprof_profile(filename):
profile = profile_pb2.Profile()
with open(filename, "rb") as f:
@@ -118,8 +107,7 @@ class PprofProfilePrinter(object):
for i in range(len(sample.value)):
print('%svalue[%d] = %d' % (space, i, sample.value[i]))
for i in range(len(sample.label)):
- print('%slabel[%d] = %s:%s' % (space, i, self.string(sample.label[i].key),
- self.string(sample.label[i].str)))
+ print('%slabel[%d] = ', (space, i))
def show_location_id(self, location_id, space=''):
location = self.profile.location[location_id - 1]
@@ -174,20 +162,11 @@ class PprofProfilePrinter(object):
return self.string_table[string_id]
-class Label(object):
- def __init__(self, key_id: int, str_id: int):
- # See profile.Label.key
- self.key_id = key_id
- # See profile.Label.str
- self.str_id = str_id
-
-
class Sample(object):
def __init__(self):
self.location_ids = []
self.values = {}
- self.labels = []
def add_location_id(self, location_id):
self.location_ids.append(location_id)
@@ -270,6 +249,15 @@ class PprofProfileGenerator(object):
config['binary_cache_dir'] = 'binary_cache'
if not os.path.isdir(config['binary_cache_dir']):
config['binary_cache_dir'] = None
+ self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None
+ if config.get('pid_filters'):
+ self.pid_filter = {int(x) for x in config['pid_filters']}
+ else:
+ self.pid_filter = None
+ if config.get('tid_filters'):
+ self.tid_filter = {int(x) for x in config['tid_filters']}
+ else:
+ self.tid_filter = None
self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None
self.max_chain_length = config['max_chain_length']
self.profile = profile_pb2.Profile()
@@ -302,17 +290,8 @@ class PprofProfileGenerator(object):
if self.config.get('show_art_frames'):
self.lib.ShowArtFrames()
- self.lib.SetReportOptions(self.config['report_lib_options'])
-
- comments = [
- "Simpleperf Record Command:\n" + self.lib.GetRecordCmd(),
- "Converted to pprof with:\n" + " ".join(sys.argv),
- "Architecture:\n" + self.lib.GetArch(),
- ]
- for comment in comments:
- self.profile.comment.append(self.get_string_id(comment))
-
- numbers_re = re.compile(r"\d+")
+ for file_path in self.config['proguard_mapping_file'] or []:
+ self.lib.AddProguardMappingFile(file_path)
# Process all samples in perf.data, aggregate samples.
while True:
@@ -325,26 +304,13 @@ class PprofProfileGenerator(object):
symbol = self.lib.GetSymbolOfCurrentSample()
callchain = self.lib.GetCallChainOfCurrentSample()
+ if not self._filter_report_sample(report_sample):
+ continue
+
sample_type_id = self.get_sample_type_id(event.name)
sample = Sample()
sample.add_value(sample_type_id, 1)
sample.add_value(sample_type_id + 1, report_sample.period)
- sample.labels.append(Label(
- self.get_string_id("thread"),
- self.get_string_id(report_sample.thread_comm)))
- # Heuristic: threadpools doing similar work are often named as
- # name-1, name-2, name-3. Combine threadpools into one label
- # "name-%d" if they only differ by a number.
- sample.labels.append(Label(
- self.get_string_id("threadpool"),
- self.get_string_id(
- numbers_re.sub("%d", report_sample.thread_comm))))
- sample.labels.append(Label(
- self.get_string_id("pid"),
- self.get_string_id(str(report_sample.pid))))
- sample.labels.append(Label(
- self.get_string_id("tid"),
- self.get_string_id(str(report_sample.tid))))
if self._filter_symbol(symbol):
location_id = self.get_location_id(report_sample.ip, symbol)
sample.add_location_id(location_id)
@@ -356,9 +322,9 @@ class PprofProfileGenerator(object):
if sample.location_ids:
self.add_sample(sample)
- def gen(self, jobs: int):
+ def gen(self):
# 1. Generate line info for locations and functions.
- self.gen_source_lines(jobs)
+ self.gen_source_lines()
# 2. Produce samples/locations/functions in profile.
for sample in self.sample_list:
@@ -372,6 +338,19 @@ class PprofProfileGenerator(object):
return self.profile
+ def _filter_report_sample(self, sample):
+ """Return true if the sample can be used."""
+ if self.comm_filter:
+ if sample.thread_comm not in self.comm_filter:
+ return False
+ if self.pid_filter:
+ if sample.pid not in self.pid_filter:
+ return False
+ if self.tid_filter:
+ if sample.tid not in self.tid_filter:
+ return False
+ return True
+
def _filter_symbol(self, symbol):
if not self.dso_filter or symbol.dso_name in self.dso_filter:
return True
@@ -397,12 +376,11 @@ class PprofProfileGenerator(object):
return sample_type_id
sample_type_id = len(self.profile.sample_type)
sample_type = self.profile.sample_type.add()
- sample_type.type = self.get_string_id(name + '_samples')
- sample_type.unit = self.get_string_id('samples')
+ sample_type.type = self.get_string_id('event_' + name + '_samples')
+ sample_type.unit = self.get_string_id('count')
sample_type = self.profile.sample_type.add()
- sample_type.type = self.get_string_id(name)
- units = EVENT_UNITS.get(name, 'count')
- sample_type.unit = self.get_string_id(units)
+ sample_type.type = self.get_string_id('event_' + name + '_count')
+ sample_type.unit = self.get_string_id('count')
self.sample_types[name] = sample_type_id
return sample_type_id
@@ -448,14 +426,26 @@ class PprofProfileGenerator(object):
return value
binary_path = dso_name
- build_id = self.lib.GetBuildIdForPath(dso_name)
+ build_id = ''
+
+ # The build ids in perf.data are padded to 20 bytes, but pprof needs without padding.
+ # So read build id from the binary in binary_cache, and check it with build id in
+ # perf.data.
+ build_id_in_perf_data = self.lib.GetBuildIdForPath(dso_name)
# Try elf_path in binary cache.
- elf_path = self.binary_finder.find_binary(dso_name, build_id)
+ elf_path = self.binary_finder.find_binary(dso_name, build_id_in_perf_data)
if elf_path:
+ build_id = build_id_in_perf_data
binary_path = str(elf_path)
- # The build ids in perf.data are padded to 20 bytes, but pprof needs without padding.
- build_id = ReadElf.unpad_build_id(build_id)
+ # When there is no matching elf_path, try converting build_id in perf.data.
+ if not build_id and build_id_in_perf_data.startswith('0x'):
+ # Fallback to the way used by TrimZeroesFromBuildIDString() in quipper.
+ build_id = build_id_in_perf_data[2:] # remove '0x'
+ padding = '0' * 8
+ while build_id.endswith(padding):
+ build_id = build_id[:-len(padding)]
+
self.binary_map[dso_name] = (binary_path, build_id)
return (binary_path, build_id)
@@ -486,13 +476,13 @@ class PprofProfileGenerator(object):
self.sample_list.append(sample)
self.sample_map[sample.key] = sample
- def gen_source_lines(self, jobs: int):
+ def gen_source_lines(self):
# 1. Create Addr2line instance
if not self.config.get('binary_cache_dir'):
- logging.info("Can't generate line information because binary_cache is missing.")
+ log_info("Can't generate line information because binary_cache is missing.")
return
if not ToolFinder.find_tool_path('llvm-symbolizer', self.config['ndk_path']):
- logging.info("Can't generate line information because can't find llvm-symbolizer.")
+ log_info("Can't generate line information because can't find llvm-symbolizer.")
return
# We have changed dso names to paths in binary_cache in self.get_binary(). So no need to
# pass binary_cache_dir to BinaryFinder.
@@ -511,7 +501,7 @@ class PprofProfileGenerator(object):
addr2line.add_addr(dso_name, None, function.vaddr_in_dso, function.vaddr_in_dso)
# 3. Generate source lines.
- addr2line.convert_addrs_to_lines(jobs)
+ addr2line.convert_addrs_to_lines()
# 4. Annotate locations and functions.
for location in self.location_list:
@@ -525,18 +515,14 @@ class PprofProfileGenerator(object):
sources = addr2line.get_addr_source(dso, location.vaddr_in_dso)
if not sources:
continue
- for i, source in enumerate(sources):
+ for (source_id, source) in enumerate(sources):
source_file, source_line, function_name = source
- if i == 0:
- # Don't override original function name from report library, which is more
- # accurate when proguard mapping file is given.
- function_id = location.lines[0].function_id
- # Clear default line info.
- location.lines.clear()
- else:
- function_id = self.get_function_id(function_name, dso_name, 0)
+ function_id = self.get_function_id(function_name, dso_name, 0)
if function_id == 0:
continue
+ if source_id == 0:
+ # Clear default line info
+ location.lines = []
location.lines.append(self.add_line(source_file, source_line, function_id))
for function in self.function_list:
@@ -568,11 +554,6 @@ class PprofProfileGenerator(object):
values[sample_type_id] = sample.values[sample_type_id]
profile_sample.value.extend(values)
- for l in sample.labels:
- label = profile_sample.label.add()
- label.key = l.key_id
- label.str = l.str_id
-
def gen_profile_mapping(self, mapping):
profile_mapping = self.profile.mapping.add()
profile_mapping.id = mapping.id
@@ -610,22 +591,28 @@ class PprofProfileGenerator(object):
def main():
- parser = BaseArgumentParser(description='Generate pprof profile data in pprof.profile.')
+ parser = argparse.ArgumentParser(description='Generate pprof profile data in pprof.profile.')
parser.add_argument('--show', nargs='?', action='append', help='print existing pprof.profile.')
parser.add_argument('-i', '--record_file', nargs='+', default=['perf.data'], help="""
Set profiling data file to report. Default is perf.data""")
parser.add_argument('-o', '--output_file', default='pprof.profile', help="""
The path of generated pprof profile data.""")
+ parser.add_argument('--comm', nargs='+', action='append', help="""
+ Use samples only in threads with selected names.""")
+ parser.add_argument('--pid', nargs='+', action='append', help="""
+ Use samples only in processes with selected process ids.""")
+ parser.add_argument('--tid', nargs='+', action='append', help="""
+ Use samples only in threads with selected thread ids.""")
+ parser.add_argument('--dso', nargs='+', action='append', help="""
+ Use samples only in selected binaries.""")
parser.add_argument('--max_chain_length', type=int, default=1000000000, help="""
Maximum depth of samples to be converted.""") # Large value as infinity standin.
parser.add_argument('--ndk_path', type=extant_dir, help='Set the path of a ndk release.')
+ parser.add_argument('--show_art_frames', action='store_true',
+ help='Show frames of internal methods in the ART Java interpreter.')
parser.add_argument(
- '-j', '--jobs', type=int, default=os.cpu_count(),
- help='Use multithreading to speed up source code annotation.')
- sample_filter_group = parser.add_argument_group('Sample filter options')
- sample_filter_group.add_argument('--dso', nargs='+', action='append', help="""
- Use samples only in selected binaries.""")
- parser.add_report_lib_options(sample_filter_group=sample_filter_group)
+ '--proguard-mapping-file', nargs='+',
+ help='Add proguard mapping file to de-obfuscate symbols')
args = parser.parse_args()
if args.show:
@@ -637,16 +624,19 @@ def main():
config = {}
config['output_file'] = args.output_file
+ config['comm_filters'] = flatten_arg_list(args.comm)
+ config['pid_filters'] = flatten_arg_list(args.pid)
+ config['tid_filters'] = flatten_arg_list(args.tid)
config['dso_filters'] = flatten_arg_list(args.dso)
config['ndk_path'] = args.ndk_path
+ config['show_art_frames'] = args.show_art_frames
config['max_chain_length'] = args.max_chain_length
- config['report_lib_options'] = args.report_lib_options
+ config['proguard_mapping_file'] = args.proguard_mapping_file
generator = PprofProfileGenerator(config)
for record_file in args.record_file:
generator.load_record_file(record_file)
- profile = generator.gen(args.jobs)
+ profile = generator.gen()
store_pprof_profile(config['output_file'], profile)
- logging.info("Report is generated at '%s' successfully." % config['output_file'])
if __name__ == '__main__':