diff options
Diffstat (limited to 'simpleperf/scripts/simpleperf_utils.py')
-rw-r--r-- | simpleperf/scripts/simpleperf_utils.py | 422 |
1 files changed, 133 insertions, 289 deletions
diff --git a/simpleperf/scripts/simpleperf_utils.py b/simpleperf/scripts/simpleperf_utils.py index 2a7dfd35..cf50fcb2 100644 --- a/simpleperf/scripts/simpleperf_utils.py +++ b/simpleperf/scripts/simpleperf_utils.py @@ -20,8 +20,6 @@ from __future__ import annotations import argparse -from concurrent.futures import Future, ThreadPoolExecutor -from dataclasses import dataclass import logging import os import os.path @@ -31,10 +29,7 @@ import shutil import subprocess import sys import time -from typing import Any, Dict, Iterator, List, Optional, Set, Tuple, Union - - -NDK_ERROR_MESSAGE = "Please install the Android NDK (https://developer.android.com/studio/projects/install-ndk), then set NDK path with --ndk_path option." +from typing import Dict, Iterator, List, Optional, Set, Union def get_script_dir() -> str: @@ -57,7 +52,49 @@ def get_platform() -> str: return 'linux' +def is_python3() -> str: + return sys.version_info >= (3, 0) + + +def log_debug(msg: str): + logging.debug(msg) + + +def log_info(msg: str): + logging.info(msg) + + +def log_warning(msg: str): + logging.warning(msg) + + +def log_fatal(msg: str): + raise Exception(msg) + + +def log_exit(msg: str): + sys.exit(msg) + + +def disable_debug_log(): + logging.getLogger().setLevel(logging.WARN) + + +def set_log_level(level_name: str): + if level_name == 'debug': + level = logging.DEBUG + elif level_name == 'info': + level = logging.INFO + elif level_name == 'warning': + level = logging.WARNING + else: + log_fatal('unknown log level: %s' % level_name) + logging.getLogger().setLevel(level) + + def str_to_bytes(str_value: str) -> bytes: + if not is_python3(): + return str_value # In python 3, str are wide strings whereas the C api expects 8 bit strings, # hence we have to convert. For now using utf-8 as the encoding. return str_value.encode('utf-8') @@ -66,6 +103,8 @@ def str_to_bytes(str_value: str) -> bytes: def bytes_to_str(bytes_value: Optional[bytes]) -> str: if not bytes_value: return '' + if not is_python3(): + return bytes_value return bytes_value.decode('utf-8') @@ -142,10 +181,11 @@ class ToolFinder: 'path_in_ndk': lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-symbolizer' % platform, }, - 'llvm-strip': { - 'is_binutils': False, - 'path_in_ndk': - lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-strip' % platform, + 'objdump': { + 'is_binutils': True, + }, + 'strip': { + 'is_binutils': True, }, } @@ -285,7 +325,7 @@ class AdbHelper(object): def run_and_return_output(self, adb_args: List[str], log_output: bool = False, log_stderr: bool = False) -> Tuple[bool, str]: adb_args = [self.adb_path] + adb_args - logging.debug('run adb cmd: %s' % adb_args) + log_debug('run adb cmd: %s' % adb_args) env = None if self.serial_number: env = os.environ.copy() @@ -298,10 +338,10 @@ class AdbHelper(object): returncode = subproc.returncode result = (returncode == 0) if log_output and stdout_data: - logging.debug(stdout_data) + log_debug(stdout_data) if log_stderr and stderr_data: - logging.warning(stderr_data) - logging.debug('run adb cmd: %s [result %s]' % (adb_args, result)) + log_warning(stderr_data) + log_debug('run adb cmd: %s [result %s]' % (adb_args, result)) return (result, stdout_data) def check_run(self, adb_args: List[str], log_output: bool = False): @@ -320,10 +360,10 @@ class AdbHelper(object): return if 'root' not in stdoutdata: return - logging.info('unroot adb') + log_info('unroot adb') self.run(['unroot']) - time.sleep(1) self.run(['wait-for-device']) + time.sleep(1) def switch_to_root(self) -> bool: if not self.enable_switch_to_root: @@ -345,7 +385,7 @@ class AdbHelper(object): def get_property(self, name: str) -> Optional[str]: result, stdoutdata = self.run_and_return_output(['shell', 'getprop', name]) - return stdoutdata.strip() if result else None + return stdoutdata if result else None def set_property(self, name: str, value: str) -> bool: return self.run(['shell', 'setprop', name, value]) @@ -365,20 +405,17 @@ class AdbHelper(object): def get_android_version(self) -> int: """ Get Android version on device, like 7 is for Android N, 8 is for Android O.""" - build_version = self.get_property('ro.build.version.codename') - if not build_version or build_version == 'REL': - build_version = self.get_property('ro.build.version.release') + build_version = self.get_property('ro.build.version.release') android_version = 0 if build_version: - if build_version[0].isdigit(): - i = 1 - while i < len(build_version) and build_version[i].isdigit(): - i += 1 - android_version = int(build_version[:i]) - else: + if not build_version[0].isdigit(): c = build_version[0].upper() if c.isupper() and c >= 'L': android_version = ord(c) - ord('L') + 5 + else: + strs = build_version.split('.') + if strs: + android_version = int(strs[0]) return android_version @@ -499,26 +536,6 @@ class Addr2Nearestline(object): def __init__(self, build_id: Optional[str]): self.build_id = build_id self.addrs: Dict[int, Addr2Nearestline.Addr] = {} - # Saving file names for each addr takes a lot of memory. So we store file ids in Addr, - # and provide data structures connecting file id and file name here. - self.file_name_to_id: Dict[str, int] = {} - self.file_id_to_name: List[str] = [] - self.func_name_to_id: Dict[str, int] = {} - self.func_id_to_name: List[str] = [] - - def get_file_id(self, file_path: str) -> int: - file_id = self.file_name_to_id.get(file_path) - if file_id is None: - file_id = self.file_name_to_id[file_path] = len(self.file_id_to_name) - self.file_id_to_name.append(file_path) - return file_id - - def get_func_id(self, func_name: str) -> int: - func_id = self.func_name_to_id.get(func_name) - if func_id is None: - func_id = self.func_name_to_id[func_name] = len(self.func_id_to_name) - self.func_id_to_name.append(func_name) - return func_id class Addr(object): """ Info of an addr request. @@ -536,11 +553,17 @@ class Addr2Nearestline(object): binary_finder: BinaryFinder, with_function_name: bool): self.symbolizer_path = ToolFinder.find_tool_path('llvm-symbolizer', ndk_path) if not self.symbolizer_path: - log_exit("Can't find llvm-symbolizer. " + NDK_ERROR_MESSAGE) + log_exit("Can't find llvm-symbolizer. Please set ndk path with --ndk_path option.") self.readelf = ReadElf(ndk_path) self.dso_map: Dict[str, Addr2Nearestline.Dso] = {} # map from dso_path to Dso. self.binary_finder = binary_finder self.with_function_name = with_function_name + # Saving file names for each addr takes a lot of memory. So we store file ids in Addr, + # and provide data structures connecting file id and file name here. + self.file_name_to_id: Dict[str, int] = {} + self.file_id_to_name: List[str] = [] + self.func_name_to_id: Dict[str, int] = {} + self.func_id_to_name: List[str] = [] def add_addr(self, dso_path: str, build_id: Optional[str], func_addr: int, addr: int): dso = self.dso_map.get(dso_path) @@ -549,24 +572,19 @@ class Addr2Nearestline(object): if addr not in dso.addrs: dso.addrs[addr] = self.Addr(func_addr) - def convert_addrs_to_lines(self, jobs: int): - with ThreadPoolExecutor(jobs) as executor: - futures: List[Future] = [] - for dso_path, dso in self.dso_map.items(): - futures.append(executor.submit(self._convert_addrs_in_one_dso, dso_path, dso)) - for future in futures: - # Call future.result() to report exceptions raised in the executor. - future.result() + def convert_addrs_to_lines(self): + for dso_path, dso in self.dso_map.items(): + self._convert_addrs_in_one_dso(dso_path, dso) def _convert_addrs_in_one_dso(self, dso_path: str, dso: Addr2Nearestline.Dso): real_path = self.binary_finder.find_binary(dso_path, dso.build_id) if not real_path: if dso_path not in ['//anon', 'unknown', '[kernel.kallsyms]']: - logging.debug("Can't find dso %s" % dso_path) + log_debug("Can't find dso %s" % dso_path) return if not self._check_debug_line_section(real_path): - logging.debug("file %s doesn't contain .debug_line section." % real_path) + log_debug("file %s doesn't contain .debug_line section." % real_path) return addr_step = self._get_addr_step(real_path) @@ -613,7 +631,38 @@ class Addr2Nearestline(object): stdoutdata = bytes_to_str(stdoutdata) except OSError: return - addr_map = self.parse_line_output(stdoutdata, dso) + addr_map: Dict[int, List[Tuple[int]]] = {} + cur_line_list: Optional[List[Tuple[int]]] = None + need_function_name = self.with_function_name + cur_function_name: Optional[str] = None + for line in stdoutdata.strip().split('\n'): + line = line.strip() + if not line: + continue + if line[:2] == '0x': + # a new address + cur_line_list = addr_map[int(line, 16)] = [] + elif need_function_name: + cur_function_name = line.strip() + need_function_name = False + else: + need_function_name = self.with_function_name + if cur_line_list is None: + continue + file_path, line_number = self._parse_source_location(line) + if not file_path or not line_number: + # An addr can have a list of (file, line), when the addr belongs to an inlined + # function. Sometimes only part of the list has ? mark. In this case, we think + # the line info is valid if the first line doesn't have ? mark. + if not cur_line_list: + cur_line_list = None + continue + file_id = self._get_file_id(file_path) + if self.with_function_name: + func_id = self._get_func_id(cur_function_name) + cur_line_list.append((file_id, line_number, func_id)) + else: + cur_line_list.append((file_id, line_number)) # 3. Fill line info in dso.addrs. for addr in dso.addrs: @@ -637,66 +686,7 @@ class Addr2Nearestline(object): args.append('--functions=none') return args - def parse_line_output(self, output: str, dso: Addr2Nearestline.Dso) -> Dict[int, - List[Tuple[int]]]: - """ - The output is a list of lines. - address1 - function_name1 (the function name can be empty) - source_location1 - function_name2 - source_location2 - ... - (end with empty line) - """ - - addr_map: Dict[int, List[Tuple[int]]] = {} - lines = output.strip().splitlines() - i = 0 - while i < len(lines): - address = self._parse_line_output_address(lines[i]) - i += 1 - if address is None: - continue - info = [] - while i < len(lines): - if self.with_function_name: - if i + 1 == len(lines): - break - function_name = lines[i].strip() - if not function_name and (':' not in lines[i+1]): - # no more frames - break - i += 1 - elif not lines[i]: - i += 1 - break - - file_path, line_number = self._parse_line_output_source_location(lines[i]) - i += 1 - if not file_path or not line_number: - # An addr can have a list of (file, line), when the addr belongs to an inlined - # function. Sometimes only part of the list has ? mark. In this case, we think - # the line info is valid if the first line doesn't have ? mark. - if not info: - break - continue - file_id = dso.get_file_id(file_path) - if self.with_function_name: - func_id = dso.get_func_id(function_name) - info.append((file_id, line_number, func_id)) - else: - info.append((file_id, line_number)) - if info: - addr_map[address] = info - return addr_map - - def _parse_line_output_address(self, output: str) -> Optional[int]: - if output.startswith('0x'): - return int(output, 16) - return None - - def _parse_line_output_source_location(self, line: str) -> Tuple[Optional[str], Optional[int]]: + def _parse_source_location(self, line: str) -> Tuple[Optional[str], Optional[int]]: file_path, line_number = None, None # Handle lines in format filename:line:column, like "runtest/two_functions.cpp:14:25". # Filename may contain ':' like "C:\Users\...\file". @@ -711,6 +701,20 @@ class Addr2Nearestline(object): return None, None return file_path, line_number + def _get_file_id(self, file_path: str) -> int: + file_id = self.file_name_to_id.get(file_path) + if file_id is None: + file_id = self.file_name_to_id[file_path] = len(self.file_id_to_name) + self.file_id_to_name.append(file_path) + return file_id + + def _get_func_id(self, func_name: str) -> int: + func_id = self.func_name_to_id.get(func_name) + if func_id is None: + func_id = self.func_name_to_id[func_name] = len(self.func_id_to_name) + self.func_id_to_name.append(func_name) + return func_id + def get_dso(self, dso_path: str) -> Addr2Nearestline.Dso: return self.dso_map.get(dso_path) @@ -719,9 +723,9 @@ class Addr2Nearestline(object): if source is None: return None if self.with_function_name: - return [(dso.file_id_to_name[file_id], line, dso.func_id_to_name[func_id]) + return [(self.file_id_to_name[file_id], line, self.func_id_to_name[func_id]) for (file_id, line, func_id) in source] - return [(dso.file_id_to_name[file_id], line) for (file_id, line) in source] + return [(self.file_id_to_name[file_id], line) for (file_id, line) in source] class SourceFileSearcher(object): @@ -813,9 +817,14 @@ class Objdump(object): real_path, arch = dso_info objdump_path = self.objdump_paths.get(arch) if not objdump_path: - objdump_path = ToolFinder.find_tool_path('llvm-objdump', self.ndk_path, arch) + if arch == 'arm': + # llvm-objdump for arm is not good at showing branch targets. + # So still prefer objdump. + objdump_path = ToolFinder.find_tool_path('objdump', self.ndk_path, arch) if not objdump_path: - log_exit("Can't find llvm-objdump." + NDK_ERROR_MESSAGE) + objdump_path = ToolFinder.find_tool_path('llvm-objdump', self.ndk_path, arch) + if not objdump_path: + log_exit("Can't find llvm-objdump. Please set ndk path with --ndk_path option.") self.objdump_paths[arch] = objdump_path # 3. Run objdump. @@ -852,7 +861,7 @@ class ReadElf(object): def __init__(self, ndk_path: Optional[str]): self.readelf_path = ToolFinder.find_tool_path('llvm-readelf', ndk_path) if not self.readelf_path: - log_exit("Can't find llvm-readelf. " + NDK_ERROR_MESSAGE) + log_exit("Can't find llvm-readelf. Please set ndk path with --ndk_path option.") @staticmethod def is_elf_file(path: Union[Path, str]) -> bool: @@ -904,16 +913,6 @@ class ReadElf(object): build_id = build_id[:40] return '0x' + build_id - @staticmethod - def unpad_build_id(build_id: str) -> str: - if build_id.startswith('0x'): - build_id = build_id[2:] - # Unpad build id as TrimZeroesFromBuildIDString() in quipper. - padding = '0' * 8 - while build_id.endswith(padding): - build_id = build_id[:-len(padding)] - return build_id - def get_sections(self, elf_file_path: Union[Path, str]) -> List[str]: """ Get sections of an elf file. """ section_names: List[str] = [] @@ -963,164 +962,9 @@ def extant_file(arg: str) -> str: return path -def log_fatal(msg: str): - raise Exception(msg) - - -def log_exit(msg: str): - sys.exit(msg) - - -class LogFormatter(logging.Formatter): - """ Use custom logging format. """ - - def __init__(self): - super().__init__('%(asctime)s [%(levelname)s] (%(filename)s:%(lineno)d) %(message)s') - - def formatTime(self, record, datefmt): - return super().formatTime(record, '%H:%M:%S') + ',%03d' % record.msecs - - -class Log: - initialized = False - - @classmethod - def init(cls, log_level: str = 'info'): - assert not cls.initialized - cls.initialized = True - cls.logger = logging.root - cls.logger.setLevel(log_level.upper()) - handler = logging.StreamHandler() - handler.setFormatter(LogFormatter()) - cls.logger.addHandler(handler) - - class ArgParseFormatter( argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter): pass -@dataclass -class ReportLibOptions: - show_art_frames: bool - trace_offcpu: str - proguard_mapping_files: List[str] - sample_filters: List[str] - - -class BaseArgumentParser(argparse.ArgumentParser): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs, formatter_class=ArgParseFormatter) - self.has_sample_filter_options = False - self.sample_filter_with_pid_shortcut = False - self.has_report_lib_options = False - - def add_report_lib_options(self, group: Optional[Any] = None, - default_show_art_frames: bool = False, - sample_filter_group: Optional[Any] = None, - sample_filter_with_pid_shortcut: bool = True): - self.has_report_lib_options = True - parser = group if group else self - parser.add_argument( - '--proguard-mapping-file', nargs='+', - help='Add proguard mapping file to de-obfuscate symbols') - parser.add_argument('--show-art-frames', '--show_art_frames', - action=argparse.BooleanOptionalAction, default=default_show_art_frames, - help='Show frames of internal methods in the ART Java interpreter.') - parser.add_argument( - '--trace-offcpu', choices=['on-cpu', 'off-cpu', 'on-off-cpu', 'mixed-on-off-cpu'], - help="""Set report mode for profiles recorded with --trace-offcpu option. All possible - modes are: on-cpu (only on-cpu samples), off-cpu (only off-cpu samples), - on-off-cpu (both on-cpu and off-cpu samples, can be split by event name), - mixed-on-off-cpu (on-cpu and off-cpu samples using the same event name). - If not set, mixed-on-off-cpu mode is used. - """) - self._add_sample_filter_options(sample_filter_group, sample_filter_with_pid_shortcut) - - def _add_sample_filter_options( - self, group: Optional[Any] = None, with_pid_shortcut: bool = True): - if not group: - group = self.add_argument_group('Sample filter options') - group.add_argument('--exclude-pid', metavar='pid', nargs='+', type=int, - help='exclude samples for selected processes') - group.add_argument('--exclude-tid', metavar='tid', nargs='+', type=int, - help='exclude samples for selected threads') - group.add_argument( - '--exclude-process-name', metavar='process_name_regex', nargs='+', - help='exclude samples for processes with name containing the regular expression') - group.add_argument( - '--exclude-thread-name', metavar='thread_name_regex', nargs='+', - help='exclude samples for threads with name containing the regular expression') - - if with_pid_shortcut: - group.add_argument('--pid', metavar='pid', nargs='+', type=int, - help='only include samples for selected processes') - group.add_argument('--tid', metavar='tid', nargs='+', type=int, - help='only include samples for selected threads') - group.add_argument('--include-pid', metavar='pid', nargs='+', type=int, - help='only include samples for selected processes') - group.add_argument('--include-tid', metavar='tid', nargs='+', type=int, - help='only include samples for selected threads') - group.add_argument( - '--include-process-name', metavar='process_name_regex', nargs='+', - help='only include samples for processes with name containing the regular expression') - group.add_argument( - '--comm', '--include-thread-name', metavar='thread_name_regex', - dest='include_thread_name', nargs='+', - help='only include samples for threads with name containing the regular expression') - group.add_argument( - '--filter-file', metavar='file', - help='use filter file to filter samples based on timestamps. ' + - 'The file format is in doc/sampler_filter.md.') - self.has_sample_filter_options = True - self.sample_filter_with_pid_shortcut = with_pid_shortcut - - def _build_sample_filter(self, args: argparse.Namespace) -> List[str]: - """ Build sample filters, which can be passed to ReportLib.SetSampleFilter(). """ - filters = [] - if args.exclude_pid: - filters.extend(['--exclude-pid', ','.join(str(pid) for pid in args.exclude_pid)]) - if args.exclude_tid: - filters.extend(['--exclude-tid', ','.join(str(tid) for tid in args.exclude_tid)]) - if args.exclude_process_name: - for name in args.exclude_process_name: - filters.extend(['--exclude-process-name', name]) - if args.exclude_thread_name: - for name in args.exclude_thread_name: - filters.extend(['--exclude-thread-name', name]) - - if args.include_pid: - filters.extend(['--include-pid', ','.join(str(pid) for pid in args.include_pid)]) - if args.include_tid: - filters.extend(['--include-tid', ','.join(str(tid) for tid in args.include_tid)]) - if self.sample_filter_with_pid_shortcut: - if args.pid: - filters.extend(['--include-pid', ','.join(str(pid) for pid in args.pid)]) - if args.tid: - filters.extend(['--include-tid', ','.join(str(pid) for pid in args.tid)]) - if args.include_process_name: - for name in args.include_process_name: - filters.extend(['--include-process-name', name]) - if args.include_thread_name: - for name in args.include_thread_name: - filters.extend(['--include-thread-name', name]) - if args.filter_file: - filters.extend(['--filter-file', args.filter_file]) - return filters - - def parse_known_args(self, *args, **kwargs): - self.add_argument( - '--log', choices=['debug', 'info', 'warning'], - default='info', help='set log level') - namespace, left_args = super().parse_known_args(*args, **kwargs) - - if self.has_report_lib_options: - sample_filters = self._build_sample_filter(namespace) - report_lib_options = ReportLibOptions( - namespace.show_art_frames, namespace.trace_offcpu, namespace.proguard_mapping_file, - sample_filters) - setattr(namespace, 'report_lib_options', report_lib_options) - - if not Log.initialized: - Log.init(namespace.log) - return namespace, left_args +logging.getLogger().setLevel(logging.DEBUG) |