diff options
Diffstat (limited to 'simpleperf/scripts/binary_cache_builder.py')
-rwxr-xr-x | simpleperf/scripts/binary_cache_builder.py | 389 |
1 files changed, 246 insertions, 143 deletions
diff --git a/simpleperf/scripts/binary_cache_builder.py b/simpleperf/scripts/binary_cache_builder.py index 4322e2c0..46c8532a 100755 --- a/simpleperf/scripts/binary_cache_builder.py +++ b/simpleperf/scripts/binary_cache_builder.py @@ -19,13 +19,14 @@ it, and put them in binary_cache. """ -from dataclasses import dataclass +from collections import defaultdict import logging import os import os.path from pathlib import Path import shutil -from typing import List, Optional, Union +import sys +from typing import Dict, List, Optional, Tuple, Union from simpleperf_report_lib import ReportLib from simpleperf_utils import ( @@ -37,27 +38,233 @@ def is_jit_symfile(dso_name): return dso_name.split('/')[-1].startswith('TemporaryFile') -class BinaryCacheBuilder(object): +class BinaryCache: + def __init__(self, binary_dir: Path): + self.binary_dir = binary_dir + + def get_path_in_cache(self, device_path: str, build_id: str) -> Path: + """ Given a binary path in perf.data, return its corresponding path in the cache. + """ + if build_id: + filename = device_path.split('/')[-1] + # Add build id to make the filename unique. + unique_filename = build_id[2:] + '-' + filename + return self.binary_dir / unique_filename + + # For elf file without build id, we can only follow its path on device. Otherwise, + # simpleperf can't find it. However, we don't prefer this way. Because: + # 1) It doesn't work for native libs loaded directly from apk + # (android:extractNativeLibs=”false”). + # 2) It may exceed path limit on windows. + if device_path.startswith('/'): + device_path = device_path[1:] + device_path = device_path.replace('/', os.sep) + return Path(os.path.join(self.binary_dir, device_path)) + + +class BinarySource: + """ Source to find debug binaries. """ + + def __init__(self, readelf: ReadElf): + self.readelf = readelf + + def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache): + """ pull binaries needed in perf.data to binary_cache. + binaries: maps from binary path to its build_id in perf.data. + """ + raise Exception('not implemented') + + def read_build_id(self, path: Path): + return self.readelf.get_build_id(path) + + +class BinarySourceFromDevice(BinarySource): + """ Pull binaries from device. """ + + def __init__(self, readelf: ReadElf, disable_adb_root: bool): + super().__init__(readelf) + self.adb = AdbHelper(enable_switch_to_root=not disable_adb_root) + + def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache): + if not self.adb.is_device_available(): + return + for path, build_id in binaries.items(): + self.collect_binary(path, build_id, binary_cache) + self.pull_kernel_symbols(binary_cache.binary_dir / 'kallsyms') + + def collect_binary(self, path: str, build_id: str, binary_cache: BinaryCache): + if not path.startswith('/') or path == "//anon" or path.startswith("/dev/"): + # [kernel.kallsyms] or unknown, or something we can't find binary. + return + binary_cache_file = binary_cache.get_path_in_cache(path, build_id) + self.check_and_pull_binary(path, build_id, binary_cache_file) + + def check_and_pull_binary(self, path: str, expected_build_id: str, binary_cache_file: Path): + """If the binary_cache_file exists and has the expected_build_id, there + is no need to pull the binary from device. Otherwise, pull it. + """ + if binary_cache_file.is_file() and ( + not expected_build_id or expected_build_id == self.read_build_id(binary_cache_file) + ): + logging.info('use current file in binary_cache: %s', binary_cache_file) + else: + logging.info('pull file to binary_cache: %s to %s', path, binary_cache_file) + target_dir = binary_cache_file.parent + if not target_dir.is_dir(): + os.makedirs(target_dir) + if binary_cache_file.is_file(): + binary_cache_file.unlink() + self.pull_file_from_device(path, binary_cache_file) + + def pull_file_from_device(self, device_path: str, host_path: Path): + if self.adb.run(['pull', device_path, str(host_path)]): + return True + # On non-root devices, we can't pull /data/app/XXX/base.odex directly. + # Instead, we can first copy the file to /data/local/tmp, then pull it. + filename = device_path[device_path.rfind('/')+1:] + if (self.adb.run(['shell', 'cp', device_path, '/data/local/tmp']) and + self.adb.run(['pull', '/data/local/tmp/' + filename, host_path])): + self.adb.run(['shell', 'rm', '/data/local/tmp/' + filename]) + return True + logging.warning('failed to pull %s from device', device_path) + return False + + def pull_kernel_symbols(self, file_path: Path): + if file_path.is_file(): + file_path.unlink() + if self.adb.switch_to_root(): + self.adb.run(['shell', 'echo', '0', '>/proc/sys/kernel/kptr_restrict']) + self.adb.run(['pull', '/proc/kallsyms', file_path]) + + +class BinarySourceFromLibDirs(BinarySource): + """ Collect binaries from lib dirs. """ + + def __init__(self, readelf: ReadElf, lib_dirs: List[Path]): + super().__init__(readelf) + self.lib_dirs = lib_dirs + self.filename_map = None + self.build_id_map = None + self.binary_cache = None + + def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache): + self.create_filename_map(binaries) + self.create_build_id_map(binaries) + self.binary_cache = binary_cache + + # Search all files in lib_dirs, and copy matching files to build_cache. + for lib_dir in self.lib_dirs: + if self.is_platform_symbols_dir(lib_dir): + self.search_platform_symbols_dir(lib_dir) + else: + self.search_dir(lib_dir) + + def create_filename_map(self, binaries: Dict[str, str]): + """ Create a map mapping from filename to binaries having the name. """ + self.filename_map = defaultdict(list) + for path, build_id in binaries.items(): + index = path.rfind('/') + filename = path[index + 1:] + self.filename_map[filename].append((path, build_id)) + + def create_build_id_map(self, binaries: Dict[str, str]): + """ Create a map mapping from build id to binary path. """ + self.build_id_map = {} + for path, build_id in binaries.items(): + if build_id: + self.build_id_map[build_id] = path + + def is_platform_symbols_dir(self, lib_dir: Path): + """ Check if lib_dir points to $ANDROID_PRODUCT_OUT/symbols. """ + subdir_names = [p.name for p in lib_dir.iterdir()] + return lib_dir.name == 'symbols' and 'system' in subdir_names + + def search_platform_symbols_dir(self, lib_dir: Path): + """ Platform symbols dir contains too many binaries. Reading build ids for + all of them takes a long time. So we only read build ids for binaries + having names exist in filename_map. + """ + for root, _, files in os.walk(lib_dir): + for filename in files: + binaries = self.filename_map.get(filename) + if not binaries: + continue + file_path = Path(os.path.join(root, filename)) + build_id = self.read_build_id(file_path) + for path, expected_build_id in binaries: + if expected_build_id == build_id: + self.copy_to_binary_cache(file_path, build_id, path) + + def search_dir(self, lib_dir: Path): + """ For a normal lib dir, it's unlikely to contain many binaries. So we can read + build ids for all binaries in it. But users may give debug binaries with a name + different from the one recorded in perf.data. So we should only rely on build id + if it is available. + """ + for root, _, files in os.walk(lib_dir): + for filename in files: + file_path = Path(os.path.join(root, filename)) + build_id = self.read_build_id(file_path) + if build_id: + # For elf file with build id, use build id to match. + device_path = self.build_id_map.get(build_id) + if device_path: + self.copy_to_binary_cache(file_path, build_id, device_path) + elif self.readelf.is_elf_file(file_path): + # For elf file without build id, use filename to match. + for path, expected_build_id in self.filename_map.get(filename, []): + if not expected_build_id: + self.copy_to_binary_cache(file_path, '', path) + break + + def copy_to_binary_cache( + self, from_path: Path, expected_build_id: str, device_path: str): + to_path = self.binary_cache.get_path_in_cache(device_path, expected_build_id) + if not self.need_to_copy(from_path, to_path, expected_build_id): + # The existing file in binary_cache can provide more information, so no need to copy. + return + to_dir = to_path.parent + if not to_dir.is_dir(): + os.makedirs(to_dir) + logging.info('copy to binary_cache: %s to %s', from_path, to_path) + shutil.copy(from_path, to_path) + + def need_to_copy(self, from_path: Path, to_path: Path, expected_build_id: str): + if not to_path.is_file() or self.read_build_id(to_path) != expected_build_id: + return True + return self.get_file_stripped_level(from_path) < self.get_file_stripped_level(to_path) + + def get_file_stripped_level(self, path: Path) -> int: + """Return stripped level of an ELF file. Larger value means more stripped.""" + sections = self.readelf.get_sections(path) + if '.debug_line' in sections: + return 0 + if '.symtab' in sections: + return 1 + return 2 + + +class BinaryCacheBuilder: """Collect all binaries needed by perf.data in binary_cache.""" def __init__(self, ndk_path: Optional[str], disable_adb_root: bool): - self.adb = AdbHelper(enable_switch_to_root=not disable_adb_root) self.readelf = ReadElf(ndk_path) - self.binary_cache_dir = 'binary_cache' - if not os.path.isdir(self.binary_cache_dir): - os.makedirs(self.binary_cache_dir) + self.device_source = BinarySourceFromDevice(self.readelf, disable_adb_root) + self.binary_cache_dir = Path('binary_cache') + self.binary_cache = BinaryCache(self.binary_cache_dir) self.binaries = {} - def build_binary_cache(self, perf_data_path: str, symfs_dirs: List[Union[Path, str]]): + def build_binary_cache(self, perf_data_path: str, symfs_dirs: List[Union[Path, str]]) -> bool: + self.binary_cache_dir.mkdir(exist_ok=True) self.collect_used_binaries(perf_data_path) - self.copy_binaries_from_symfs_dirs(symfs_dirs) - if self.adb.is_device_available(): - self.pull_binaries_from_device() - self._pull_kernel_symbols() + if not self.copy_binaries_from_symfs_dirs(symfs_dirs): + return False + self.pull_binaries_from_device() self.create_build_id_list() + return True def collect_used_binaries(self, perf_data_path): - """read perf.data, collect all used binaries and their build id (if available).""" + """read perf.data, collect all used binaries and their build id(if available).""" # A dict mapping from binary name to build_id binaries = {} lib = ReportLib() @@ -82,149 +289,45 @@ class BinaryCacheBuilder(object): binaries[name] = lib.GetBuildIdForPath(dso_name) self.binaries = binaries - def copy_binaries_from_symfs_dirs(self, symfs_dirs: List[Union[Path, str]]): - """collect all files in symfs_dirs.""" - if not symfs_dirs: - return - - # It is possible that the path of the binary in symfs_dirs doesn't match - # the one recorded in perf.data. For example, a file in symfs_dirs might - # be "debug/arm/obj/armeabi-v7a/libsudo-game-jni.so", but the path in - # perf.data is "/data/app/xxxx/lib/arm/libsudo-game-jni.so". So we match - # binaries if they have the same filename (like libsudo-game-jni.so) - # and same build_id. - - # Map from filename to binary paths. - filename_dict = {} - for binary in self.binaries: - index = binary.rfind('/') - filename = binary[index+1:] - paths = filename_dict.get(filename) - if paths is None: - filename_dict[filename] = paths = [] - paths.append(binary) - - # Walk through all files in symfs_dirs, and copy matching files to build_cache. - for symfs_dir in symfs_dirs: - for root, _, files in os.walk(symfs_dir): - for filename in files: - paths = filename_dict.get(filename) - if not paths: - continue - build_id = self._read_build_id(os.path.join(root, filename)) - for binary in paths: - expected_build_id = self.binaries.get(binary) - if expected_build_id == build_id: - self._copy_to_binary_cache(os.path.join(root, filename), - expected_build_id, binary) - break - - def _copy_to_binary_cache(self, from_path, expected_build_id, target_file): - if target_file[0] == '/': - target_file = target_file[1:] - target_file = target_file.replace('/', os.sep) - target_file = os.path.join(self.binary_cache_dir, target_file) - if not self._need_to_copy(from_path, target_file, expected_build_id): - # The existing file in binary_cache can provide more information, so no need to copy. - return - target_dir = os.path.dirname(target_file) - if not os.path.isdir(target_dir): - os.makedirs(target_dir) - logging.info('copy to binary_cache: %s to %s' % (from_path, target_file)) - shutil.copy(from_path, target_file) - - def _need_to_copy(self, source_file, target_file, expected_build_id): - if not os.path.isfile(target_file): - return True - if self._read_build_id(target_file) != expected_build_id: - return True - return self._get_file_stripped_level(source_file) < self._get_file_stripped_level( - target_file) - - def _get_file_stripped_level(self, file_path): - """Return stripped level of an ELF file. Larger value means more stripped.""" - sections = self.readelf.get_sections(file_path) - if '.debug_line' in sections: - return 0 - if '.symtab' in sections: - return 1 - return 2 + def copy_binaries_from_symfs_dirs(self, symfs_dirs: List[Union[str, Path]]) -> bool: + if symfs_dirs: + lib_dirs: List[Path] = [] + for symfs_dir in symfs_dirs: + if isinstance(symfs_dir, str): + symfs_dir = Path(symfs_dir) + if not symfs_dir.is_dir(): + logging.error("can't find dir %s", symfs_dir) + return False + lib_dirs.append(symfs_dir) + lib_dir_source = BinarySourceFromLibDirs(self.readelf, lib_dirs) + lib_dir_source.collect_binaries(self.binaries, self.binary_cache) + return True def pull_binaries_from_device(self): - """pull binaries needed in perf.data to binary_cache.""" - for binary in self.binaries: - build_id = self.binaries[binary] - if not binary.startswith('/') or binary == "//anon" or binary.startswith("/dev/"): - # [kernel.kallsyms] or unknown, or something we can't find binary. - continue - binary_cache_file = binary[1:].replace('/', os.sep) - binary_cache_file = os.path.join(self.binary_cache_dir, binary_cache_file) - self._check_and_pull_binary(binary, build_id, binary_cache_file) - - def _check_and_pull_binary(self, binary, expected_build_id, binary_cache_file): - """If the binary_cache_file exists and has the expected_build_id, there - is no need to pull the binary from device. Otherwise, pull it. - """ - need_pull = True - if os.path.isfile(binary_cache_file): - need_pull = False - if expected_build_id: - build_id = self._read_build_id(binary_cache_file) - if expected_build_id != build_id: - need_pull = True - if need_pull: - target_dir = os.path.dirname(binary_cache_file) - if not os.path.isdir(target_dir): - os.makedirs(target_dir) - if os.path.isfile(binary_cache_file): - os.remove(binary_cache_file) - logging.info('pull file to binary_cache: %s to %s' % (binary, binary_cache_file)) - self._pull_file_from_device(binary, binary_cache_file) - else: - logging.info('use current file in binary_cache: %s' % binary_cache_file) - - def _read_build_id(self, file_path): - """read build id of a binary on host.""" - return self.readelf.get_build_id(file_path) - - def _pull_file_from_device(self, device_path, host_path): - if self.adb.run(['pull', device_path, host_path]): - return True - # In non-root device, we can't pull /data/app/XXX/base.odex directly. - # Instead, we can first copy the file to /data/local/tmp, then pull it. - filename = device_path[device_path.rfind('/')+1:] - if (self.adb.run(['shell', 'cp', device_path, '/data/local/tmp']) and - self.adb.run(['pull', '/data/local/tmp/' + filename, host_path])): - self.adb.run(['shell', 'rm', '/data/local/tmp/' + filename]) - return True - logging.warning('failed to pull %s from device' % device_path) - return False - - def _pull_kernel_symbols(self): - file_path = os.path.join(self.binary_cache_dir, 'kallsyms') - if os.path.isfile(file_path): - os.remove(file_path) - if self.adb.switch_to_root(): - self.adb.run(['shell', 'echo', '0', '>/proc/sys/kernel/kptr_restrict']) - self.adb.run(['pull', '/proc/kallsyms', file_path]) + self.device_source.collect_binaries(self.binaries, self.binary_cache) def create_build_id_list(self): """ Create build_id_list. So report scripts can find a binary by its build_id instead of path. """ - build_id_list_path = os.path.join(self.binary_cache_dir, 'build_id_list') + build_id_list_path = self.binary_cache_dir / 'build_id_list' + # Write in binary mode to avoid "\r\n" problem on windows, which can confuse simpleperf. with open(build_id_list_path, 'wb') as fh: for root, _, files in os.walk(self.binary_cache_dir): for filename in files: - path = os.path.join(root, filename) - relative_path = path[len(self.binary_cache_dir) + 1:] - build_id = self._read_build_id(path) + path = Path(os.path.join(root, filename)) + build_id = self.readelf.get_build_id(path) if build_id: + relative_path = path.relative_to(self.binary_cache_dir) line = f'{build_id}={relative_path}\n' fh.write(str_to_bytes(line)) + def find_path_in_cache(self, device_path: str) -> Optional[Path]: + build_id = self.binaries.get(device_path) + return self.binary_cache.get_path_in_cache(device_path, build_id) + -def main(): +def main() -> bool: parser = BaseArgumentParser(description=""" Pull binaries needed by perf.data from device to binary_cache directory.""") parser.add_argument('-i', '--perf_data_path', default='perf.data', type=extant_file, help=""" @@ -238,8 +341,8 @@ def main(): ndk_path = None if not args.ndk_path else args.ndk_path[0] builder = BinaryCacheBuilder(ndk_path, args.disable_adb_root) symfs_dirs = flatten_arg_list(args.native_lib_dir) - builder.build_binary_cache(args.perf_data_path, symfs_dirs) + return builder.build_binary_cache(args.perf_data_path, symfs_dirs) if __name__ == '__main__': - main() + sys.exit(0 if main() else 1) |