summaryrefslogtreecommitdiff
path: root/simpleperf/scripts/binary_cache_builder.py
diff options
context:
space:
mode:
Diffstat (limited to 'simpleperf/scripts/binary_cache_builder.py')
-rwxr-xr-xsimpleperf/scripts/binary_cache_builder.py389
1 files changed, 246 insertions, 143 deletions
diff --git a/simpleperf/scripts/binary_cache_builder.py b/simpleperf/scripts/binary_cache_builder.py
index 4322e2c0..46c8532a 100755
--- a/simpleperf/scripts/binary_cache_builder.py
+++ b/simpleperf/scripts/binary_cache_builder.py
@@ -19,13 +19,14 @@
it, and put them in binary_cache.
"""
-from dataclasses import dataclass
+from collections import defaultdict
import logging
import os
import os.path
from pathlib import Path
import shutil
-from typing import List, Optional, Union
+import sys
+from typing import Dict, List, Optional, Tuple, Union
from simpleperf_report_lib import ReportLib
from simpleperf_utils import (
@@ -37,27 +38,233 @@ def is_jit_symfile(dso_name):
return dso_name.split('/')[-1].startswith('TemporaryFile')
-class BinaryCacheBuilder(object):
+class BinaryCache:
+ def __init__(self, binary_dir: Path):
+ self.binary_dir = binary_dir
+
+ def get_path_in_cache(self, device_path: str, build_id: str) -> Path:
+ """ Given a binary path in perf.data, return its corresponding path in the cache.
+ """
+ if build_id:
+ filename = device_path.split('/')[-1]
+ # Add build id to make the filename unique.
+ unique_filename = build_id[2:] + '-' + filename
+ return self.binary_dir / unique_filename
+
+ # For elf file without build id, we can only follow its path on device. Otherwise,
+ # simpleperf can't find it. However, we don't prefer this way. Because:
+ # 1) It doesn't work for native libs loaded directly from apk
+ # (android:extractNativeLibs=”false”).
+ # 2) It may exceed path limit on windows.
+ if device_path.startswith('/'):
+ device_path = device_path[1:]
+ device_path = device_path.replace('/', os.sep)
+ return Path(os.path.join(self.binary_dir, device_path))
+
+
+class BinarySource:
+ """ Source to find debug binaries. """
+
+ def __init__(self, readelf: ReadElf):
+ self.readelf = readelf
+
+ def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache):
+ """ pull binaries needed in perf.data to binary_cache.
+ binaries: maps from binary path to its build_id in perf.data.
+ """
+ raise Exception('not implemented')
+
+ def read_build_id(self, path: Path):
+ return self.readelf.get_build_id(path)
+
+
+class BinarySourceFromDevice(BinarySource):
+ """ Pull binaries from device. """
+
+ def __init__(self, readelf: ReadElf, disable_adb_root: bool):
+ super().__init__(readelf)
+ self.adb = AdbHelper(enable_switch_to_root=not disable_adb_root)
+
+ def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache):
+ if not self.adb.is_device_available():
+ return
+ for path, build_id in binaries.items():
+ self.collect_binary(path, build_id, binary_cache)
+ self.pull_kernel_symbols(binary_cache.binary_dir / 'kallsyms')
+
+ def collect_binary(self, path: str, build_id: str, binary_cache: BinaryCache):
+ if not path.startswith('/') or path == "//anon" or path.startswith("/dev/"):
+ # [kernel.kallsyms] or unknown, or something we can't find binary.
+ return
+ binary_cache_file = binary_cache.get_path_in_cache(path, build_id)
+ self.check_and_pull_binary(path, build_id, binary_cache_file)
+
+ def check_and_pull_binary(self, path: str, expected_build_id: str, binary_cache_file: Path):
+ """If the binary_cache_file exists and has the expected_build_id, there
+ is no need to pull the binary from device. Otherwise, pull it.
+ """
+ if binary_cache_file.is_file() and (
+ not expected_build_id or expected_build_id == self.read_build_id(binary_cache_file)
+ ):
+ logging.info('use current file in binary_cache: %s', binary_cache_file)
+ else:
+ logging.info('pull file to binary_cache: %s to %s', path, binary_cache_file)
+ target_dir = binary_cache_file.parent
+ if not target_dir.is_dir():
+ os.makedirs(target_dir)
+ if binary_cache_file.is_file():
+ binary_cache_file.unlink()
+ self.pull_file_from_device(path, binary_cache_file)
+
+ def pull_file_from_device(self, device_path: str, host_path: Path):
+ if self.adb.run(['pull', device_path, str(host_path)]):
+ return True
+ # On non-root devices, we can't pull /data/app/XXX/base.odex directly.
+ # Instead, we can first copy the file to /data/local/tmp, then pull it.
+ filename = device_path[device_path.rfind('/')+1:]
+ if (self.adb.run(['shell', 'cp', device_path, '/data/local/tmp']) and
+ self.adb.run(['pull', '/data/local/tmp/' + filename, host_path])):
+ self.adb.run(['shell', 'rm', '/data/local/tmp/' + filename])
+ return True
+ logging.warning('failed to pull %s from device', device_path)
+ return False
+
+ def pull_kernel_symbols(self, file_path: Path):
+ if file_path.is_file():
+ file_path.unlink()
+ if self.adb.switch_to_root():
+ self.adb.run(['shell', 'echo', '0', '>/proc/sys/kernel/kptr_restrict'])
+ self.adb.run(['pull', '/proc/kallsyms', file_path])
+
+
+class BinarySourceFromLibDirs(BinarySource):
+ """ Collect binaries from lib dirs. """
+
+ def __init__(self, readelf: ReadElf, lib_dirs: List[Path]):
+ super().__init__(readelf)
+ self.lib_dirs = lib_dirs
+ self.filename_map = None
+ self.build_id_map = None
+ self.binary_cache = None
+
+ def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache):
+ self.create_filename_map(binaries)
+ self.create_build_id_map(binaries)
+ self.binary_cache = binary_cache
+
+ # Search all files in lib_dirs, and copy matching files to build_cache.
+ for lib_dir in self.lib_dirs:
+ if self.is_platform_symbols_dir(lib_dir):
+ self.search_platform_symbols_dir(lib_dir)
+ else:
+ self.search_dir(lib_dir)
+
+ def create_filename_map(self, binaries: Dict[str, str]):
+ """ Create a map mapping from filename to binaries having the name. """
+ self.filename_map = defaultdict(list)
+ for path, build_id in binaries.items():
+ index = path.rfind('/')
+ filename = path[index + 1:]
+ self.filename_map[filename].append((path, build_id))
+
+ def create_build_id_map(self, binaries: Dict[str, str]):
+ """ Create a map mapping from build id to binary path. """
+ self.build_id_map = {}
+ for path, build_id in binaries.items():
+ if build_id:
+ self.build_id_map[build_id] = path
+
+ def is_platform_symbols_dir(self, lib_dir: Path):
+ """ Check if lib_dir points to $ANDROID_PRODUCT_OUT/symbols. """
+ subdir_names = [p.name for p in lib_dir.iterdir()]
+ return lib_dir.name == 'symbols' and 'system' in subdir_names
+
+ def search_platform_symbols_dir(self, lib_dir: Path):
+ """ Platform symbols dir contains too many binaries. Reading build ids for
+ all of them takes a long time. So we only read build ids for binaries
+ having names exist in filename_map.
+ """
+ for root, _, files in os.walk(lib_dir):
+ for filename in files:
+ binaries = self.filename_map.get(filename)
+ if not binaries:
+ continue
+ file_path = Path(os.path.join(root, filename))
+ build_id = self.read_build_id(file_path)
+ for path, expected_build_id in binaries:
+ if expected_build_id == build_id:
+ self.copy_to_binary_cache(file_path, build_id, path)
+
+ def search_dir(self, lib_dir: Path):
+ """ For a normal lib dir, it's unlikely to contain many binaries. So we can read
+ build ids for all binaries in it. But users may give debug binaries with a name
+ different from the one recorded in perf.data. So we should only rely on build id
+ if it is available.
+ """
+ for root, _, files in os.walk(lib_dir):
+ for filename in files:
+ file_path = Path(os.path.join(root, filename))
+ build_id = self.read_build_id(file_path)
+ if build_id:
+ # For elf file with build id, use build id to match.
+ device_path = self.build_id_map.get(build_id)
+ if device_path:
+ self.copy_to_binary_cache(file_path, build_id, device_path)
+ elif self.readelf.is_elf_file(file_path):
+ # For elf file without build id, use filename to match.
+ for path, expected_build_id in self.filename_map.get(filename, []):
+ if not expected_build_id:
+ self.copy_to_binary_cache(file_path, '', path)
+ break
+
+ def copy_to_binary_cache(
+ self, from_path: Path, expected_build_id: str, device_path: str):
+ to_path = self.binary_cache.get_path_in_cache(device_path, expected_build_id)
+ if not self.need_to_copy(from_path, to_path, expected_build_id):
+ # The existing file in binary_cache can provide more information, so no need to copy.
+ return
+ to_dir = to_path.parent
+ if not to_dir.is_dir():
+ os.makedirs(to_dir)
+ logging.info('copy to binary_cache: %s to %s', from_path, to_path)
+ shutil.copy(from_path, to_path)
+
+ def need_to_copy(self, from_path: Path, to_path: Path, expected_build_id: str):
+ if not to_path.is_file() or self.read_build_id(to_path) != expected_build_id:
+ return True
+ return self.get_file_stripped_level(from_path) < self.get_file_stripped_level(to_path)
+
+ def get_file_stripped_level(self, path: Path) -> int:
+ """Return stripped level of an ELF file. Larger value means more stripped."""
+ sections = self.readelf.get_sections(path)
+ if '.debug_line' in sections:
+ return 0
+ if '.symtab' in sections:
+ return 1
+ return 2
+
+
+class BinaryCacheBuilder:
"""Collect all binaries needed by perf.data in binary_cache."""
def __init__(self, ndk_path: Optional[str], disable_adb_root: bool):
- self.adb = AdbHelper(enable_switch_to_root=not disable_adb_root)
self.readelf = ReadElf(ndk_path)
- self.binary_cache_dir = 'binary_cache'
- if not os.path.isdir(self.binary_cache_dir):
- os.makedirs(self.binary_cache_dir)
+ self.device_source = BinarySourceFromDevice(self.readelf, disable_adb_root)
+ self.binary_cache_dir = Path('binary_cache')
+ self.binary_cache = BinaryCache(self.binary_cache_dir)
self.binaries = {}
- def build_binary_cache(self, perf_data_path: str, symfs_dirs: List[Union[Path, str]]):
+ def build_binary_cache(self, perf_data_path: str, symfs_dirs: List[Union[Path, str]]) -> bool:
+ self.binary_cache_dir.mkdir(exist_ok=True)
self.collect_used_binaries(perf_data_path)
- self.copy_binaries_from_symfs_dirs(symfs_dirs)
- if self.adb.is_device_available():
- self.pull_binaries_from_device()
- self._pull_kernel_symbols()
+ if not self.copy_binaries_from_symfs_dirs(symfs_dirs):
+ return False
+ self.pull_binaries_from_device()
self.create_build_id_list()
+ return True
def collect_used_binaries(self, perf_data_path):
- """read perf.data, collect all used binaries and their build id (if available)."""
+ """read perf.data, collect all used binaries and their build id(if available)."""
# A dict mapping from binary name to build_id
binaries = {}
lib = ReportLib()
@@ -82,149 +289,45 @@ class BinaryCacheBuilder(object):
binaries[name] = lib.GetBuildIdForPath(dso_name)
self.binaries = binaries
- def copy_binaries_from_symfs_dirs(self, symfs_dirs: List[Union[Path, str]]):
- """collect all files in symfs_dirs."""
- if not symfs_dirs:
- return
-
- # It is possible that the path of the binary in symfs_dirs doesn't match
- # the one recorded in perf.data. For example, a file in symfs_dirs might
- # be "debug/arm/obj/armeabi-v7a/libsudo-game-jni.so", but the path in
- # perf.data is "/data/app/xxxx/lib/arm/libsudo-game-jni.so". So we match
- # binaries if they have the same filename (like libsudo-game-jni.so)
- # and same build_id.
-
- # Map from filename to binary paths.
- filename_dict = {}
- for binary in self.binaries:
- index = binary.rfind('/')
- filename = binary[index+1:]
- paths = filename_dict.get(filename)
- if paths is None:
- filename_dict[filename] = paths = []
- paths.append(binary)
-
- # Walk through all files in symfs_dirs, and copy matching files to build_cache.
- for symfs_dir in symfs_dirs:
- for root, _, files in os.walk(symfs_dir):
- for filename in files:
- paths = filename_dict.get(filename)
- if not paths:
- continue
- build_id = self._read_build_id(os.path.join(root, filename))
- for binary in paths:
- expected_build_id = self.binaries.get(binary)
- if expected_build_id == build_id:
- self._copy_to_binary_cache(os.path.join(root, filename),
- expected_build_id, binary)
- break
-
- def _copy_to_binary_cache(self, from_path, expected_build_id, target_file):
- if target_file[0] == '/':
- target_file = target_file[1:]
- target_file = target_file.replace('/', os.sep)
- target_file = os.path.join(self.binary_cache_dir, target_file)
- if not self._need_to_copy(from_path, target_file, expected_build_id):
- # The existing file in binary_cache can provide more information, so no need to copy.
- return
- target_dir = os.path.dirname(target_file)
- if not os.path.isdir(target_dir):
- os.makedirs(target_dir)
- logging.info('copy to binary_cache: %s to %s' % (from_path, target_file))
- shutil.copy(from_path, target_file)
-
- def _need_to_copy(self, source_file, target_file, expected_build_id):
- if not os.path.isfile(target_file):
- return True
- if self._read_build_id(target_file) != expected_build_id:
- return True
- return self._get_file_stripped_level(source_file) < self._get_file_stripped_level(
- target_file)
-
- def _get_file_stripped_level(self, file_path):
- """Return stripped level of an ELF file. Larger value means more stripped."""
- sections = self.readelf.get_sections(file_path)
- if '.debug_line' in sections:
- return 0
- if '.symtab' in sections:
- return 1
- return 2
+ def copy_binaries_from_symfs_dirs(self, symfs_dirs: List[Union[str, Path]]) -> bool:
+ if symfs_dirs:
+ lib_dirs: List[Path] = []
+ for symfs_dir in symfs_dirs:
+ if isinstance(symfs_dir, str):
+ symfs_dir = Path(symfs_dir)
+ if not symfs_dir.is_dir():
+ logging.error("can't find dir %s", symfs_dir)
+ return False
+ lib_dirs.append(symfs_dir)
+ lib_dir_source = BinarySourceFromLibDirs(self.readelf, lib_dirs)
+ lib_dir_source.collect_binaries(self.binaries, self.binary_cache)
+ return True
def pull_binaries_from_device(self):
- """pull binaries needed in perf.data to binary_cache."""
- for binary in self.binaries:
- build_id = self.binaries[binary]
- if not binary.startswith('/') or binary == "//anon" or binary.startswith("/dev/"):
- # [kernel.kallsyms] or unknown, or something we can't find binary.
- continue
- binary_cache_file = binary[1:].replace('/', os.sep)
- binary_cache_file = os.path.join(self.binary_cache_dir, binary_cache_file)
- self._check_and_pull_binary(binary, build_id, binary_cache_file)
-
- def _check_and_pull_binary(self, binary, expected_build_id, binary_cache_file):
- """If the binary_cache_file exists and has the expected_build_id, there
- is no need to pull the binary from device. Otherwise, pull it.
- """
- need_pull = True
- if os.path.isfile(binary_cache_file):
- need_pull = False
- if expected_build_id:
- build_id = self._read_build_id(binary_cache_file)
- if expected_build_id != build_id:
- need_pull = True
- if need_pull:
- target_dir = os.path.dirname(binary_cache_file)
- if not os.path.isdir(target_dir):
- os.makedirs(target_dir)
- if os.path.isfile(binary_cache_file):
- os.remove(binary_cache_file)
- logging.info('pull file to binary_cache: %s to %s' % (binary, binary_cache_file))
- self._pull_file_from_device(binary, binary_cache_file)
- else:
- logging.info('use current file in binary_cache: %s' % binary_cache_file)
-
- def _read_build_id(self, file_path):
- """read build id of a binary on host."""
- return self.readelf.get_build_id(file_path)
-
- def _pull_file_from_device(self, device_path, host_path):
- if self.adb.run(['pull', device_path, host_path]):
- return True
- # In non-root device, we can't pull /data/app/XXX/base.odex directly.
- # Instead, we can first copy the file to /data/local/tmp, then pull it.
- filename = device_path[device_path.rfind('/')+1:]
- if (self.adb.run(['shell', 'cp', device_path, '/data/local/tmp']) and
- self.adb.run(['pull', '/data/local/tmp/' + filename, host_path])):
- self.adb.run(['shell', 'rm', '/data/local/tmp/' + filename])
- return True
- logging.warning('failed to pull %s from device' % device_path)
- return False
-
- def _pull_kernel_symbols(self):
- file_path = os.path.join(self.binary_cache_dir, 'kallsyms')
- if os.path.isfile(file_path):
- os.remove(file_path)
- if self.adb.switch_to_root():
- self.adb.run(['shell', 'echo', '0', '>/proc/sys/kernel/kptr_restrict'])
- self.adb.run(['pull', '/proc/kallsyms', file_path])
+ self.device_source.collect_binaries(self.binaries, self.binary_cache)
def create_build_id_list(self):
""" Create build_id_list. So report scripts can find a binary by its build_id instead of
path.
"""
- build_id_list_path = os.path.join(self.binary_cache_dir, 'build_id_list')
+ build_id_list_path = self.binary_cache_dir / 'build_id_list'
+ # Write in binary mode to avoid "\r\n" problem on windows, which can confuse simpleperf.
with open(build_id_list_path, 'wb') as fh:
for root, _, files in os.walk(self.binary_cache_dir):
for filename in files:
- path = os.path.join(root, filename)
- relative_path = path[len(self.binary_cache_dir) + 1:]
- build_id = self._read_build_id(path)
+ path = Path(os.path.join(root, filename))
+ build_id = self.readelf.get_build_id(path)
if build_id:
+ relative_path = path.relative_to(self.binary_cache_dir)
line = f'{build_id}={relative_path}\n'
fh.write(str_to_bytes(line))
+ def find_path_in_cache(self, device_path: str) -> Optional[Path]:
+ build_id = self.binaries.get(device_path)
+ return self.binary_cache.get_path_in_cache(device_path, build_id)
+
-def main():
+def main() -> bool:
parser = BaseArgumentParser(description="""
Pull binaries needed by perf.data from device to binary_cache directory.""")
parser.add_argument('-i', '--perf_data_path', default='perf.data', type=extant_file, help="""
@@ -238,8 +341,8 @@ def main():
ndk_path = None if not args.ndk_path else args.ndk_path[0]
builder = BinaryCacheBuilder(ndk_path, args.disable_adb_root)
symfs_dirs = flatten_arg_list(args.native_lib_dir)
- builder.build_binary_cache(args.perf_data_path, symfs_dirs)
+ return builder.build_binary_cache(args.perf_data_path, symfs_dirs)
if __name__ == '__main__':
- main()
+ sys.exit(0 if main() else 1)