1 files changed, 133 insertions, 289 deletions
diff --git a/simpleperf/scripts/simpleperf_utils.py b/simpleperf/scripts/simpleperf_utils.py
index 2a7dfd35..cf50fcb2 100644
--- a/simpleperf/scripts/simpleperf_utils.py
+++ b/simpleperf/scripts/simpleperf_utils.py
@@ -20,8 +20,6 @@
 
 from __future__ import annotations
 import argparse
-from concurrent.futures import Future, ThreadPoolExecutor
-from dataclasses import dataclass
 import logging
 import os
 import os.path
@@ -31,10 +29,7 @@ import shutil
 import subprocess
 import sys
 import time
-from typing import Any, Dict, Iterator, List, Optional, Set, Tuple, Union
-
-
-NDK_ERROR_MESSAGE = "Please install the Android NDK (https://developer.android.com/studio/projects/install-ndk), then set NDK path with --ndk_path option."
+from typing import Dict, Iterator, List, Optional, Set, Union
 
 
 def get_script_dir() -> str:
@@ -57,7 +52,49 @@ def get_platform() -> str:
     return 'linux'
 
 
+def is_python3() -> str:
+    return sys.version_info >= (3, 0)
+
+
+def log_debug(msg: str):
+    logging.debug(msg)
+
+
+def log_info(msg: str):
+    logging.info(msg)
+
+
+def log_warning(msg: str):
+    logging.warning(msg)
+
+
+def log_fatal(msg: str):
+    raise Exception(msg)
+
+
+def log_exit(msg: str):
+    sys.exit(msg)
+
+
+def disable_debug_log():
+    logging.getLogger().setLevel(logging.WARN)
+
+
+def set_log_level(level_name: str):
+    if level_name == 'debug':
+        level = logging.DEBUG
+    elif level_name == 'info':
+        level = logging.INFO
+    elif level_name == 'warning':
+        level = logging.WARNING
+    else:
+        log_fatal('unknown log level: %s' % level_name)
+    logging.getLogger().setLevel(level)
+
+
 def str_to_bytes(str_value: str) -> bytes:
+    if not is_python3():
+        return str_value
     # In python 3, str are wide strings whereas the C api expects 8 bit strings,
     # hence we have to convert. For now using utf-8 as the encoding.
     return str_value.encode('utf-8')
@@ -66,6 +103,8 @@ def str_to_bytes(str_value: str) -> bytes:
 def bytes_to_str(bytes_value: Optional[bytes]) -> str:
     if not bytes_value:
         return ''
+    if not is_python3():
+        return bytes_value
     return bytes_value.decode('utf-8')
 
 
@@ -142,10 +181,11 @@ class ToolFinder:
             'path_in_ndk':
                 lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-symbolizer' % platform,
         },
-        'llvm-strip': {
-            'is_binutils': False,
-            'path_in_ndk':
-                lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-strip' % platform,
+        'objdump': {
+            'is_binutils': True,
+        },
+        'strip': {
+            'is_binutils': True,
         },
     }
 
@@ -285,7 +325,7 @@ class AdbHelper(object):
     def run_and_return_output(self, adb_args: List[str], log_output: bool = False,
                               log_stderr: bool = False) -> Tuple[bool, str]:
         adb_args = [self.adb_path] + adb_args
-        logging.debug('run adb cmd: %s' % adb_args)
+        log_debug('run adb cmd: %s' % adb_args)
         env = None
         if self.serial_number:
             env = os.environ.copy()
@@ -298,10 +338,10 @@ class AdbHelper(object):
         returncode = subproc.returncode
         result = (returncode == 0)
         if log_output and stdout_data:
-            logging.debug(stdout_data)
+            log_debug(stdout_data)
         if log_stderr and stderr_data:
-            logging.warning(stderr_data)
-        logging.debug('run adb cmd: %s  [result %s]' % (adb_args, result))
+            log_warning(stderr_data)
+        log_debug('run adb cmd: %s  [result %s]' % (adb_args, result))
         return (result, stdout_data)
 
     def check_run(self, adb_args: List[str], log_output: bool = False):
@@ -320,10 +360,10 @@ class AdbHelper(object):
             return
         if 'root' not in stdoutdata:
             return
-        logging.info('unroot adb')
+        log_info('unroot adb')
         self.run(['unroot'])
-        time.sleep(1)
         self.run(['wait-for-device'])
+        time.sleep(1)
 
     def switch_to_root(self) -> bool:
         if not self.enable_switch_to_root:
@@ -345,7 +385,7 @@ class AdbHelper(object):
 
     def get_property(self, name: str) -> Optional[str]:
         result, stdoutdata = self.run_and_return_output(['shell', 'getprop', name])
-        return stdoutdata.strip() if result else None
+        return stdoutdata if result else None
 
     def set_property(self, name: str, value: str) -> bool:
         return self.run(['shell', 'setprop', name, value])
@@ -365,20 +405,17 @@ class AdbHelper(object):
 
     def get_android_version(self) -> int:
         """ Get Android version on device, like 7 is for Android N, 8 is for Android O."""
-        build_version = self.get_property('ro.build.version.codename')
-        if not build_version or build_version == 'REL':
-            build_version = self.get_property('ro.build.version.release')
+        build_version = self.get_property('ro.build.version.release')
         android_version = 0
         if build_version:
-            if build_version[0].isdigit():
-                i = 1
-                while i < len(build_version) and build_version[i].isdigit():
-                    i += 1
-                android_version = int(build_version[:i])
-            else:
+            if not build_version[0].isdigit():
                 c = build_version[0].upper()
                 if c.isupper() and c >= 'L':
                     android_version = ord(c) - ord('L') + 5
+            else:
+                strs = build_version.split('.')
+                if strs:
+                    android_version = int(strs[0])
         return android_version
 
 
@@ -499,26 +536,6 @@ class Addr2Nearestline(object):
         def __init__(self, build_id: Optional[str]):
             self.build_id = build_id
             self.addrs: Dict[int, Addr2Nearestline.Addr] = {}
-            # Saving file names for each addr takes a lot of memory. So we store file ids in Addr,
-            # and provide data structures connecting file id and file name here.
-            self.file_name_to_id: Dict[str, int] = {}
-            self.file_id_to_name: List[str] = []
-            self.func_name_to_id: Dict[str, int] = {}
-            self.func_id_to_name: List[str] = []
-
-        def get_file_id(self, file_path: str) -> int:
-            file_id = self.file_name_to_id.get(file_path)
-            if file_id is None:
-                file_id = self.file_name_to_id[file_path] = len(self.file_id_to_name)
-                self.file_id_to_name.append(file_path)
-            return file_id
-
-        def get_func_id(self, func_name: str) -> int:
-            func_id = self.func_name_to_id.get(func_name)
-            if func_id is None:
-                func_id = self.func_name_to_id[func_name] = len(self.func_id_to_name)
-                self.func_id_to_name.append(func_name)
-            return func_id
 
     class Addr(object):
         """ Info of an addr request.
@@ -536,11 +553,17 @@ class Addr2Nearestline(object):
             binary_finder: BinaryFinder, with_function_name: bool):
         self.symbolizer_path = ToolFinder.find_tool_path('llvm-symbolizer', ndk_path)
         if not self.symbolizer_path:
-            log_exit("Can't find llvm-symbolizer. " + NDK_ERROR_MESSAGE)
+            log_exit("Can't find llvm-symbolizer. Please set ndk path with --ndk_path option.")
         self.readelf = ReadElf(ndk_path)
         self.dso_map: Dict[str, Addr2Nearestline.Dso] = {}  # map from dso_path to Dso.
         self.binary_finder = binary_finder
         self.with_function_name = with_function_name
+        # Saving file names for each addr takes a lot of memory. So we store file ids in Addr,
+        # and provide data structures connecting file id and file name here.
+        self.file_name_to_id: Dict[str, int] = {}
+        self.file_id_to_name: List[str] = []
+        self.func_name_to_id: Dict[str, int] = {}
+        self.func_id_to_name: List[str] = []
 
     def add_addr(self, dso_path: str, build_id: Optional[str], func_addr: int, addr: int):
         dso = self.dso_map.get(dso_path)
@@ -549,24 +572,19 @@ class Addr2Nearestline(object):
         if addr not in dso.addrs:
             dso.addrs[addr] = self.Addr(func_addr)
 
-    def convert_addrs_to_lines(self, jobs: int):
-        with ThreadPoolExecutor(jobs) as executor:
-            futures: List[Future] = []
-            for dso_path, dso in self.dso_map.items():
-                futures.append(executor.submit(self._convert_addrs_in_one_dso, dso_path, dso))
-            for future in futures:
-                # Call future.result() to report exceptions raised in the executor.
-                future.result()
+    def convert_addrs_to_lines(self):
+        for dso_path, dso in self.dso_map.items():
+            self._convert_addrs_in_one_dso(dso_path, dso)
 
     def _convert_addrs_in_one_dso(self, dso_path: str, dso: Addr2Nearestline.Dso):
         real_path = self.binary_finder.find_binary(dso_path, dso.build_id)
         if not real_path:
             if dso_path not in ['//anon', 'unknown', '[kernel.kallsyms]']:
-                logging.debug("Can't find dso %s" % dso_path)
+                log_debug("Can't find dso %s" % dso_path)
             return
 
         if not self._check_debug_line_section(real_path):
-            logging.debug("file %s doesn't contain .debug_line section." % real_path)
+            log_debug("file %s doesn't contain .debug_line section." % real_path)
             return
 
         addr_step = self._get_addr_step(real_path)
@@ -613,7 +631,38 @@ class Addr2Nearestline(object):
             stdoutdata = bytes_to_str(stdoutdata)
         except OSError:
             return
-        addr_map = self.parse_line_output(stdoutdata, dso)
+        addr_map: Dict[int, List[Tuple[int]]] = {}
+        cur_line_list: Optional[List[Tuple[int]]] = None
+        need_function_name = self.with_function_name
+        cur_function_name: Optional[str] = None
+        for line in stdoutdata.strip().split('\n'):
+            line = line.strip()
+            if not line:
+                continue
+            if line[:2] == '0x':
+                # a new address
+                cur_line_list = addr_map[int(line, 16)] = []
+            elif need_function_name:
+                cur_function_name = line.strip()
+                need_function_name = False
+            else:
+                need_function_name = self.with_function_name
+                if cur_line_list is None:
+                    continue
+                file_path, line_number = self._parse_source_location(line)
+                if not file_path or not line_number:
+                    # An addr can have a list of (file, line), when the addr belongs to an inlined
+                    # function. Sometimes only part of the list has ? mark. In this case, we think
+                    # the line info is valid if the first line doesn't have ? mark.
+                    if not cur_line_list:
+                        cur_line_list = None
+                    continue
+                file_id = self._get_file_id(file_path)
+                if self.with_function_name:
+                    func_id = self._get_func_id(cur_function_name)
+                    cur_line_list.append((file_id, line_number, func_id))
+                else:
+                    cur_line_list.append((file_id, line_number))
 
         # 3. Fill line info in dso.addrs.
         for addr in dso.addrs:
@@ -637,66 +686,7 @@ class Addr2Nearestline(object):
             args.append('--functions=none')
         return args
 
-    def parse_line_output(self, output: str, dso: Addr2Nearestline.Dso) -> Dict[int,
-                                                                                List[Tuple[int]]]:
-        """
-        The output is a list of lines.
-            address1
-            function_name1 (the function name can be empty)
-            source_location1
-            function_name2
-            source_location2
-            ...
-            (end with empty line)
-        """
-
-        addr_map: Dict[int, List[Tuple[int]]] = {}
-        lines = output.strip().splitlines()
-        i = 0
-        while i < len(lines):
-            address = self._parse_line_output_address(lines[i])
-            i += 1
-            if address is None:
-                continue
-            info = []
-            while i < len(lines):
-                if self.with_function_name:
-                    if i + 1 == len(lines):
-                        break
-                    function_name = lines[i].strip()
-                    if not function_name and (':' not in lines[i+1]):
-                        # no more frames
-                        break
-                    i += 1
-                elif not lines[i]:
-                    i += 1
-                    break
-
-                file_path, line_number = self._parse_line_output_source_location(lines[i])
-                i += 1
-                if not file_path or not line_number:
-                    # An addr can have a list of (file, line), when the addr belongs to an inlined
-                    # function. Sometimes only part of the list has ? mark. In this case, we think
-                    # the line info is valid if the first line doesn't have ? mark.
-                    if not info:
-                        break
-                    continue
-                file_id = dso.get_file_id(file_path)
-                if self.with_function_name:
-                    func_id = dso.get_func_id(function_name)
-                    info.append((file_id, line_number, func_id))
-                else:
-                    info.append((file_id, line_number))
-            if info:
-                addr_map[address] = info
-        return addr_map
-
-    def _parse_line_output_address(self, output: str) -> Optional[int]:
-        if output.startswith('0x'):
-            return int(output, 16)
-        return None
-
-    def _parse_line_output_source_location(self, line: str) -> Tuple[Optional[str], Optional[int]]:
+    def _parse_source_location(self, line: str) -> Tuple[Optional[str], Optional[int]]:
         file_path, line_number = None, None
         # Handle lines in format filename:line:column, like "runtest/two_functions.cpp:14:25".
         # Filename may contain ':' like "C:\Users\...\file".
@@ -711,6 +701,20 @@ class Addr2Nearestline(object):
             return None, None
         return file_path, line_number
 
+    def _get_file_id(self, file_path: str) -> int:
+        file_id = self.file_name_to_id.get(file_path)
+        if file_id is None:
+            file_id = self.file_name_to_id[file_path] = len(self.file_id_to_name)
+            self.file_id_to_name.append(file_path)
+        return file_id
+
+    def _get_func_id(self, func_name: str) -> int:
+        func_id = self.func_name_to_id.get(func_name)
+        if func_id is None:
+            func_id = self.func_name_to_id[func_name] = len(self.func_id_to_name)
+            self.func_id_to_name.append(func_name)
+        return func_id
+
     def get_dso(self, dso_path: str) -> Addr2Nearestline.Dso:
         return self.dso_map.get(dso_path)
 
@@ -719,9 +723,9 @@ class Addr2Nearestline(object):
         if source is None:
             return None
         if self.with_function_name:
-            return [(dso.file_id_to_name[file_id], line, dso.func_id_to_name[func_id])
+            return [(self.file_id_to_name[file_id], line, self.func_id_to_name[func_id])
                     for (file_id, line, func_id) in source]
-        return [(dso.file_id_to_name[file_id], line) for (file_id, line) in source]
+        return [(self.file_id_to_name[file_id], line) for (file_id, line) in source]
 
 
 class SourceFileSearcher(object):
@@ -813,9 +817,14 @@ class Objdump(object):
         real_path, arch = dso_info
         objdump_path = self.objdump_paths.get(arch)
         if not objdump_path:
-            objdump_path = ToolFinder.find_tool_path('llvm-objdump', self.ndk_path, arch)
+            if arch == 'arm':
+                # llvm-objdump for arm is not good at showing branch targets.
+                # So still prefer objdump.
+                objdump_path = ToolFinder.find_tool_path('objdump', self.ndk_path, arch)
             if not objdump_path:
-                log_exit("Can't find llvm-objdump." + NDK_ERROR_MESSAGE)
+                objdump_path = ToolFinder.find_tool_path('llvm-objdump', self.ndk_path, arch)
+            if not objdump_path:
+                log_exit("Can't find llvm-objdump. Please set ndk path with --ndk_path option.")
             self.objdump_paths[arch] = objdump_path
 
         # 3. Run objdump.
@@ -852,7 +861,7 @@ class ReadElf(object):
     def __init__(self, ndk_path: Optional[str]):
         self.readelf_path = ToolFinder.find_tool_path('llvm-readelf', ndk_path)
         if not self.readelf_path:
-            log_exit("Can't find llvm-readelf. " + NDK_ERROR_MESSAGE)
+            log_exit("Can't find llvm-readelf. Please set ndk path with --ndk_path option.")
 
     @staticmethod
     def is_elf_file(path: Union[Path, str]) -> bool:
@@ -904,16 +913,6 @@ class ReadElf(object):
             build_id = build_id[:40]
         return '0x' + build_id
 
-    @staticmethod
-    def unpad_build_id(build_id: str) -> str:
-        if build_id.startswith('0x'):
-            build_id = build_id[2:]
-            # Unpad build id as TrimZeroesFromBuildIDString() in quipper.
-            padding = '0' * 8
-            while build_id.endswith(padding):
-                build_id = build_id[:-len(padding)]
-        return build_id
-
     def get_sections(self, elf_file_path: Union[Path, str]) -> List[str]:
         """ Get sections of an elf file. """
         section_names: List[str] = []
@@ -963,164 +962,9 @@ def extant_file(arg: str) -> str:
     return path
 
 
-def log_fatal(msg: str):
-    raise Exception(msg)
-
-
-def log_exit(msg: str):
-    sys.exit(msg)
-
-
-class LogFormatter(logging.Formatter):
-    """ Use custom logging format. """
-
-    def __init__(self):
-        super().__init__('%(asctime)s [%(levelname)s] (%(filename)s:%(lineno)d) %(message)s')
-
-    def formatTime(self, record, datefmt):
-        return super().formatTime(record, '%H:%M:%S') + ',%03d' % record.msecs
-
-
-class Log:
-    initialized = False
-
-    @classmethod
-    def init(cls, log_level: str = 'info'):
-        assert not cls.initialized
-        cls.initialized = True
-        cls.logger = logging.root
-        cls.logger.setLevel(log_level.upper())
-        handler = logging.StreamHandler()
-        handler.setFormatter(LogFormatter())
-        cls.logger.addHandler(handler)
-
-
 class ArgParseFormatter(
         argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter):
     pass
 
 
-@dataclass
-class ReportLibOptions:
-    show_art_frames: bool
-    trace_offcpu: str
-    proguard_mapping_files: List[str]
-    sample_filters: List[str]
-
-
-class BaseArgumentParser(argparse.ArgumentParser):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs, formatter_class=ArgParseFormatter)
-        self.has_sample_filter_options = False
-        self.sample_filter_with_pid_shortcut = False
-        self.has_report_lib_options = False
-
-    def add_report_lib_options(self, group: Optional[Any] = None,
-                               default_show_art_frames: bool = False,
-                               sample_filter_group: Optional[Any] = None,
-                               sample_filter_with_pid_shortcut: bool = True):
-        self.has_report_lib_options = True
-        parser = group if group else self
-        parser.add_argument(
-            '--proguard-mapping-file', nargs='+',
-            help='Add proguard mapping file to de-obfuscate symbols')
-        parser.add_argument('--show-art-frames', '--show_art_frames',
-                            action=argparse.BooleanOptionalAction, default=default_show_art_frames,
-                            help='Show frames of internal methods in the ART Java interpreter.')
-        parser.add_argument(
-            '--trace-offcpu', choices=['on-cpu', 'off-cpu', 'on-off-cpu', 'mixed-on-off-cpu'],
-            help="""Set report mode for profiles recorded with --trace-offcpu option. All possible
-                    modes are: on-cpu (only on-cpu samples), off-cpu (only off-cpu samples),
-                    on-off-cpu (both on-cpu and off-cpu samples, can be split by event name),
-                    mixed-on-off-cpu (on-cpu and off-cpu samples using the same event name).
-                    If not set, mixed-on-off-cpu mode is used.
-                """)
-        self._add_sample_filter_options(sample_filter_group, sample_filter_with_pid_shortcut)
-
-    def _add_sample_filter_options(
-            self, group: Optional[Any] = None, with_pid_shortcut: bool = True):
-        if not group:
-            group = self.add_argument_group('Sample filter options')
-        group.add_argument('--exclude-pid', metavar='pid', nargs='+', type=int,
-                           help='exclude samples for selected processes')
-        group.add_argument('--exclude-tid', metavar='tid', nargs='+', type=int,
-                           help='exclude samples for selected threads')
-        group.add_argument(
-            '--exclude-process-name', metavar='process_name_regex', nargs='+',
-            help='exclude samples for processes with name containing the regular expression')
-        group.add_argument(
-            '--exclude-thread-name', metavar='thread_name_regex', nargs='+',
-            help='exclude samples for threads with name containing the regular expression')
-
-        if with_pid_shortcut:
-            group.add_argument('--pid', metavar='pid', nargs='+', type=int,
-                               help='only include samples for selected processes')
-            group.add_argument('--tid', metavar='tid', nargs='+', type=int,
-                               help='only include samples for selected threads')
-        group.add_argument('--include-pid', metavar='pid', nargs='+', type=int,
-                           help='only include samples for selected processes')
-        group.add_argument('--include-tid', metavar='tid', nargs='+', type=int,
-                           help='only include samples for selected threads')
-        group.add_argument(
-            '--include-process-name', metavar='process_name_regex', nargs='+',
-            help='only include samples for processes with name containing the regular expression')
-        group.add_argument(
-            '--comm', '--include-thread-name', metavar='thread_name_regex',
-            dest='include_thread_name', nargs='+',
-            help='only include samples for threads with name containing the regular expression')
-        group.add_argument(
-            '--filter-file', metavar='file',
-            help='use filter file to filter samples based on timestamps. ' +
-            'The file format is in doc/sampler_filter.md.')
-        self.has_sample_filter_options = True
-        self.sample_filter_with_pid_shortcut = with_pid_shortcut
-
-    def _build_sample_filter(self, args: argparse.Namespace) -> List[str]:
-        """ Build sample filters, which can be passed to ReportLib.SetSampleFilter(). """
-        filters = []
-        if args.exclude_pid:
-            filters.extend(['--exclude-pid', ','.join(str(pid) for pid in args.exclude_pid)])
-        if args.exclude_tid:
-            filters.extend(['--exclude-tid', ','.join(str(tid) for tid in args.exclude_tid)])
-        if args.exclude_process_name:
-            for name in args.exclude_process_name:
-                filters.extend(['--exclude-process-name', name])
-        if args.exclude_thread_name:
-            for name in args.exclude_thread_name:
-                filters.extend(['--exclude-thread-name', name])
-
-        if args.include_pid:
-            filters.extend(['--include-pid', ','.join(str(pid) for pid in args.include_pid)])
-        if args.include_tid:
-            filters.extend(['--include-tid', ','.join(str(tid) for tid in args.include_tid)])
-        if self.sample_filter_with_pid_shortcut:
-            if args.pid:
-                filters.extend(['--include-pid', ','.join(str(pid) for pid in args.pid)])
-            if args.tid:
-                filters.extend(['--include-tid', ','.join(str(pid) for pid in args.tid)])
-        if args.include_process_name:
-            for name in args.include_process_name:
-                filters.extend(['--include-process-name', name])
-        if args.include_thread_name:
-            for name in args.include_thread_name:
-                filters.extend(['--include-thread-name', name])
-        if args.filter_file:
-            filters.extend(['--filter-file', args.filter_file])
-        return filters
-
-    def parse_known_args(self, *args, **kwargs):
-        self.add_argument(
-            '--log', choices=['debug', 'info', 'warning'],
-            default='info', help='set log level')
-        namespace, left_args = super().parse_known_args(*args, **kwargs)
-
-        if self.has_report_lib_options:
-            sample_filters = self._build_sample_filter(namespace)
-            report_lib_options = ReportLibOptions(
-                namespace.show_art_frames, namespace.trace_offcpu, namespace.proguard_mapping_file,
-                sample_filters)
-            setattr(namespace, 'report_lib_options', report_lib_options)
-
-        if not Log.initialized:
-            Log.init(namespace.log)
-        return namespace, left_args
+logging.getLogger().setLevel(logging.DEBUG)