summaryrefslogtreecommitdiff
path: root/simpleperf
diff options
context:
space:
mode:
authorYabin Cui <yabinc@google.com>2024-02-02 17:18:57 -0800
committerYabin Cui <yabinc@google.com>2024-02-05 16:45:20 -0800
commit40a51f76d7d6e315f2c66668e9234fc2faee85c4 (patch)
treec7096441917169273dfa653c9623d464f383430a /simpleperf
parent55117049fff502fa3c668fcd82f75eb301046800 (diff)
downloadextras-40a51f76d7d6e315f2c66668e9234fc2faee85c4.tar.gz
simpleperf: report_html.py: split disassemble work evenly
Currently, report_html.py decides how to disassemble functions by checking how many functions need to be disassembled in a binary: 1) If not many, disassemble the functions one by one. 2) Otherwise, disassemble the whole binary. However, the samples may hit many functions in a large binary (like libmonochrome_64.so), while they only take a small percentage of the binary. And the time used to disassemble the whole binary can be longer than disassembling all other files. To speed it up, we can split the large binary into multiple parts, each taking 1M code size, and disassemble needed parts on different CPUs. In an experiment disassembling chrome profile, the disassembling time is reduced from 3 minutes to 2 minutes. The time is printed by adding `--log debug`. Also add `--disassemble-job-size` to adjust code split size. Bug: 323271419 Test: run report_html.py manually Test: run test.py --only-host-test Change-Id: I6af546616cbca7e294956b610182757b679fcb04
Diffstat (limited to 'simpleperf')
-rwxr-xr-xsimpleperf/scripts/report_html.py64
-rw-r--r--simpleperf/scripts/simpleperf_utils.py9
2 files changed, 48 insertions, 25 deletions
diff --git a/simpleperf/scripts/report_html.py b/simpleperf/scripts/report_html.py
index 368bd509..ba143fd0 100755
--- a/simpleperf/scripts/report_html.py
+++ b/simpleperf/scripts/report_html.py
@@ -801,7 +801,8 @@ class RecordData(object):
# Collect needed source code in SourceFileSet.
self.source_files.load_source_code(source_dirs)
- def add_disassembly(self, filter_lib: Callable[[str], bool], jobs: int):
+ def add_disassembly(self, filter_lib: Callable[[str], bool],
+ jobs: int, disassemble_job_size: int):
""" Collect disassembly information:
1. Use objdump to collect disassembly for each function in FunctionSet.
2. Set flag to dump addr_hit_map when generating record info.
@@ -816,6 +817,7 @@ class RecordData(object):
with ThreadPoolExecutor(jobs) as executor:
futures: List[Future] = []
+ all_tasks = []
for lib_id, functions in lib_functions.items():
lib = self.libs.get_lib(lib_id)
if not filter_lib(lib.name):
@@ -823,33 +825,45 @@ class RecordData(object):
dso_info = objdump.get_dso_info(lib.name, lib.build_id)
if not dso_info:
continue
- # If there are not many functions, it's faster to disassemble them one by one.
- # Otherwise it's faster to disassemble the whole binary.
- if len(functions) < jobs:
- for function in functions:
- futures.append(executor.submit(self._disassemble_function, objdump,
- dso_info, function))
- else:
- futures.append(executor.submit(self._disassemble_binary, objdump, dso_info,
- functions))
- for future in futures:
- future.result()
+ tasks = self.split_disassembly_jobs(functions, disassemble_job_size)
+ logging.debug('create %d jobs to disassemble %d functions in %s',
+ len(tasks), len(functions), lib.name)
+ for task in tasks:
+ futures.append(executor.submit(
+ self._disassemble_functions, objdump, dso_info, task))
+ all_tasks.append(task)
+
+ for task, future in zip(all_tasks, futures):
+ result = future.result()
+ if result and len(result) == len(task):
+ for function, disassembly in zip(task, result):
+ function.disassembly = disassembly.lines
+
+ logging.debug('finished all disassemble jobs')
self.gen_addr_hit_map_in_record_info = True
- def _disassemble_function(self, objdump: Objdump, dso_info, function: Function):
- result = objdump.disassemble_function(dso_info, AddrRange(function.start_addr,
- function.addr_len))
- if result:
- function.disassembly = result.lines
-
- def _disassemble_binary(self, objdump: Objdump, dso_info, functions: List[Function]):
+ def split_disassembly_jobs(self, functions: List[Function],
+ disassemble_job_size: int) -> List[List[Function]]:
+ """ Decide how to split the task of dissassembly functions in one library. """
+ if not functions:
+ return []
functions.sort(key=lambda f: f.start_addr)
+ result = []
+ job_start_addr = None
+ for function in functions:
+ if (job_start_addr is None or
+ function.start_addr - job_start_addr > disassemble_job_size):
+ job_start_addr = function.start_addr
+ result.append([function])
+ else:
+ result[-1].append(function)
+ return result
+
+ def _disassemble_functions(self, objdump: Objdump, dso_info,
+ functions: List[Function]) -> Optional[List[Disassembly]]:
addr_ranges = [AddrRange(f.start_addr, f.addr_len) for f in functions]
- result = objdump.disassemble_functions(dso_info, addr_ranges)
- if result:
- for i in range(len(functions)):
- functions[i].disassembly = result[i].lines
+ return objdump.disassemble_functions(dso_info, addr_ranges)
def gen_record_info(self) -> Dict[str, Any]:
""" Return json data which will be used by report_html.js. """
@@ -1010,6 +1024,8 @@ def get_args() -> argparse.Namespace:
parser.add_argument('--add_source_code', action='store_true', help='Add source code.')
parser.add_argument('--source_dirs', nargs='+', help='Source code directories.')
parser.add_argument('--add_disassembly', action='store_true', help='Add disassembled code.')
+ parser.add_argument('--disassemble-job-size', type=int, default=1024*1024,
+ help='address range for one disassemble job')
parser.add_argument('--binary_filter', nargs='+', help="""Annotate source code and disassembly
only for selected binaries.""")
parser.add_argument(
@@ -1064,7 +1080,7 @@ def main():
if args.add_source_code:
record_data.add_source_code(args.source_dirs, filter_lib, args.jobs)
if args.add_disassembly:
- record_data.add_disassembly(filter_lib, args.jobs)
+ record_data.add_disassembly(filter_lib, args.jobs, args.disassemble_job_size)
# 3. Generate report html.
report_generator = ReportGenerator(args.report_path)
diff --git a/simpleperf/scripts/simpleperf_utils.py b/simpleperf/scripts/simpleperf_utils.py
index 04939a8d..af4fca1b 100644
--- a/simpleperf/scripts/simpleperf_utils.py
+++ b/simpleperf/scripts/simpleperf_utils.py
@@ -869,6 +869,8 @@ class Objdump(object):
""" Disassemble code for multiple addr ranges in a binary. sorted_addr_ranges should be
sorted by addr_range.start.
"""
+ if not sorted_addr_ranges:
+ return []
real_path, arch = dso_info
objdump_path = self.objdump_paths.get(arch)
if not objdump_path:
@@ -878,7 +880,12 @@ class Objdump(object):
self.objdump_paths[arch] = objdump_path
# Run objdump.
- args = [objdump_path, '-dlC', '--no-show-raw-insn', real_path]
+ start_addr = sorted_addr_ranges[0].start
+ stop_addr = max(addr_range.end for addr_range in sorted_addr_ranges)
+ args = [objdump_path, '-dlC', '--no-show-raw-insn',
+ '--start-address=0x%x' % start_addr,
+ '--stop-address=0x%x' % stop_addr,
+ real_path]
if arch == 'arm' and 'llvm-objdump' in objdump_path:
args += ['--print-imm-hex']
try: