diff options
author | Yabin Cui <yabinc@google.com> | 2024-02-02 17:18:57 -0800 |
---|---|---|
committer | Yabin Cui <yabinc@google.com> | 2024-02-05 16:45:20 -0800 |
commit | 40a51f76d7d6e315f2c66668e9234fc2faee85c4 (patch) | |
tree | c7096441917169273dfa653c9623d464f383430a /simpleperf | |
parent | 55117049fff502fa3c668fcd82f75eb301046800 (diff) | |
download | extras-40a51f76d7d6e315f2c66668e9234fc2faee85c4.tar.gz |
simpleperf: report_html.py: split disassemble work evenly
Currently, report_html.py decides how to disassemble functions by
checking how many functions need to be disassembled in a binary:
1) If not many, disassemble the functions one by one.
2) Otherwise, disassemble the whole binary.
However, the samples may hit many functions in a large binary (like
libmonochrome_64.so), while they only take a small percentage of
the binary. And the time used to disassemble the whole binary can
be longer than disassembling all other files.
To speed it up, we can split the large binary into multiple parts,
each taking 1M code size, and disassemble needed parts on different CPUs.
In an experiment disassembling chrome profile, the disassembling
time is reduced from 3 minutes to 2 minutes. The time is printed
by adding `--log debug`.
Also add `--disassemble-job-size` to adjust code split size.
Bug: 323271419
Test: run report_html.py manually
Test: run test.py --only-host-test
Change-Id: I6af546616cbca7e294956b610182757b679fcb04
Diffstat (limited to 'simpleperf')
-rwxr-xr-x | simpleperf/scripts/report_html.py | 64 | ||||
-rw-r--r-- | simpleperf/scripts/simpleperf_utils.py | 9 |
2 files changed, 48 insertions, 25 deletions
diff --git a/simpleperf/scripts/report_html.py b/simpleperf/scripts/report_html.py index 368bd509..ba143fd0 100755 --- a/simpleperf/scripts/report_html.py +++ b/simpleperf/scripts/report_html.py @@ -801,7 +801,8 @@ class RecordData(object): # Collect needed source code in SourceFileSet. self.source_files.load_source_code(source_dirs) - def add_disassembly(self, filter_lib: Callable[[str], bool], jobs: int): + def add_disassembly(self, filter_lib: Callable[[str], bool], + jobs: int, disassemble_job_size: int): """ Collect disassembly information: 1. Use objdump to collect disassembly for each function in FunctionSet. 2. Set flag to dump addr_hit_map when generating record info. @@ -816,6 +817,7 @@ class RecordData(object): with ThreadPoolExecutor(jobs) as executor: futures: List[Future] = [] + all_tasks = [] for lib_id, functions in lib_functions.items(): lib = self.libs.get_lib(lib_id) if not filter_lib(lib.name): @@ -823,33 +825,45 @@ class RecordData(object): dso_info = objdump.get_dso_info(lib.name, lib.build_id) if not dso_info: continue - # If there are not many functions, it's faster to disassemble them one by one. - # Otherwise it's faster to disassemble the whole binary. - if len(functions) < jobs: - for function in functions: - futures.append(executor.submit(self._disassemble_function, objdump, - dso_info, function)) - else: - futures.append(executor.submit(self._disassemble_binary, objdump, dso_info, - functions)) - for future in futures: - future.result() + tasks = self.split_disassembly_jobs(functions, disassemble_job_size) + logging.debug('create %d jobs to disassemble %d functions in %s', + len(tasks), len(functions), lib.name) + for task in tasks: + futures.append(executor.submit( + self._disassemble_functions, objdump, dso_info, task)) + all_tasks.append(task) + + for task, future in zip(all_tasks, futures): + result = future.result() + if result and len(result) == len(task): + for function, disassembly in zip(task, result): + function.disassembly = disassembly.lines + + logging.debug('finished all disassemble jobs') self.gen_addr_hit_map_in_record_info = True - def _disassemble_function(self, objdump: Objdump, dso_info, function: Function): - result = objdump.disassemble_function(dso_info, AddrRange(function.start_addr, - function.addr_len)) - if result: - function.disassembly = result.lines - - def _disassemble_binary(self, objdump: Objdump, dso_info, functions: List[Function]): + def split_disassembly_jobs(self, functions: List[Function], + disassemble_job_size: int) -> List[List[Function]]: + """ Decide how to split the task of dissassembly functions in one library. """ + if not functions: + return [] functions.sort(key=lambda f: f.start_addr) + result = [] + job_start_addr = None + for function in functions: + if (job_start_addr is None or + function.start_addr - job_start_addr > disassemble_job_size): + job_start_addr = function.start_addr + result.append([function]) + else: + result[-1].append(function) + return result + + def _disassemble_functions(self, objdump: Objdump, dso_info, + functions: List[Function]) -> Optional[List[Disassembly]]: addr_ranges = [AddrRange(f.start_addr, f.addr_len) for f in functions] - result = objdump.disassemble_functions(dso_info, addr_ranges) - if result: - for i in range(len(functions)): - functions[i].disassembly = result[i].lines + return objdump.disassemble_functions(dso_info, addr_ranges) def gen_record_info(self) -> Dict[str, Any]: """ Return json data which will be used by report_html.js. """ @@ -1010,6 +1024,8 @@ def get_args() -> argparse.Namespace: parser.add_argument('--add_source_code', action='store_true', help='Add source code.') parser.add_argument('--source_dirs', nargs='+', help='Source code directories.') parser.add_argument('--add_disassembly', action='store_true', help='Add disassembled code.') + parser.add_argument('--disassemble-job-size', type=int, default=1024*1024, + help='address range for one disassemble job') parser.add_argument('--binary_filter', nargs='+', help="""Annotate source code and disassembly only for selected binaries.""") parser.add_argument( @@ -1064,7 +1080,7 @@ def main(): if args.add_source_code: record_data.add_source_code(args.source_dirs, filter_lib, args.jobs) if args.add_disassembly: - record_data.add_disassembly(filter_lib, args.jobs) + record_data.add_disassembly(filter_lib, args.jobs, args.disassemble_job_size) # 3. Generate report html. report_generator = ReportGenerator(args.report_path) diff --git a/simpleperf/scripts/simpleperf_utils.py b/simpleperf/scripts/simpleperf_utils.py index 04939a8d..af4fca1b 100644 --- a/simpleperf/scripts/simpleperf_utils.py +++ b/simpleperf/scripts/simpleperf_utils.py @@ -869,6 +869,8 @@ class Objdump(object): """ Disassemble code for multiple addr ranges in a binary. sorted_addr_ranges should be sorted by addr_range.start. """ + if not sorted_addr_ranges: + return [] real_path, arch = dso_info objdump_path = self.objdump_paths.get(arch) if not objdump_path: @@ -878,7 +880,12 @@ class Objdump(object): self.objdump_paths[arch] = objdump_path # Run objdump. - args = [objdump_path, '-dlC', '--no-show-raw-insn', real_path] + start_addr = sorted_addr_ranges[0].start + stop_addr = max(addr_range.end for addr_range in sorted_addr_ranges) + args = [objdump_path, '-dlC', '--no-show-raw-insn', + '--start-address=0x%x' % start_addr, + '--stop-address=0x%x' % stop_addr, + real_path] if arch == 'arm' and 'llvm-objdump' in objdump_path: args += ['--print-imm-hex'] try: |