summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYabin Cui <yabinc@google.com>2021-06-03 17:17:35 +0000
committerAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2021-06-03 17:17:35 +0000
commit16910b75cb6ff7b81b526f99e8ccaf83c3d68204 (patch)
tree9d56d6d6950aaa18d6e27e7ee13c9060eac80be4
parent1e0933d3674cb4bdddc90ee912071e1852dab50f (diff)
parent3051e4d830eb8b53ceb710ce70fa34d443e558a9 (diff)
downloadextras-16910b75cb6ff7b81b526f99e8ccaf83c3d68204.tar.gz
Merge "simpleperf: use multithreading to speed up line annotation." am: 3051e4d830
Original change: https://android-review.googlesource.com/c/platform/system/extras/+/1725854 Change-Id: I79cd73b91ad7eebf2768efd00fff272dd6a4bdf2
-rwxr-xr-xsimpleperf/scripts/annotate.py2
-rwxr-xr-xsimpleperf/scripts/pprof_proto_generator.py13
-rwxr-xr-xsimpleperf/scripts/report_html.py40
-rw-r--r--simpleperf/scripts/simpleperf_utils.py60
-rwxr-xr-xsimpleperf/scripts/test/do_test.py5
-rw-r--r--simpleperf/scripts/test/pprof_proto_generator_test.py48
-rw-r--r--simpleperf/scripts/test/tools_test.py2
7 files changed, 115 insertions, 55 deletions
diff --git a/simpleperf/scripts/annotate.py b/simpleperf/scripts/annotate.py
index c517f181..7f2392ea 100755
--- a/simpleperf/scripts/annotate.py
+++ b/simpleperf/scripts/annotate.py
@@ -62,7 +62,7 @@ class Addr2Line(object):
self.addr2line.add_addr(dso_path, build_id, func_addr, addr)
def convert_addrs_to_lines(self):
- self.addr2line.convert_addrs_to_lines()
+ self.addr2line.convert_addrs_to_lines(jobs=os.cpu_count())
def get_sources(self, dso_path, addr):
dso = self.addr2line.get_dso(dso_path)
diff --git a/simpleperf/scripts/pprof_proto_generator.py b/simpleperf/scripts/pprof_proto_generator.py
index 11806852..103e38c0 100755
--- a/simpleperf/scripts/pprof_proto_generator.py
+++ b/simpleperf/scripts/pprof_proto_generator.py
@@ -322,9 +322,9 @@ class PprofProfileGenerator(object):
if sample.location_ids:
self.add_sample(sample)
- def gen(self):
+ def gen(self, jobs: int):
# 1. Generate line info for locations and functions.
- self.gen_source_lines()
+ self.gen_source_lines(jobs)
# 2. Produce samples/locations/functions in profile.
for sample in self.sample_list:
@@ -476,7 +476,7 @@ class PprofProfileGenerator(object):
self.sample_list.append(sample)
self.sample_map[sample.key] = sample
- def gen_source_lines(self):
+ def gen_source_lines(self, jobs: int):
# 1. Create Addr2line instance
if not self.config.get('binary_cache_dir'):
log_info("Can't generate line information because binary_cache is missing.")
@@ -501,7 +501,7 @@ class PprofProfileGenerator(object):
addr2line.add_addr(dso_name, None, function.vaddr_in_dso, function.vaddr_in_dso)
# 3. Generate source lines.
- addr2line.convert_addrs_to_lines()
+ addr2line.convert_addrs_to_lines(jobs)
# 4. Annotate locations and functions.
for location in self.location_list:
@@ -613,6 +613,9 @@ def main():
parser.add_argument(
'--proguard-mapping-file', nargs='+',
help='Add proguard mapping file to de-obfuscate symbols')
+ parser.add_argument(
+ '-j', '--jobs', type=int, default=os.cpu_count(),
+ help='Use multithreading to speed up source code annotation.')
args = parser.parse_args()
if args.show:
@@ -635,7 +638,7 @@ def main():
generator = PprofProfileGenerator(config)
for record_file in args.record_file:
generator.load_record_file(record_file)
- profile = generator.gen()
+ profile = generator.gen(args.jobs)
store_pprof_profile(config['output_file'], profile)
diff --git a/simpleperf/scripts/report_html.py b/simpleperf/scripts/report_html.py
index 6e6a90e1..65a25cba 100755
--- a/simpleperf/scripts/report_html.py
+++ b/simpleperf/scripts/report_html.py
@@ -18,7 +18,7 @@
from __future__ import annotations
import argparse
import collections
-from concurrent.futures import ThreadPoolExecutor
+from concurrent.futures import Future, ThreadPoolExecutor
from dataclasses import dataclass
import datetime
import json
@@ -711,7 +711,7 @@ class RecordData(object):
self.events[event_name] = EventScope(event_name)
return self.events[event_name]
- def add_source_code(self, source_dirs: List[str], filter_lib: Callable[[str], bool]):
+ def add_source_code(self, source_dirs: List[str], filter_lib: Callable[[str], bool], jobs: int):
""" Collect source code information:
1. Find line ranges for each function in FunctionSet.
2. Find line for each addr in FunctionScope.addr_hit_map.
@@ -737,7 +737,7 @@ class RecordData(object):
func_addr = self.functions.id_to_func[function.func_id].start_addr
for addr in function.addr_hit_map:
addr2line.add_addr(lib_info.name, lib_info.build_id, func_addr, addr)
- addr2line.convert_addrs_to_lines()
+ addr2line.convert_addrs_to_lines(jobs)
# Set line range for each function.
for function in self.functions.id_to_func.values():
@@ -786,7 +786,6 @@ class RecordData(object):
2. Set flag to dump addr_hit_map when generating record info.
"""
objdump = Objdump(self.ndk_path, self.binary_finder)
- executor = ThreadPoolExecutor(jobs)
lib_functions: Dict[int, List[Function]] = collections.defaultdict(list)
for function in self.functions.id_to_func.values():
@@ -794,20 +793,23 @@ class RecordData(object):
continue
lib_functions[function.lib_id].append(function)
- for lib_id, functions in lib_functions.items():
- lib = self.libs.get_lib(lib_id)
- if not filter_lib(lib.name):
- continue
- dso_info = objdump.get_dso_info(lib.name, lib.build_id)
- if not dso_info:
- continue
- log_info('Disassemble %s' % dso_info[0])
- for function in functions:
- def task(function, dso_info):
- function.disassembly = objdump.disassemble_code(
- dso_info, function.start_addr, function.addr_len)
- executor.submit(task, function, dso_info)
- executor.shutdown(wait=True)
+ with ThreadPoolExecutor(jobs) as executor:
+ for lib_id, functions in lib_functions.items():
+ lib = self.libs.get_lib(lib_id)
+ if not filter_lib(lib.name):
+ continue
+ dso_info = objdump.get_dso_info(lib.name, lib.build_id)
+ if not dso_info:
+ continue
+ log_info('Disassemble %s' % dso_info[0])
+ futures: List[Future] = []
+ for function in functions:
+ futures.append(
+ executor.submit(objdump.disassemble_code, dso_info,
+ function.start_addr, function.addr_len))
+ for i in range(len(functions)):
+ # Call future.result() to report exceptions raised in the executor.
+ functions[i].disassembly = futures[i].result()
self.gen_addr_hit_map_in_record_info = True
def gen_record_info(self) -> Dict[str, Any]:
@@ -1024,7 +1026,7 @@ def main():
return True
return False
if args.add_source_code:
- record_data.add_source_code(args.source_dirs, filter_lib)
+ record_data.add_source_code(args.source_dirs, filter_lib, args.jobs)
if args.add_disassembly:
record_data.add_disassembly(filter_lib, args.jobs)
diff --git a/simpleperf/scripts/simpleperf_utils.py b/simpleperf/scripts/simpleperf_utils.py
index cf50fcb2..466c4486 100644
--- a/simpleperf/scripts/simpleperf_utils.py
+++ b/simpleperf/scripts/simpleperf_utils.py
@@ -20,6 +20,7 @@
from __future__ import annotations
import argparse
+from concurrent.futures import Future, ThreadPoolExecutor
import logging
import os
import os.path
@@ -536,6 +537,26 @@ class Addr2Nearestline(object):
def __init__(self, build_id: Optional[str]):
self.build_id = build_id
self.addrs: Dict[int, Addr2Nearestline.Addr] = {}
+ # Saving file names for each addr takes a lot of memory. So we store file ids in Addr,
+ # and provide data structures connecting file id and file name here.
+ self.file_name_to_id: Dict[str, int] = {}
+ self.file_id_to_name: List[str] = []
+ self.func_name_to_id: Dict[str, int] = {}
+ self.func_id_to_name: List[str] = []
+
+ def get_file_id(self, file_path: str) -> int:
+ file_id = self.file_name_to_id.get(file_path)
+ if file_id is None:
+ file_id = self.file_name_to_id[file_path] = len(self.file_id_to_name)
+ self.file_id_to_name.append(file_path)
+ return file_id
+
+ def get_func_id(self, func_name: str) -> int:
+ func_id = self.func_name_to_id.get(func_name)
+ if func_id is None:
+ func_id = self.func_name_to_id[func_name] = len(self.func_id_to_name)
+ self.func_id_to_name.append(func_name)
+ return func_id
class Addr(object):
""" Info of an addr request.
@@ -558,12 +579,6 @@ class Addr2Nearestline(object):
self.dso_map: Dict[str, Addr2Nearestline.Dso] = {} # map from dso_path to Dso.
self.binary_finder = binary_finder
self.with_function_name = with_function_name
- # Saving file names for each addr takes a lot of memory. So we store file ids in Addr,
- # and provide data structures connecting file id and file name here.
- self.file_name_to_id: Dict[str, int] = {}
- self.file_id_to_name: List[str] = []
- self.func_name_to_id: Dict[str, int] = {}
- self.func_id_to_name: List[str] = []
def add_addr(self, dso_path: str, build_id: Optional[str], func_addr: int, addr: int):
dso = self.dso_map.get(dso_path)
@@ -572,9 +587,14 @@ class Addr2Nearestline(object):
if addr not in dso.addrs:
dso.addrs[addr] = self.Addr(func_addr)
- def convert_addrs_to_lines(self):
- for dso_path, dso in self.dso_map.items():
- self._convert_addrs_in_one_dso(dso_path, dso)
+ def convert_addrs_to_lines(self, jobs: int):
+ with ThreadPoolExecutor(jobs) as executor:
+ futures: List[Future] = []
+ for dso_path, dso in self.dso_map.items():
+ futures.append(executor.submit(self._convert_addrs_in_one_dso, dso_path, dso))
+ for future in futures:
+ # Call future.result() to report exceptions raised in the executor.
+ future.result()
def _convert_addrs_in_one_dso(self, dso_path: str, dso: Addr2Nearestline.Dso):
real_path = self.binary_finder.find_binary(dso_path, dso.build_id)
@@ -657,9 +677,9 @@ class Addr2Nearestline(object):
if not cur_line_list:
cur_line_list = None
continue
- file_id = self._get_file_id(file_path)
+ file_id = dso.get_file_id(file_path)
if self.with_function_name:
- func_id = self._get_func_id(cur_function_name)
+ func_id = dso.get_func_id(cur_function_name)
cur_line_list.append((file_id, line_number, func_id))
else:
cur_line_list.append((file_id, line_number))
@@ -701,20 +721,6 @@ class Addr2Nearestline(object):
return None, None
return file_path, line_number
- def _get_file_id(self, file_path: str) -> int:
- file_id = self.file_name_to_id.get(file_path)
- if file_id is None:
- file_id = self.file_name_to_id[file_path] = len(self.file_id_to_name)
- self.file_id_to_name.append(file_path)
- return file_id
-
- def _get_func_id(self, func_name: str) -> int:
- func_id = self.func_name_to_id.get(func_name)
- if func_id is None:
- func_id = self.func_name_to_id[func_name] = len(self.func_id_to_name)
- self.func_id_to_name.append(func_name)
- return func_id
-
def get_dso(self, dso_path: str) -> Addr2Nearestline.Dso:
return self.dso_map.get(dso_path)
@@ -723,9 +729,9 @@ class Addr2Nearestline(object):
if source is None:
return None
if self.with_function_name:
- return [(self.file_id_to_name[file_id], line, self.func_id_to_name[func_id])
+ return [(dso.file_id_to_name[file_id], line, dso.func_id_to_name[func_id])
for (file_id, line, func_id) in source]
- return [(self.file_id_to_name[file_id], line) for (file_id, line) in source]
+ return [(dso.file_id_to_name[file_id], line) for (file_id, line) in source]
class SourceFileSearcher(object):
diff --git a/simpleperf/scripts/test/do_test.py b/simpleperf/scripts/test/do_test.py
index 55ee6b22..f27c0cbc 100755
--- a/simpleperf/scripts/test/do_test.py
+++ b/simpleperf/scripts/test/do_test.py
@@ -282,7 +282,8 @@ class TestProcess:
""" Exceed max try time. So mark left tests as failed. """
for test in self.tests:
if test not in self.test_results:
- self.test_results[test] = TestResult(self.try_time, False)
+ test_duration = '%.3fs' % (time.time() - self.last_update_time)
+ self.test_results[test] = TestResult(self.try_time, False, test_duration)
return False
self.try_time += 1
@@ -331,7 +332,7 @@ class TestSummary:
@property
def failed_test_count(self) -> int:
- return self.test_count - sum(1 for result in self.results.values() if result)
+ return self.test_count - sum(1 for result in self.results.values() if result.ok)
def update(self, test_proc: TestProcess):
for test, result in test_proc.test_results.items():
diff --git a/simpleperf/scripts/test/pprof_proto_generator_test.py b/simpleperf/scripts/test/pprof_proto_generator_test.py
index dcdf2ca9..51d8d8b6 100644
--- a/simpleperf/scripts/test/pprof_proto_generator_test.py
+++ b/simpleperf/scripts/test/pprof_proto_generator_test.py
@@ -14,6 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+from collections import namedtuple
import google.protobuf
from typing import List, Optional
@@ -124,3 +125,50 @@ class TestPprofProtoGenerator(TestBase):
output = self.run_generator(testdata_file=testdata_file)
self.assertIn('simpleperf_runtest_two_functions_arm64', output)
self.assertIn('two_functions.cpp', output)
+
+ def test_line_info(self):
+ """ Check line numbers generated in profile. """
+ testdata_file = TestHelper.testdata_path('runtest_two_functions_arm64_perf.data')
+
+ # Build binary_cache.
+ binary_cache_builder = BinaryCacheBuilder(TestHelper.ndk_path, False)
+ binary_cache_builder.build_binary_cache(testdata_file, [TestHelper.testdata_dir])
+
+ # Generate profile.
+ profile = self.generate_profile(None, [testdata_file])
+
+ CheckItem = namedtuple(
+ 'CheckItem', ['addr', 'source_file', 'source_line', 'func_name', 'func_start_line'])
+
+ check_items = [
+ CheckItem(0x113c, 'two_functions.cpp', 22, 'main', 20),
+ CheckItem(0x1140, 'two_functions.cpp', 23, 'main', 20),
+ CheckItem(0x1094, 'two_functions.cpp', 9, 'Function1', 6),
+ CheckItem(0x1104, 'two_functions.cpp', 16, 'Function2', 13),
+ ]
+ mapping = None
+ for mapping in profile.mapping:
+ binary_path = profile.string_table[mapping.filename]
+ if 'runtest_two_functions_arm64' in binary_path:
+ self.assertTrue(mapping.has_line_numbers)
+ mapping = mapping
+ break
+ self.assertIsNotNone(mapping)
+
+ for check_item in check_items:
+ found = False
+ for location in profile.location:
+ if location.mapping_id != mapping.id:
+ continue
+ addr = location.address - mapping.memory_start + mapping.file_offset
+ if addr == check_item.addr:
+ found = True
+ self.assertEqual(len(location.line), 1)
+ line = location.line[0]
+ function = profile.function[line.function_id - 1]
+ self.assertIn(check_item.source_file, profile.string_table[function.filename])
+ self.assertEqual(line.line, check_item.source_line)
+ self.assertIn(check_item.func_name, profile.string_table[function.name])
+ self.assertEqual(function.start_line, check_item.func_start_line)
+ break
+ self.assertTrue(found, check_item)
diff --git a/simpleperf/scripts/test/tools_test.py b/simpleperf/scripts/test/tools_test.py
index e8878d41..4ea12478 100644
--- a/simpleperf/scripts/test/tools_test.py
+++ b/simpleperf/scripts/test/tools_test.py
@@ -101,7 +101,7 @@ class TestTools(TestBase):
test_addrs = test_map[dso_path]
for test_addr in test_addrs:
addr2line.add_addr(dso_path, None, test_addr['func_addr'], test_addr['addr'])
- addr2line.convert_addrs_to_lines()
+ addr2line.convert_addrs_to_lines(4)
for dso_path in test_map:
dso = addr2line.get_dso(dso_path)
self.assertIsNotNone(dso, dso_path)