diff options
author | Yabin Cui <yabinc@google.com> | 2022-01-13 10:45:56 -0800 |
---|---|---|
committer | Yabin Cui <yabinc@google.com> | 2022-01-13 10:46:58 -0800 |
commit | 5b90b6a5e3fc9703abc7ce55b8790d772ec7a82c (patch) | |
tree | b88610f8f98d9b79be1f5e7d50be653f58a0c13c | |
parent | 740b3cd08c4c04ee1d7d0091b5d59ad8682a9820 (diff) | |
download | extras-5b90b6a5e3fc9703abc7ce55b8790d772ec7a82c.tar.gz |
simpleperf: adjust format of gecko_profile_generator.py.
Use 4-space indent as other scripts.
Bug: 211814099
Test: run test.py --only-host-test
Change-Id: I65f755a53b226841ec74d52c1cc8b9c0b732d7e4
-rwxr-xr-x | simpleperf/scripts/gecko_profile_generator.py | 598 | ||||
-rw-r--r-- | simpleperf/scripts/test/gecko_profile_generator_test.py | 27 |
2 files changed, 313 insertions, 312 deletions
diff --git a/simpleperf/scripts/gecko_profile_generator.py b/simpleperf/scripts/gecko_profile_generator.py index 886d89c5..afeecac8 100755 --- a/simpleperf/scripts/gecko_profile_generator.py +++ b/simpleperf/scripts/gecko_profile_generator.py @@ -44,29 +44,29 @@ GeckoProfile = Dict # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156 class Frame(NamedTuple): - string_id: StringID - relevantForJS: bool - innerWindowID: int - implementation: None - optimizations: None - line: None - column: None - category: CategoryID - subcategory: int + string_id: StringID + relevantForJS: bool + innerWindowID: int + implementation: None + optimizations: None + line: None + column: None + category: CategoryID + subcategory: int # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216 class Stack(NamedTuple): - prefix_id: Optional[StackID] - frame_id: FrameID - category_id: CategoryID + prefix_id: Optional[StackID] + frame_id: FrameID + category_id: CategoryID # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90 class Sample(NamedTuple): - stack_id: Optional[StackID] - time_ms: Milliseconds - responsiveness: int + stack_id: Optional[StackID] + time_ms: Milliseconds + responsiveness: int # Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/profile.js#L425 @@ -121,302 +121,302 @@ CATEGORIES = [ @dataclass class Thread: - """A builder for a profile of a single thread. - - Attributes: - comm: Thread command-line (name). - pid: process ID of containing process. - tid: thread ID. - samples: Timeline of profile samples. - frameTable: interned stack frame ID -> stack frame. - stringTable: interned string ID -> string. - stringMap: interned string -> string ID. - stackTable: interned stack ID -> stack. - stackMap: (stack prefix ID, leaf stack frame ID) -> interned Stack ID. - frameMap: Stack Frame string -> interned Frame ID. - """ - comm: str - pid: int - tid: int - samples: List[Sample] = field(default_factory=list) - frameTable: List[Frame] = field(default_factory=list) - stringTable: List[str] = field(default_factory=list) - # TODO: this is redundant with frameTable, could we remove this? - stringMap: Dict[str, int] = field(default_factory=dict) - stackTable: List[Stack] = field(default_factory=list) - stackMap: Dict[Tuple[Optional[int], int], int] = field(default_factory=dict) - frameMap: Dict[str, int] = field(default_factory=dict) - - def _intern_stack(self, frame_id: int, prefix_id: Optional[int]) -> int: - """Gets a matching stack, or saves the new stack. Returns a Stack ID.""" - key = (prefix_id, frame_id) - stack_id = self.stackMap.get(key) - if stack_id is not None: - return stack_id - stack_id = len(self.stackTable) - self.stackTable.append(Stack(prefix_id=prefix_id, - frame_id=frame_id, - category_id=0)) - self.stackMap[key] = stack_id - return stack_id - - def _intern_string(self, string: str) -> int: - """Gets a matching string, or saves the new string. Returns a String ID.""" - string_id = self.stringMap.get(string) - if string_id is not None: - return string_id - string_id = len(self.stringTable) - self.stringTable.append(string) - self.stringMap[string] = string_id - return string_id - - def _intern_frame(self, frame_str: str) -> int: - """Gets a matching stack frame, or saves the new frame. Returns a Frame ID.""" - frame_id = self.frameMap.get(frame_str) - if frame_id is not None: - return frame_id - frame_id = len(self.frameTable) - self.frameMap[frame_str] = frame_id - string_id = self._intern_string(frame_str) - - category = 0 - # Heuristic: kernel code contains "kallsyms" as the library name. - if "kallsyms" in frame_str or ".ko" in frame_str: - category = 1 - elif ".so" in frame_str: - category = 2 - elif ".vdex" in frame_str: - category = 3 - elif ".oat" in frame_str: - category = 4 - - self.frameTable.append(Frame( - string_id=string_id, - relevantForJS=False, - innerWindowID=0, - implementation=None, - optimizations=None, - line=None, - column=None, - category=category, - subcategory=0, - )) - return frame_id - - def _add_sample(self, comm: str, stack: List[str], time_ms: Milliseconds) -> None: - """Add a timestamped stack trace sample to the thread builder. - - Args: - comm: command-line (name) of the thread at this sample - stack: sampled stack frames. Root first, leaf last. - time_ms: timestamp of sample in milliseconds + """A builder for a profile of a single thread. + + Attributes: + comm: Thread command-line (name). + pid: process ID of containing process. + tid: thread ID. + samples: Timeline of profile samples. + frameTable: interned stack frame ID -> stack frame. + stringTable: interned string ID -> string. + stringMap: interned string -> string ID. + stackTable: interned stack ID -> stack. + stackMap: (stack prefix ID, leaf stack frame ID) -> interned Stack ID. + frameMap: Stack Frame string -> interned Frame ID. """ - # Unix threads often don't set their name immediately upon creation. - # Use the last name - if self.comm != comm: - self.comm = comm - - prefix_stack_id = None - for frame in stack: - frame_id = self._intern_frame(frame) - prefix_stack_id = self._intern_stack(frame_id, prefix_stack_id) - - self.samples.append(Sample(stack_id=prefix_stack_id, - time_ms=time_ms, - responsiveness=0)) - - def _to_json_dict(self) -> Dict: - """Converts this Thread to GeckoThread JSON format.""" - # The samples aren't guaranteed to be in order. Sort them by time. - self.samples.sort(key=lambda s: s.time_ms) - - # Gecko profile format is row-oriented data as List[List], - # And a schema for interpreting each index. - # Schema: - # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md - # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L230 - return { - "tid": self.tid, - "pid": self.pid, - "name": self.comm, - # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L51 - "markers": { - "schema": { - "name": 0, - "startTime": 1, - "endTime": 2, - "phase": 3, - "category": 4, - "data": 5, + comm: str + pid: int + tid: int + samples: List[Sample] = field(default_factory=list) + frameTable: List[Frame] = field(default_factory=list) + stringTable: List[str] = field(default_factory=list) + # TODO: this is redundant with frameTable, could we remove this? + stringMap: Dict[str, int] = field(default_factory=dict) + stackTable: List[Stack] = field(default_factory=list) + stackMap: Dict[Tuple[Optional[int], int], int] = field(default_factory=dict) + frameMap: Dict[str, int] = field(default_factory=dict) + + def _intern_stack(self, frame_id: int, prefix_id: Optional[int]) -> int: + """Gets a matching stack, or saves the new stack. Returns a Stack ID.""" + key = (prefix_id, frame_id) + stack_id = self.stackMap.get(key) + if stack_id is not None: + return stack_id + stack_id = len(self.stackTable) + self.stackTable.append(Stack(prefix_id=prefix_id, + frame_id=frame_id, + category_id=0)) + self.stackMap[key] = stack_id + return stack_id + + def _intern_string(self, string: str) -> int: + """Gets a matching string, or saves the new string. Returns a String ID.""" + string_id = self.stringMap.get(string) + if string_id is not None: + return string_id + string_id = len(self.stringTable) + self.stringTable.append(string) + self.stringMap[string] = string_id + return string_id + + def _intern_frame(self, frame_str: str) -> int: + """Gets a matching stack frame, or saves the new frame. Returns a Frame ID.""" + frame_id = self.frameMap.get(frame_str) + if frame_id is not None: + return frame_id + frame_id = len(self.frameTable) + self.frameMap[frame_str] = frame_id + string_id = self._intern_string(frame_str) + + category = 0 + # Heuristic: kernel code contains "kallsyms" as the library name. + if "kallsyms" in frame_str or ".ko" in frame_str: + category = 1 + elif ".so" in frame_str: + category = 2 + elif ".vdex" in frame_str: + category = 3 + elif ".oat" in frame_str: + category = 4 + + self.frameTable.append(Frame( + string_id=string_id, + relevantForJS=False, + innerWindowID=0, + implementation=None, + optimizations=None, + line=None, + column=None, + category=category, + subcategory=0, + )) + return frame_id + + def _add_sample(self, comm: str, stack: List[str], time_ms: Milliseconds) -> None: + """Add a timestamped stack trace sample to the thread builder. + + Args: + comm: command-line (name) of the thread at this sample + stack: sampled stack frames. Root first, leaf last. + time_ms: timestamp of sample in milliseconds + """ + # Unix threads often don't set their name immediately upon creation. + # Use the last name + if self.comm != comm: + self.comm = comm + + prefix_stack_id = None + for frame in stack: + frame_id = self._intern_frame(frame) + prefix_stack_id = self._intern_stack(frame_id, prefix_stack_id) + + self.samples.append(Sample(stack_id=prefix_stack_id, + time_ms=time_ms, + responsiveness=0)) + + def _to_json_dict(self) -> Dict: + """Converts this Thread to GeckoThread JSON format.""" + # The samples aren't guaranteed to be in order. Sort them by time. + self.samples.sort(key=lambda s: s.time_ms) + + # Gecko profile format is row-oriented data as List[List], + # And a schema for interpreting each index. + # Schema: + # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md + # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L230 + return { + "tid": self.tid, + "pid": self.pid, + "name": self.comm, + # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L51 + "markers": { + "schema": { + "name": 0, + "startTime": 1, + "endTime": 2, + "phase": 3, + "category": 4, + "data": 5, + }, + "data": [], }, - "data": [], - }, - # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90 - "samples": { - "schema": { - "stack": 0, - "time": 1, - "responsiveness": 2, + # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90 + "samples": { + "schema": { + "stack": 0, + "time": 1, + "responsiveness": 2, + }, + "data": self.samples }, - "data": self.samples - }, - # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156 - "frameTable": { - "schema": { - "location": 0, - "relevantForJS": 1, - "innerWindowID": 2, - "implementation": 3, - "optimizations": 4, - "line": 5, - "column": 6, - "category": 7, - "subcategory": 8, + # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156 + "frameTable": { + "schema": { + "location": 0, + "relevantForJS": 1, + "innerWindowID": 2, + "implementation": 3, + "optimizations": 4, + "line": 5, + "column": 6, + "category": 7, + "subcategory": 8, + }, + "data": self.frameTable, }, - "data": self.frameTable, - }, - # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216 - "stackTable": { - "schema": { - "prefix": 0, - "frame": 1, - "category": 2, + # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216 + "stackTable": { + "schema": { + "prefix": 0, + "frame": 1, + "category": 2, + }, + "data": self.stackTable, }, - "data": self.stackTable, - }, - "stringTable": self.stringTable, - "registerTime": 0, - "unregisterTime": None, - "processType": "default", - } + "stringTable": self.stringTable, + "registerTime": 0, + "unregisterTime": None, + "processType": "default", + } def _gecko_profile( - record_file: str, - symfs_dir: Optional[str], - kallsyms_file: Optional[str], - proguard_mapping_file: List[str], - comm_filter: Set[str]) -> GeckoProfile: - """convert a simpleperf profile to gecko format""" - lib = ReportLib() - - lib.ShowIpForUnknownSymbol() - for file_path in proguard_mapping_file: - lib.AddProguardMappingFile(file_path) - if symfs_dir is not None: - lib.SetSymfs(symfs_dir) - lib.SetRecordFile(record_file) - if kallsyms_file is not None: - lib.SetKallsymsFile(kallsyms_file) - - arch = lib.GetArch() - meta_info = lib.MetaInfo() - record_cmd = lib.GetRecordCmd() - - # Map from tid to Thread - threadMap: Dict[int, Thread] = {} - - while True: - sample = lib.GetNextSample() - if sample is None: - lib.Close() - break - if comm_filter: - if sample.thread_comm not in comm_filter: - continue - event = lib.GetEventOfCurrentSample() - symbol = lib.GetSymbolOfCurrentSample() - callchain = lib.GetCallChainOfCurrentSample() - sample_time_ms = sample.time / 1000000 - - stack = ['%s (in %s)' % (symbol.symbol_name, symbol.dso_name)] - for i in range(callchain.nr): - entry = callchain.entries[i] - stack.append('%s (in %s)' % (entry.symbol.symbol_name, entry.symbol.dso_name)) - # We want root first, leaf last. - stack.reverse() - - # add thread sample - thread = threadMap.get(sample.tid) - if thread is None: - thread = Thread(comm=sample.thread_comm, pid=sample.pid, tid=sample.tid) - threadMap[sample.tid] = thread - thread._add_sample( - comm=sample.thread_comm, - stack=stack, - # We are being a bit fast and loose here with time here. simpleperf - # uses CLOCK_MONOTONIC by default, which doesn't use the normal unix - # epoch, but rather some arbitrary time. In practice, this doesn't - # matter, the Firefox Profiler normalises all the timestamps to begin at - # the minimum time. Consider fixing this in future, if needed, by - # setting `simpleperf record --clockid realtime`. - time_ms=sample_time_ms) - - threads = [thread._to_json_dict() for thread in threadMap.values()] - - profile_timestamp = meta_info.get('timestamp') - end_time_ms = (int(profile_timestamp) * 1000) if profile_timestamp else 0 - - # Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L305 - gecko_profile_meta = { - "interval": 1, - "processType": 0, - "product": record_cmd, - "device": meta_info.get("product_props"), - "platform": meta_info.get("android_build_fingerprint"), - "stackwalk": 1, - "debug": 0, - "gcpoison": 0, - "asyncstack": 1, - # The profile timestamp is actually the end time, not the start time. - # This is close enough for our purposes; I mostly just want to know which - # day the profile was taken! Consider fixing this in future, if needed, - # by setting `simpleperf record --clockid realtime` and taking the minimum - # sample time. - "startTime": end_time_ms, - "shutdownTime": None, - "version": 24, - "presymbolicated": True, - "categories": CATEGORIES, - "markerSchema": [], - "abi": arch, - "oscpu": meta_info.get("android_build_fingerprint"), - } - - # Schema: - # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L377 - # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md - return { - "meta": gecko_profile_meta, - "libs": [], - "threads": threads, - "processes": [], - "pausedRanges": [], - } + record_file: str, + symfs_dir: Optional[str], + kallsyms_file: Optional[str], + proguard_mapping_file: List[str], + comm_filter: Set[str]) -> GeckoProfile: + """convert a simpleperf profile to gecko format""" + lib = ReportLib() + + lib.ShowIpForUnknownSymbol() + for file_path in proguard_mapping_file: + lib.AddProguardMappingFile(file_path) + if symfs_dir is not None: + lib.SetSymfs(symfs_dir) + lib.SetRecordFile(record_file) + if kallsyms_file is not None: + lib.SetKallsymsFile(kallsyms_file) + + arch = lib.GetArch() + meta_info = lib.MetaInfo() + record_cmd = lib.GetRecordCmd() + + # Map from tid to Thread + threadMap: Dict[int, Thread] = {} + + while True: + sample = lib.GetNextSample() + if sample is None: + lib.Close() + break + if comm_filter: + if sample.thread_comm not in comm_filter: + continue + event = lib.GetEventOfCurrentSample() + symbol = lib.GetSymbolOfCurrentSample() + callchain = lib.GetCallChainOfCurrentSample() + sample_time_ms = sample.time / 1000000 + + stack = ['%s (in %s)' % (symbol.symbol_name, symbol.dso_name)] + for i in range(callchain.nr): + entry = callchain.entries[i] + stack.append('%s (in %s)' % (entry.symbol.symbol_name, entry.symbol.dso_name)) + # We want root first, leaf last. + stack.reverse() + + # add thread sample + thread = threadMap.get(sample.tid) + if thread is None: + thread = Thread(comm=sample.thread_comm, pid=sample.pid, tid=sample.tid) + threadMap[sample.tid] = thread + thread._add_sample( + comm=sample.thread_comm, + stack=stack, + # We are being a bit fast and loose here with time here. simpleperf + # uses CLOCK_MONOTONIC by default, which doesn't use the normal unix + # epoch, but rather some arbitrary time. In practice, this doesn't + # matter, the Firefox Profiler normalises all the timestamps to begin at + # the minimum time. Consider fixing this in future, if needed, by + # setting `simpleperf record --clockid realtime`. + time_ms=sample_time_ms) + + threads = [thread._to_json_dict() for thread in threadMap.values()] + + profile_timestamp = meta_info.get('timestamp') + end_time_ms = (int(profile_timestamp) * 1000) if profile_timestamp else 0 + + # Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L305 + gecko_profile_meta = { + "interval": 1, + "processType": 0, + "product": record_cmd, + "device": meta_info.get("product_props"), + "platform": meta_info.get("android_build_fingerprint"), + "stackwalk": 1, + "debug": 0, + "gcpoison": 0, + "asyncstack": 1, + # The profile timestamp is actually the end time, not the start time. + # This is close enough for our purposes; I mostly just want to know which + # day the profile was taken! Consider fixing this in future, if needed, + # by setting `simpleperf record --clockid realtime` and taking the minimum + # sample time. + "startTime": end_time_ms, + "shutdownTime": None, + "version": 24, + "presymbolicated": True, + "categories": CATEGORIES, + "markerSchema": [], + "abi": arch, + "oscpu": meta_info.get("android_build_fingerprint"), + } + + # Schema: + # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L377 + # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md + return { + "meta": gecko_profile_meta, + "libs": [], + "threads": threads, + "processes": [], + "pausedRanges": [], + } def main() -> None: - parser = BaseArgumentParser(description=__doc__) - parser.add_argument('--symfs', - help='Set the path to find binaries with symbols and debug info.') - parser.add_argument('--kallsyms', help='Set the path to find kernel symbols.') - parser.add_argument('-i', '--record_file', nargs='?', default='perf.data', - help='Default is perf.data.') - parser.add_argument( - '--proguard-mapping-file', nargs='+', - help='Add proguard mapping file to de-obfuscate symbols', - default = []) - parser.add_argument('--comm', nargs='+', action='append', help=""" + parser = BaseArgumentParser(description=__doc__) + parser.add_argument('--symfs', + help='Set the path to find binaries with symbols and debug info.') + parser.add_argument('--kallsyms', help='Set the path to find kernel symbols.') + parser.add_argument('-i', '--record_file', nargs='?', default='perf.data', + help='Default is perf.data.') + parser.add_argument( + '--proguard-mapping-file', nargs='+', + help='Add proguard mapping file to de-obfuscate symbols', + default=[]) + parser.add_argument('--comm', nargs='+', action='append', help=""" Use samples only in threads with selected names.""") - args = parser.parse_args() - profile = _gecko_profile( - record_file=args.record_file, - symfs_dir=args.symfs, - kallsyms_file=args.kallsyms, - proguard_mapping_file=args.proguard_mapping_file, - comm_filter=set(flatten_arg_list(args.comm))) - - json.dump(profile, sys.stdout, sort_keys=True) + args = parser.parse_args() + profile = _gecko_profile( + record_file=args.record_file, + symfs_dir=args.symfs, + kallsyms_file=args.kallsyms, + proguard_mapping_file=args.proguard_mapping_file, + comm_filter=set(flatten_arg_list(args.comm))) + + json.dump(profile, sys.stdout, sort_keys=True) if __name__ == '__main__': diff --git a/simpleperf/scripts/test/gecko_profile_generator_test.py b/simpleperf/scripts/test/gecko_profile_generator_test.py index 8f300767..d66cd526 100644 --- a/simpleperf/scripts/test/gecko_profile_generator_test.py +++ b/simpleperf/scripts/test/gecko_profile_generator_test.py @@ -18,18 +18,19 @@ import json from . test_utils import TestBase, TestHelper + class TestGeckoProfileGenerator(TestBase): - def run_generator(self, testdata_file): - testdata_path = TestHelper.testdata_path(testdata_file) - gecko_profile_json = self.run_cmd( - ['gecko_profile_generator.py', '-i', testdata_path], return_output=True) - return json.loads(gecko_profile_json) + def run_generator(self, testdata_file): + testdata_path = TestHelper.testdata_path(testdata_file) + gecko_profile_json = self.run_cmd( + ['gecko_profile_generator.py', '-i', testdata_path], return_output=True) + return json.loads(gecko_profile_json) - def test_golden(self): - got = self.run_generator('perf_with_interpreter_frames.data') - golden_path = TestHelper.testdata_path('perf_with_interpreter_frames.gecko.json') - with open(golden_path) as f: - want = json.load(f) - self.assertEqual( - json.dumps(got, sort_keys=True, indent=2), - json.dumps(want, sort_keys=True, indent=2)) + def test_golden(self): + got = self.run_generator('perf_with_interpreter_frames.data') + golden_path = TestHelper.testdata_path('perf_with_interpreter_frames.gecko.json') + with open(golden_path) as f: + want = json.load(f) + self.assertEqual( + json.dumps(got, sort_keys=True, indent=2), + json.dumps(want, sort_keys=True, indent=2)) |