summaryrefslogtreecommitdiff
path: root/boottime_tools
diff options
context:
space:
mode:
authorKeun young Park <keunyoung@google.com>2021-11-02 12:16:05 -0700
committerKeun young Park <keunyoung@google.com>2021-11-03 11:23:22 -0700
commitda9bdee8b9334928f5d5f5a886940a2eae2fe254 (patch)
tree1a1e574996fed9712ce21111842d3c80a3f80bdc /boottime_tools
parent36c233be062c877f57495916b435be7ea8faeec4 (diff)
downloadextras-da9bdee8b9334928f5d5f5a886940a2eae2fe254.tar.gz
make bootanalyze more robust
- handles adb hangup during boot up : can handle problematic devices better - run kernel dmesg under separate thread and reduces chances for losing kernel log Bug: 204450693 Test: ./system/extras/boottime_tools/bootanalyze/bootanalyze.py -c ./system/extras/boottime_tools/bootanalyze/config.yaml -r -n 10 -t -v Change-Id: I7a467e9a85464e1caf82f718b21d4ca99adee745
Diffstat (limited to 'boottime_tools')
-rwxr-xr-xboottime_tools/bootanalyze/bootanalyze.py121
1 files changed, 82 insertions, 39 deletions
diff --git a/boottime_tools/bootanalyze/bootanalyze.py b/boottime_tools/bootanalyze/bootanalyze.py
index 7eea05b9..c33d97cc 100755
--- a/boottime_tools/bootanalyze/bootanalyze.py
+++ b/boottime_tools/bootanalyze/bootanalyze.py
@@ -35,7 +35,6 @@ import yaml
from datetime import datetime, date
-
TIME_DMESG = r"\[\s*(\d+\.\d+)\]"
TIME_LOGCAT = r"[0-9]+\.?[0-9]*"
KERNEL_TIME_KEY = "kernel"
@@ -46,6 +45,7 @@ LAUNCHER_START = "LauncherStart"
BOOT_TIME_TOO_BIG = 200.0
MAX_RETRIES = 5
DEBUG = False
+DEBUG_PATTERN = False
ADB_CMD = "adb"
TIMING_THRESHOLD = 5.0
BOOT_PROP = r"\[ro\.boottime\.([^\]]+)\]:\s+\[(\d+)\]"
@@ -99,6 +99,8 @@ def main():
for key, pattern in cfg['timings'].items()}
shutdown_events_pattern = {key: re.compile(pattern)
for key, pattern in cfg['shutdown_events'].items()}
+ if DEBUG_PATTERN:
+ print("search event:{} timing event:{}".format(search_events_pattern, timing_events_pattern))
data_points = {}
kernel_timing_points = collections.OrderedDict()
@@ -301,6 +303,13 @@ def handle_reboot_log(capture_log_on_error, shutdown_events_pattern, components_
print('{0:30}: {1:<7.5}'.format(item[0], item[1]))
return shutdown_events, shutdown_timing_events
+def collect_dmesg_events(search_events_pattern, timings_pattern, results):
+ dmesg_events, kernel_timing_events = collect_events(search_events_pattern, ADB_CMD +\
+ ' shell su root dmesg -w', timings_pattern,\
+ [KERNEL_BOOT_COMPLETE], True)
+ results.append(dmesg_events)
+ results.append(kernel_timing_events)
+
def iterate(args, search_events_pattern, timings_pattern, shutdown_events_pattern, cfg, error_time,\
components_to_monitor):
shutdown_events = None
@@ -315,16 +324,22 @@ def iterate(args, search_events_pattern, timings_pattern, shutdown_events_patter
components_to_monitor)
t.join()
- dmesg_events, kernel_timing_events = collect_events(search_events_pattern, ADB_CMD +\
- ' shell su root dmesg -w', timings_pattern,\
- [KERNEL_BOOT_COMPLETE], True)
+ results = []
+ t = threading.Thread(target=collect_dmesg_events, args=(search_events_pattern,\
+ timings_pattern, results))
+ t.start()
- logcat_stop_events = [LOGCAT_BOOT_COMPLETE, KERNEL_BOOT_COMPLETE, LAUNCHER_START]
+ logcat_stop_events = [LOGCAT_BOOT_COMPLETE, LAUNCHER_START]
if args.fs_check:
logcat_stop_events.append("FsStat")
logcat_events, logcat_timing_events = collect_events(
search_events_pattern, ADB_CMD + ' logcat -b all -v epoch', timings_pattern,\
logcat_stop_events, False)
+
+ t.join()
+ dmesg_events = results[0]
+ kernel_timing_events = results[1]
+
logcat_event_time = extract_time(logcat_events, TIME_LOGCAT, float)
logcat_original_time = extract_time(logcat_events, TIME_LOGCAT, str);
dmesg_event_time = extract_time(dmesg_events, TIME_DMESG, float);
@@ -364,6 +379,7 @@ def iterate(args, search_events_pattern, timings_pattern, shutdown_events_patter
if not dmesg_event_time.get(KERNEL_BOOT_COMPLETE):
print("BootAnimEnd time or BootComplete-kernel not captured in both log" +\
", cannot get time diff")
+ print("dmesg {} logcat {}".format(dmesg_event_time, logcat_event_time))
return None, None, None, None, None, None
diffs.append((logcat_event_time[LOGCAT_BOOT_COMPLETE],\
logcat_event_time[LOGCAT_BOOT_COMPLETE] - dmesg_event_time[KERNEL_BOOT_COMPLETE]))
@@ -631,56 +647,79 @@ def collect_logcat_for_shutdown(capture_log_on_error, shutdown_events_pattern,\
f.write('\n'.join(lines))
return events, timing_events
+def log_timeout(time_left, stop_events, events, timing_events):
+ print("timeout waiting for event, continue", time_left)
+ print(" remaininig events {}, event {} timing events {}".\
+ format(stop_events, events, timing_events))
def collect_events(search_events, command, timings, stop_events, disable_timing_after_zygote):
events = collections.OrderedDict()
timing_events = {}
- process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
+
data_available = stop_events is None
zygote_pids = []
start_time = time.time()
zygote_found = False
-
line = None
- read_poll = select.poll()
- read_poll.register(process.stdout, select.POLLIN)
+ print("remaining stop_events:", stop_events)
+ init = True
while True:
+ if init:
+ process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
+ read_poll = select.poll()
+ read_poll.register(process.stdout, select.POLLIN)
+ init = False
+ if len(stop_events) == 0:
+ break
time_left = start_time + max_wait_time - time.time()
if time_left <= 0:
- print("timeout waiting for event, continue", time_left)
+ log_timeout(time_left, stop_events, events, timing_events)
break
- read_r = read_poll.poll(time_left * 1000.0)
- if len(read_r) > 0 and read_r[0][1] == select.POLLIN:
- line = process.stdout.readline().decode('utf-8', 'ignore')
- else:
- print("poll timeout waiting for event, continue", time_left)
+ polled_events = read_poll.poll(time_left * 1000.0)
+ if len(polled_events) == 0:
+ log_timeout(time_left, stop_events, events, timing_events)
break
- if not data_available:
- print("Collecting data samples from '%s'. Please wait...\n" % command)
- data_available = True
- event = get_boot_event(line, search_events)
- if event:
- debug("event[{0}] captured: {1}".format(event, line))
- if event == "starting_zygote":
- events[event] = line
- zygote_found = True
- elif event.startswith("zygote"):
- handle_zygote_event(zygote_pids, events, event, line)
+ for polled_event in polled_events:
+ if polled_event[1] == select.POLLIN:
+ line = process.stdout.readline().decode('utf-8', 'ignore')
else:
- new_event = update_name_if_already_exist(events, event)
- events[new_event] = line
- if event in stop_events:
- stop_events.remove(event)
- print("remaining stop_events:", stop_events)
- if len(stop_events) == 0:
+ if polled_event[1] == select.POLLHUP:
+ if len(stop_events) == 0:
+ break;
+ # adb connection lost
+ print("poll error waiting for event, adb lost?")
+ if time_left > 0:
+ print("retry adb")
+ run_adb_cmd('wait-for-device')
+ print(" reconnected")
+ init = True
+ continue
+ else:
break
-
- timing_event = get_boot_event(line, timings)
- if timing_event and (not disable_timing_after_zygote or not zygote_found):
- if timing_event not in timing_events:
- timing_events[timing_event] = []
- timing_events[timing_event].append(line)
- debug("timing_event[{0}] captured: {1}".format(timing_event, line))
+ if not data_available:
+ print("Collecting data samples from '%s'. Please wait...\n" % command)
+ data_available = True
+ event = get_boot_event(line, search_events)
+ if event:
+ debug("event[{0}] captured: {1}".format(event, line))
+ if event == "starting_zygote":
+ events[event] = line
+ zygote_found = True
+ elif event.startswith("zygote"):
+ handle_zygote_event(zygote_pids, events, event, line)
+ else:
+ new_event = update_name_if_already_exist(events, event)
+ events[new_event] = line
+ if event in stop_events:
+ stop_events.remove(event)
+ print("remaining stop_events:", stop_events)
+
+ timing_event = get_boot_event(line, timings)
+ if timing_event and (not disable_timing_after_zygote or not zygote_found):
+ if timing_event not in timing_events:
+ timing_events[timing_event] = []
+ timing_events[timing_event].append(line)
+ debug("timing_event[{0}] captured: {1}".format(timing_event, line))
process.terminate()
return events, timing_events
@@ -743,6 +782,9 @@ def extract_time(events, pattern, date_transform_function):
def do_reboot(serial, use_adb_reboot):
+ # do not update time
+ run_adb_cmd('shell settings put global auto_time 0')
+ run_adb_cmd('shell settings put global auto_time_zone 0')
original_devices = subprocess.check_output("adb devices", shell=True)
if use_adb_reboot:
print('Rebooting the device using adb reboot')
@@ -780,6 +822,7 @@ def reboot(serial, use_stressfs, permissive, use_adb_reboot, adb_buffersize=None
print('Waiting the device')
run_adb_cmd('wait-for-device')
+ print(' found a device')
if adb_buffersize is not None:
# increase the buffer size