diff options
Diffstat (limited to 'services/core/java/com/android/server/PackageWatchdog.java')
-rw-r--r-- | services/core/java/com/android/server/PackageWatchdog.java | 1971 |
1 files changed, 1971 insertions, 0 deletions
diff --git a/services/core/java/com/android/server/PackageWatchdog.java b/services/core/java/com/android/server/PackageWatchdog.java new file mode 100644 index 000000000000..6f20adf74ee2 --- /dev/null +++ b/services/core/java/com/android/server/PackageWatchdog.java @@ -0,0 +1,1971 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.server; + +import static android.service.watchdog.ExplicitHealthCheckService.PackageConfig; + +import static java.lang.annotation.RetentionPolicy.SOURCE; + +import android.annotation.IntDef; +import android.annotation.Nullable; +import android.content.Context; +import android.content.pm.PackageInfo; +import android.content.pm.PackageManager; +import android.content.pm.VersionedPackage; +import android.crashrecovery.flags.Flags; +import android.net.ConnectivityModuleConnector; +import android.os.Environment; +import android.os.Handler; +import android.os.Looper; +import android.os.Process; +import android.os.SystemProperties; +import android.provider.DeviceConfig; +import android.sysprop.CrashRecoveryProperties; +import android.text.TextUtils; +import android.util.ArrayMap; +import android.util.ArraySet; +import android.util.AtomicFile; +import android.util.LongArrayQueue; +import android.util.Slog; +import android.util.Xml; + +import com.android.internal.annotations.GuardedBy; +import com.android.internal.annotations.VisibleForTesting; +import com.android.internal.os.BackgroundThread; +import com.android.internal.util.IndentingPrintWriter; +import com.android.internal.util.XmlUtils; +import com.android.modules.utils.TypedXmlPullParser; +import com.android.modules.utils.TypedXmlSerializer; + +import libcore.io.IoUtils; + +import org.xmlpull.v1.XmlPullParserException; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +/** + * Monitors the health of packages on the system and notifies interested observers when packages + * fail. On failure, the registered observer with the least user impacting mitigation will + * be notified. + */ +public class PackageWatchdog { + private static final String TAG = "PackageWatchdog"; + + static final String PROPERTY_WATCHDOG_TRIGGER_DURATION_MILLIS = + "watchdog_trigger_failure_duration_millis"; + static final String PROPERTY_WATCHDOG_TRIGGER_FAILURE_COUNT = + "watchdog_trigger_failure_count"; + static final String PROPERTY_WATCHDOG_EXPLICIT_HEALTH_CHECK_ENABLED = + "watchdog_explicit_health_check_enabled"; + + // TODO: make the following values configurable via DeviceConfig + private static final long NATIVE_CRASH_POLLING_INTERVAL_MILLIS = + TimeUnit.SECONDS.toMillis(30); + private static final long NUMBER_OF_NATIVE_CRASH_POLLS = 10; + + + public static final int FAILURE_REASON_UNKNOWN = 0; + public static final int FAILURE_REASON_NATIVE_CRASH = 1; + public static final int FAILURE_REASON_EXPLICIT_HEALTH_CHECK = 2; + public static final int FAILURE_REASON_APP_CRASH = 3; + public static final int FAILURE_REASON_APP_NOT_RESPONDING = 4; + public static final int FAILURE_REASON_BOOT_LOOP = 5; + + @IntDef(prefix = { "FAILURE_REASON_" }, value = { + FAILURE_REASON_UNKNOWN, + FAILURE_REASON_NATIVE_CRASH, + FAILURE_REASON_EXPLICIT_HEALTH_CHECK, + FAILURE_REASON_APP_CRASH, + FAILURE_REASON_APP_NOT_RESPONDING, + FAILURE_REASON_BOOT_LOOP + }) + @Retention(RetentionPolicy.SOURCE) + public @interface FailureReasons {} + + // Duration to count package failures before it resets to 0 + @VisibleForTesting + static final int DEFAULT_TRIGGER_FAILURE_DURATION_MS = + (int) TimeUnit.MINUTES.toMillis(1); + // Number of package failures within the duration above before we notify observers + @VisibleForTesting + static final int DEFAULT_TRIGGER_FAILURE_COUNT = 5; + @VisibleForTesting + static final long DEFAULT_OBSERVING_DURATION_MS = TimeUnit.DAYS.toMillis(2); + // Sliding window for tracking how many mitigation calls were made for a package. + @VisibleForTesting + static final long DEFAULT_DEESCALATION_WINDOW_MS = TimeUnit.HOURS.toMillis(1); + // Whether explicit health checks are enabled or not + private static final boolean DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED = true; + + @VisibleForTesting + static final int DEFAULT_BOOT_LOOP_TRIGGER_COUNT = 5; + + static final long DEFAULT_BOOT_LOOP_TRIGGER_WINDOW_MS = TimeUnit.MINUTES.toMillis(10); + + // Threshold level at which or above user might experience significant disruption. + private static final String MAJOR_USER_IMPACT_LEVEL_THRESHOLD = + "persist.device_config.configuration.major_user_impact_level_threshold"; + private static final int DEFAULT_MAJOR_USER_IMPACT_LEVEL_THRESHOLD = + PackageHealthObserverImpact.USER_IMPACT_LEVEL_71; + + private long mNumberOfNativeCrashPollsRemaining; + + private static final int DB_VERSION = 1; + private static final String TAG_PACKAGE_WATCHDOG = "package-watchdog"; + private static final String TAG_PACKAGE = "package"; + private static final String TAG_OBSERVER = "observer"; + private static final String ATTR_VERSION = "version"; + private static final String ATTR_NAME = "name"; + private static final String ATTR_DURATION = "duration"; + private static final String ATTR_EXPLICIT_HEALTH_CHECK_DURATION = "health-check-duration"; + private static final String ATTR_PASSED_HEALTH_CHECK = "passed-health-check"; + private static final String ATTR_MITIGATION_CALLS = "mitigation-calls"; + private static final String ATTR_MITIGATION_COUNT = "mitigation-count"; + + // A file containing information about the current mitigation count in the case of a boot loop. + // This allows boot loop information to persist in the case of an fs-checkpoint being + // aborted. + private static final String METADATA_FILE = "/metadata/watchdog/mitigation_count.txt"; + + @GuardedBy("PackageWatchdog.class") + private static PackageWatchdog sPackageWatchdog; + + private final Object mLock = new Object(); + // System server context + private final Context mContext; + // Handler to run short running tasks + private final Handler mShortTaskHandler; + // Handler for processing IO and long running tasks + private final Handler mLongTaskHandler; + // Contains (observer-name -> observer-handle) that have ever been registered from + // previous boots. Observers with all packages expired are periodically pruned. + // It is saved to disk on system shutdown and repouplated on startup so it survives reboots. + @GuardedBy("mLock") + private final ArrayMap<String, ObserverInternal> mAllObservers = new ArrayMap<>(); + // File containing the XML data of monitored packages /data/system/package-watchdog.xml + private final AtomicFile mPolicyFile; + private final ExplicitHealthCheckController mHealthCheckController; + private final ConnectivityModuleConnector mConnectivityModuleConnector; + private final Runnable mSyncRequests = this::syncRequests; + private final Runnable mSyncStateWithScheduledReason = this::syncStateWithScheduledReason; + private final Runnable mSaveToFile = this::saveToFile; + private final SystemClock mSystemClock; + private final BootThreshold mBootThreshold; + private final DeviceConfig.OnPropertiesChangedListener + mOnPropertyChangedListener = this::onPropertyChanged; + + // The set of packages that have been synced with the ExplicitHealthCheckController + @GuardedBy("mLock") + private Set<String> mRequestedHealthCheckPackages = new ArraySet<>(); + @GuardedBy("mLock") + private boolean mIsPackagesReady; + // Flag to control whether explicit health checks are supported or not + @GuardedBy("mLock") + private boolean mIsHealthCheckEnabled = DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED; + @GuardedBy("mLock") + private int mTriggerFailureDurationMs = DEFAULT_TRIGGER_FAILURE_DURATION_MS; + @GuardedBy("mLock") + private int mTriggerFailureCount = DEFAULT_TRIGGER_FAILURE_COUNT; + // SystemClock#uptimeMillis when we last executed #syncState + // 0 if no prune is scheduled. + @GuardedBy("mLock") + private long mUptimeAtLastStateSync; + // If true, sync explicit health check packages with the ExplicitHealthCheckController. + @GuardedBy("mLock") + private boolean mSyncRequired = false; + + @FunctionalInterface + @VisibleForTesting + interface SystemClock { + long uptimeMillis(); + } + + private PackageWatchdog(Context context) { + // Needs to be constructed inline + this(context, new AtomicFile( + new File(new File(Environment.getDataDirectory(), "system"), + "package-watchdog.xml")), + new Handler(Looper.myLooper()), BackgroundThread.getHandler(), + new ExplicitHealthCheckController(context), + ConnectivityModuleConnector.getInstance(), + android.os.SystemClock::uptimeMillis); + } + + /** + * Creates a PackageWatchdog that allows injecting dependencies. + */ + @VisibleForTesting + PackageWatchdog(Context context, AtomicFile policyFile, Handler shortTaskHandler, + Handler longTaskHandler, ExplicitHealthCheckController controller, + ConnectivityModuleConnector connectivityModuleConnector, SystemClock clock) { + mContext = context; + mPolicyFile = policyFile; + mShortTaskHandler = shortTaskHandler; + mLongTaskHandler = longTaskHandler; + mHealthCheckController = controller; + mConnectivityModuleConnector = connectivityModuleConnector; + mSystemClock = clock; + mNumberOfNativeCrashPollsRemaining = NUMBER_OF_NATIVE_CRASH_POLLS; + mBootThreshold = new BootThreshold(DEFAULT_BOOT_LOOP_TRIGGER_COUNT, + DEFAULT_BOOT_LOOP_TRIGGER_WINDOW_MS); + + loadFromFile(); + sPackageWatchdog = this; + } + + /** Creates or gets singleton instance of PackageWatchdog. */ + public static PackageWatchdog getInstance(Context context) { + synchronized (PackageWatchdog.class) { + if (sPackageWatchdog == null) { + new PackageWatchdog(context); + } + return sPackageWatchdog; + } + } + + /** + * Called during boot to notify when packages are ready on the device so we can start + * binding. + */ + public void onPackagesReady() { + synchronized (mLock) { + mIsPackagesReady = true; + mHealthCheckController.setCallbacks(packageName -> onHealthCheckPassed(packageName), + packages -> onSupportedPackages(packages), + this::onSyncRequestNotified); + setPropertyChangedListenerLocked(); + updateConfigs(); + registerConnectivityModuleHealthListener(); + } + } + + /** + * Registers {@code observer} to listen for package failures. Add a new ObserverInternal for + * this observer if it does not already exist. + * + * <p>Observers are expected to call this on boot. It does not specify any packages but + * it will resume observing any packages requested from a previous boot. + */ + public void registerHealthObserver(PackageHealthObserver observer) { + synchronized (mLock) { + ObserverInternal internalObserver = mAllObservers.get(observer.getName()); + if (internalObserver != null) { + internalObserver.registeredObserver = observer; + } else { + internalObserver = new ObserverInternal(observer.getName(), new ArrayList<>()); + internalObserver.registeredObserver = observer; + mAllObservers.put(observer.getName(), internalObserver); + syncState("added new observer"); + } + } + } + + /** + * Starts observing the health of the {@code packages} for {@code observer} and notifies + * {@code observer} of any package failures within the monitoring duration. + * + * <p>If monitoring a package supporting explicit health check, at the end of the monitoring + * duration if {@link #onHealthCheckPassed} was never called, + * {@link PackageHealthObserver#execute} will be called as if the package failed. + * + * <p>If {@code observer} is already monitoring a package in {@code packageNames}, + * the monitoring window of that package will be reset to {@code durationMs} and the health + * check state will be reset to a default depending on if the package is contained in + * {@link mPackagesWithExplicitHealthCheckEnabled}. + * + * <p>If {@code packageNames} is empty, this will be a no-op. + * + * <p>If {@code durationMs} is less than 1, a default monitoring duration + * {@link #DEFAULT_OBSERVING_DURATION_MS} will be used. + */ + public void startObservingHealth(PackageHealthObserver observer, List<String> packageNames, + long durationMs) { + if (packageNames.isEmpty()) { + Slog.wtf(TAG, "No packages to observe, " + observer.getName()); + return; + } + if (durationMs < 1) { + Slog.wtf(TAG, "Invalid duration " + durationMs + "ms for observer " + + observer.getName() + ". Not observing packages " + packageNames); + durationMs = DEFAULT_OBSERVING_DURATION_MS; + } + + List<MonitoredPackage> packages = new ArrayList<>(); + for (int i = 0; i < packageNames.size(); i++) { + // Health checks not available yet so health check state will start INACTIVE + MonitoredPackage pkg = newMonitoredPackage(packageNames.get(i), durationMs, false); + if (pkg != null) { + packages.add(pkg); + } else { + Slog.w(TAG, "Failed to create MonitoredPackage for pkg=" + packageNames.get(i)); + } + } + + if (packages.isEmpty()) { + return; + } + + // Sync before we add the new packages to the observers. This will #pruneObservers, + // causing any elapsed time to be deducted from all existing packages before we add new + // packages. This maintains the invariant that the elapsed time for ALL (new and existing) + // packages is the same. + mLongTaskHandler.post(() -> { + syncState("observing new packages"); + + synchronized (mLock) { + ObserverInternal oldObserver = mAllObservers.get(observer.getName()); + if (oldObserver == null) { + Slog.d(TAG, observer.getName() + " started monitoring health " + + "of packages " + packageNames); + mAllObservers.put(observer.getName(), + new ObserverInternal(observer.getName(), packages)); + } else { + Slog.d(TAG, observer.getName() + " added the following " + + "packages to monitor " + packageNames); + oldObserver.updatePackagesLocked(packages); + } + } + + // Register observer in case not already registered + registerHealthObserver(observer); + + // Sync after we add the new packages to the observers. We may have received packges + // requiring an earlier schedule than we are currently scheduled for. + syncState("updated observers"); + }); + + } + + /** + * Unregisters {@code observer} from listening to package failure. + * Additionally, this stops observing any packages that may have previously been observed + * even from a previous boot. + */ + public void unregisterHealthObserver(PackageHealthObserver observer) { + mLongTaskHandler.post(() -> { + synchronized (mLock) { + mAllObservers.remove(observer.getName()); + } + syncState("unregistering observer: " + observer.getName()); + }); + } + + /** + * Called when a process fails due to a crash, ANR or explicit health check. + * + * <p>For each package contained in the process, one registered observer with the least user + * impact will be notified for mitigation. + * + * <p>This method could be called frequently if there is a severe problem on the device. + */ + public void onPackageFailure(List<VersionedPackage> packages, + @FailureReasons int failureReason) { + if (packages == null) { + Slog.w(TAG, "Could not resolve a list of failing packages"); + return; + } + mLongTaskHandler.post(() -> { + synchronized (mLock) { + if (mAllObservers.isEmpty()) { + return; + } + boolean requiresImmediateAction = (failureReason == FAILURE_REASON_NATIVE_CRASH + || failureReason == FAILURE_REASON_EXPLICIT_HEALTH_CHECK); + if (requiresImmediateAction) { + handleFailureImmediately(packages, failureReason); + } else { + for (int pIndex = 0; pIndex < packages.size(); pIndex++) { + VersionedPackage versionedPackage = packages.get(pIndex); + // Observer that will receive failure for versionedPackage + PackageHealthObserver currentObserverToNotify = null; + int currentObserverImpact = Integer.MAX_VALUE; + MonitoredPackage currentMonitoredPackage = null; + + // Find observer with least user impact + for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) { + ObserverInternal observer = mAllObservers.valueAt(oIndex); + PackageHealthObserver registeredObserver = observer.registeredObserver; + if (registeredObserver != null + && observer.onPackageFailureLocked( + versionedPackage.getPackageName())) { + MonitoredPackage p = observer.getMonitoredPackage( + versionedPackage.getPackageName()); + int mitigationCount = 1; + if (p != null) { + mitigationCount = p.getMitigationCountLocked() + 1; + } + int impact = registeredObserver.onHealthCheckFailed( + versionedPackage, failureReason, mitigationCount); + if (impact != PackageHealthObserverImpact.USER_IMPACT_LEVEL_0 + && impact < currentObserverImpact) { + currentObserverToNotify = registeredObserver; + currentObserverImpact = impact; + currentMonitoredPackage = p; + } + } + } + + // Execute action with least user impact + if (currentObserverToNotify != null) { + int mitigationCount = 1; + if (currentMonitoredPackage != null) { + currentMonitoredPackage.noteMitigationCallLocked(); + mitigationCount = + currentMonitoredPackage.getMitigationCountLocked(); + } + if (Flags.recoverabilityDetection()) { + maybeExecute(currentObserverToNotify, versionedPackage, + failureReason, currentObserverImpact, mitigationCount); + } else { + currentObserverToNotify.execute(versionedPackage, + failureReason, mitigationCount); + } + } + } + } + } + }); + } + + /** + * For native crashes or explicit health check failures, call directly into each observer to + * mitigate the error without going through failure threshold logic. + */ + private void handleFailureImmediately(List<VersionedPackage> packages, + @FailureReasons int failureReason) { + VersionedPackage failingPackage = packages.size() > 0 ? packages.get(0) : null; + PackageHealthObserver currentObserverToNotify = null; + int currentObserverImpact = Integer.MAX_VALUE; + for (ObserverInternal observer: mAllObservers.values()) { + PackageHealthObserver registeredObserver = observer.registeredObserver; + if (registeredObserver != null) { + int impact = registeredObserver.onHealthCheckFailed( + failingPackage, failureReason, 1); + if (impact != PackageHealthObserverImpact.USER_IMPACT_LEVEL_0 + && impact < currentObserverImpact) { + currentObserverToNotify = registeredObserver; + currentObserverImpact = impact; + } + } + } + if (currentObserverToNotify != null) { + if (Flags.recoverabilityDetection()) { + maybeExecute(currentObserverToNotify, failingPackage, failureReason, + currentObserverImpact, /*mitigationCount=*/ 1); + } else { + currentObserverToNotify.execute(failingPackage, failureReason, 1); + } + } + } + + private void maybeExecute(PackageHealthObserver currentObserverToNotify, + VersionedPackage versionedPackage, + @FailureReasons int failureReason, + int currentObserverImpact, + int mitigationCount) { + if (currentObserverImpact < getUserImpactLevelLimit()) { + currentObserverToNotify.execute(versionedPackage, failureReason, mitigationCount); + } + } + + + /** + * Called when the system server boots. If the system server is detected to be in a boot loop, + * query each observer and perform the mitigation action with the lowest user impact. + * + * Note: PackageWatchdog considers system_server restart loop as bootloop. Full reboots + * are not counted in bootloop. + */ + @SuppressWarnings("GuardedBy") + public void noteBoot() { + synchronized (mLock) { + // if boot count has reached threshold, start mitigation. + // We wait until threshold number of restarts only for the first time. Perform + // mitigations for every restart after that. + boolean mitigate = mBootThreshold.incrementAndTest(); + if (mitigate) { + if (!Flags.recoverabilityDetection()) { + mBootThreshold.reset(); + } + int mitigationCount = mBootThreshold.getMitigationCount() + 1; + PackageHealthObserver currentObserverToNotify = null; + ObserverInternal currentObserverInternal = null; + int currentObserverImpact = Integer.MAX_VALUE; + for (int i = 0; i < mAllObservers.size(); i++) { + final ObserverInternal observer = mAllObservers.valueAt(i); + PackageHealthObserver registeredObserver = observer.registeredObserver; + if (registeredObserver != null) { + int impact = Flags.recoverabilityDetection() + ? registeredObserver.onBootLoop( + observer.getBootMitigationCount() + 1) + : registeredObserver.onBootLoop(mitigationCount); + if (impact != PackageHealthObserverImpact.USER_IMPACT_LEVEL_0 + && impact < currentObserverImpact) { + currentObserverToNotify = registeredObserver; + currentObserverInternal = observer; + currentObserverImpact = impact; + } + } + } + if (currentObserverToNotify != null) { + if (Flags.recoverabilityDetection()) { + int currentObserverMitigationCount = + currentObserverInternal.getBootMitigationCount() + 1; + currentObserverInternal.setBootMitigationCount( + currentObserverMitigationCount); + saveAllObserversBootMitigationCountToMetadata(METADATA_FILE); + currentObserverToNotify.executeBootLoopMitigation( + currentObserverMitigationCount); + } else { + mBootThreshold.setMitigationCount(mitigationCount); + mBootThreshold.saveMitigationCountToMetadata(); + currentObserverToNotify.executeBootLoopMitigation(mitigationCount); + } + } + } + } + } + + // TODO(b/120598832): Optimize write? Maybe only write a separate smaller file? Also + // avoid holding lock? + // This currently adds about 7ms extra to shutdown thread + /** Writes the package information to file during shutdown. */ + public void writeNow() { + synchronized (mLock) { + // Must only run synchronous tasks as this runs on the ShutdownThread and no other + // thread is guaranteed to run during shutdown. + if (!mAllObservers.isEmpty()) { + mLongTaskHandler.removeCallbacks(mSaveToFile); + pruneObserversLocked(); + saveToFile(); + Slog.i(TAG, "Last write to update package durations"); + } + } + } + + /** + * Enables or disables explicit health checks. + * <p> If explicit health checks are enabled, the health check service is started. + * <p> If explicit health checks are disabled, pending explicit health check requests are + * passed and the health check service is stopped. + */ + private void setExplicitHealthCheckEnabled(boolean enabled) { + synchronized (mLock) { + mIsHealthCheckEnabled = enabled; + mHealthCheckController.setEnabled(enabled); + mSyncRequired = true; + // Prune to update internal state whenever health check is enabled/disabled + syncState("health check state " + (enabled ? "enabled" : "disabled")); + } + } + + /** + * This method should be only called on mShortTaskHandler, since it modifies + * {@link #mNumberOfNativeCrashPollsRemaining}. + */ + private void checkAndMitigateNativeCrashes() { + mNumberOfNativeCrashPollsRemaining--; + // Check if native watchdog reported a crash + if ("1".equals(SystemProperties.get("sys.init.updatable_crashing"))) { + // We rollback all available low impact rollbacks when crash is unattributable + onPackageFailure(Collections.EMPTY_LIST, FAILURE_REASON_NATIVE_CRASH); + // we stop polling after an attempt to execute rollback, regardless of whether the + // attempt succeeds or not + } else { + if (mNumberOfNativeCrashPollsRemaining > 0) { + mShortTaskHandler.postDelayed(() -> checkAndMitigateNativeCrashes(), + NATIVE_CRASH_POLLING_INTERVAL_MILLIS); + } + } + } + + /** + * Since this method can eventually trigger a rollback, it should be called + * only once boot has completed {@code onBootCompleted} and not earlier, because the install + * session must be entirely completed before we try to rollback. + */ + public void scheduleCheckAndMitigateNativeCrashes() { + Slog.i(TAG, "Scheduling " + mNumberOfNativeCrashPollsRemaining + " polls to check " + + "and mitigate native crashes"); + mShortTaskHandler.post(()->checkAndMitigateNativeCrashes()); + } + + private int getUserImpactLevelLimit() { + return SystemProperties.getInt(MAJOR_USER_IMPACT_LEVEL_THRESHOLD, + DEFAULT_MAJOR_USER_IMPACT_LEVEL_THRESHOLD); + } + + /** Possible severity values of the user impact of a {@link PackageHealthObserver#execute}. */ + @Retention(SOURCE) + @IntDef(value = {PackageHealthObserverImpact.USER_IMPACT_LEVEL_0, + PackageHealthObserverImpact.USER_IMPACT_LEVEL_10, + PackageHealthObserverImpact.USER_IMPACT_LEVEL_20, + PackageHealthObserverImpact.USER_IMPACT_LEVEL_30, + PackageHealthObserverImpact.USER_IMPACT_LEVEL_50, + PackageHealthObserverImpact.USER_IMPACT_LEVEL_70, + PackageHealthObserverImpact.USER_IMPACT_LEVEL_71, + PackageHealthObserverImpact.USER_IMPACT_LEVEL_75, + PackageHealthObserverImpact.USER_IMPACT_LEVEL_80, + PackageHealthObserverImpact.USER_IMPACT_LEVEL_90, + PackageHealthObserverImpact.USER_IMPACT_LEVEL_100}) + public @interface PackageHealthObserverImpact { + /** No action to take. */ + int USER_IMPACT_LEVEL_0 = 0; + /* Action has low user impact, user of a device will barely notice. */ + int USER_IMPACT_LEVEL_10 = 10; + /* Actions having medium user impact, user of a device will likely notice. */ + int USER_IMPACT_LEVEL_20 = 20; + int USER_IMPACT_LEVEL_30 = 30; + int USER_IMPACT_LEVEL_50 = 50; + int USER_IMPACT_LEVEL_70 = 70; + /* Action has high user impact, a last resort, user of a device will be very frustrated. */ + int USER_IMPACT_LEVEL_71 = 71; + int USER_IMPACT_LEVEL_75 = 75; + int USER_IMPACT_LEVEL_80 = 80; + int USER_IMPACT_LEVEL_90 = 90; + int USER_IMPACT_LEVEL_100 = 100; + } + + /** Register instances of this interface to receive notifications on package failure. */ + public interface PackageHealthObserver { + /** + * Called when health check fails for the {@code versionedPackage}. + * + * @param versionedPackage the package that is failing. This may be null if a native + * service is crashing. + * @param failureReason the type of failure that is occurring. + * @param mitigationCount the number of times mitigation has been called for this package + * (including this time). + * + * + * @return any one of {@link PackageHealthObserverImpact} to express the impact + * to the user on {@link #execute} + */ + @PackageHealthObserverImpact int onHealthCheckFailed( + @Nullable VersionedPackage versionedPackage, + @FailureReasons int failureReason, + int mitigationCount); + + /** + * Executes mitigation for {@link #onHealthCheckFailed}. + * + * @param versionedPackage the package that is failing. This may be null if a native + * service is crashing. + * @param failureReason the type of failure that is occurring. + * @param mitigationCount the number of times mitigation has been called for this package + * (including this time). + * @return {@code true} if action was executed successfully, {@code false} otherwise + */ + boolean execute(@Nullable VersionedPackage versionedPackage, + @FailureReasons int failureReason, int mitigationCount); + + + /** + * Called when the system server has booted several times within a window of time, defined + * by {@link #mBootThreshold} + * + * @param mitigationCount the number of times mitigation has been attempted for this + * boot loop (including this time). + */ + default @PackageHealthObserverImpact int onBootLoop(int mitigationCount) { + return PackageHealthObserverImpact.USER_IMPACT_LEVEL_0; + } + + /** + * Executes mitigation for {@link #onBootLoop} + * @param mitigationCount the number of times mitigation has been attempted for this + * boot loop (including this time). + */ + default boolean executeBootLoopMitigation(int mitigationCount) { + return false; + } + + // TODO(b/120598832): Ensure uniqueness? + /** + * Identifier for the observer, should not change across device updates otherwise the + * watchdog may drop observing packages with the old name. + */ + String getName(); + + /** + * An observer will not be pruned if this is set, even if the observer is not explicitly + * monitoring any packages. + */ + default boolean isPersistent() { + return false; + } + + /** + * Returns {@code true} if this observer wishes to observe the given package, {@code false} + * otherwise + * + * <p> A persistent observer may choose to start observing certain failing packages, even if + * it has not explicitly asked to watch the package with {@link #startObservingHealth}. + */ + default boolean mayObservePackage(String packageName) { + return false; + } + } + + @VisibleForTesting + long getTriggerFailureCount() { + synchronized (mLock) { + return mTriggerFailureCount; + } + } + + @VisibleForTesting + long getTriggerFailureDurationMs() { + synchronized (mLock) { + return mTriggerFailureDurationMs; + } + } + + /** + * Serializes and syncs health check requests with the {@link ExplicitHealthCheckController}. + */ + private void syncRequestsAsync() { + mShortTaskHandler.removeCallbacks(mSyncRequests); + mShortTaskHandler.post(mSyncRequests); + } + + /** + * Syncs health check requests with the {@link ExplicitHealthCheckController}. + * Calls to this must be serialized. + * + * @see #syncRequestsAsync + */ + private void syncRequests() { + boolean syncRequired = false; + synchronized (mLock) { + if (mIsPackagesReady) { + Set<String> packages = getPackagesPendingHealthChecksLocked(); + if (mSyncRequired || !packages.equals(mRequestedHealthCheckPackages) + || packages.isEmpty()) { + syncRequired = true; + mRequestedHealthCheckPackages = packages; + } + } // else, we will sync requests when packages become ready + } + + // Call outside lock to avoid holding lock when calling into the controller. + if (syncRequired) { + Slog.i(TAG, "Syncing health check requests for packages: " + + mRequestedHealthCheckPackages); + mHealthCheckController.syncRequests(mRequestedHealthCheckPackages); + mSyncRequired = false; + } + } + + /** + * Updates the observers monitoring {@code packageName} that explicit health check has passed. + * + * <p> This update is strictly for registered observers at the time of the call + * Observers that register after this signal will have no knowledge of prior signals and will + * effectively behave as if the explicit health check hasn't passed for {@code packageName}. + * + * <p> {@code packageName} can still be considered failed if reported by + * {@link #onPackageFailureLocked} before the package expires. + * + * <p> Triggered by components outside the system server when they are fully functional after an + * update. + */ + private void onHealthCheckPassed(String packageName) { + Slog.i(TAG, "Health check passed for package: " + packageName); + boolean isStateChanged = false; + + synchronized (mLock) { + for (int observerIdx = 0; observerIdx < mAllObservers.size(); observerIdx++) { + ObserverInternal observer = mAllObservers.valueAt(observerIdx); + MonitoredPackage monitoredPackage = observer.getMonitoredPackage(packageName); + + if (monitoredPackage != null) { + int oldState = monitoredPackage.getHealthCheckStateLocked(); + int newState = monitoredPackage.tryPassHealthCheckLocked(); + isStateChanged |= oldState != newState; + } + } + } + + if (isStateChanged) { + syncState("health check passed for " + packageName); + } + } + + private void onSupportedPackages(List<PackageConfig> supportedPackages) { + boolean isStateChanged = false; + + Map<String, Long> supportedPackageTimeouts = new ArrayMap<>(); + Iterator<PackageConfig> it = supportedPackages.iterator(); + while (it.hasNext()) { + PackageConfig info = it.next(); + supportedPackageTimeouts.put(info.getPackageName(), info.getHealthCheckTimeoutMillis()); + } + + synchronized (mLock) { + Slog.d(TAG, "Received supported packages " + supportedPackages); + Iterator<ObserverInternal> oit = mAllObservers.values().iterator(); + while (oit.hasNext()) { + Iterator<MonitoredPackage> pit = oit.next().getMonitoredPackages() + .values().iterator(); + while (pit.hasNext()) { + MonitoredPackage monitoredPackage = pit.next(); + String packageName = monitoredPackage.getName(); + int oldState = monitoredPackage.getHealthCheckStateLocked(); + int newState; + + if (supportedPackageTimeouts.containsKey(packageName)) { + // Supported packages become ACTIVE if currently INACTIVE + newState = monitoredPackage.setHealthCheckActiveLocked( + supportedPackageTimeouts.get(packageName)); + } else { + // Unsupported packages are marked as PASSED unless already FAILED + newState = monitoredPackage.tryPassHealthCheckLocked(); + } + isStateChanged |= oldState != newState; + } + } + } + + if (isStateChanged) { + syncState("updated health check supported packages " + supportedPackages); + } + } + + private void onSyncRequestNotified() { + synchronized (mLock) { + mSyncRequired = true; + syncRequestsAsync(); + } + } + + @GuardedBy("mLock") + private Set<String> getPackagesPendingHealthChecksLocked() { + Set<String> packages = new ArraySet<>(); + Iterator<ObserverInternal> oit = mAllObservers.values().iterator(); + while (oit.hasNext()) { + ObserverInternal observer = oit.next(); + Iterator<MonitoredPackage> pit = + observer.getMonitoredPackages().values().iterator(); + while (pit.hasNext()) { + MonitoredPackage monitoredPackage = pit.next(); + String packageName = monitoredPackage.getName(); + if (monitoredPackage.isPendingHealthChecksLocked()) { + packages.add(packageName); + } + } + } + return packages; + } + + /** + * Syncs the state of the observers. + * + * <p> Prunes all observers, saves new state to disk, syncs health check requests with the + * health check service and schedules the next state sync. + */ + private void syncState(String reason) { + synchronized (mLock) { + Slog.i(TAG, "Syncing state, reason: " + reason); + pruneObserversLocked(); + + saveToFileAsync(); + syncRequestsAsync(); + + // Done syncing state, schedule the next state sync + scheduleNextSyncStateLocked(); + } + } + + private void syncStateWithScheduledReason() { + syncState("scheduled"); + } + + @GuardedBy("mLock") + private void scheduleNextSyncStateLocked() { + long durationMs = getNextStateSyncMillisLocked(); + mShortTaskHandler.removeCallbacks(mSyncStateWithScheduledReason); + if (durationMs == Long.MAX_VALUE) { + Slog.i(TAG, "Cancelling state sync, nothing to sync"); + mUptimeAtLastStateSync = 0; + } else { + mUptimeAtLastStateSync = mSystemClock.uptimeMillis(); + mShortTaskHandler.postDelayed(mSyncStateWithScheduledReason, durationMs); + } + } + + /** + * Returns the next duration in millis to sync the watchdog state. + * + * @returns Long#MAX_VALUE if there are no observed packages. + */ + @GuardedBy("mLock") + private long getNextStateSyncMillisLocked() { + long shortestDurationMs = Long.MAX_VALUE; + for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) { + ArrayMap<String, MonitoredPackage> packages = mAllObservers.valueAt(oIndex) + .getMonitoredPackages(); + for (int pIndex = 0; pIndex < packages.size(); pIndex++) { + MonitoredPackage mp = packages.valueAt(pIndex); + long duration = mp.getShortestScheduleDurationMsLocked(); + if (duration < shortestDurationMs) { + shortestDurationMs = duration; + } + } + } + return shortestDurationMs; + } + + /** + * Removes {@code elapsedMs} milliseconds from all durations on monitored packages + * and updates other internal state. + */ + @GuardedBy("mLock") + private void pruneObserversLocked() { + long elapsedMs = mUptimeAtLastStateSync == 0 + ? 0 : mSystemClock.uptimeMillis() - mUptimeAtLastStateSync; + if (elapsedMs <= 0) { + Slog.i(TAG, "Not pruning observers, elapsed time: " + elapsedMs + "ms"); + return; + } + + Iterator<ObserverInternal> it = mAllObservers.values().iterator(); + while (it.hasNext()) { + ObserverInternal observer = it.next(); + Set<MonitoredPackage> failedPackages = + observer.prunePackagesLocked(elapsedMs); + if (!failedPackages.isEmpty()) { + onHealthCheckFailed(observer, failedPackages); + } + if (observer.getMonitoredPackages().isEmpty() && (observer.registeredObserver == null + || !observer.registeredObserver.isPersistent())) { + Slog.i(TAG, "Discarding observer " + observer.name + ". All packages expired"); + it.remove(); + } + } + } + + private void onHealthCheckFailed(ObserverInternal observer, + Set<MonitoredPackage> failedPackages) { + mLongTaskHandler.post(() -> { + synchronized (mLock) { + PackageHealthObserver registeredObserver = observer.registeredObserver; + if (registeredObserver != null) { + Iterator<MonitoredPackage> it = failedPackages.iterator(); + while (it.hasNext()) { + VersionedPackage versionedPkg = getVersionedPackage(it.next().getName()); + if (versionedPkg != null) { + Slog.i(TAG, + "Explicit health check failed for package " + versionedPkg); + registeredObserver.execute(versionedPkg, + PackageWatchdog.FAILURE_REASON_EXPLICIT_HEALTH_CHECK, 1); + } + } + } + } + }); + } + + /** + * Gets PackageInfo for the given package. Matches any user and apex. + * + * @throws PackageManager.NameNotFoundException if no such package is installed. + */ + private PackageInfo getPackageInfo(String packageName) + throws PackageManager.NameNotFoundException { + PackageManager pm = mContext.getPackageManager(); + try { + // The MATCH_ANY_USER flag doesn't mix well with the MATCH_APEX + // flag, so make two separate attempts to get the package info. + // We don't need both flags at the same time because we assume + // apex files are always installed for all users. + return pm.getPackageInfo(packageName, PackageManager.MATCH_ANY_USER); + } catch (PackageManager.NameNotFoundException e) { + return pm.getPackageInfo(packageName, PackageManager.MATCH_APEX); + } + } + + @Nullable + private VersionedPackage getVersionedPackage(String packageName) { + final PackageManager pm = mContext.getPackageManager(); + if (pm == null || TextUtils.isEmpty(packageName)) { + return null; + } + try { + final long versionCode = getPackageInfo(packageName).getLongVersionCode(); + return new VersionedPackage(packageName, versionCode); + } catch (PackageManager.NameNotFoundException e) { + return null; + } + } + + /** + * Loads mAllObservers from file. + * + * <p>Note that this is <b>not</b> thread safe and should only called be called + * from the constructor. + */ + private void loadFromFile() { + InputStream infile = null; + mAllObservers.clear(); + try { + infile = mPolicyFile.openRead(); + final TypedXmlPullParser parser = Xml.resolvePullParser(infile); + XmlUtils.beginDocument(parser, TAG_PACKAGE_WATCHDOG); + int outerDepth = parser.getDepth(); + while (XmlUtils.nextElementWithin(parser, outerDepth)) { + ObserverInternal observer = ObserverInternal.read(parser, this); + if (observer != null) { + mAllObservers.put(observer.name, observer); + } + } + } catch (FileNotFoundException e) { + // Nothing to monitor + } catch (IOException | NumberFormatException | XmlPullParserException e) { + Slog.wtf(TAG, "Unable to read monitored packages, deleting file", e); + mPolicyFile.delete(); + } finally { + IoUtils.closeQuietly(infile); + } + } + + private void onPropertyChanged(DeviceConfig.Properties properties) { + try { + updateConfigs(); + } catch (Exception ignore) { + Slog.w(TAG, "Failed to reload device config changes"); + } + } + + /** Adds a {@link DeviceConfig#OnPropertiesChangedListener}. */ + private void setPropertyChangedListenerLocked() { + DeviceConfig.addOnPropertiesChangedListener( + DeviceConfig.NAMESPACE_ROLLBACK, + mContext.getMainExecutor(), + mOnPropertyChangedListener); + } + + @VisibleForTesting + void removePropertyChangedListener() { + DeviceConfig.removeOnPropertiesChangedListener(mOnPropertyChangedListener); + } + + /** + * Health check is enabled or disabled after reading the flags + * from DeviceConfig. + */ + @VisibleForTesting + void updateConfigs() { + synchronized (mLock) { + mTriggerFailureCount = DeviceConfig.getInt( + DeviceConfig.NAMESPACE_ROLLBACK, + PROPERTY_WATCHDOG_TRIGGER_FAILURE_COUNT, + DEFAULT_TRIGGER_FAILURE_COUNT); + if (mTriggerFailureCount <= 0) { + mTriggerFailureCount = DEFAULT_TRIGGER_FAILURE_COUNT; + } + + mTriggerFailureDurationMs = DeviceConfig.getInt( + DeviceConfig.NAMESPACE_ROLLBACK, + PROPERTY_WATCHDOG_TRIGGER_DURATION_MILLIS, + DEFAULT_TRIGGER_FAILURE_DURATION_MS); + if (mTriggerFailureDurationMs <= 0) { + mTriggerFailureDurationMs = DEFAULT_TRIGGER_FAILURE_DURATION_MS; + } + + setExplicitHealthCheckEnabled(DeviceConfig.getBoolean( + DeviceConfig.NAMESPACE_ROLLBACK, + PROPERTY_WATCHDOG_EXPLICIT_HEALTH_CHECK_ENABLED, + DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED)); + } + } + + private void registerConnectivityModuleHealthListener() { + // TODO: have an internal method to trigger a rollback by reporting high severity errors, + // and rely on ActivityManager to inform the watchdog of severe network stack crashes + // instead of having this listener in parallel. + mConnectivityModuleConnector.registerHealthListener( + packageName -> { + final VersionedPackage pkg = getVersionedPackage(packageName); + if (pkg == null) { + Slog.wtf(TAG, "NetworkStack failed but could not find its package"); + return; + } + final List<VersionedPackage> pkgList = Collections.singletonList(pkg); + onPackageFailure(pkgList, FAILURE_REASON_EXPLICIT_HEALTH_CHECK); + }); + } + + /** + * Persists mAllObservers to file. Threshold information is ignored. + */ + private boolean saveToFile() { + Slog.i(TAG, "Saving observer state to file"); + synchronized (mLock) { + FileOutputStream stream; + try { + stream = mPolicyFile.startWrite(); + } catch (IOException e) { + Slog.w(TAG, "Cannot update monitored packages", e); + return false; + } + + try { + TypedXmlSerializer out = Xml.resolveSerializer(stream); + out.startDocument(null, true); + out.startTag(null, TAG_PACKAGE_WATCHDOG); + out.attributeInt(null, ATTR_VERSION, DB_VERSION); + for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) { + mAllObservers.valueAt(oIndex).writeLocked(out); + } + out.endTag(null, TAG_PACKAGE_WATCHDOG); + out.endDocument(); + mPolicyFile.finishWrite(stream); + return true; + } catch (IOException e) { + Slog.w(TAG, "Failed to save monitored packages, restoring backup", e); + mPolicyFile.failWrite(stream); + return false; + } finally { + IoUtils.closeQuietly(stream); + } + } + } + + private void saveToFileAsync() { + if (!mLongTaskHandler.hasCallbacks(mSaveToFile)) { + mLongTaskHandler.post(mSaveToFile); + } + } + + /** Convert a {@code LongArrayQueue} to a String of comma-separated values. */ + public static String longArrayQueueToString(LongArrayQueue queue) { + if (queue.size() > 0) { + StringBuilder sb = new StringBuilder(); + sb.append(queue.get(0)); + for (int i = 1; i < queue.size(); i++) { + sb.append(","); + sb.append(queue.get(i)); + } + return sb.toString(); + } + return ""; + } + + /** Parse a comma-separated String of longs into a LongArrayQueue. */ + public static LongArrayQueue parseLongArrayQueue(String commaSeparatedValues) { + LongArrayQueue result = new LongArrayQueue(); + if (!TextUtils.isEmpty(commaSeparatedValues)) { + String[] values = commaSeparatedValues.split(","); + for (String value : values) { + result.addLast(Long.parseLong(value)); + } + } + return result; + } + + + /** Dump status of every observer in mAllObservers. */ + public void dump(IndentingPrintWriter pw) { + pw.println("Package Watchdog status"); + pw.increaseIndent(); + synchronized (mLock) { + for (String observerName : mAllObservers.keySet()) { + pw.println("Observer name: " + observerName); + pw.increaseIndent(); + ObserverInternal observerInternal = mAllObservers.get(observerName); + observerInternal.dump(pw); + pw.decreaseIndent(); + } + } + } + + @VisibleForTesting + @GuardedBy("mLock") + void registerObserverInternal(ObserverInternal observerInternal) { + mAllObservers.put(observerInternal.name, observerInternal); + } + + /** + * Represents an observer monitoring a set of packages along with the failure thresholds for + * each package. + * + * <p> Note, the PackageWatchdog#mLock must always be held when reading or writing + * instances of this class. + */ + static class ObserverInternal { + public final String name; + @GuardedBy("mLock") + private final ArrayMap<String, MonitoredPackage> mPackages = new ArrayMap<>(); + @Nullable + @GuardedBy("mLock") + public PackageHealthObserver registeredObserver; + private int mMitigationCount; + + ObserverInternal(String name, List<MonitoredPackage> packages) { + this(name, packages, /*mitigationCount=*/ 0); + } + + ObserverInternal(String name, List<MonitoredPackage> packages, int mitigationCount) { + this.name = name; + updatePackagesLocked(packages); + this.mMitigationCount = mitigationCount; + } + + /** + * Writes important {@link MonitoredPackage} details for this observer to file. + * Does not persist any package failure thresholds. + */ + @GuardedBy("mLock") + public boolean writeLocked(TypedXmlSerializer out) { + try { + out.startTag(null, TAG_OBSERVER); + out.attribute(null, ATTR_NAME, name); + if (Flags.recoverabilityDetection()) { + out.attributeInt(null, ATTR_MITIGATION_COUNT, mMitigationCount); + } + for (int i = 0; i < mPackages.size(); i++) { + MonitoredPackage p = mPackages.valueAt(i); + p.writeLocked(out); + } + out.endTag(null, TAG_OBSERVER); + return true; + } catch (IOException e) { + Slog.w(TAG, "Cannot save observer", e); + return false; + } + } + + public int getBootMitigationCount() { + return mMitigationCount; + } + + public void setBootMitigationCount(int mitigationCount) { + mMitigationCount = mitigationCount; + } + + @GuardedBy("mLock") + public void updatePackagesLocked(List<MonitoredPackage> packages) { + for (int pIndex = 0; pIndex < packages.size(); pIndex++) { + MonitoredPackage p = packages.get(pIndex); + MonitoredPackage existingPackage = getMonitoredPackage(p.getName()); + if (existingPackage != null) { + existingPackage.updateHealthCheckDuration(p.mDurationMs); + } else { + putMonitoredPackage(p); + } + } + } + + /** + * Reduces the monitoring durations of all packages observed by this observer by + * {@code elapsedMs}. If any duration is less than 0, the package is removed from + * observation. If any health check duration is less than 0, the health check result + * is evaluated. + * + * @return a {@link Set} of packages that were removed from the observer without explicit + * health check passing, or an empty list if no package expired for which an explicit health + * check was still pending + */ + @GuardedBy("mLock") + private Set<MonitoredPackage> prunePackagesLocked(long elapsedMs) { + Set<MonitoredPackage> failedPackages = new ArraySet<>(); + Iterator<MonitoredPackage> it = mPackages.values().iterator(); + while (it.hasNext()) { + MonitoredPackage p = it.next(); + int oldState = p.getHealthCheckStateLocked(); + int newState = p.handleElapsedTimeLocked(elapsedMs); + if (oldState != HealthCheckState.FAILED + && newState == HealthCheckState.FAILED) { + Slog.i(TAG, "Package " + p.getName() + " failed health check"); + failedPackages.add(p); + } + if (p.isExpiredLocked()) { + it.remove(); + } + } + return failedPackages; + } + + /** + * Increments failure counts of {@code packageName}. + * @returns {@code true} if failure threshold is exceeded, {@code false} otherwise + */ + @GuardedBy("mLock") + public boolean onPackageFailureLocked(String packageName) { + if (getMonitoredPackage(packageName) == null && registeredObserver.isPersistent() + && registeredObserver.mayObservePackage(packageName)) { + putMonitoredPackage(sPackageWatchdog.newMonitoredPackage( + packageName, DEFAULT_OBSERVING_DURATION_MS, false)); + } + MonitoredPackage p = getMonitoredPackage(packageName); + if (p != null) { + return p.onFailureLocked(); + } + return false; + } + + /** + * Returns the map of packages monitored by this observer. + * + * @return a mapping of package names to {@link MonitoredPackage} objects. + */ + @GuardedBy("mLock") + public ArrayMap<String, MonitoredPackage> getMonitoredPackages() { + return mPackages; + } + + /** + * Returns the {@link MonitoredPackage} associated with a given package name if the + * package is being monitored by this observer. + * + * @param packageName: the name of the package. + * @return the {@link MonitoredPackage} object associated with the package name if one + * exists, {@code null} otherwise. + */ + @GuardedBy("mLock") + @Nullable + public MonitoredPackage getMonitoredPackage(String packageName) { + return mPackages.get(packageName); + } + + /** + * Associates a {@link MonitoredPackage} with the observer. + * + * @param p: the {@link MonitoredPackage} to store. + */ + @GuardedBy("mLock") + public void putMonitoredPackage(MonitoredPackage p) { + mPackages.put(p.getName(), p); + } + + /** + * Returns one ObserverInternal from the {@code parser} and advances its state. + * + * <p>Note that this method is <b>not</b> thread safe. It should only be called from + * #loadFromFile which in turn is only called on construction of the + * singleton PackageWatchdog. + **/ + public static ObserverInternal read(TypedXmlPullParser parser, PackageWatchdog watchdog) { + String observerName = null; + int observerMitigationCount = 0; + if (TAG_OBSERVER.equals(parser.getName())) { + observerName = parser.getAttributeValue(null, ATTR_NAME); + if (TextUtils.isEmpty(observerName)) { + Slog.wtf(TAG, "Unable to read observer name"); + return null; + } + } + List<MonitoredPackage> packages = new ArrayList<>(); + int innerDepth = parser.getDepth(); + try { + if (Flags.recoverabilityDetection()) { + observerMitigationCount = parser.getAttributeInt(null, ATTR_MITIGATION_COUNT); + } + while (XmlUtils.nextElementWithin(parser, innerDepth)) { + if (TAG_PACKAGE.equals(parser.getName())) { + try { + MonitoredPackage pkg = watchdog.parseMonitoredPackage(parser); + if (pkg != null) { + packages.add(pkg); + } + } catch (NumberFormatException e) { + Slog.wtf(TAG, "Skipping package for observer " + observerName, e); + continue; + } + } + } + } catch (XmlPullParserException | IOException e) { + Slog.wtf(TAG, "Unable to read observer " + observerName, e); + return null; + } + if (packages.isEmpty()) { + return null; + } + return new ObserverInternal(observerName, packages, observerMitigationCount); + } + + /** Dumps information about this observer and the packages it watches. */ + public void dump(IndentingPrintWriter pw) { + boolean isPersistent = registeredObserver != null && registeredObserver.isPersistent(); + pw.println("Persistent: " + isPersistent); + for (String packageName : mPackages.keySet()) { + MonitoredPackage p = getMonitoredPackage(packageName); + pw.println(packageName + ": "); + pw.increaseIndent(); + pw.println("# Failures: " + p.mFailureHistory.size()); + pw.println("Monitoring duration remaining: " + p.mDurationMs + "ms"); + pw.println("Explicit health check duration: " + p.mHealthCheckDurationMs + "ms"); + pw.println("Health check state: " + p.toString(p.mHealthCheckState)); + pw.decreaseIndent(); + } + } + } + + @Retention(SOURCE) + @IntDef(value = { + HealthCheckState.ACTIVE, + HealthCheckState.INACTIVE, + HealthCheckState.PASSED, + HealthCheckState.FAILED}) + public @interface HealthCheckState { + // The package has not passed health check but has requested a health check + int ACTIVE = 0; + // The package has not passed health check and has not requested a health check + int INACTIVE = 1; + // The package has passed health check + int PASSED = 2; + // The package has failed health check + int FAILED = 3; + } + + MonitoredPackage newMonitoredPackage( + String name, long durationMs, boolean hasPassedHealthCheck) { + return newMonitoredPackage(name, durationMs, Long.MAX_VALUE, hasPassedHealthCheck, + new LongArrayQueue()); + } + + MonitoredPackage newMonitoredPackage(String name, long durationMs, long healthCheckDurationMs, + boolean hasPassedHealthCheck, LongArrayQueue mitigationCalls) { + return new MonitoredPackage(name, durationMs, healthCheckDurationMs, + hasPassedHealthCheck, mitigationCalls); + } + + MonitoredPackage parseMonitoredPackage(TypedXmlPullParser parser) + throws XmlPullParserException { + String packageName = parser.getAttributeValue(null, ATTR_NAME); + long duration = parser.getAttributeLong(null, ATTR_DURATION); + long healthCheckDuration = parser.getAttributeLong(null, + ATTR_EXPLICIT_HEALTH_CHECK_DURATION); + boolean hasPassedHealthCheck = parser.getAttributeBoolean(null, ATTR_PASSED_HEALTH_CHECK); + LongArrayQueue mitigationCalls = parseLongArrayQueue( + parser.getAttributeValue(null, ATTR_MITIGATION_CALLS)); + return newMonitoredPackage(packageName, + duration, healthCheckDuration, hasPassedHealthCheck, mitigationCalls); + } + + /** + * Represents a package and its health check state along with the time + * it should be monitored for. + * + * <p> Note, the PackageWatchdog#mLock must always be held when reading or writing + * instances of this class. + */ + class MonitoredPackage { + private final String mPackageName; + // Times when package failures happen sorted in ascending order + @GuardedBy("mLock") + private final LongArrayQueue mFailureHistory = new LongArrayQueue(); + // Times when an observer was called to mitigate this package's failure. Sorted in + // ascending order. + @GuardedBy("mLock") + private final LongArrayQueue mMitigationCalls; + // One of STATE_[ACTIVE|INACTIVE|PASSED|FAILED]. Updated on construction and after + // methods that could change the health check state: handleElapsedTimeLocked and + // tryPassHealthCheckLocked + private int mHealthCheckState = HealthCheckState.INACTIVE; + // Whether an explicit health check has passed. + // This value in addition with mHealthCheckDurationMs determines the health check state + // of the package, see #getHealthCheckStateLocked + @GuardedBy("mLock") + private boolean mHasPassedHealthCheck; + // System uptime duration to monitor package. + @GuardedBy("mLock") + private long mDurationMs; + // System uptime duration to check the result of an explicit health check + // Initially, MAX_VALUE until we get a value from the health check service + // and request health checks. + // This value in addition with mHasPassedHealthCheck determines the health check state + // of the package, see #getHealthCheckStateLocked + @GuardedBy("mLock") + private long mHealthCheckDurationMs = Long.MAX_VALUE; + + MonitoredPackage(String packageName, long durationMs, + long healthCheckDurationMs, boolean hasPassedHealthCheck, + LongArrayQueue mitigationCalls) { + mPackageName = packageName; + mDurationMs = durationMs; + mHealthCheckDurationMs = healthCheckDurationMs; + mHasPassedHealthCheck = hasPassedHealthCheck; + mMitigationCalls = mitigationCalls; + updateHealthCheckStateLocked(); + } + + /** Writes the salient fields to disk using {@code out}. */ + @GuardedBy("mLock") + public void writeLocked(TypedXmlSerializer out) throws IOException { + out.startTag(null, TAG_PACKAGE); + out.attribute(null, ATTR_NAME, getName()); + out.attributeLong(null, ATTR_DURATION, mDurationMs); + out.attributeLong(null, ATTR_EXPLICIT_HEALTH_CHECK_DURATION, mHealthCheckDurationMs); + out.attributeBoolean(null, ATTR_PASSED_HEALTH_CHECK, mHasPassedHealthCheck); + LongArrayQueue normalizedCalls = normalizeMitigationCalls(); + out.attribute(null, ATTR_MITIGATION_CALLS, longArrayQueueToString(normalizedCalls)); + out.endTag(null, TAG_PACKAGE); + } + + /** + * Increment package failures or resets failure count depending on the last package failure. + * + * @return {@code true} if failure count exceeds a threshold, {@code false} otherwise + */ + @GuardedBy("mLock") + public boolean onFailureLocked() { + // Sliding window algorithm: find out if there exists a window containing failures >= + // mTriggerFailureCount. + final long now = mSystemClock.uptimeMillis(); + mFailureHistory.addLast(now); + while (now - mFailureHistory.peekFirst() > mTriggerFailureDurationMs) { + // Prune values falling out of the window + mFailureHistory.removeFirst(); + } + boolean failed = mFailureHistory.size() >= mTriggerFailureCount; + if (failed) { + mFailureHistory.clear(); + } + return failed; + } + + /** + * Notes the timestamp of a mitigation call into the observer. + */ + @GuardedBy("mLock") + public void noteMitigationCallLocked() { + mMitigationCalls.addLast(mSystemClock.uptimeMillis()); + } + + /** + * Prunes any mitigation calls outside of the de-escalation window, and returns the + * number of calls that are in the window afterwards. + * + * @return the number of mitigation calls made in the de-escalation window. + */ + @GuardedBy("mLock") + public int getMitigationCountLocked() { + try { + final long now = mSystemClock.uptimeMillis(); + while (now - mMitigationCalls.peekFirst() > DEFAULT_DEESCALATION_WINDOW_MS) { + mMitigationCalls.removeFirst(); + } + } catch (NoSuchElementException ignore) { + } + + return mMitigationCalls.size(); + } + + /** + * Before writing to disk, make the mitigation call timestamps relative to the current + * system uptime. This is because they need to be relative to the uptime which will reset + * at the next boot. + * + * @return a LongArrayQueue of the mitigation calls relative to the current system uptime. + */ + @GuardedBy("mLock") + public LongArrayQueue normalizeMitigationCalls() { + LongArrayQueue normalized = new LongArrayQueue(); + final long now = mSystemClock.uptimeMillis(); + for (int i = 0; i < mMitigationCalls.size(); i++) { + normalized.addLast(mMitigationCalls.get(i) - now); + } + return normalized; + } + + /** + * Sets the initial health check duration. + * + * @return the new health check state + */ + @GuardedBy("mLock") + public int setHealthCheckActiveLocked(long initialHealthCheckDurationMs) { + if (initialHealthCheckDurationMs <= 0) { + Slog.wtf(TAG, "Cannot set non-positive health check duration " + + initialHealthCheckDurationMs + "ms for package " + getName() + + ". Using total duration " + mDurationMs + "ms instead"); + initialHealthCheckDurationMs = mDurationMs; + } + if (mHealthCheckState == HealthCheckState.INACTIVE) { + // Transitions to ACTIVE + mHealthCheckDurationMs = initialHealthCheckDurationMs; + } + return updateHealthCheckStateLocked(); + } + + /** + * Updates the monitoring durations of the package. + * + * @return the new health check state + */ + @GuardedBy("mLock") + public int handleElapsedTimeLocked(long elapsedMs) { + if (elapsedMs <= 0) { + Slog.w(TAG, "Cannot handle non-positive elapsed time for package " + getName()); + return mHealthCheckState; + } + // Transitions to FAILED if now <= 0 and health check not passed + mDurationMs -= elapsedMs; + if (mHealthCheckState == HealthCheckState.ACTIVE) { + // We only update health check durations if we have #setHealthCheckActiveLocked + // This ensures we don't leave the INACTIVE state for an unexpected elapsed time + // Transitions to FAILED if now <= 0 and health check not passed + mHealthCheckDurationMs -= elapsedMs; + } + return updateHealthCheckStateLocked(); + } + + /** Explicitly update the monitoring duration of the package. */ + @GuardedBy("mLock") + public void updateHealthCheckDuration(long newDurationMs) { + mDurationMs = newDurationMs; + } + + /** + * Marks the health check as passed and transitions to {@link HealthCheckState.PASSED} + * if not yet {@link HealthCheckState.FAILED}. + * + * @return the new {@link HealthCheckState health check state} + */ + @GuardedBy("mLock") + @HealthCheckState + public int tryPassHealthCheckLocked() { + if (mHealthCheckState != HealthCheckState.FAILED) { + // FAILED is a final state so only pass if we haven't failed + // Transition to PASSED + mHasPassedHealthCheck = true; + } + return updateHealthCheckStateLocked(); + } + + /** Returns the monitored package name. */ + private String getName() { + return mPackageName; + } + + /** + * Returns the current {@link HealthCheckState health check state}. + */ + @GuardedBy("mLock") + @HealthCheckState + public int getHealthCheckStateLocked() { + return mHealthCheckState; + } + + /** + * Returns the shortest duration before the package should be scheduled for a prune. + * + * @return the duration or {@link Long#MAX_VALUE} if the package should not be scheduled + */ + @GuardedBy("mLock") + public long getShortestScheduleDurationMsLocked() { + // Consider health check duration only if #isPendingHealthChecksLocked is true + return Math.min(toPositive(mDurationMs), + isPendingHealthChecksLocked() + ? toPositive(mHealthCheckDurationMs) : Long.MAX_VALUE); + } + + /** + * Returns {@code true} if the total duration left to monitor the package is less than or + * equal to 0 {@code false} otherwise. + */ + @GuardedBy("mLock") + public boolean isExpiredLocked() { + return mDurationMs <= 0; + } + + /** + * Returns {@code true} if the package, {@link #getName} is expecting health check results + * {@code false} otherwise. + */ + @GuardedBy("mLock") + public boolean isPendingHealthChecksLocked() { + return mHealthCheckState == HealthCheckState.ACTIVE + || mHealthCheckState == HealthCheckState.INACTIVE; + } + + /** + * Updates the health check state based on {@link #mHasPassedHealthCheck} + * and {@link #mHealthCheckDurationMs}. + * + * @return the new {@link HealthCheckState health check state} + */ + @GuardedBy("mLock") + @HealthCheckState + private int updateHealthCheckStateLocked() { + int oldState = mHealthCheckState; + if (mHasPassedHealthCheck) { + // Set final state first to avoid ambiguity + mHealthCheckState = HealthCheckState.PASSED; + } else if (mHealthCheckDurationMs <= 0 || mDurationMs <= 0) { + // Set final state first to avoid ambiguity + mHealthCheckState = HealthCheckState.FAILED; + } else if (mHealthCheckDurationMs == Long.MAX_VALUE) { + mHealthCheckState = HealthCheckState.INACTIVE; + } else { + mHealthCheckState = HealthCheckState.ACTIVE; + } + + if (oldState != mHealthCheckState) { + Slog.i(TAG, "Updated health check state for package " + getName() + ": " + + toString(oldState) + " -> " + toString(mHealthCheckState)); + } + return mHealthCheckState; + } + + /** Returns a {@link String} representation of the current health check state. */ + private String toString(@HealthCheckState int state) { + switch (state) { + case HealthCheckState.ACTIVE: + return "ACTIVE"; + case HealthCheckState.INACTIVE: + return "INACTIVE"; + case HealthCheckState.PASSED: + return "PASSED"; + case HealthCheckState.FAILED: + return "FAILED"; + default: + return "UNKNOWN"; + } + } + + /** Returns {@code value} if it is greater than 0 or {@link Long#MAX_VALUE} otherwise. */ + private long toPositive(long value) { + return value > 0 ? value : Long.MAX_VALUE; + } + + /** Compares the equality of this object with another {@link MonitoredPackage}. */ + @VisibleForTesting + boolean isEqualTo(MonitoredPackage pkg) { + return (getName().equals(pkg.getName())) + && mDurationMs == pkg.mDurationMs + && mHasPassedHealthCheck == pkg.mHasPassedHealthCheck + && mHealthCheckDurationMs == pkg.mHealthCheckDurationMs + && (mMitigationCalls.toString()).equals(pkg.mMitigationCalls.toString()); + } + } + + @GuardedBy("mLock") + @SuppressWarnings("GuardedBy") + void saveAllObserversBootMitigationCountToMetadata(String filePath) { + HashMap<String, Integer> bootMitigationCounts = new HashMap<>(); + for (int i = 0; i < mAllObservers.size(); i++) { + final ObserverInternal observer = mAllObservers.valueAt(i); + bootMitigationCounts.put(observer.name, observer.getBootMitigationCount()); + } + + try { + FileOutputStream fileStream = new FileOutputStream(new File(filePath)); + ObjectOutputStream objectStream = new ObjectOutputStream(fileStream); + objectStream.writeObject(bootMitigationCounts); + objectStream.flush(); + objectStream.close(); + fileStream.close(); + } catch (Exception e) { + Slog.i(TAG, "Could not save observers metadata to file: " + e); + } + } + + /** + * Handles the thresholding logic for system server boots. + */ + class BootThreshold { + + private final int mBootTriggerCount; + private final long mTriggerWindow; + + BootThreshold(int bootTriggerCount, long triggerWindow) { + this.mBootTriggerCount = bootTriggerCount; + this.mTriggerWindow = triggerWindow; + } + + public void reset() { + setStart(0); + setCount(0); + } + + protected int getCount() { + return CrashRecoveryProperties.rescueBootCount().orElse(0); + } + + protected void setCount(int count) { + CrashRecoveryProperties.rescueBootCount(count); + } + + public long getStart() { + return CrashRecoveryProperties.rescueBootStart().orElse(0L); + } + + public int getMitigationCount() { + return CrashRecoveryProperties.bootMitigationCount().orElse(0); + } + + public void setStart(long start) { + CrashRecoveryProperties.rescueBootStart(getStartTime(start)); + } + + public void setMitigationStart(long start) { + CrashRecoveryProperties.bootMitigationStart(getStartTime(start)); + } + + public long getMitigationStart() { + return CrashRecoveryProperties.bootMitigationStart().orElse(0L); + } + + public void setMitigationCount(int count) { + CrashRecoveryProperties.bootMitigationCount(count); + } + + private static long constrain(long amount, long low, long high) { + return amount < low ? low : (amount > high ? high : amount); + } + + public long getStartTime(long start) { + final long now = mSystemClock.uptimeMillis(); + return constrain(start, 0, now); + } + + public void saveMitigationCountToMetadata() { + try (BufferedWriter writer = new BufferedWriter(new FileWriter(METADATA_FILE))) { + writer.write(String.valueOf(getMitigationCount())); + } catch (Exception e) { + Slog.e(TAG, "Could not save metadata to file: " + e); + } + } + + public void readMitigationCountFromMetadataIfNecessary() { + File bootPropsFile = new File(METADATA_FILE); + if (bootPropsFile.exists()) { + try (BufferedReader reader = new BufferedReader(new FileReader(METADATA_FILE))) { + String mitigationCount = reader.readLine(); + setMitigationCount(Integer.parseInt(mitigationCount)); + bootPropsFile.delete(); + } catch (Exception e) { + Slog.i(TAG, "Could not read metadata file: " + e); + } + } + } + + + /** Increments the boot counter, and returns whether the device is bootlooping. */ + @GuardedBy("mLock") + public boolean incrementAndTest() { + if (Flags.recoverabilityDetection()) { + readAllObserversBootMitigationCountIfNecessary(METADATA_FILE); + } else { + readMitigationCountFromMetadataIfNecessary(); + } + + final long now = mSystemClock.uptimeMillis(); + if (now - getStart() < 0) { + Slog.e(TAG, "Window was less than zero. Resetting start to current time."); + setStart(now); + setMitigationStart(now); + } + if (now - getMitigationStart() > DEFAULT_DEESCALATION_WINDOW_MS) { + setMitigationStart(now); + if (Flags.recoverabilityDetection()) { + resetAllObserversBootMitigationCount(); + } else { + setMitigationCount(0); + } + } + final long window = now - getStart(); + if (window >= mTriggerWindow) { + setCount(1); + setStart(now); + return false; + } else { + int count = getCount() + 1; + setCount(count); + EventLogTags.writeRescueNote(Process.ROOT_UID, count, window); + if (Flags.recoverabilityDetection()) { + // After a reboot (e.g. by WARM_REBOOT or mainline rollback) we apply + // mitigations without waiting for DEFAULT_BOOT_LOOP_TRIGGER_COUNT. + return (count >= mBootTriggerCount) + || (performedMitigationsDuringWindow() && count > 1); + } + return count >= mBootTriggerCount; + } + } + + @GuardedBy("mLock") + private boolean performedMitigationsDuringWindow() { + for (ObserverInternal observerInternal: mAllObservers.values()) { + if (observerInternal.getBootMitigationCount() > 0) { + return true; + } + } + return false; + } + + @GuardedBy("mLock") + private void resetAllObserversBootMitigationCount() { + for (int i = 0; i < mAllObservers.size(); i++) { + final ObserverInternal observer = mAllObservers.valueAt(i); + observer.setBootMitigationCount(0); + } + saveAllObserversBootMitigationCountToMetadata(METADATA_FILE); + } + + @GuardedBy("mLock") + @SuppressWarnings("GuardedBy") + void readAllObserversBootMitigationCountIfNecessary(String filePath) { + File metadataFile = new File(filePath); + if (metadataFile.exists()) { + try { + FileInputStream fileStream = new FileInputStream(metadataFile); + ObjectInputStream objectStream = new ObjectInputStream(fileStream); + HashMap<String, Integer> bootMitigationCounts = + (HashMap<String, Integer>) objectStream.readObject(); + objectStream.close(); + fileStream.close(); + + for (int i = 0; i < mAllObservers.size(); i++) { + final ObserverInternal observer = mAllObservers.valueAt(i); + if (bootMitigationCounts.containsKey(observer.name)) { + observer.setBootMitigationCount( + bootMitigationCounts.get(observer.name)); + } + } + } catch (Exception e) { + Slog.i(TAG, "Could not read observer metadata file: " + e); + } + } + } + + } +} |