summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTreehugger Robot <treehugger-gerrit@google.com>2022-02-25 23:39:32 +0000
committerAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2022-02-25 23:39:32 +0000
commitbd236aa3d42264df72532daa3ef9b80692a35d69 (patch)
tree8f7df7d5319551e86104a6b1e7278f4eff2d94ab
parent630bfc5473f231142505436f40ebd870ae8bca04 (diff)
parent9fc78a40b9bae9201e43c2ffda5ebfc85b0dc0d3 (diff)
downloadnative-bd236aa3d42264df72532daa3ef9b80692a35d69.tar.gz
Merge "Add a timeout for all installd operations." am: 9fc78a40b9
Original change: https://android-review.googlesource.com/c/platform/frameworks/native/+/1993670 Change-Id: I101828c3e984a1b3818457604183914233544f9d
-rw-r--r--cmds/installd/dexopt.cpp112
-rw-r--r--cmds/installd/tests/installd_dexopt_test.cpp6
-rw-r--r--cmds/installd/tests/installd_utils_test.cpp42
-rw-r--r--cmds/installd/utils.cpp78
-rw-r--r--cmds/installd/utils.h7
-rw-r--r--cmds/installd/view_compiler.cpp20
6 files changed, 198 insertions, 67 deletions
diff --git a/cmds/installd/dexopt.cpp b/cmds/installd/dexopt.cpp
index c796da6c46..9647865e4f 100644
--- a/cmds/installd/dexopt.cpp
+++ b/cmds/installd/dexopt.cpp
@@ -74,10 +74,19 @@ using android::base::ReadFdToString;
using android::base::ReadFully;
using android::base::StringPrintf;
using android::base::WriteFully;
+using android::base::borrowed_fd;
using android::base::unique_fd;
namespace {
+// Timeout for short operations, such as merging profiles.
+constexpr int kShortTimeoutMs = 60000; // 1 minute.
+
+// Timeout for long operations, such as compilation. This should be smaller than the Package Manager
+// watchdog (PackageManagerService.WATCHDOG_TIMEOUT, 10 minutes), so that the operation will be
+// aborted before that watchdog would take down the system server.
+constexpr int kLongTimeoutMs = 570000; // 9.5 minutes.
+
class DexOptStatus {
public:
// Check if dexopt is cancelled and fork if it is not cancelled.
@@ -486,6 +495,25 @@ static void open_profile_files(uid_t uid, const std::string& package_name,
}
}
+// Cleans up an output file specified by a file descriptor. This function should be called whenever
+// a subprocess that modifies a system-managed file crashes.
+// If the subprocess crashes while it's writing to the file, the file is likely corrupted, so we
+// should remove it.
+// If the subprocess times out and is killed while it's acquiring a flock on the file, there is
+// probably a deadlock, so it's also good to remove the file so that later operations won't
+// encounter the same problem. It's safe to do so because the process that is holding the flock will
+// still have access to the file until the file descriptor is closed.
+// Note that we can't do `clear_reference_profile` here even if the fd points to a reference profile
+// because that also requires a flock and is therefore likely to be stuck in the second case.
+static bool cleanup_output_fd(int fd) {
+ std::string path;
+ bool ret = remove_file_at_fd(fd, &path);
+ if (ret) {
+ LOG(INFO) << "Removed file at path " << path;
+ }
+ return ret;
+}
+
static constexpr int PROFMAN_BIN_RETURN_CODE_SUCCESS = 0;
static constexpr int PROFMAN_BIN_RETURN_CODE_COMPILE = 1;
static constexpr int PROFMAN_BIN_RETURN_CODE_SKIP_COMPILATION_NOT_ENOUGH_DELTA = 2;
@@ -497,13 +525,14 @@ static constexpr int PROFMAN_BIN_RETURN_CODE_SKIP_COMPILATION_EMPTY_PROFILES = 7
class RunProfman : public ExecVHelper {
public:
- void SetupArgs(const std::vector<unique_fd>& profile_fds,
- const unique_fd& reference_profile_fd,
- const std::vector<unique_fd>& apk_fds,
- const std::vector<std::string>& dex_locations,
- bool copy_and_update,
- bool for_snapshot,
- bool for_boot_image) {
+ template <typename T, typename U>
+ void SetupArgs(const std::vector<T>& profile_fds,
+ const unique_fd& reference_profile_fd,
+ const std::vector<U>& apk_fds,
+ const std::vector<std::string>& dex_locations,
+ bool copy_and_update,
+ bool for_snapshot,
+ bool for_boot_image) {
// TODO(calin): Assume for now we run in the bg compile job (which is in
// most of the invocation). With the current data flow, is not very easy or
@@ -519,11 +548,11 @@ class RunProfman : public ExecVHelper {
AddArg("--reference-profile-file-fd=" + std::to_string(reference_profile_fd.get()));
}
- for (const unique_fd& fd : profile_fds) {
+ for (const T& fd : profile_fds) {
AddArg("--profile-file-fd=" + std::to_string(fd.get()));
}
- for (const unique_fd& fd : apk_fds) {
+ for (const U& fd : apk_fds) {
AddArg("--apk-fd=" + std::to_string(fd.get()));
}
@@ -582,20 +611,14 @@ class RunProfman : public ExecVHelper {
for_boot_image);
}
- void SetupCopyAndUpdate(unique_fd&& profile_fd,
- unique_fd&& reference_profile_fd,
- unique_fd&& apk_fd,
+ void SetupCopyAndUpdate(const unique_fd& profile_fd,
+ const unique_fd& reference_profile_fd,
+ const unique_fd& apk_fd,
const std::string& dex_location) {
- // The fds need to stay open longer than the scope of the function, so put them into a local
- // variable vector.
- profiles_fd_.push_back(std::move(profile_fd));
- apk_fds_.push_back(std::move(apk_fd));
- reference_profile_fd_ = std::move(reference_profile_fd);
- std::vector<std::string> dex_locations = {dex_location};
- SetupArgs(profiles_fd_,
- reference_profile_fd_,
- apk_fds_,
- dex_locations,
+ SetupArgs(std::vector<borrowed_fd>{profile_fd},
+ reference_profile_fd,
+ std::vector<borrowed_fd>{apk_fd},
+ {dex_location},
/*copy_and_update=*/true,
/*for_snapshot*/false,
/*for_boot_image*/false);
@@ -621,11 +644,6 @@ class RunProfman : public ExecVHelper {
void Exec() {
ExecVHelper::Exec(DexoptReturnCodes::kProfmanExec);
}
-
- private:
- unique_fd reference_profile_fd_;
- std::vector<unique_fd> profiles_fd_;
- std::vector<unique_fd> apk_fds_;
};
static int analyze_profiles(uid_t uid, const std::string& package_name,
@@ -657,13 +675,14 @@ static int analyze_profiles(uid_t uid, const std::string& package_name,
profman_merge.Exec();
}
/* parent */
- int return_code = wait_child(pid);
+ int return_code = wait_child_with_timeout(pid, kShortTimeoutMs);
bool need_to_compile = false;
bool empty_profiles = false;
bool should_clear_current_profiles = false;
bool should_clear_reference_profile = false;
if (!WIFEXITED(return_code)) {
LOG(WARNING) << "profman failed for location " << location << ": " << return_code;
+ cleanup_output_fd(reference_profile_fd.get());
} else {
return_code = WEXITSTATUS(return_code);
switch (return_code) {
@@ -797,10 +816,10 @@ bool dump_profiles(int32_t uid, const std::string& pkgname, const std::string& p
profman_dump.Exec();
}
/* parent */
- int return_code = wait_child(pid);
+ int return_code = wait_child_with_timeout(pid, kShortTimeoutMs);
if (!WIFEXITED(return_code)) {
- LOG(WARNING) << "profman failed for package " << pkgname << ": "
- << return_code;
+ LOG(WARNING) << "profman failed for package " << pkgname << ": " << return_code;
+ cleanup_output_fd(output_fd.get());
return false;
}
return true;
@@ -871,7 +890,11 @@ bool copy_system_profile(const std::string& system_profile,
_exit(0);
}
/* parent */
- int return_code = wait_child(pid);
+ int return_code = wait_child_with_timeout(pid, kShortTimeoutMs);
+ if (!WIFEXITED(return_code)) {
+ cleanup_output_fd(out_fd.get());
+ return false;
+ }
return return_code == 0;
}
@@ -1521,7 +1544,7 @@ static bool get_class_loader_context_dex_paths(const char* class_loader_context,
}
pipe_read.reset();
- int return_code = wait_child(pid);
+ int return_code = wait_child_with_timeout(pid, kShortTimeoutMs);
if (!WIFEXITED(return_code)) {
PLOG(ERROR) << "Error waiting for child dexoptanalyzer process";
return false;
@@ -1695,7 +1718,7 @@ static SecondaryDexOptProcessResult process_secondary_dex_dexopt(const std::stri
}
/* parent */
- int result = wait_child(pid);
+ int result = wait_child_with_timeout(pid, kShortTimeoutMs);
cancelled = dexopt_status_->check_if_killed_and_remove_dexopt_pid(pid);
if (!WIFEXITED(result)) {
if ((WTERMSIG(result) == SIGKILL) && cancelled) {
@@ -1954,7 +1977,7 @@ int dexopt(const char* dex_path, uid_t uid, const char* pkgname, const char* ins
runner.Exec(DexoptReturnCodes::kDex2oatExec);
} else {
- int res = wait_child(pid);
+ int res = wait_child_with_timeout(pid, kLongTimeoutMs);
bool cancelled = dexopt_status_->check_if_killed_and_remove_dexopt_pid(pid);
if (res == 0) {
LOG(VERBOSE) << "DexInv: --- END '" << dex_path << "' (success) ---";
@@ -2143,7 +2166,7 @@ bool reconcile_secondary_dex_file(const std::string& dex_path,
_exit(result ? kReconcileSecondaryDexCleanedUp : kReconcileSecondaryDexAccessIOError);
}
- int return_code = wait_child(pid);
+ int return_code = wait_child_with_timeout(pid, kShortTimeoutMs);
if (!WIFEXITED(return_code)) {
LOG(WARNING) << "reconcile dex failed for location " << dex_path << ": " << return_code;
} else {
@@ -2261,7 +2284,7 @@ bool hash_secondary_dex_file(const std::string& dex_path, const std::string& pkg
if (!ReadFully(pipe_read, out_secondary_dex_hash->data(), out_secondary_dex_hash->size())) {
out_secondary_dex_hash->clear();
}
- return wait_child(pid) == 0;
+ return wait_child_with_timeout(pid, kShortTimeoutMs) == 0;
}
// Helper for move_ab, so that we can have common failure-case cleanup.
@@ -2591,9 +2614,10 @@ static bool create_app_profile_snapshot(int32_t app_id,
}
/* parent */
- int return_code = wait_child(pid);
+ int return_code = wait_child_with_timeout(pid, kShortTimeoutMs);
if (!WIFEXITED(return_code)) {
LOG(WARNING) << "profman failed for " << package_name << ":" << profile_name;
+ cleanup_output_fd(snapshot_fd.get());
return false;
}
@@ -2700,10 +2724,11 @@ static bool create_boot_image_profile_snapshot(const std::string& package_name,
}
/* parent */
- int return_code = wait_child(pid);
+ int return_code = wait_child_with_timeout(pid, kShortTimeoutMs);
if (!WIFEXITED(return_code)) {
PLOG(WARNING) << "profman failed for " << package_name << ":" << profile_name;
+ cleanup_output_fd(snapshot_fd.get());
return false;
}
@@ -2774,9 +2799,9 @@ bool prepare_app_profile(const std::string& package_name,
}
RunProfman args;
- args.SetupCopyAndUpdate(std::move(dex_metadata_fd),
- std::move(ref_profile_fd),
- std::move(apk_fd),
+ args.SetupCopyAndUpdate(dex_metadata_fd,
+ ref_profile_fd,
+ apk_fd,
code_path);
pid_t pid = fork();
if (pid == 0) {
@@ -2789,9 +2814,10 @@ bool prepare_app_profile(const std::string& package_name,
}
/* parent */
- int return_code = wait_child(pid);
+ int return_code = wait_child_with_timeout(pid, kShortTimeoutMs);
if (!WIFEXITED(return_code)) {
PLOG(WARNING) << "profman failed for " << package_name << ":" << profile_name;
+ cleanup_output_fd(ref_profile_fd.get());
return false;
}
return true;
diff --git a/cmds/installd/tests/installd_dexopt_test.cpp b/cmds/installd/tests/installd_dexopt_test.cpp
index a937436f9c..aeba451a48 100644
--- a/cmds/installd/tests/installd_dexopt_test.cpp
+++ b/cmds/installd/tests/installd_dexopt_test.cpp
@@ -50,6 +50,8 @@ using android::base::unique_fd;
namespace android {
namespace installd {
+constexpr int kTimeoutMs = 60000;
+
// TODO(calin): try to dedup this code.
#if defined(__arm__)
static const std::string kRuntimeIsa = "arm";
@@ -1072,7 +1074,7 @@ class ProfileTest : public DexoptTest {
_exit(0);
}
/* parent */
- ASSERT_TRUE(WIFEXITED(wait_child(pid)));
+ ASSERT_TRUE(WIFEXITED(wait_child_with_timeout(pid, kTimeoutMs)));
}
void mergePackageProfiles(const std::string& package_name,
@@ -1376,7 +1378,7 @@ class BootProfileTest : public ProfileTest {
_exit(0);
}
/* parent */
- ASSERT_TRUE(WIFEXITED(wait_child(pid)));
+ ASSERT_TRUE(WIFEXITED(wait_child_with_timeout(pid, kTimeoutMs)));
}
protected:
std::string intial_android_profiles_dir;
diff --git a/cmds/installd/tests/installd_utils_test.cpp b/cmds/installd/tests/installd_utils_test.cpp
index 5e8c79e9fa..514b88135a 100644
--- a/cmds/installd/tests/installd_utils_test.cpp
+++ b/cmds/installd/tests/installd_utils_test.cpp
@@ -14,8 +14,10 @@
* limitations under the License.
*/
+#include <errno.h>
#include <stdlib.h>
#include <string.h>
+#include <unistd.h>
#include <android-base/logging.h>
#include <android-base/scopeguard.h>
@@ -656,5 +658,45 @@ TEST_F(UtilsTest, TestCreateDirIfNeeded) {
ASSERT_NE(0, create_dir_if_needed("/data/local/tmp/user/0/bar/baz", 0700));
}
+TEST_F(UtilsTest, WaitChild) {
+ pid_t pid = fork();
+ if (pid == 0) {
+ /* child */
+ // Do nothing.
+ _exit(0);
+ }
+ /* parent */
+ int return_code = wait_child_with_timeout(pid, /*timeout_ms=*/100);
+ EXPECT_TRUE(WIFEXITED(return_code));
+ EXPECT_EQ(WEXITSTATUS(return_code), 0);
+}
+
+TEST_F(UtilsTest, WaitChildTimeout) {
+ pid_t pid = fork();
+ if (pid == 0) {
+ /* child */
+ sleep(1);
+ _exit(0);
+ }
+ /* parent */
+ int return_code = wait_child_with_timeout(pid, /*timeout_ms=*/1);
+ EXPECT_FALSE(WIFEXITED(return_code));
+ EXPECT_EQ(WTERMSIG(return_code), SIGKILL);
+}
+
+TEST_F(UtilsTest, RemoveFileAtFd) {
+ std::string filename = "/data/local/tmp/tempfile-XXXXXX";
+ int fd = mkstemp(filename.data());
+ ASSERT_GE(fd, 0);
+ ASSERT_EQ(access(filename.c_str(), F_OK), 0);
+
+ std::string actual_filename;
+ remove_file_at_fd(fd, &actual_filename);
+ EXPECT_NE(access(filename.c_str(), F_OK), 0);
+ EXPECT_EQ(filename, actual_filename);
+
+ close(fd);
+}
+
} // namespace installd
} // namespace android
diff --git a/cmds/installd/utils.cpp b/cmds/installd/utils.cpp
index faccded34b..f04ee337d9 100644
--- a/cmds/installd/utils.cpp
+++ b/cmds/installd/utils.cpp
@@ -19,18 +19,21 @@
#include <errno.h>
#include <fcntl.h>
#include <fts.h>
+#include <poll.h>
#include <stdlib.h>
#include <sys/capability.h>
+#include <sys/pidfd.h>
#include <sys/stat.h>
#include <sys/statvfs.h>
#include <sys/wait.h>
#include <sys/xattr.h>
+#include <unistd.h>
#include <uuid/uuid.h>
#include <android-base/file.h>
#include <android-base/logging.h>
-#include <android-base/strings.h>
#include <android-base/stringprintf.h>
+#include <android-base/strings.h>
#include <android-base/unique_fd.h>
#include <cutils/fs.h>
#include <cutils/properties.h>
@@ -1059,30 +1062,45 @@ int ensure_config_user_dirs(userid_t userid) {
return fs_prepare_dir(path.c_str(), 0750, uid, gid);
}
-int wait_child(pid_t pid)
-{
+static int wait_child(pid_t pid) {
int status;
- pid_t got_pid;
+ pid_t got_pid = TEMP_FAILURE_RETRY(waitpid(pid, &status, /*options=*/0));
- while (1) {
- got_pid = waitpid(pid, &status, 0);
- if (got_pid == -1 && errno == EINTR) {
- printf("waitpid interrupted, retrying\n");
- } else {
- break;
- }
- }
if (got_pid != pid) {
- ALOGW("waitpid failed: wanted %d, got %d: %s\n",
- (int) pid, (int) got_pid, strerror(errno));
- return 1;
+ PLOG(ERROR) << "waitpid failed: wanted " << pid << ", got " << got_pid;
+ return W_EXITCODE(/*exit_code=*/255, /*signal_number=*/0);
}
- if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
- return 0;
- } else {
- return status; /* always nonzero */
+ return status;
+}
+
+int wait_child_with_timeout(pid_t pid, int timeout_ms) {
+ int pidfd = pidfd_open(pid, /*flags=*/0);
+ if (pidfd < 0) {
+ PLOG(ERROR) << "pidfd_open failed for pid " << pid;
+ kill(pid, SIGKILL);
+ return wait_child(pid);
+ }
+
+ struct pollfd pfd;
+ pfd.fd = pidfd;
+ pfd.events = POLLIN;
+ int poll_ret = TEMP_FAILURE_RETRY(poll(&pfd, /*nfds=*/1, timeout_ms));
+
+ close(pidfd);
+
+ if (poll_ret < 0) {
+ PLOG(ERROR) << "poll failed for pid " << pid;
+ kill(pid, SIGKILL);
+ return wait_child(pid);
}
+ if (poll_ret == 0) {
+ LOG(WARNING) << "Child process " << pid << " timed out after " << timeout_ms
+ << "ms. Killing it";
+ kill(pid, SIGKILL);
+ return wait_child(pid);
+ }
+ return wait_child(pid);
}
/**
@@ -1295,5 +1313,27 @@ void drop_capabilities(uid_t uid) {
}
}
+bool remove_file_at_fd(int fd, /*out*/ std::string* path) {
+ char path_buffer[PATH_MAX + 1];
+ std::string proc_path = android::base::StringPrintf("/proc/self/fd/%d", fd);
+ ssize_t len = readlink(proc_path.c_str(), path_buffer, PATH_MAX);
+ if (len < 0) {
+ PLOG(WARNING) << "Could not remove file at fd " << fd << ": Failed to get file path";
+ return false;
+ }
+ path_buffer[len] = '\0';
+ if (path != nullptr) {
+ *path = path_buffer;
+ }
+ if (unlink(path_buffer) != 0) {
+ if (errno == ENOENT) {
+ return true;
+ }
+ PLOG(WARNING) << "Could not remove file at path " << path_buffer;
+ return false;
+ }
+ return true;
+}
+
} // namespace installd
} // namespace android
diff --git a/cmds/installd/utils.h b/cmds/installd/utils.h
index 0b1c324b02..39738945e4 100644
--- a/cmds/installd/utils.h
+++ b/cmds/installd/utils.h
@@ -153,7 +153,8 @@ int validate_apk_path_subdirs(const char *path);
int ensure_config_user_dirs(userid_t userid);
-int wait_child(pid_t pid);
+// Waits for a child process, or kills it if it times out. Returns the exit code.
+int wait_child_with_timeout(pid_t pid, int timeout_ms);
int prepare_app_cache_dir(const std::string& parent, const char* name, mode_t target_mode,
uid_t uid, gid_t gid);
@@ -170,6 +171,10 @@ bool collect_profiles(std::vector<std::string>* profiles_paths);
void drop_capabilities(uid_t uid);
+// Removes a file specified by a file descriptor. Returns true on success. Reports the file path to
+// `path` if present.
+bool remove_file_at_fd(int fd, /*out*/ std::string* path = nullptr);
+
} // namespace installd
} // namespace android
diff --git a/cmds/installd/view_compiler.cpp b/cmds/installd/view_compiler.cpp
index 60d6492e70..8c000a11c9 100644
--- a/cmds/installd/view_compiler.cpp
+++ b/cmds/installd/view_compiler.cpp
@@ -33,7 +33,13 @@
namespace android {
namespace installd {
-using base::unique_fd;
+namespace {
+
+using ::android::base::unique_fd;
+
+constexpr int kTimeoutMs = 300000;
+
+} // namespace
bool view_compiler(const char* apk_path, const char* package_name, const char* out_dex_file,
int uid) {
@@ -88,7 +94,17 @@ bool view_compiler(const char* apk_path, const char* package_name, const char* o
_exit(1);
}
- return wait_child(pid) == 0;
+ int return_code = wait_child_with_timeout(pid, kTimeoutMs);
+ if (!WIFEXITED(return_code)) {
+ LOG(WARNING) << "viewcompiler failed for " << package_name << ":" << apk_path;
+ if (WTERMSIG(return_code) == SIGKILL) {
+ // If the subprocess is killed while it's writing to the file, the file is likely
+ // corrupted, so we should remove it.
+ remove_file_at_fd(outfd.get());
+ }
+ return false;
+ }
+ return WEXITSTATUS(return_code) == 0;
}
} // namespace installd