summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAkilesh Kailash <akailash@google.com>2022-06-29 05:41:19 +0000
committerAkilesh Kailash <akailash@google.com>2022-08-01 16:31:20 +0000
commit2c0161be6a3f09c63ec88dd0b367daed49cefec7 (patch)
tree19004dc52c1ec9d883ee4ef836ae824adb1bd2bd
parentc5ad522fef9c02c918387c44e2b86c227630eee0 (diff)
downloadcore-2c0161be6a3f09c63ec88dd0b367daed49cefec7.tar.gz
Tune snapshot-merge performance
Currently, there is one thread per partition for snapshot merge. When all these threads are run in parallel, this may stress the system as the merge threads are both CPU and I/O bound. Allow only two merge threads to be in-flight at any point in time. This will ensure that there is forward progress done with respect to snapshot-merge and only two cores are used as against using 5-6 cores. Additionally, system and prodcut partitions are merged first. This is primarily because /root is mounted of system partition and faster the merge completes on /system partition, we can switch the dm tables immediately. There is no change in the merge phase from libsnapshot perspective. This prioritization is based on each merge phase. If the system partition merge is in second phase, then it takes priority in that phase. As a side benefit, this should also reduce the memory usage when merge is in-flight given that we now limit the threads. There is slight delay in overall merge time as we now throttle the merge. No boot time regressions observed. Full OTA: Merge time (Without this patch): 42 seconds Merge time (With this patch): 46 seconds Incremental OTA: Merge time (Without this patch): 52 seconds Merge time (With this patch): 57 seconds system partition merge completes in the first ~12-16 seconds. App-launch (COLD) on Pixel: Baseline (After snapshot-merge is completed when there is no daemon): ========================== Chrome: 250 youtube: 631 camera: 230 ========================== Without this patch when snapshot-merge is in-progress (in ms): Full - OTA Chrome: 1729 youtube: 3126 camera: 1525 ========================== With this patch when snapshot-merge is in-progress (in ms): Full - OTA Chrome: 1061 youtube: 820 camera: 1378 Incremental - OTA (350M) Chrome: 495 youtube: 1442 camera: 641 ===================== Bug: 237490659 Ignore-AOSP-First: cherry-pick from aosp Test: Full and incremental OTA Signed-off-by: Akilesh Kailash <akailash@google.com> Change-Id: I887d5073dba88e9a8a85ac10c771e4ccee7c84ff
-rw-r--r--fs_mgr/libsnapshot/snapshot.cpp11
-rw-r--r--fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.cpp5
-rw-r--r--fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.h6
-rw-r--r--fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_server.cpp73
-rw-r--r--fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_server.h18
-rw-r--r--fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_transitions.cpp19
6 files changed, 121 insertions, 11 deletions
diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp
index 019b64a44..870801ef1 100644
--- a/fs_mgr/libsnapshot/snapshot.cpp
+++ b/fs_mgr/libsnapshot/snapshot.cpp
@@ -2151,8 +2151,17 @@ bool SnapshotManager::ListSnapshots(LockedFile* lock, std::vector<std::string>*
if (!suffix.empty() && !android::base::EndsWith(name, suffix)) {
continue;
}
- snapshots->emplace_back(std::move(name));
+
+ // Insert system and product partition at the beginning so that
+ // during snapshot-merge, these partitions are merged first.
+ if (name == "system_a" || name == "system_b" || name == "product_a" ||
+ name == "product_b") {
+ snapshots->insert(snapshots->begin(), std::move(name));
+ } else {
+ snapshots->emplace_back(std::move(name));
+ }
}
+
return true;
}
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.cpp b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.cpp
index 692cb740c..718c13ce0 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.cpp
@@ -143,17 +143,18 @@ void SnapshotHandler::PrepareReadAhead() {
NotifyRAForMergeReady();
}
-void SnapshotHandler::CheckMergeCompletionStatus() {
+bool SnapshotHandler::CheckMergeCompletionStatus() {
if (!merge_initiated_) {
SNAP_LOG(INFO) << "Merge was not initiated. Total-data-ops: "
<< reader_->get_num_total_data_ops();
- return;
+ return false;
}
struct CowHeader* ch = reinterpret_cast<struct CowHeader*>(mapped_addr_);
SNAP_LOG(INFO) << "Merge-status: Total-Merged-ops: " << ch->num_merge_ops
<< " Total-data-ops: " << reader_->get_num_total_data_ops();
+ return true;
}
bool SnapshotHandler::ReadMetadata() {
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.h b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.h
index 83d40f635..dd2627eee 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.h
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.h
@@ -274,7 +274,7 @@ class SnapshotHandler : public std::enable_shared_from_this<SnapshotHandler> {
const bool& IsAttached() const { return attached_; }
void AttachControlDevice() { attached_ = true; }
- void CheckMergeCompletionStatus();
+ bool CheckMergeCompletionStatus();
bool CommitMerge(int num_merge_ops);
void CloseFds() { cow_fd_ = {}; }
@@ -305,6 +305,8 @@ class SnapshotHandler : public std::enable_shared_from_this<SnapshotHandler> {
// State transitions for merge
void InitiateMerge();
+ void MonitorMerge();
+ void WakeupMonitorMergeThread();
void WaitForMergeComplete();
bool WaitForMergeBegin();
void NotifyRAForMergeReady();
@@ -333,6 +335,7 @@ class SnapshotHandler : public std::enable_shared_from_this<SnapshotHandler> {
void SetSocketPresent(bool socket) { is_socket_present_ = socket; }
void SetIouringEnabled(bool io_uring_enabled) { is_io_uring_enabled_ = io_uring_enabled; }
bool MergeInitiated() { return merge_initiated_; }
+ bool MergeMonitored() { return merge_monitored_; }
double GetMergePercentage() { return merge_completion_percentage_; }
// Merge Block State Transitions
@@ -407,6 +410,7 @@ class SnapshotHandler : public std::enable_shared_from_this<SnapshotHandler> {
double merge_completion_percentage_;
bool merge_initiated_ = false;
+ bool merge_monitored_ = false;
bool attached_ = false;
bool is_socket_present_;
bool is_io_uring_enabled_ = false;
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_server.cpp b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_server.cpp
index 82b2b25dd..5d93f01a7 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_server.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_server.cpp
@@ -59,6 +59,14 @@ DaemonOps UserSnapshotServer::Resolveop(std::string& input) {
return DaemonOps::INVALID;
}
+UserSnapshotServer::UserSnapshotServer() {
+ monitor_merge_event_fd_.reset(eventfd(0, EFD_CLOEXEC));
+ if (monitor_merge_event_fd_ == -1) {
+ PLOG(FATAL) << "monitor_merge_event_fd_: failed to create eventfd";
+ }
+ terminating_ = false;
+}
+
UserSnapshotServer::~UserSnapshotServer() {
// Close any client sockets that were added via AcceptClient().
for (size_t i = 1; i < watched_fds_.size(); i++) {
@@ -249,7 +257,7 @@ bool UserSnapshotServer::Receivemsg(android::base::borrowed_fd fd, const std::st
return Sendmsg(fd, "fail");
}
- if (!StartMerge(*iter)) {
+ if (!StartMerge(&lock, *iter)) {
return Sendmsg(fd, "fail");
}
@@ -298,7 +306,7 @@ void UserSnapshotServer::RunThread(std::shared_ptr<UserSnapshotDmUserHandler> ha
}
handler->snapuserd()->CloseFds();
- handler->snapuserd()->CheckMergeCompletionStatus();
+ bool merge_completed = handler->snapuserd()->CheckMergeCompletionStatus();
handler->snapuserd()->UnmapBufferRegion();
auto misc_name = handler->misc_name();
@@ -306,7 +314,11 @@ void UserSnapshotServer::RunThread(std::shared_ptr<UserSnapshotDmUserHandler> ha
{
std::lock_guard<std::mutex> lock(lock_);
- num_partitions_merge_complete_ += 1;
+ if (merge_completed) {
+ num_partitions_merge_complete_ += 1;
+ active_merge_threads_ -= 1;
+ WakeupMonitorMergeThread();
+ }
handler->SetThreadTerminated();
auto iter = FindHandler(&lock, handler->misc_name());
if (iter == dm_users_.end()) {
@@ -418,6 +430,9 @@ void UserSnapshotServer::JoinAllThreads() {
if (th.joinable()) th.join();
}
+
+ stop_monitor_merge_thread_ = true;
+ WakeupMonitorMergeThread();
}
void UserSnapshotServer::AddWatchedFd(android::base::borrowed_fd fd, int events) {
@@ -502,13 +517,24 @@ bool UserSnapshotServer::StartHandler(const std::shared_ptr<UserSnapshotDmUserHa
return true;
}
-bool UserSnapshotServer::StartMerge(const std::shared_ptr<UserSnapshotDmUserHandler>& handler) {
+bool UserSnapshotServer::StartMerge(std::lock_guard<std::mutex>* proof_of_lock,
+ const std::shared_ptr<UserSnapshotDmUserHandler>& handler) {
+ CHECK(proof_of_lock);
+
if (!handler->snapuserd()->IsAttached()) {
LOG(ERROR) << "Handler not attached to dm-user - Merge thread cannot be started";
return false;
}
- handler->snapuserd()->InitiateMerge();
+ handler->snapuserd()->MonitorMerge();
+
+ if (!is_merge_monitor_started_.has_value()) {
+ std::thread(&UserSnapshotServer::MonitorMerge, this).detach();
+ is_merge_monitor_started_ = true;
+ }
+
+ merge_handlers_.push(handler);
+ WakeupMonitorMergeThread();
return true;
}
@@ -590,6 +616,42 @@ bool UserSnapshotServer::RemoveAndJoinHandler(const std::string& misc_name) {
return true;
}
+void UserSnapshotServer::WakeupMonitorMergeThread() {
+ uint64_t notify = 1;
+ ssize_t rc = TEMP_FAILURE_RETRY(write(monitor_merge_event_fd_.get(), &notify, sizeof(notify)));
+ if (rc < 0) {
+ PLOG(FATAL) << "failed to notify monitor merge thread";
+ }
+}
+
+void UserSnapshotServer::MonitorMerge() {
+ while (!stop_monitor_merge_thread_) {
+ uint64_t testVal;
+ ssize_t ret =
+ TEMP_FAILURE_RETRY(read(monitor_merge_event_fd_.get(), &testVal, sizeof(testVal)));
+ if (ret == -1) {
+ PLOG(FATAL) << "Failed to read from eventfd";
+ } else if (ret == 0) {
+ LOG(FATAL) << "Hit EOF on eventfd";
+ }
+
+ LOG(INFO) << "MonitorMerge: active-merge-threads: " << active_merge_threads_;
+ {
+ std::lock_guard<std::mutex> lock(lock_);
+ while (active_merge_threads_ < kMaxMergeThreads && merge_handlers_.size() > 0) {
+ auto handler = merge_handlers_.front();
+ merge_handlers_.pop();
+ LOG(INFO) << "Starting merge for partition: "
+ << handler->snapuserd()->GetMiscName();
+ handler->snapuserd()->InitiateMerge();
+ active_merge_threads_ += 1;
+ }
+ }
+ }
+
+ LOG(INFO) << "Exiting MonitorMerge: size: " << merge_handlers_.size();
+}
+
bool UserSnapshotServer::WaitForSocket() {
auto scope_guard = android::base::make_scope_guard([this]() -> void { JoinAllThreads(); });
@@ -646,6 +708,7 @@ bool UserSnapshotServer::WaitForSocket() {
if (!StartWithSocket(true)) {
return false;
}
+
return Run();
}
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_server.h b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_server.h
index 34e7941bc..e0844aeb1 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_server.h
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_server.h
@@ -15,6 +15,7 @@
#pragma once
#include <poll.h>
+#include <sys/eventfd.h>
#include <cstdio>
#include <cstring>
@@ -22,6 +23,8 @@
#include <future>
#include <iostream>
#include <mutex>
+#include <optional>
+#include <queue>
#include <sstream>
#include <string>
#include <thread>
@@ -34,6 +37,7 @@ namespace android {
namespace snapshot {
static constexpr uint32_t kMaxPacketSize = 512;
+static constexpr uint8_t kMaxMergeThreads = 2;
enum class DaemonOps {
INIT,
@@ -84,13 +88,19 @@ class UserSnapshotServer {
std::vector<struct pollfd> watched_fds_;
bool is_socket_present_ = false;
int num_partitions_merge_complete_ = 0;
+ int active_merge_threads_ = 0;
+ bool stop_monitor_merge_thread_ = false;
bool is_server_running_ = false;
bool io_uring_enabled_ = false;
+ std::optional<bool> is_merge_monitor_started_;
+
+ android::base::unique_fd monitor_merge_event_fd_;
std::mutex lock_;
using HandlerList = std::vector<std::shared_ptr<UserSnapshotDmUserHandler>>;
HandlerList dm_users_;
+ std::queue<std::shared_ptr<UserSnapshotDmUserHandler>> merge_handlers_;
void AddWatchedFd(android::base::borrowed_fd fd, int events);
void AcceptClient();
@@ -108,6 +118,8 @@ class UserSnapshotServer {
bool IsTerminating() { return terminating_; }
void RunThread(std::shared_ptr<UserSnapshotDmUserHandler> handler);
+ void MonitorMerge();
+
void JoinAllThreads();
bool StartWithSocket(bool start_listening);
@@ -119,7 +131,7 @@ class UserSnapshotServer {
void TerminateMergeThreads(std::lock_guard<std::mutex>* proof_of_lock);
public:
- UserSnapshotServer() { terminating_ = false; }
+ UserSnapshotServer();
~UserSnapshotServer();
bool Start(const std::string& socketname);
@@ -133,9 +145,11 @@ class UserSnapshotServer {
const std::string& backing_device,
const std::string& base_path_merge);
bool StartHandler(const std::shared_ptr<UserSnapshotDmUserHandler>& handler);
- bool StartMerge(const std::shared_ptr<UserSnapshotDmUserHandler>& handler);
+ bool StartMerge(std::lock_guard<std::mutex>* proof_of_lock,
+ const std::shared_ptr<UserSnapshotDmUserHandler>& handler);
std::string GetMergeStatus(const std::shared_ptr<UserSnapshotDmUserHandler>& handler);
+ void WakeupMonitorMergeThread();
void SetTerminating() { terminating_ = true; }
void ReceivedSocketSignal() { received_socket_signal_ = true; }
void SetServerRunning() { is_server_running_ = true; }
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_transitions.cpp b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_transitions.cpp
index d4e1d7c7e..28c9f688a 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_transitions.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_transitions.cpp
@@ -165,6 +165,13 @@ using namespace android;
using namespace android::dm;
using android::base::unique_fd;
+void SnapshotHandler::MonitorMerge() {
+ {
+ std::lock_guard<std::mutex> lock(lock_);
+ merge_monitored_ = true;
+ }
+}
+
// This is invoked once primarily by update-engine to initiate
// the merge
void SnapshotHandler::InitiateMerge() {
@@ -361,10 +368,16 @@ void SnapshotHandler::WaitForMergeComplete() {
std::string SnapshotHandler::GetMergeStatus() {
bool merge_not_initiated = false;
+ bool merge_monitored = false;
bool merge_failed = false;
{
std::lock_guard<std::mutex> lock(lock_);
+
+ if (MergeMonitored()) {
+ merge_monitored = true;
+ }
+
if (!MergeInitiated()) {
merge_not_initiated = true;
}
@@ -387,6 +400,12 @@ std::string SnapshotHandler::GetMergeStatus() {
return "snapshot-merge-complete";
}
+ // Merge monitor thread is tracking the merge but the merge thread
+ // is not started yet.
+ if (merge_monitored) {
+ return "snapshot-merge";
+ }
+
// Return the state as "snapshot". If the device was rebooted during
// merge, we will return the status as "snapshot". This is ok, as
// libsnapshot will explicitly resume the merge. This is slightly