summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYabin <yabinc@google.com>2022-08-23 11:43:01 -0700
committerYabin Cui <yabinc@google.com>2022-08-29 22:38:15 +0000
commit7d415cbce750ae73f531944a3ad40afcdbf5ac74 (patch)
tree4140150cd7bd0e519f49a28acfa6719a97bc1ba5
parent035545f4caa78e23deae5367487298e21316ec16 (diff)
downloadextras-7d415cbce750ae73f531944a3ad40afcdbf5ac74.tar.gz
simpleperf: stat: don't open event files across cpus.
When monitoring an hardware event for a thread running on all cpus, the stat cmd opens one event file to monitor the thread across all cpus. This works because CPU PMUs on all cpus are managed by one pmu object in perf event driver in the kernel. And the event file is bounded to a pmu object. So it can transfer with the thread between cpus. But now big/little ARM cores may have different numbers of CPU PMU hardware counters. To fully use them, CPU PMUs on big/little cores need to be managed by different pmu objects. As a result, event files can't be transfered between cpus belonging to different pmu objects. To work with this, this patch changes the stat cmd to open one event file for each cpu. But after that, we can't tell if hardware counter multiplexing happens by checking if runtime equals enabled time. So we avoid showing the runtime / enabled_time percentage. Instead, we check if events used are more than hardware counters available on each cpu. Bug: 243479304 Bug: 243065368 Test: simpleperf_unit_test Change-Id: I0eb6acbdb2c23bd726be90232a8d97285dac5482 (cherry picked from commit 4ed8e11cc2adf37eb95b922f51a029617c875874)
-rw-r--r--simpleperf/cmd_stat.cpp146
-rw-r--r--simpleperf/event_type.h5
2 files changed, 82 insertions, 69 deletions
diff --git a/simpleperf/cmd_stat.cpp b/simpleperf/cmd_stat.cpp
index 56249907..5efae951 100644
--- a/simpleperf/cmd_stat.cpp
+++ b/simpleperf/cmd_stat.cpp
@@ -157,7 +157,7 @@ void CounterSummaries::ShowText(FILE* fp, bool show_thread, bool show_cpu) {
}
titles.emplace_back("count");
titles.emplace_back("event_name");
- titles.emplace_back(" # count / runtime, runtime / enabled_time");
+ titles.emplace_back(" # count / runtime");
std::vector<size_t> width(titles.size(), 0);
@@ -206,10 +206,9 @@ void CounterSummaries::ShowText(FILE* fp, bool show_thread, bool show_cpu) {
if (show_cpu) {
fprintf(fp, " %-*d", static_cast<int>(width[i++]), s.cpu);
}
- fprintf(fp, " %*s %-*s # %-*s (%.0f%%)%s\n", static_cast<int>(width[i]),
- s.readable_count.c_str(), static_cast<int>(width[i + 1]), s.Name().c_str(),
- static_cast<int>(width[i + 2]), s.comment.c_str(), 1.0 / s.scale * 100,
- (s.auto_generated ? " (generated)" : ""));
+ fprintf(fp, " %*s %-*s # %-*s%s\n", static_cast<int>(width[i]), s.readable_count.c_str(),
+ static_cast<int>(width[i + 1]), s.Name().c_str(), static_cast<int>(width[i + 2]),
+ s.comment.c_str(), (s.auto_generated ? " (generated)" : ""));
}
}
@@ -440,6 +439,7 @@ class StatCommand : public Command {
void MonitorEachThread();
void AdjustToIntervalOnlyValues(std::vector<CountersInfo>& counters);
bool ShowCounters(const std::vector<CountersInfo>& counters, double duration_in_sec, FILE* fp);
+ void CheckHardwareCounterMultiplexing();
bool verbose_mode_;
bool system_wide_collection_;
@@ -536,9 +536,6 @@ bool StatCommand::Run(const std::vector<std::string>& args) {
}
// 3. Open perf_event_files and output file if defined.
- if (cpus_.empty() && !report_per_core_ && (report_per_thread_ || !system_wide_collection_)) {
- cpus_.push_back(-1); // Get event count for each thread on all cpus.
- }
if (!event_selection_set_.OpenEventFiles(cpus_)) {
return false;
}
@@ -617,8 +614,15 @@ bool StatCommand::Run(const std::vector<std::string>& args) {
// 6. Read and print counters.
if (interval_in_ms_ == 0) {
- return print_counters();
+ if (!print_counters()) {
+ return false;
+ }
}
+
+ // 7. Print hardware counter multiplexing warning when needed.
+ event_selection_set_.CloseEventFiles();
+ CheckHardwareCounterMultiplexing();
+
return true;
}
@@ -739,46 +743,49 @@ bool StatCommand::ParseOptions(const std::vector<std::string>& args,
return true;
}
-std::optional<size_t> GetHardwareCountersOnCpu(int cpu) {
- size_t available_counters = 0;
+std::optional<bool> CheckHardwareCountersOnCpu(int cpu, size_t counters) {
const EventType* event = FindEventTypeByName("cpu-cycles", true);
if (event == nullptr) {
return std::nullopt;
}
perf_event_attr attr = CreateDefaultPerfEventAttr(*event);
- while (true) {
- auto workload = Workload::CreateWorkload({"sleep", "0.1"});
- if (!workload || !workload->SetCpuAffinity(cpu)) {
- return std::nullopt;
- }
- std::vector<std::unique_ptr<EventFd>> event_fds;
- for (size_t i = 0; i <= available_counters; i++) {
- EventFd* group_event_fd = event_fds.empty() ? nullptr : event_fds[0].get();
- auto event_fd = EventFd::OpenEventFile(attr, workload->GetPid(), cpu, group_event_fd,
- "cpu-cycles", false);
- if (!event_fd) {
- break;
- }
- event_fds.emplace_back(std::move(event_fd));
+ auto workload = Workload::CreateWorkload({"sleep", "0.1"});
+ if (!workload || !workload->SetCpuAffinity(cpu)) {
+ return std::nullopt;
+ }
+ std::vector<std::unique_ptr<EventFd>> event_fds;
+ for (size_t i = 0; i < counters; i++) {
+ EventFd* group_event_fd = event_fds.empty() ? nullptr : event_fds[0].get();
+ auto event_fd =
+ EventFd::OpenEventFile(attr, workload->GetPid(), cpu, group_event_fd, "cpu-cycles", false);
+ if (!event_fd) {
+ return false;
}
- if (event_fds.size() != available_counters + 1) {
- break;
+ event_fds.emplace_back(std::move(event_fd));
+ }
+ if (!workload->Start() || !workload->WaitChildProcess(true, nullptr)) {
+ return std::nullopt;
+ }
+ for (auto& event_fd : event_fds) {
+ PerfCounter counter;
+ if (!event_fd->ReadCounter(&counter)) {
+ return std::nullopt;
}
- if (!workload->Start() || !workload->WaitChildProcess(true, nullptr)) {
+ if (counter.time_enabled == 0 || counter.time_enabled > counter.time_running) {
return false;
}
- bool always_running = true;
- for (auto& event_fd : event_fds) {
- PerfCounter counter;
- if (!event_fd->ReadCounter(&counter)) {
- return std::nullopt;
- }
- if (counter.time_enabled == 0 || counter.time_enabled > counter.time_running) {
- always_running = false;
- break;
- }
+ }
+ return true;
+}
+
+std::optional<size_t> GetHardwareCountersOnCpu(int cpu) {
+ size_t available_counters = 0;
+ while (true) {
+ std::optional<bool> result = CheckHardwareCountersOnCpu(cpu, available_counters + 1);
+ if (!result.has_value()) {
+ return std::nullopt;
}
- if (!always_running) {
+ if (!result.value()) {
break;
}
available_counters++;
@@ -907,42 +914,43 @@ bool StatCommand::ShowCounters(const std::vector<CountersInfo>& counters, double
summaries.GenerateComments(duration_in_sec);
summaries.Show(fp);
- if (csv_)
+ if (csv_) {
fprintf(fp, "Total test time,%lf,seconds,\n", duration_in_sec);
- else
+ } else {
fprintf(fp, "\nTotal test time: %lf seconds.\n", duration_in_sec);
+ }
+ return true;
+}
- const char* COUNTER_MULTIPLEX_INFO =
- "probably caused by hardware counter multiplexing (less counters than events).\n"
- "Try --use-devfreq-counters if on a rooted device.";
-
- if (cpus_ == std::vector<int>(1, -1) ||
- event_selection_set_.GetMonitoredThreads() == std::set<pid_t>({-1})) {
- // We either monitor a thread on all cpus, or monitor all threads on a cpu. In both cases,
- // if percentages < 100%, probably it is caused by hardware counter multiplexing.
- bool counters_always_available = true;
- for (const auto& summary : summaries.Summaries()) {
- if (!summary.IsMonitoredAllTheTime()) {
- counters_always_available = false;
- break;
- }
+void StatCommand::CheckHardwareCounterMultiplexing() {
+ size_t hardware_events = 0;
+ for (const EventType* event : event_selection_set_.GetEvents()) {
+ if (event->IsHardwareEvent()) {
+ hardware_events++;
}
- if (!counters_always_available) {
- LOG(WARNING) << "Percentages < 100% means some events only run a subset of enabled time,\n"
- << COUNTER_MULTIPLEX_INFO;
+ }
+ if (hardware_events == 0) {
+ return;
+ }
+ std::vector<int> cpus = cpus_;
+ if (cpus.empty()) {
+ cpus = GetOnlineCpus();
+ }
+ for (int cpu : cpus) {
+ std::optional<bool> result = CheckHardwareCountersOnCpu(cpu, hardware_events);
+ if (result.has_value() && !result.value()) {
+ LOG(WARNING) << "It seems the number of hardware events are more than the number of\n"
+ << "available CPU PMU hardware counters. That will trigger hardware counter\n"
+ << "multiplexing. As a result, events are not counted all the time processes\n"
+ << "running, and event counts are smaller than what really happen.\n"
+ << "Use --print-hw-counter to show available hardware counters.\n"
+#if defined(__ANDROID__)
+ << "If on a rooted device, try --use-devfreq-counters to get more counters.\n"
+#endif
+ ;
+ break;
}
- } else if (report_per_thread_) {
- // We monitor each thread on each cpu.
- LOG(INFO) << "A percentage represents runtime_on_a_cpu / runtime_on_all_cpus for each thread.\n"
- << "If percentage sum of a thread < 99%, or report for a running thread is missing,\n"
- << COUNTER_MULTIPLEX_INFO;
- } else {
- // We monitor some threads on each cpu.
- LOG(INFO) << "A percentage represents runtime_on_a_cpu / runtime_on_all_cpus for monitored\n"
- << "threads. If percentage sum < 99%, or report for an event is missing,\n"
- << COUNTER_MULTIPLEX_INFO;
}
- return true;
}
} // namespace
diff --git a/simpleperf/event_type.h b/simpleperf/event_type.h
index d2cd0c15..14863caa 100644
--- a/simpleperf/event_type.h
+++ b/simpleperf/event_type.h
@@ -24,6 +24,8 @@
#include <string>
#include <vector>
+#include "perf_event.h"
+
namespace simpleperf {
inline const std::string kETMEventName = "cs-etm";
@@ -50,6 +52,9 @@ struct EventType {
bool IsPmuEvent() const { return name.find('/') != std::string::npos; }
bool IsEtmEvent() const { return name == kETMEventName; }
+ bool IsHardwareEvent() const {
+ return type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE || type == PERF_TYPE_RAW;
+ }
std::vector<int> GetPmuCpumask();