summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorandroid-build-team Robot <android-build-team-robot@google.com>2020-03-28 23:06:18 +0000
committerandroid-build-team Robot <android-build-team-robot@google.com>2020-03-28 23:06:18 +0000
commit252a6c753950ab417ed22e4374b187dd690850b9 (patch)
tree83793d4913bc15b232a80fe1b14214caeb13a6ea
parentce065bacaa86146009a4c2c35e4609152b894b3f (diff)
parentbf06df7d2d361ccca839813c54e2c9073c0c828e (diff)
downloadextras-252a6c753950ab417ed22e4374b187dd690850b9.tar.gz
Snap for 6344033 from bf06df7d2d361ccca839813c54e2c9073c0c828e to rvc-d1-release
Change-Id: Ieec7288bf4cd912dcfb9deb2a91922946b400c7d
-rw-r--r--simpleperf/cmd_stat.cpp232
-rw-r--r--simpleperf/cmd_stat_impl.h246
-rw-r--r--simpleperf/cmd_stat_test.cpp146
-rw-r--r--simpleperf/doc/executable_commands_reference.md16
4 files changed, 449 insertions, 191 deletions
diff --git a/simpleperf/cmd_stat.cpp b/simpleperf/cmd_stat.cpp
index adc4f0a2..c8712df7 100644
--- a/simpleperf/cmd_stat.cpp
+++ b/simpleperf/cmd_stat.cpp
@@ -32,6 +32,7 @@
#include <android-base/strings.h>
#include <android-base/unique_fd.h>
+#include "cmd_stat_impl.h"
#include "command.h"
#include "environment.h"
#include "event_attr.h"
@@ -42,6 +43,8 @@
#include "utils.h"
#include "workload.h"
+using namespace simpleperf;
+
namespace {
static std::vector<std::string> default_measured_event_types{
@@ -50,95 +53,6 @@ static std::vector<std::string> default_measured_event_types{
"task-clock", "context-switches", "page-faults",
};
-struct CounterSum {
- uint64_t value = 0;
- uint64_t time_enabled = 0;
- uint64_t time_running = 0;
-};
-
-struct ThreadInfo {
- pid_t tid;
- pid_t pid;
- std::string name;
-};
-
-struct CounterSummary {
- std::string type_name;
- std::string modifier;
- uint32_t group_id;
- const ThreadInfo* thread;
- int cpu; // -1 represents all cpus
- uint64_t count;
- double scale;
- std::string readable_count;
- std::string comment;
- bool auto_generated;
-
- CounterSummary(const std::string& type_name, const std::string& modifier, uint32_t group_id,
- const ThreadInfo* thread, int cpu, uint64_t count, double scale,
- bool auto_generated, bool csv)
- : type_name(type_name),
- modifier(modifier),
- group_id(group_id),
- thread(thread),
- cpu(cpu),
- count(count),
- scale(scale),
- auto_generated(auto_generated) {
- readable_count = ReadableCountValue(csv);
- }
-
- bool IsMonitoredAtTheSameTime(const CounterSummary& other) const {
- // Two summaries are monitored at the same time if they are in the same
- // group or are monitored all the time.
- if (group_id == other.group_id) {
- return true;
- }
- return IsMonitoredAllTheTime() && other.IsMonitoredAllTheTime();
- }
-
- std::string Name() const {
- if (modifier.empty()) {
- return type_name;
- }
- return type_name + ":" + modifier;
- }
-
- bool IsMonitoredAllTheTime() const {
- // If an event runs all the time it is enabled (by not sharing hardware
- // counters with other events), the scale of its summary is usually within
- // [1, 1 + 1e-5]. By setting SCALE_ERROR_LIMIT to 1e-5, We can identify
- // events monitored all the time in most cases while keeping the report
- // error rate <= 1e-5.
- constexpr double SCALE_ERROR_LIMIT = 1e-5;
- return (fabs(scale - 1.0) < SCALE_ERROR_LIMIT);
- }
-
- private:
- std::string ReadableCountValue(bool csv) {
- if (type_name == "cpu-clock" || type_name == "task-clock") {
- // Convert nanoseconds to milliseconds.
- double value = count / 1e6;
- return android::base::StringPrintf("%lf(ms)", value);
- } else {
- // Convert big numbers to human friendly mode. For example,
- // 1000000 will be converted to 1,000,000.
- std::string s = android::base::StringPrintf("%" PRIu64, count);
- if (csv) {
- return s;
- } else {
- for (size_t i = s.size() - 1, j = 1; i > 0; --i, ++j) {
- if (j == 3) {
- s.insert(s.begin() + i, ',');
- j = 0;
- }
- }
- return s;
- }
- }
- }
-};
-
static const std::unordered_map<std::string_view, std::pair<std::string_view, std::string_view>>
COMMON_EVENT_RATE_MAP = {
{"cache-misses", {"cache-references", "miss rate"}},
@@ -178,8 +92,9 @@ static const std::unordered_map<std::string_view, std::pair<std::string_view, st
class CounterSummaries {
public:
- explicit CounterSummaries(bool csv) : csv_(csv) {}
- std::vector<CounterSummary>& Summaries() { return summaries_; }
+ explicit CounterSummaries(std::vector<CounterSummary>&& summaries, bool csv)
+ : summaries_(std::move(summaries)), csv_(csv) {}
+ const std::vector<CounterSummary>& Summaries() { return summaries_; }
const CounterSummary* FindSummary(const std::string& type_name, const std::string& modifier,
const ThreadInfo* thread, int cpu) {
@@ -204,8 +119,8 @@ class CounterSummaries {
const CounterSummary* other = FindSummary(s.type_name, "k", s.thread, s.cpu);
if (other != nullptr && other->IsMonitoredAtTheSameTime(s)) {
if (FindSummary(s.type_name, "", s.thread, s.cpu) == nullptr) {
- Summaries().emplace_back(s.type_name, "", s.group_id, s.thread, s.cpu,
- s.count + other->count, s.scale, true, csv_);
+ summaries_.emplace_back(s.type_name, "", s.group_id, s.thread, s.cpu,
+ s.count + other->count, s.scale, true, csv_);
}
}
}
@@ -257,7 +172,8 @@ class CounterSummaries {
w = std::max(w, size);
};
- for (size_t i = 0; i < titles.size(); i++) {
+ // The last title is too long. Don't include it for width adjustment.
+ for (size_t i = 0; i + 1 < titles.size(); i++) {
adjust_width(width[i], titles[i].size());
}
@@ -903,16 +819,11 @@ void StatCommand::AdjustToIntervalOnlyValues(std::vector<CountersInfo>& counters
}
for (size_t j = 0; j < counters_per_event.size(); j++) {
PerfCounter& counter = counters_per_event[j].counter;
- CounterSum& sum = last_sum[j];
- uint64_t tmp = counter.value;
- counter.value -= sum.value;
- sum.value = tmp;
- tmp = counter.time_enabled;
- counter.time_enabled -= sum.time_enabled;
- sum.time_enabled = tmp;
- tmp = counter.time_running;
- counter.time_running -= sum.time_running;
- sum.time_running = tmp;
+ CounterSum new_sum;
+ new_sum.FromCounter(counter);
+ CounterSum delta = new_sum - last_sum[j];
+ delta.ToCounter(counter);
+ last_sum[j] = new_sum;
}
}
}
@@ -948,81 +859,11 @@ bool StatCommand::ShowCounters(const std::vector<CountersInfo>& counters,
}
}
- bool counters_always_available = true;
- CounterSummaries summaries(csv_);
-
- auto add_summary = [&](const CountersInfo& info, pid_t tid, int cpu, const CounterSum& sum) {
- double scale = 1.0;
- if (sum.time_running < sum.time_enabled && sum.time_running != 0) {
- scale = static_cast<double>(sum.time_enabled) / sum.time_running;
- }
- if (system_wide_collection_ && report_per_thread_ && sum.time_running == 0) {
- // No need to report threads not running in system wide per thread report.
- return;
- }
- ThreadInfo* thread = nullptr;
- if (report_per_thread_) {
- auto it = thread_info_.find(tid);
- CHECK(it != thread_info_.end());
- thread = &it->second;
- }
- summaries.Summaries().emplace_back(info.event_name, info.event_modifier, info.group_id,
- thread, cpu, sum.value, scale, false, csv_);
- counters_always_available &= summaries.Summaries().back().IsMonitoredAllTheTime();
- };
-
- auto sort_summaries = [&](std::vector<CounterSummary>::iterator begin,
- std::vector<CounterSummary>::iterator end) {
- if (report_per_thread_ && report_per_core_) {
- // First sort by event count for all cpus in a thread, then sort by event count of each cpu.
- std::unordered_map<pid_t, uint64_t> count_per_thread;
- for (auto it = begin; it != end; ++it) {
- count_per_thread[it->thread->tid] += it->count;
- }
- std::sort(begin, end, [&](const CounterSummary& s1, const CounterSummary& s2) {
- pid_t tid1 = s1.thread->tid;
- pid_t tid2 = s2.thread->tid;
- if (tid1 != tid2) {
- if (count_per_thread[tid1] != count_per_thread[tid2]) {
- return count_per_thread[tid1] > count_per_thread[tid2];
- }
- return tid1 < tid2;
- }
- return s1.count > s2.count;
- });
- } else {
- std::sort(begin, end, [](const CounterSummary& s1, const CounterSummary& s2) {
- return s1.count > s2.count;
- });
- }
- };
-
+ CounterSummaryBuilder builder(report_per_thread_, report_per_core_, csv_, thread_info_);
for (const auto& info : counters) {
- std::unordered_map<uint64_t, CounterSum> sum_map;
- for (auto& counter : info.counters) {
- uint64_t key = 0;
- if (report_per_thread_) {
- key |= counter.tid;
- }
- if (report_per_core_) {
- key |= static_cast<uint64_t>(counter.cpu) << 32;
- }
- CounterSum& sum = sum_map[key];
- sum.value += counter.counter.value;
- sum.time_enabled = counter.counter.time_enabled;
- sum.time_running = counter.counter.time_running;
- }
- size_t pre_sum_count = summaries.Summaries().size();
- for (const auto& pair : sum_map) {
- pid_t tid = report_per_thread_ ? static_cast<pid_t>(pair.first & UINT32_MAX) : 0;
- int cpu = report_per_core_ ? static_cast<int>(pair.first >> 32) : -1;
- const CounterSum& sum = pair.second;
- add_summary(info, tid, cpu, sum);
- }
- if (report_per_thread_ || report_per_core_) {
- sort_summaries(summaries.Summaries().begin() + pre_sum_count, summaries.Summaries().end());
- }
+ builder.AddCountersForOneEventType(info);
}
+ CounterSummaries summaries(builder.Build(), csv_);
summaries.AutoGenerateSummaries();
summaries.GenerateComments(duration_in_sec);
summaries.Show(fp);
@@ -1032,26 +873,35 @@ bool StatCommand::ShowCounters(const std::vector<CountersInfo>& counters,
else
fprintf(fp, "\nTotal test time: %lf seconds.\n", duration_in_sec);
+ const char* COUNTER_MULTIPLEX_INFO =
+ "probably caused by hardware counter multiplexing (less counters than events).\n"
+ "Try --use-devfreq-counters if on a rooted device.";
+
if (cpus_ == std::vector<int>(1, -1) ||
event_selection_set_.GetMonitoredThreads() == std::set<pid_t>({-1})) {
// We either monitor a thread on all cpus, or monitor all threads on a cpu. In both cases,
// if percentages < 100%, probably it is caused by hardware counter multiplexing.
- if (!counters_always_available) {
- LOG(WARNING) << "Percentages < 100% means some events only run a subset of enabled time.\n"
- << "Probably because there are less hardware counters available than events.\n"
- << "Try --use-devfreq-counters if on a rooted device.";
+ bool counters_always_available = true;
+ for (const auto& summary : summaries.Summaries()) {
+ if (!summary.IsMonitoredAllTheTime()) {
+ counters_always_available = false;
+ break;
+ }
}
+ if (!counters_always_available) {
+ LOG(WARNING) << "Percentages < 100% means some events only run a subset of enabled time,\n"
+ << COUNTER_MULTIPLEX_INFO;
+ }
+ } else if (report_per_thread_) {
+ // We monitor each thread on each cpu.
+ LOG(INFO) << "A percentage represents runtime_on_a_cpu / runtime_on_all_cpus for each thread.\n"
+ << "If percentage sum of a thread < 99%, or report for a running thread is missing,\n"
+ << COUNTER_MULTIPLEX_INFO;
} else {
- // We monitor a thread on a cpu. A percentage represents
- // runtime_of_a_thread_on_a_cpu / runtime_of_a_thread_on_all_cpus. If percentage sum of a
- // thread < 100%, or total event count for a running thread is 0, probably it is caused by
- // hardware counter multiplexing. It is hard to detect the second case, so always print below
- // info.
- LOG(INFO) << "A percentage represents runtime_of_a_thread_on_a_cpu / "
- "runtime_of_a_thread_on_all_cpus.\n"
- << "If percentage sum of a thread < 100%, or total event count for a running\n"
- << "thread is 0, probably because there are less hardware counters available than\n"
- << "events. Try --use-devfreq-counters if on a rooted device.";
+ // We monitor some threads on each cpu.
+ LOG(INFO) << "A percentage represents runtime_on_a_cpu / runtime_on_all_cpus for monitored\n"
+ << "threads. If percentage sum < 99%, or report for an event is missing,\n"
+ << COUNTER_MULTIPLEX_INFO;
}
return true;
}
diff --git a/simpleperf/cmd_stat_impl.h b/simpleperf/cmd_stat_impl.h
new file mode 100644
index 00000000..16dd091d
--- /dev/null
+++ b/simpleperf/cmd_stat_impl.h
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <math.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include <android-base/stringprintf.h>
+
+#include "event_selection_set.h"
+
+namespace simpleperf {
+
+struct CounterSum {
+ uint64_t value = 0;
+ uint64_t time_enabled = 0;
+ uint64_t time_running = 0;
+
+ void FromCounter(const PerfCounter& counter) {
+ value = counter.value;
+ time_enabled = counter.time_enabled;
+ time_running = counter.time_running;
+ }
+
+ void ToCounter(PerfCounter& counter) const {
+ counter.value = value;
+ counter.time_enabled = time_enabled;
+ counter.time_running = time_running;
+ }
+
+ CounterSum operator+(const CounterSum& other) const {
+ CounterSum res;
+ res.value = value + other.value;
+ res.time_enabled = time_enabled + other.time_enabled;
+ res.time_running = time_running + other.time_running;
+ return res;
+ }
+
+ CounterSum operator-(const CounterSum& other) const {
+ CounterSum res;
+ res.value = value - other.value;
+ res.time_enabled = time_enabled - other.time_enabled;
+ res.time_running = time_running - other.time_running;
+ return res;
+ }
+};
+
+struct ThreadInfo {
+ pid_t tid;
+ pid_t pid;
+ std::string name;
+};
+
+struct CounterSummary {
+ std::string type_name;
+ std::string modifier;
+ uint32_t group_id;
+ const ThreadInfo* thread;
+ int cpu; // -1 represents all cpus
+ uint64_t count;
+ double scale;
+ std::string readable_count;
+ std::string comment;
+ bool auto_generated;
+
+ CounterSummary(const std::string& type_name, const std::string& modifier, uint32_t group_id,
+ const ThreadInfo* thread, int cpu, uint64_t count, double scale,
+ bool auto_generated, bool csv)
+ : type_name(type_name),
+ modifier(modifier),
+ group_id(group_id),
+ thread(thread),
+ cpu(cpu),
+ count(count),
+ scale(scale),
+ auto_generated(auto_generated) {
+ readable_count = ReadableCountValue(csv);
+ }
+
+ bool IsMonitoredAtTheSameTime(const CounterSummary& other) const {
+ // Two summaries are monitored at the same time if they are in the same
+ // group or are monitored all the time.
+ if (group_id == other.group_id) {
+ return true;
+ }
+ return IsMonitoredAllTheTime() && other.IsMonitoredAllTheTime();
+ }
+
+ std::string Name() const {
+ if (modifier.empty()) {
+ return type_name;
+ }
+ return type_name + ":" + modifier;
+ }
+
+ bool IsMonitoredAllTheTime() const {
+ // If an event runs all the time it is enabled (by not sharing hardware
+ // counters with other events), the scale of its summary is usually within
+ // [1, 1 + 1e-5]. By setting SCALE_ERROR_LIMIT to 1e-5, We can identify
+ // events monitored all the time in most cases while keeping the report
+ // error rate <= 1e-5.
+ constexpr double SCALE_ERROR_LIMIT = 1e-5;
+ return (fabs(scale - 1.0) < SCALE_ERROR_LIMIT);
+ }
+
+ private:
+ std::string ReadableCountValue(bool csv) {
+ if (type_name == "cpu-clock" || type_name == "task-clock") {
+ // Convert nanoseconds to milliseconds.
+ double value = count / 1e6;
+ return android::base::StringPrintf("%lf(ms)", value);
+ } else {
+ // Convert big numbers to human friendly mode. For example,
+ // 1000000 will be converted to 1,000,000.
+ std::string s = android::base::StringPrintf("%" PRIu64, count);
+ if (csv) {
+ return s;
+ } else {
+ for (size_t i = s.size() - 1, j = 1; i > 0; --i, ++j) {
+ if (j == 3) {
+ s.insert(s.begin() + i, ',');
+ j = 0;
+ }
+ }
+ return s;
+ }
+ }
+ }
+};
+
+// Build a vector of CounterSummary.
+class CounterSummaryBuilder {
+ public:
+ CounterSummaryBuilder(bool report_per_thread, bool report_per_core, bool csv,
+ const std::unordered_map<pid_t, ThreadInfo>& thread_map)
+ : report_per_thread_(report_per_thread),
+ report_per_core_(report_per_core),
+ csv_(csv),
+ thread_map_(thread_map) {}
+
+ void AddCountersForOneEventType(const CountersInfo& info) {
+ std::unordered_map<uint64_t, CounterSum> sum_map;
+ for (const auto& counter : info.counters) {
+ uint64_t key = 0;
+ if (report_per_thread_) {
+ key |= counter.tid;
+ }
+ if (report_per_core_) {
+ key |= static_cast<uint64_t>(counter.cpu) << 32;
+ }
+ CounterSum& sum = sum_map[key];
+ CounterSum add;
+ add.FromCounter(counter.counter);
+ sum = sum + add;
+ }
+ size_t pre_sum_count = summaries_.size();
+ for (const auto& pair : sum_map) {
+ pid_t tid = report_per_thread_ ? static_cast<pid_t>(pair.first & UINT32_MAX) : 0;
+ int cpu = report_per_core_ ? static_cast<int>(pair.first >> 32) : -1;
+ const CounterSum& sum = pair.second;
+ AddSummary(info, tid, cpu, sum);
+ }
+ if (report_per_thread_ || report_per_core_) {
+ SortSummaries(summaries_.begin() + pre_sum_count, summaries_.end());
+ }
+ }
+
+ std::vector<CounterSummary> Build() {
+ std::vector<CounterSummary> res = std::move(summaries_);
+ summaries_.clear();
+ return res;
+ }
+
+ private:
+ void AddSummary(const CountersInfo& info, pid_t tid, int cpu, const CounterSum& sum) {
+ double scale = 1.0;
+ if (sum.time_running < sum.time_enabled && sum.time_running != 0) {
+ scale = static_cast<double>(sum.time_enabled) / sum.time_running;
+ }
+ if ((report_per_thread_ || report_per_core_) && sum.time_running == 0) {
+ // No need to report threads or cpus not running.
+ return;
+ }
+ const ThreadInfo* thread = nullptr;
+ if (report_per_thread_) {
+ auto it = thread_map_.find(tid);
+ CHECK(it != thread_map_.end());
+ thread = &it->second;
+ }
+ summaries_.emplace_back(info.event_name, info.event_modifier, info.group_id, thread, cpu,
+ sum.value, scale, false, csv_);
+ }
+
+ void SortSummaries(std::vector<CounterSummary>::iterator begin,
+ std::vector<CounterSummary>::iterator end) {
+ if (report_per_thread_ && report_per_core_) {
+ // First sort by event count for all cpus in a thread, then sort by event count of each cpu.
+ std::unordered_map<pid_t, uint64_t> count_per_thread;
+ for (auto it = begin; it != end; ++it) {
+ count_per_thread[it->thread->tid] += it->count;
+ }
+ std::sort(begin, end, [&](const CounterSummary& s1, const CounterSummary& s2) {
+ pid_t tid1 = s1.thread->tid;
+ pid_t tid2 = s2.thread->tid;
+ if (tid1 != tid2) {
+ if (count_per_thread[tid1] != count_per_thread[tid2]) {
+ return count_per_thread[tid1] > count_per_thread[tid2];
+ }
+ return tid1 < tid2;
+ }
+ return s1.count > s2.count;
+ });
+ } else {
+ std::sort(begin, end, [](const CounterSummary& s1, const CounterSummary& s2) {
+ return s1.count > s2.count;
+ });
+ }
+ };
+
+ const bool report_per_thread_;
+ const bool report_per_core_;
+ const bool csv_;
+ const std::unordered_map<pid_t, ThreadInfo>& thread_map_;
+ std::vector<CounterSummary> summaries_;
+};
+
+} // namespace simpleperf \ No newline at end of file
diff --git a/simpleperf/cmd_stat_test.cpp b/simpleperf/cmd_stat_test.cpp
index bbeb4b66..8bc4e4ff 100644
--- a/simpleperf/cmd_stat_test.cpp
+++ b/simpleperf/cmd_stat_test.cpp
@@ -22,12 +22,15 @@
#include <thread>
+#include "cmd_stat_impl.h"
#include "command.h"
#include "environment.h"
#include "event_selection_set.h"
#include "get_test_data.h"
#include "test_util.h"
+using namespace simpleperf;
+
static std::unique_ptr<Command> StatCmd() {
return CreateCommandInstance("stat");
}
@@ -341,3 +344,146 @@ TEST(stat_cmd, per_core_option) {
ASSERT_TRUE(StatCmd()->Run({"--per-core", "sleep", "0.1"}));
TEST_IN_ROOT(StatCmd()->Run({"--per-core", "-a", "--duration", "0.1"}));
}
+
+TEST(stat_cmd, counter_sum) {
+ PerfCounter counter;
+ counter.value = 1;
+ counter.time_enabled = 2;
+ counter.time_running = 3;
+ CounterSum a;
+ a.FromCounter(counter);
+ ASSERT_EQ(a.value, 1);
+ ASSERT_EQ(a.time_enabled, 2);
+ ASSERT_EQ(a.time_running, 3);
+ CounterSum b = a + a;
+ ASSERT_EQ(b.value, 2);
+ ASSERT_EQ(b.time_enabled, 4);
+ ASSERT_EQ(b.time_running, 6);
+ CounterSum c = a - a;
+ ASSERT_EQ(c.value, 0);
+ ASSERT_EQ(c.time_enabled, 0);
+ ASSERT_EQ(c.time_running, 0);
+ b.ToCounter(counter);
+ ASSERT_EQ(counter.value, 2);
+ ASSERT_EQ(counter.time_enabled, 4);
+ ASSERT_EQ(counter.time_running, 6);
+}
+
+class StatCmdSummaryBuilderTest : public ::testing::Test {
+ protected:
+ void AddCounter(int event_id, pid_t tid, int cpu, int value, int time_enabled, int time_running) {
+ if (thread_map_.count(tid) == 0) {
+ ThreadInfo& thread = thread_map_[tid];
+ thread.pid = thread.tid = tid;
+ thread.name = "thread" + std::to_string(tid);
+ }
+ if (event_id >= counters_.size()) {
+ counters_.resize(event_id + 1);
+ counters_[event_id].group_id = 0;
+ counters_[event_id].event_name = "event" + std::to_string(event_id);
+ }
+ CountersInfo& info = counters_[event_id];
+ info.counters.resize(info.counters.size() + 1);
+ CounterInfo& counter = info.counters.back();
+ counter.tid = tid;
+ counter.cpu = cpu;
+ counter.counter.id = 0;
+ counter.counter.value = value;
+ counter.counter.time_enabled = time_enabled;
+ counter.counter.time_running = time_running;
+ }
+
+ std::vector<CounterSummary> BuildSummary(bool report_per_thread, bool report_per_core) {
+ CounterSummaryBuilder builder(report_per_thread, report_per_core, false, thread_map_);
+ for (auto& info : counters_) {
+ builder.AddCountersForOneEventType(info);
+ }
+ return builder.Build();
+ }
+
+ std::unordered_map<pid_t, ThreadInfo> thread_map_;
+ std::vector<CountersInfo> counters_;
+};
+
+TEST_F(StatCmdSummaryBuilderTest, multiple_events) {
+ AddCounter(0, 0, 0, 1, 1, 1);
+ AddCounter(1, 0, 0, 2, 2, 2);
+ std::vector<CounterSummary> summaries = BuildSummary(false, false);
+ ASSERT_EQ(summaries.size(), 2);
+ ASSERT_EQ(summaries[0].type_name, "event0");
+ ASSERT_EQ(summaries[0].count, 1);
+ ASSERT_NEAR(summaries[0].scale, 1.0, 1e-5);
+ ASSERT_EQ(summaries[1].type_name, "event1");
+ ASSERT_EQ(summaries[1].count, 2);
+ ASSERT_NEAR(summaries[1].scale, 1.0, 1e-5);
+}
+
+TEST_F(StatCmdSummaryBuilderTest, default_aggregate) {
+ AddCounter(0, 0, 0, 1, 1, 1);
+ AddCounter(0, 0, 1, 1, 1, 1);
+ AddCounter(0, 1, 0, 1, 1, 1);
+ AddCounter(0, 1, 1, 2, 2, 1);
+ std::vector<CounterSummary> summaries = BuildSummary(false, false);
+ ASSERT_EQ(summaries.size(), 1);
+ ASSERT_EQ(summaries[0].count, 5);
+ ASSERT_NEAR(summaries[0].scale, 1.25, 1e-5);
+}
+
+TEST_F(StatCmdSummaryBuilderTest, per_thread_aggregate) {
+ AddCounter(0, 0, 0, 1, 1, 1);
+ AddCounter(0, 0, 1, 1, 1, 1);
+ AddCounter(0, 1, 0, 1, 1, 1);
+ AddCounter(0, 1, 1, 2, 2, 1);
+ std::vector<CounterSummary> summaries = BuildSummary(true, false);
+ ASSERT_EQ(summaries.size(), 2);
+ ASSERT_EQ(summaries[0].thread->tid, 1);
+ ASSERT_EQ(summaries[0].cpu, -1);
+ ASSERT_EQ(summaries[0].count, 3);
+ ASSERT_NEAR(summaries[0].scale, 1.5, 1e-5);
+ ASSERT_EQ(summaries[1].thread->tid, 0);
+ ASSERT_EQ(summaries[0].cpu, -1);
+ ASSERT_EQ(summaries[1].count, 2);
+ ASSERT_NEAR(summaries[1].scale, 1.0, 1e-5);
+}
+
+TEST_F(StatCmdSummaryBuilderTest, per_core_aggregate) {
+ AddCounter(0, 0, 0, 1, 1, 1);
+ AddCounter(0, 0, 1, 1, 1, 1);
+ AddCounter(0, 1, 0, 1, 1, 1);
+ AddCounter(0, 1, 1, 2, 2, 1);
+ std::vector<CounterSummary> summaries = BuildSummary(false, true);
+ ASSERT_EQ(summaries.size(), 2);
+ ASSERT_TRUE(summaries[0].thread == nullptr);
+ ASSERT_EQ(summaries[0].cpu, 1);
+ ASSERT_EQ(summaries[0].count, 3);
+ ASSERT_NEAR(summaries[0].scale, 1.5, 1e-5);
+ ASSERT_TRUE(summaries[1].thread == nullptr);
+ ASSERT_EQ(summaries[1].cpu, 0);
+ ASSERT_EQ(summaries[1].count, 2);
+ ASSERT_NEAR(summaries[1].scale, 1.0, 1e-5);
+}
+
+TEST_F(StatCmdSummaryBuilderTest, per_thread_core_aggregate) {
+ AddCounter(0, 0, 0, 1, 1, 1);
+ AddCounter(0, 0, 1, 2, 1, 1);
+ AddCounter(0, 1, 0, 3, 1, 1);
+ AddCounter(0, 1, 1, 4, 2, 1);
+ std::vector<CounterSummary> summaries = BuildSummary(true, true);
+ ASSERT_EQ(summaries.size(), 4);
+ ASSERT_EQ(summaries[0].thread->tid, 1);
+ ASSERT_EQ(summaries[0].cpu, 1);
+ ASSERT_EQ(summaries[0].count, 4);
+ ASSERT_NEAR(summaries[0].scale, 2.0, 1e-5);
+ ASSERT_EQ(summaries[1].thread->tid, 1);
+ ASSERT_EQ(summaries[1].cpu, 0);
+ ASSERT_EQ(summaries[1].count, 3);
+ ASSERT_NEAR(summaries[1].scale, 1.0, 1e-5);
+ ASSERT_EQ(summaries[2].thread->tid, 0);
+ ASSERT_EQ(summaries[2].cpu, 1);
+ ASSERT_EQ(summaries[2].count, 2);
+ ASSERT_NEAR(summaries[2].scale, 1.0, 1e-5);
+ ASSERT_EQ(summaries[3].thread->tid, 0);
+ ASSERT_EQ(summaries[3].cpu, 0);
+ ASSERT_EQ(summaries[3].count, 1);
+ ASSERT_NEAR(summaries[3].scale, 1.0, 1e-5);
+}
diff --git a/simpleperf/doc/executable_commands_reference.md b/simpleperf/doc/executable_commands_reference.md
index 22ed52fe..5d51a158 100644
--- a/simpleperf/doc/executable_commands_reference.md
+++ b/simpleperf/doc/executable_commands_reference.md
@@ -294,10 +294,26 @@ $ su 0 simpleperf stat --per-thread -a --interval 1000 --interval-only-values --
By default, stat cmd outputs an event count sum for all monitored cpu cores. But when `--per-core`
option is used, stat cmd outputs an event count for each core. It can be used to see how events
are distributed on different cores.
+When stating non-system wide with `--per-core` option, simpleperf creates a perf event for each
+monitored thread on each core. When a thread is in running state, perf events on all cores are
+enabled, but only the perf event on the core running the thread is in running state. So the
+percentage comment shows runtime_on_a_core / runtime_on_all_cores. Note that, percentage is still
+affected by hardware counter multiplexing. Check simpleperf log output for ways to distinguish it.
```sh
# Print event counts for each cpu running threads in process 11904.
+# A percentage shows runtime_on_a_cpu / runtime_on_all_cpus.
$ simpleperf stat --per-core -p 11904 --duration 1
+Performance counter statistics:
+
+# cpu count event_name # percentage = event_run_time / enabled_time
+ 7 56,552,838 cpu-cycles # (60%)
+ 3 25,958,605 cpu-cycles # (20%)
+ 0 22,822,698 cpu-cycles # (15%)
+ 1 6,661,495 cpu-cycles # (5%)
+ 4 1,519,093 cpu-cycles # (0%)
+
+Total test time: 1.001082 seconds.
# Print event counts for each cpu system wide.
$ su 0 simpleperf stat --per-core -a --duration 1