summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYabin Cui <yabinc@google.com>2020-03-24 14:28:16 -0700
committerYabin Cui <yabinc@google.com>2020-03-27 14:46:58 -0700
commit8c1fb92b54b18b9cd0e6f4d39648c0069822c893 (patch)
treebc5532e6faa11595f1cc265e8dfded6731c28298
parentf76271cb2d5579ee4b4e77d694cef837e4075382 (diff)
downloadextras-8c1fb92b54b18b9cd0e6f4d39648c0069822c893.tar.gz
simpleperf: update doc for stat --per-core option.
Improve document reported by issue https://github.com/android/ndk/issues/1214. Also clean up stat ouptut: 1. Skip output for threads and cpus not running. 2. Tiny fix in column width adjustment. Bug: 152518722 Test: run simpleperf_unit_test. Change-Id: Ic032a12daa50bfb4c1552c94d8e7e7822c2f4ad8 (cherry picked from commit 3ef0328b7613f97a516163438869d435515a496a)
-rw-r--r--simpleperf/cmd_stat.cpp35
-rw-r--r--simpleperf/doc/executable_commands_reference.md16
2 files changed, 35 insertions, 16 deletions
diff --git a/simpleperf/cmd_stat.cpp b/simpleperf/cmd_stat.cpp
index adc4f0a2..0f8f6150 100644
--- a/simpleperf/cmd_stat.cpp
+++ b/simpleperf/cmd_stat.cpp
@@ -257,7 +257,8 @@ class CounterSummaries {
w = std::max(w, size);
};
- for (size_t i = 0; i < titles.size(); i++) {
+ // The last title is too long. Don't include it for width adjustment.
+ for (size_t i = 0; i + 1 < titles.size(); i++) {
adjust_width(width[i], titles[i].size());
}
@@ -956,8 +957,8 @@ bool StatCommand::ShowCounters(const std::vector<CountersInfo>& counters,
if (sum.time_running < sum.time_enabled && sum.time_running != 0) {
scale = static_cast<double>(sum.time_enabled) / sum.time_running;
}
- if (system_wide_collection_ && report_per_thread_ && sum.time_running == 0) {
- // No need to report threads not running in system wide per thread report.
+ if ((report_per_thread_ || report_per_core_) && sum.time_running == 0) {
+ // No need to report threads or cpus not running.
return;
}
ThreadInfo* thread = nullptr;
@@ -1032,26 +1033,28 @@ bool StatCommand::ShowCounters(const std::vector<CountersInfo>& counters,
else
fprintf(fp, "\nTotal test time: %lf seconds.\n", duration_in_sec);
+ const char* COUNTER_MULTIPLEX_INFO =
+ "probably caused by hardware counter multiplexing (less counters than events).\n"
+ "Try --use-devfreq-counters if on a rooted device.";
+
if (cpus_ == std::vector<int>(1, -1) ||
event_selection_set_.GetMonitoredThreads() == std::set<pid_t>({-1})) {
// We either monitor a thread on all cpus, or monitor all threads on a cpu. In both cases,
// if percentages < 100%, probably it is caused by hardware counter multiplexing.
if (!counters_always_available) {
- LOG(WARNING) << "Percentages < 100% means some events only run a subset of enabled time.\n"
- << "Probably because there are less hardware counters available than events.\n"
- << "Try --use-devfreq-counters if on a rooted device.";
+ LOG(WARNING) << "Percentages < 100% means some events only run a subset of enabled time,\n"
+ << COUNTER_MULTIPLEX_INFO;
}
+ } else if (report_per_thread_) {
+ // We monitor each thread on each cpu.
+ LOG(INFO) << "A percentage represents runtime_on_a_cpu / runtime_on_all_cpus for each thread.\n"
+ << "If percentage sum of a thread < 99%, or report for a running thread is missing,\n"
+ << COUNTER_MULTIPLEX_INFO;
} else {
- // We monitor a thread on a cpu. A percentage represents
- // runtime_of_a_thread_on_a_cpu / runtime_of_a_thread_on_all_cpus. If percentage sum of a
- // thread < 100%, or total event count for a running thread is 0, probably it is caused by
- // hardware counter multiplexing. It is hard to detect the second case, so always print below
- // info.
- LOG(INFO) << "A percentage represents runtime_of_a_thread_on_a_cpu / "
- "runtime_of_a_thread_on_all_cpus.\n"
- << "If percentage sum of a thread < 100%, or total event count for a running\n"
- << "thread is 0, probably because there are less hardware counters available than\n"
- << "events. Try --use-devfreq-counters if on a rooted device.";
+ // We monitor some threads on each cpu.
+ LOG(INFO) << "A percentage represents runtime_on_a_cpu / runtime_on_all_cpus for monitored\n"
+ << "threads. If percentage sum < 99%, or report for an event is missing,\n"
+ << COUNTER_MULTIPLEX_INFO;
}
return true;
}
diff --git a/simpleperf/doc/executable_commands_reference.md b/simpleperf/doc/executable_commands_reference.md
index 22ed52fe..5d51a158 100644
--- a/simpleperf/doc/executable_commands_reference.md
+++ b/simpleperf/doc/executable_commands_reference.md
@@ -294,10 +294,26 @@ $ su 0 simpleperf stat --per-thread -a --interval 1000 --interval-only-values --
By default, stat cmd outputs an event count sum for all monitored cpu cores. But when `--per-core`
option is used, stat cmd outputs an event count for each core. It can be used to see how events
are distributed on different cores.
+When stating non-system wide with `--per-core` option, simpleperf creates a perf event for each
+monitored thread on each core. When a thread is in running state, perf events on all cores are
+enabled, but only the perf event on the core running the thread is in running state. So the
+percentage comment shows runtime_on_a_core / runtime_on_all_cores. Note that, percentage is still
+affected by hardware counter multiplexing. Check simpleperf log output for ways to distinguish it.
```sh
# Print event counts for each cpu running threads in process 11904.
+# A percentage shows runtime_on_a_cpu / runtime_on_all_cpus.
$ simpleperf stat --per-core -p 11904 --duration 1
+Performance counter statistics:
+
+# cpu count event_name # percentage = event_run_time / enabled_time
+ 7 56,552,838 cpu-cycles # (60%)
+ 3 25,958,605 cpu-cycles # (20%)
+ 0 22,822,698 cpu-cycles # (15%)
+ 1 6,661,495 cpu-cycles # (5%)
+ 4 1,519,093 cpu-cycles # (0%)
+
+Total test time: 1.001082 seconds.
# Print event counts for each cpu system wide.
$ su 0 simpleperf stat --per-core -a --duration 1