diff options
author | Yabin Cui <yabinc@google.com> | 2020-03-24 14:28:16 -0700 |
---|---|---|
committer | Yabin Cui <yabinc@google.com> | 2020-03-27 14:46:58 -0700 |
commit | 8c1fb92b54b18b9cd0e6f4d39648c0069822c893 (patch) | |
tree | bc5532e6faa11595f1cc265e8dfded6731c28298 | |
parent | f76271cb2d5579ee4b4e77d694cef837e4075382 (diff) | |
download | extras-8c1fb92b54b18b9cd0e6f4d39648c0069822c893.tar.gz |
simpleperf: update doc for stat --per-core option.
Improve document reported by issue
https://github.com/android/ndk/issues/1214.
Also clean up stat ouptut:
1. Skip output for threads and cpus not running.
2. Tiny fix in column width adjustment.
Bug: 152518722
Test: run simpleperf_unit_test.
Change-Id: Ic032a12daa50bfb4c1552c94d8e7e7822c2f4ad8
(cherry picked from commit 3ef0328b7613f97a516163438869d435515a496a)
-rw-r--r-- | simpleperf/cmd_stat.cpp | 35 | ||||
-rw-r--r-- | simpleperf/doc/executable_commands_reference.md | 16 |
2 files changed, 35 insertions, 16 deletions
diff --git a/simpleperf/cmd_stat.cpp b/simpleperf/cmd_stat.cpp index adc4f0a2..0f8f6150 100644 --- a/simpleperf/cmd_stat.cpp +++ b/simpleperf/cmd_stat.cpp @@ -257,7 +257,8 @@ class CounterSummaries { w = std::max(w, size); }; - for (size_t i = 0; i < titles.size(); i++) { + // The last title is too long. Don't include it for width adjustment. + for (size_t i = 0; i + 1 < titles.size(); i++) { adjust_width(width[i], titles[i].size()); } @@ -956,8 +957,8 @@ bool StatCommand::ShowCounters(const std::vector<CountersInfo>& counters, if (sum.time_running < sum.time_enabled && sum.time_running != 0) { scale = static_cast<double>(sum.time_enabled) / sum.time_running; } - if (system_wide_collection_ && report_per_thread_ && sum.time_running == 0) { - // No need to report threads not running in system wide per thread report. + if ((report_per_thread_ || report_per_core_) && sum.time_running == 0) { + // No need to report threads or cpus not running. return; } ThreadInfo* thread = nullptr; @@ -1032,26 +1033,28 @@ bool StatCommand::ShowCounters(const std::vector<CountersInfo>& counters, else fprintf(fp, "\nTotal test time: %lf seconds.\n", duration_in_sec); + const char* COUNTER_MULTIPLEX_INFO = + "probably caused by hardware counter multiplexing (less counters than events).\n" + "Try --use-devfreq-counters if on a rooted device."; + if (cpus_ == std::vector<int>(1, -1) || event_selection_set_.GetMonitoredThreads() == std::set<pid_t>({-1})) { // We either monitor a thread on all cpus, or monitor all threads on a cpu. In both cases, // if percentages < 100%, probably it is caused by hardware counter multiplexing. if (!counters_always_available) { - LOG(WARNING) << "Percentages < 100% means some events only run a subset of enabled time.\n" - << "Probably because there are less hardware counters available than events.\n" - << "Try --use-devfreq-counters if on a rooted device."; + LOG(WARNING) << "Percentages < 100% means some events only run a subset of enabled time,\n" + << COUNTER_MULTIPLEX_INFO; } + } else if (report_per_thread_) { + // We monitor each thread on each cpu. + LOG(INFO) << "A percentage represents runtime_on_a_cpu / runtime_on_all_cpus for each thread.\n" + << "If percentage sum of a thread < 99%, or report for a running thread is missing,\n" + << COUNTER_MULTIPLEX_INFO; } else { - // We monitor a thread on a cpu. A percentage represents - // runtime_of_a_thread_on_a_cpu / runtime_of_a_thread_on_all_cpus. If percentage sum of a - // thread < 100%, or total event count for a running thread is 0, probably it is caused by - // hardware counter multiplexing. It is hard to detect the second case, so always print below - // info. - LOG(INFO) << "A percentage represents runtime_of_a_thread_on_a_cpu / " - "runtime_of_a_thread_on_all_cpus.\n" - << "If percentage sum of a thread < 100%, or total event count for a running\n" - << "thread is 0, probably because there are less hardware counters available than\n" - << "events. Try --use-devfreq-counters if on a rooted device."; + // We monitor some threads on each cpu. + LOG(INFO) << "A percentage represents runtime_on_a_cpu / runtime_on_all_cpus for monitored\n" + << "threads. If percentage sum < 99%, or report for an event is missing,\n" + << COUNTER_MULTIPLEX_INFO; } return true; } diff --git a/simpleperf/doc/executable_commands_reference.md b/simpleperf/doc/executable_commands_reference.md index 22ed52fe..5d51a158 100644 --- a/simpleperf/doc/executable_commands_reference.md +++ b/simpleperf/doc/executable_commands_reference.md @@ -294,10 +294,26 @@ $ su 0 simpleperf stat --per-thread -a --interval 1000 --interval-only-values -- By default, stat cmd outputs an event count sum for all monitored cpu cores. But when `--per-core` option is used, stat cmd outputs an event count for each core. It can be used to see how events are distributed on different cores. +When stating non-system wide with `--per-core` option, simpleperf creates a perf event for each +monitored thread on each core. When a thread is in running state, perf events on all cores are +enabled, but only the perf event on the core running the thread is in running state. So the +percentage comment shows runtime_on_a_core / runtime_on_all_cores. Note that, percentage is still +affected by hardware counter multiplexing. Check simpleperf log output for ways to distinguish it. ```sh # Print event counts for each cpu running threads in process 11904. +# A percentage shows runtime_on_a_cpu / runtime_on_all_cpus. $ simpleperf stat --per-core -p 11904 --duration 1 +Performance counter statistics: + +# cpu count event_name # percentage = event_run_time / enabled_time + 7 56,552,838 cpu-cycles # (60%) + 3 25,958,605 cpu-cycles # (20%) + 0 22,822,698 cpu-cycles # (15%) + 1 6,661,495 cpu-cycles # (5%) + 4 1,519,093 cpu-cycles # (0%) + +Total test time: 1.001082 seconds. # Print event counts for each cpu system wide. $ su 0 simpleperf stat --per-core -a --duration 1 |