summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYabin Cui <yabinc@google.com>2017-02-04 01:25:50 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2017-02-04 01:25:50 +0000
commita355220f81348774ed391c77c4e7be042635ba66 (patch)
treea406ee4ef9195583f008953e9733a2367cf1e1f0
parent420c7c361b5cf89ea3a8b478c5029e663981e5e6 (diff)
parent26968e6c48dea2eaa217991ade5a04e801f1be8f (diff)
downloadextras-a355220f81348774ed391c77c4e7be042635ba66.tar.gz
Merge "simpleperf: add inplace-sampler event type."
-rw-r--r--simpleperf/Android.mk1
-rw-r--r--simpleperf/IOEventLoop.cpp1
-rw-r--r--simpleperf/InplaceSamplerClient.cpp93
-rw-r--r--simpleperf/InplaceSamplerClient.h53
-rw-r--r--simpleperf/cmd_list.cpp3
-rw-r--r--simpleperf/cmd_record.cpp24
-rw-r--r--simpleperf/cmd_stat.cpp21
-rw-r--r--simpleperf/event_fd.cpp9
-rw-r--r--simpleperf/event_fd.h2
-rw-r--r--simpleperf/event_selection_set.cpp161
-rw-r--r--simpleperf/event_selection_set.h18
-rw-r--r--simpleperf/event_type.h8
-rw-r--r--simpleperf/event_type_table.h2
-rwxr-xr-xsimpleperf/generate_event_type_table.py7
-rw-r--r--simpleperf/record.cpp16
-rw-r--r--simpleperf/runtest/comm_change.cpp13
-rw-r--r--simpleperf/runtest/function_fork.cpp16
-rw-r--r--simpleperf/runtest/function_indirect_recursive.cpp4
-rw-r--r--simpleperf/runtest/function_pthread.cpp24
-rw-r--r--simpleperf/runtest/function_recursive.cpp4
-rw-r--r--simpleperf/runtest/one_function.cpp4
-rw-r--r--simpleperf/runtest/runtest.py110
-rw-r--r--simpleperf/runtest/two_functions.cpp6
-rw-r--r--simpleperf/workload.h3
24 files changed, 485 insertions, 118 deletions
diff --git a/simpleperf/Android.mk b/simpleperf/Android.mk
index 67eb0a04..a677de4a 100644
--- a/simpleperf/Android.mk
+++ b/simpleperf/Android.mk
@@ -110,6 +110,7 @@ libsimpleperf_src_files_linux := \
environment.cpp \
event_fd.cpp \
event_selection_set.cpp \
+ InplaceSamplerClient.cpp \
IOEventLoop.cpp \
perf_clock.cpp \
record_file_writer.cpp \
diff --git a/simpleperf/IOEventLoop.cpp b/simpleperf/IOEventLoop.cpp
index 44de2896..ce259280 100644
--- a/simpleperf/IOEventLoop.cpp
+++ b/simpleperf/IOEventLoop.cpp
@@ -40,6 +40,7 @@ struct IOEvent {
IOEventLoop::IOEventLoop() : ebase_(nullptr), has_error_(false) {}
IOEventLoop::~IOEventLoop() {
+ events_.clear();
if (ebase_ != nullptr) {
event_base_free(ebase_);
}
diff --git a/simpleperf/InplaceSamplerClient.cpp b/simpleperf/InplaceSamplerClient.cpp
new file mode 100644
index 00000000..5a788613
--- /dev/null
+++ b/simpleperf/InplaceSamplerClient.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "InplaceSamplerClient.h"
+
+#include <sys/time.h>
+#include <sys/types.h>
+#include <stdint.h>
+
+#include <memory>
+#include <vector>
+
+#include "environment.h"
+#include "utils.h"
+
+static constexpr uint64_t EVENT_ID_FOR_INPLACE_SAMPLER = ULONG_MAX;
+
+std::unique_ptr<InplaceSamplerClient> InplaceSamplerClient::Create(const perf_event_attr& attr,
+ pid_t pid,
+ const std::set<pid_t>& tids) {
+ if (pid == -1) {
+ LOG(ERROR) << "inplace-sampler can't monitor system wide events.";
+ return nullptr;
+ }
+ std::unique_ptr<InplaceSamplerClient> sampler(new InplaceSamplerClient(attr, pid, tids));
+ if (!sampler->ConnectServer()) {
+ return nullptr;
+ }
+ if (!sampler->StartProfiling()) {
+ return nullptr;
+ }
+ return sampler;
+}
+
+InplaceSamplerClient::InplaceSamplerClient(const perf_event_attr& attr, pid_t pid,
+ const std::set<pid_t>& tids)
+ : attr_(attr), pid_(pid), tids_(tids), closed_(false) {
+}
+
+uint64_t InplaceSamplerClient::Id() const {
+ return EVENT_ID_FOR_INPLACE_SAMPLER;
+}
+
+bool InplaceSamplerClient::ConnectServer() {
+ return true;
+}
+
+bool InplaceSamplerClient::StartProfiling() {
+ return true;
+}
+
+bool InplaceSamplerClient::StartPolling(IOEventLoop& loop,
+ const std::function<bool(Record*)>& record_callback,
+ const std::function<bool()>& close_callback) {
+ record_callback_ = record_callback;
+ close_callback_ = close_callback;
+ auto callback = [this]() {
+ // Fake records for testing.
+ uint64_t time = GetSystemClock();
+ CommRecord comm_r(attr_, pid_, pid_, "fake_comm", Id(), time);
+ if (!record_callback_(&comm_r)) {
+ return false;
+ }
+ MmapRecord mmap_r(attr_, false, pid_, pid_, 0x1000, 0x1000, 0x0, "fake_elf", Id(), time);
+ if (!record_callback_(&mmap_r)) {
+ return false;
+ }
+ std::vector<uint64_t> ips(1, 0x1000);
+ SampleRecord r(attr_, Id(), ips[0], pid_, pid_, time, 0, 1, ips);
+ if (!record_callback_(&r)) {
+ return false;
+ }
+ closed_ = true;
+ return close_callback_();
+ };
+ timeval duration;
+ duration.tv_sec = 0;
+ duration.tv_usec = 1000;
+ return loop.AddPeriodicEvent(duration, callback);
+}
diff --git a/simpleperf/InplaceSamplerClient.h b/simpleperf/InplaceSamplerClient.h
new file mode 100644
index 00000000..0c606bb3
--- /dev/null
+++ b/simpleperf/InplaceSamplerClient.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SIMPLE_PERF_INPLACE_SAMPLER_CLIENT_H_
+#define SIMPLE_PERF_INPLACE_SAMPLER_CLIENT_H_
+
+#include <memory>
+#include <set>
+#include <vector>
+
+#include "event_attr.h"
+#include "record.h"
+#include "UnixSocket.h"
+
+class InplaceSamplerClient {
+ public:
+ static std::unique_ptr<InplaceSamplerClient> Create(const perf_event_attr& attr, pid_t pid,
+ const std::set<pid_t>& tids);
+ uint64_t Id() const;
+ bool IsClosed() {
+ return closed_;
+ }
+ bool StartPolling(IOEventLoop& loop, const std::function<bool(Record*)>& record_callback,
+ const std::function<bool()>& close_callback);
+ bool StopProfiling();
+
+ private:
+ InplaceSamplerClient(const perf_event_attr& attr, pid_t pid, const std::set<pid_t>& tids);
+ bool ConnectServer();
+ bool StartProfiling();
+
+ const perf_event_attr attr_;
+ const pid_t pid_;
+ const std::set<pid_t> tids_;
+ std::function<bool(Record*)> record_callback_;
+ std::function<bool()> close_callback_;
+ bool closed_;
+};
+
+#endif // SIMPLE_PERF_INPLACE_SAMPLER_CLIENT_H_
diff --git a/simpleperf/cmd_list.cpp b/simpleperf/cmd_list.cpp
index 273a8037..0248aa97 100644
--- a/simpleperf/cmd_list.cpp
+++ b/simpleperf/cmd_list.cpp
@@ -36,7 +36,7 @@ static void PrintEventTypesOfType(uint32_t type, const std::string& type_name,
// Exclude kernel to list supported events even when
// /proc/sys/kernel/perf_event_paranoid is 2.
attr.exclude_kernel = 1;
- if (IsEventAttrSupportedByKernel(attr)) {
+ if (IsEventAttrSupported(attr)) {
printf(" %s\n", event_type.name.c_str());
}
}
@@ -65,6 +65,7 @@ bool ListCommand::Run(const std::vector<std::string>& args) {
{"sw", {PERF_TYPE_SOFTWARE, "software events"}},
{"cache", {PERF_TYPE_HW_CACHE, "hw-cache events"}},
{"tracepoint", {PERF_TYPE_TRACEPOINT, "tracepoint events"}},
+ {"user-space-sampler", {SIMPLEPERF_TYPE_USER_SPACE_SAMPLERS, "user-space samplers"}},
};
std::vector<std::string> names;
diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp
index b9dfff02..1cbb86cd 100644
--- a/simpleperf/cmd_record.cpp
+++ b/simpleperf/cmd_record.cpp
@@ -259,6 +259,13 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
if (workload != nullptr) {
event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
event_selection_set_.SetEnableOnExec(true);
+ if (event_selection_set_.HasInplaceSampler()) {
+ // Start worker early, because the worker process has to setup inplace-sampler server
+ // before we try to connect it.
+ if (!workload->Start()) {
+ return false;
+ }
+ }
} else {
LOG(ERROR)
<< "No threads to monitor. Try `simpleperf help record` for help";
@@ -282,9 +289,7 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
return false;
}
- // 5. Create IOEventLoop and add read/signal/periodic Events.
- IOEventLoop loop;
- event_selection_set_.SetIOEventLoop(loop);
+ // 5. Add read/signal/periodic Events.
auto callback =
std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1);
if (!event_selection_set_.PrepareToReadMmapEventData(callback)) {
@@ -296,13 +301,14 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
return false;
}
- if (!loop.AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP},
- [&]() { return loop.ExitLoop(); })) {
+ IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
+ if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP},
+ [&]() { return loop->ExitLoop(); })) {
return false;
}
if (duration_in_sec_ != 0) {
- if (!loop.AddPeriodicEvent(SecondToTimeval(duration_in_sec_),
- [&]() { return loop.ExitLoop(); })) {
+ if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_),
+ [&]() { return loop->ExitLoop(); })) {
return false;
}
}
@@ -312,10 +318,10 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
start_sampling_time_in_ns_ = GetPerfClock();
LOG(VERBOSE) << "start_sampling_time is " << start_sampling_time_in_ns_
<< " ns";
- if (workload != nullptr && !workload->Start()) {
+ if (workload != nullptr && !workload->IsStarted() && !workload->Start()) {
return false;
}
- if (!loop.RunLoop()) {
+ if (!loop->RunLoop()) {
return false;
}
if (!event_selection_set_.FinishReadMmapEventData()) {
diff --git a/simpleperf/cmd_stat.cpp b/simpleperf/cmd_stat.cpp
index 423fbffb..cdd5593a 100644
--- a/simpleperf/cmd_stat.cpp
+++ b/simpleperf/cmd_stat.cpp
@@ -383,9 +383,7 @@ bool StatCommand::Run(const std::vector<std::string>& args) {
fp = fp_holder.get();
}
- // 4. Create IOEventLoop and add signal/periodic Events.
- IOEventLoop loop;
- event_selection_set_.SetIOEventLoop(loop);
+ // 4. Add signal/periodic Events.
std::chrono::time_point<std::chrono::steady_clock> start_time;
std::vector<CountersInfo> counters;
if (system_wide_collection_ || (!cpus_.empty() && cpus_[0] != -1)) {
@@ -396,13 +394,14 @@ bool StatCommand::Run(const std::vector<std::string>& args) {
if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
return false;
}
- if (!loop.AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP},
- [&]() { return loop.ExitLoop(); })) {
+ IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
+ if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP},
+ [&]() { return loop->ExitLoop(); })) {
return false;
}
if (duration_in_sec_ != 0) {
- if (!loop.AddPeriodicEvent(SecondToTimeval(duration_in_sec_),
- [&]() { return loop.ExitLoop(); })) {
+ if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_),
+ [&]() { return loop->ExitLoop(); })) {
return false;
}
}
@@ -422,8 +421,8 @@ bool StatCommand::Run(const std::vector<std::string>& args) {
};
if (interval_in_ms_ != 0) {
- if (!loop.AddPeriodicEvent(SecondToTimeval(interval_in_ms_ / 1000.0),
- print_counters)) {
+ if (!loop->AddPeriodicEvent(SecondToTimeval(interval_in_ms_ / 1000.0),
+ print_counters)) {
return false;
}
}
@@ -433,7 +432,7 @@ bool StatCommand::Run(const std::vector<std::string>& args) {
if (workload != nullptr && !workload->Start()) {
return false;
}
- if (!loop.RunLoop()) {
+ if (!loop->RunLoop()) {
return false;
}
@@ -548,7 +547,7 @@ bool StatCommand::AddDefaultMeasuredEventTypes() {
// supported by the kernel.
const EventType* type = FindEventTypeByName(name);
if (type != nullptr &&
- IsEventAttrSupportedByKernel(CreateDefaultPerfEventAttr(*type))) {
+ IsEventAttrSupported(CreateDefaultPerfEventAttr(*type))) {
if (!event_selection_set_.AddEventType(name)) {
return false;
}
diff --git a/simpleperf/event_fd.cpp b/simpleperf/event_fd.cpp
index 08cf98ff..b78da4c0 100644
--- a/simpleperf/event_fd.cpp
+++ b/simpleperf/event_fd.cpp
@@ -260,7 +260,12 @@ bool EventFd::StartPolling(IOEventLoop& loop,
bool EventFd::StopPolling() { return IOEventLoop::DelEvent(ioevent_ref_); }
-bool IsEventAttrSupportedByKernel(perf_event_attr attr) {
- auto event_fd = EventFd::OpenEventFile(attr, getpid(), -1, nullptr, false);
+bool IsEventAttrSupported(const perf_event_attr& attr) {
+ if (attr.type == SIMPLEPERF_TYPE_USER_SPACE_SAMPLERS &&
+ attr.config == SIMPLEPERF_CONFIG_INPLACE_SAMPLER) {
+ // User space samplers don't need kernel support.
+ return true;
+ }
+ std::unique_ptr<EventFd> event_fd = EventFd::OpenEventFile(attr, getpid(), -1, nullptr, false);
return event_fd != nullptr;
}
diff --git a/simpleperf/event_fd.h b/simpleperf/event_fd.h
index aaba0ef2..f1ddb551 100644
--- a/simpleperf/event_fd.h
+++ b/simpleperf/event_fd.h
@@ -126,6 +126,6 @@ class EventFd {
DISALLOW_COPY_AND_ASSIGN(EventFd);
};
-bool IsEventAttrSupportedByKernel(perf_event_attr attr);
+bool IsEventAttrSupported(const perf_event_attr& attr);
#endif // SIMPLE_PERF_EVENT_FD_H_
diff --git a/simpleperf/event_selection_set.cpp b/simpleperf/event_selection_set.cpp
index 038997fb..71c6c723 100644
--- a/simpleperf/event_selection_set.cpp
+++ b/simpleperf/event_selection_set.cpp
@@ -36,7 +36,7 @@ bool IsBranchSamplingSupported() {
perf_event_attr attr = CreateDefaultPerfEventAttr(*type);
attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
attr.branch_sample_type = PERF_SAMPLE_BRANCH_ANY;
- return IsEventAttrSupportedByKernel(attr);
+ return IsEventAttrSupported(attr);
}
bool IsDwarfCallChainSamplingSupported() {
@@ -50,7 +50,7 @@ bool IsDwarfCallChainSamplingSupported() {
attr.exclude_callchain_user = 1;
attr.sample_regs_user = GetSupportedRegMask(GetBuildArch());
attr.sample_stack_user = 8192;
- return IsEventAttrSupportedByKernel(attr);
+ return IsEventAttrSupported(attr);
}
bool EventSelectionSet::BuildAndCheckEventSelection(
@@ -78,9 +78,9 @@ bool EventSelectionSet::BuildAndCheckEventSelection(
selection->event_attr.exclude_host = event_type->exclude_host;
selection->event_attr.exclude_guest = event_type->exclude_guest;
selection->event_attr.precise_ip = event_type->precise_ip;
- if (!IsEventAttrSupportedByKernel(selection->event_attr)) {
+ if (!IsEventAttrSupported(selection->event_attr)) {
LOG(ERROR) << "Event type '" << event_type->name
- << "' is not supported by the kernel";
+ << "' is not supported on the device";
return false;
}
selection->event_fds.clear();
@@ -129,6 +129,18 @@ std::vector<const EventType*> EventSelectionSet::GetTracepointEvents() const {
return result;
}
+bool EventSelectionSet::HasInplaceSampler() const {
+ for (const auto& group : groups_) {
+ for (const auto& sel : group) {
+ if (sel.event_attr.type == SIMPLEPERF_TYPE_USER_SPACE_SAMPLERS &&
+ sel.event_attr.config == SIMPLEPERF_CONFIG_INPLACE_SAMPLER) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
std::vector<EventAttrWithId> EventSelectionSet::GetEventAttrWithId() const {
std::vector<EventAttrWithId> result;
for (const auto& group : groups_) {
@@ -138,6 +150,9 @@ std::vector<EventAttrWithId> EventSelectionSet::GetEventAttrWithId() const {
for (const auto& fd : selection.event_fds) {
attr_id.ids.push_back(fd->Id());
}
+ if (!selection.inplace_samplers.empty()) {
+ attr_id.ids.push_back(selection.inplace_samplers[0]->Id());
+ }
result.push_back(attr_id);
}
}
@@ -347,12 +362,24 @@ bool EventSelectionSet::OpenEventFilesOnGroup(EventSelectionGroup& group,
return true;
}
-static std::set<pid_t> PrepareThreads(const std::set<pid_t>& processes,
- const std::set<pid_t>& threads) {
- std::set<pid_t> result = threads;
- for (const auto& pid : processes) {
+static std::map<pid_t, std::set<pid_t>> PrepareThreads(const std::set<pid_t>& processes,
+ const std::set<pid_t>& threads) {
+ std::map<pid_t, std::set<pid_t>> result;
+ for (auto& pid : processes) {
std::vector<pid_t> tids = GetThreadsInProcess(pid);
- result.insert(tids.begin(), tids.end());
+ std::set<pid_t>& threads_in_process = result[pid];
+ threads_in_process.insert(tids.begin(), tids.end());
+ }
+ for (auto& tid : threads) {
+ // tid = -1 means monitoring all threads.
+ if (tid == -1) {
+ result[-1].insert(-1);
+ } else {
+ pid_t pid;
+ if (GetProcessForThread(tid, &pid)) {
+ result[pid].insert(tid);
+ }
+ }
}
return result;
}
@@ -367,26 +394,56 @@ bool EventSelectionSet::OpenEventFiles(const std::vector<int>& on_cpus) {
} else {
cpus = GetOnlineCpus();
}
- std::set<pid_t> threads = PrepareThreads(processes_, threads_);
+ std::map<pid_t, std::set<pid_t>> process_map = PrepareThreads(processes_, threads_);
for (auto& group : groups_) {
- for (const auto& tid : threads) {
- size_t success_cpu_count = 0;
- std::string failed_event_type;
- for (const auto& cpu : cpus) {
- if (OpenEventFilesOnGroup(group, tid, cpu, &failed_event_type)) {
- success_cpu_count++;
+ if (IsUserSpaceSamplerGroup(group)) {
+ if (!OpenUserSpaceSamplersOnGroup(group, process_map)) {
+ return false;
+ }
+ } else {
+ for (const auto& pair : process_map) {
+ for (const auto& tid : pair.second) {
+ size_t success_cpu_count = 0;
+ std::string failed_event_type;
+ for (const auto& cpu : cpus) {
+ if (OpenEventFilesOnGroup(group, tid, cpu, &failed_event_type)) {
+ success_cpu_count++;
+ }
+ }
+ // As the online cpus can be enabled or disabled at runtime, we may not
+ // open event file for all cpus successfully. But we should open at
+ // least one cpu successfully.
+ if (success_cpu_count == 0) {
+ PLOG(ERROR) << "failed to open perf event file for event_type "
+ << failed_event_type << " for "
+ << (tid == -1 ? "all threads" : "thread " + std::to_string(tid))
+ << " on all cpus";
+ return false;
+ }
}
}
- // As the online cpus can be enabled or disabled at runtime, we may not
- // open event file for all cpus successfully. But we should open at
- // least one cpu successfully.
- if (success_cpu_count == 0) {
- PLOG(ERROR) << "failed to open perf event file for event_type "
- << failed_event_type << " for "
- << (tid == -1 ? "all threads"
- : "thread " + std::to_string(tid))
- << " on all cpus";
- return false;
+ }
+ }
+ return true;
+}
+
+bool EventSelectionSet::IsUserSpaceSamplerGroup(EventSelectionGroup& group) {
+ return group.size() == 1 && group[0].event_attr.type == SIMPLEPERF_TYPE_USER_SPACE_SAMPLERS;
+}
+
+bool EventSelectionSet::OpenUserSpaceSamplersOnGroup(EventSelectionGroup& group,
+ const std::map<pid_t, std::set<pid_t>>& process_map) {
+ CHECK_EQ(group.size(), 1u);
+ for (auto& selection : group) {
+ if (selection.event_attr.type == SIMPLEPERF_TYPE_USER_SPACE_SAMPLERS &&
+ selection.event_attr.config == SIMPLEPERF_CONFIG_INPLACE_SAMPLER) {
+ for (auto& pair : process_map) {
+ std::unique_ptr<InplaceSamplerClient> sampler = InplaceSamplerClient::Create(
+ selection.event_attr, pair.first, pair.second);
+ if (sampler == nullptr) {
+ return false;
+ }
+ selection.inplace_samplers.push_back(std::move(sampler));
}
}
}
@@ -479,6 +536,12 @@ bool EventSelectionSet::PrepareToReadMmapEventData(const std::function<bool(Reco
}
}
}
+ for (auto& sampler : selection.inplace_samplers) {
+ if (!sampler->StartPolling(*loop_, callback,
+ [&] { return CheckMonitoredTargets(); })) {
+ return false;
+ }
+ }
}
}
@@ -518,6 +581,9 @@ bool EventSelectionSet::ReadMmapEventData() {
}
}
+ if (head_size == 0) {
+ return true;
+ }
if (head_size == 1) {
// Only one buffer has data, process it directly.
std::vector<std::unique_ptr<Record>> records =
@@ -645,17 +711,21 @@ bool EventSelectionSet::HandleCpuOfflineEvent(int cpu) {
bool EventSelectionSet::HandleCpuOnlineEvent(int cpu) {
// We need to start profiling when opening new event files.
SetEnableOnExec(false);
- std::set<pid_t> threads = PrepareThreads(processes_, threads_);
+ std::map<pid_t, std::set<pid_t>> process_map = PrepareThreads(processes_, threads_);
for (auto& group : groups_) {
- for (const auto& tid : threads) {
- std::string failed_event_type;
- if (!OpenEventFilesOnGroup(group, tid, cpu, &failed_event_type)) {
- // If failed to open event files, maybe the cpu has been offlined.
- PLOG(WARNING) << "failed to open perf event file for event_type "
- << failed_event_type << " for "
- << (tid == -1 ? "all threads"
- : "thread " + std::to_string(tid))
- << " on cpu " << cpu;
+ if (IsUserSpaceSamplerGroup(group)) {
+ continue;
+ }
+ for (const auto& pair : process_map) {
+ for (const auto& tid : pair.second) {
+ std::string failed_event_type;
+ if (!OpenEventFilesOnGroup(group, tid, cpu, &failed_event_type)) {
+ // If failed to open event files, maybe the cpu has been offlined.
+ PLOG(WARNING) << "failed to open perf event file for event_type "
+ << failed_event_type << " for "
+ << (tid == -1 ? "all threads" : "thread " + std::to_string(tid))
+ << " on cpu " << cpu;
+ }
}
}
}
@@ -723,6 +793,9 @@ bool EventSelectionSet::StopWhenNoMoreTargets(double check_interval_in_sec) {
}
bool EventSelectionSet::CheckMonitoredTargets() {
+ if (!HasSampler()) {
+ return loop_->ExitLoop();
+ }
for (const auto& tid : threads_) {
if (IsThreadAlive(tid)) {
return true;
@@ -735,3 +808,19 @@ bool EventSelectionSet::CheckMonitoredTargets() {
}
return loop_->ExitLoop();
}
+
+bool EventSelectionSet::HasSampler() {
+ for (auto& group : groups_) {
+ for (auto& sel : group) {
+ if (!sel.event_fds.empty()) {
+ return false;
+ }
+ for (auto& sampler : sel.inplace_samplers) {
+ if (!sampler->IsClosed()) {
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+}
diff --git a/simpleperf/event_selection_set.h b/simpleperf/event_selection_set.h
index 8aca7840..97ad7e59 100644
--- a/simpleperf/event_selection_set.h
+++ b/simpleperf/event_selection_set.h
@@ -28,6 +28,8 @@
#include "event_attr.h"
#include "event_fd.h"
#include "event_type.h"
+#include "InplaceSamplerClient.h"
+#include "IOEventLoop.h"
#include "perf_event.h"
#include "record.h"
@@ -47,8 +49,6 @@ struct CountersInfo {
std::vector<CounterInfo> counters;
};
-class IOEventLoop;
-
// EventSelectionSet helps to monitor events. It is used in following steps:
// 1. Create an EventSelectionSet, and add event types to monitor by calling
// AddEventType() or AddEventGroup().
@@ -67,13 +67,14 @@ class IOEventLoop;
class EventSelectionSet {
public:
EventSelectionSet(bool for_stat_cmd)
- : for_stat_cmd_(for_stat_cmd), mmap_pages_(0), loop_(nullptr) {}
+ : for_stat_cmd_(for_stat_cmd), mmap_pages_(0), loop_(new IOEventLoop) {}
bool empty() const { return groups_.empty(); }
bool AddEventType(const std::string& event_name);
bool AddEventGroup(const std::vector<std::string>& event_names);
std::vector<const EventType*> GetTracepointEvents() const;
+ bool HasInplaceSampler() const;
std::vector<EventAttrWithId> GetEventAttrWithId() const;
void SetEnableOnExec(bool enable);
@@ -104,8 +105,8 @@ class EventSelectionSet {
return !processes_.empty() || !threads_.empty();
}
- void SetIOEventLoop(IOEventLoop& loop) {
- loop_ = &loop;
+ IOEventLoop* GetIOEventLoop() {
+ return loop_.get();
}
bool OpenEventFiles(const std::vector<int>& on_cpus);
@@ -128,6 +129,7 @@ class EventSelectionSet {
EventTypeAndModifier event_type_modifier;
perf_event_attr event_attr;
std::vector<std::unique_ptr<EventFd>> event_fds;
+ std::vector<std::unique_ptr<InplaceSamplerClient>> inplace_samplers;
// counters for event files closed for cpu hotplug events
std::vector<CounterInfo> hotplugged_counters;
};
@@ -136,6 +138,9 @@ class EventSelectionSet {
bool BuildAndCheckEventSelection(const std::string& event_name,
EventSelection* selection);
void UnionSampleType();
+ bool IsUserSpaceSamplerGroup(EventSelectionGroup& group);
+ bool OpenUserSpaceSamplersOnGroup(EventSelectionGroup& group,
+ const std::map<pid_t, std::set<pid_t>>& process_map);
bool OpenEventFilesOnGroup(EventSelectionGroup& group, pid_t tid, int cpu,
std::string* failed_event_type);
@@ -147,6 +152,7 @@ class EventSelectionSet {
bool HandleCpuOfflineEvent(int cpu);
bool CreateMappedBufferForCpu(int cpu);
bool CheckMonitoredTargets();
+ bool HasSampler();
const bool for_stat_cmd_;
@@ -155,7 +161,7 @@ class EventSelectionSet {
std::set<pid_t> threads_;
size_t mmap_pages_;
- IOEventLoop* loop_;
+ std::unique_ptr<IOEventLoop> loop_;
std::function<bool(Record*)> record_callback_;
std::set<int> monitored_cpus_;
diff --git a/simpleperf/event_type.h b/simpleperf/event_type.h
index 12d83b3c..a1e401f4 100644
--- a/simpleperf/event_type.h
+++ b/simpleperf/event_type.h
@@ -22,6 +22,14 @@
#include <string>
#include <vector>
+// A uint32_t value far from 0 is picked, so it is unlikely to conflict with further
+// PERF_TYPE_* events.
+static constexpr uint32_t SIMPLEPERF_TYPE_USER_SPACE_SAMPLERS = 32768;
+
+enum {
+ SIMPLEPERF_CONFIG_INPLACE_SAMPLER,
+};
+
// EventType represents one type of event, like cpu_cycle_event, cache_misses_event.
// The user knows one event type by its name, and the kernel knows one event type by its
// (type, config) pair. EventType connects the two representations, and tells the user if
diff --git a/simpleperf/event_type_table.h b/simpleperf/event_type_table.h
index a77be0af..123216c9 100644
--- a/simpleperf/event_type_table.h
+++ b/simpleperf/event_type_table.h
@@ -63,3 +63,5 @@
{"node-prefetches", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_NODE) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))},
{"node-prefetch-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_NODE) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))},
+{"inplace-sampler", SIMPLEPERF_TYPE_USER_SPACE_SAMPLERS, SIMPLEPERF_CONFIG_INPLACE_SAMPLER},
+
diff --git a/simpleperf/generate_event_type_table.py b/simpleperf/generate_event_type_table.py
index ff60c236..eaffd60d 100755
--- a/simpleperf/generate_event_type_table.py
+++ b/simpleperf/generate_event_type_table.py
@@ -106,11 +106,18 @@ def gen_hw_cache_events():
return generated_str
+def gen_user_space_events():
+ generated_str = gen_event_type_entry_str("inplace-sampler",
+ "SIMPLEPERF_TYPE_USER_SPACE_SAMPLERS",
+ "SIMPLEPERF_CONFIG_INPLACE_SAMPLER")
+ return generated_str
+
def gen_events():
generated_str = "// This file is auto-generated by generate-event_table.py.\n\n"
generated_str += gen_hardware_events() + '\n'
generated_str += gen_software_events() + '\n'
generated_str += gen_hw_cache_events() + '\n'
+ generated_str += gen_user_space_events() + '\n'
return generated_str
generated_str = gen_events()
diff --git a/simpleperf/record.cpp b/simpleperf/record.cpp
index 86da0657..ba04daff 100644
--- a/simpleperf/record.cpp
+++ b/simpleperf/record.cpp
@@ -464,7 +464,8 @@ SampleRecord::SampleRecord(const perf_event_attr& attr, uint64_t id,
sample_type = attr.sample_type;
CHECK_EQ(0u, sample_type & ~(PERF_SAMPLE_IP | PERF_SAMPLE_TID
| PERF_SAMPLE_TIME | PERF_SAMPLE_ID | PERF_SAMPLE_CPU
- | PERF_SAMPLE_PERIOD | PERF_SAMPLE_CALLCHAIN));
+ | PERF_SAMPLE_PERIOD | PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_REGS_USER
+ | PERF_SAMPLE_STACK_USER));
ip_data.ip = ip;
tid_data.pid = pid;
tid_data.tid = tid;
@@ -502,6 +503,13 @@ SampleRecord::SampleRecord(const perf_event_attr& attr, uint64_t id,
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
size += sizeof(uint64_t) * (ips.size() + 1);
}
+ if (sample_type & PERF_SAMPLE_REGS_USER) {
+ size += sizeof(uint64_t);
+ }
+ if (sample_type & PERF_SAMPLE_STACK_USER) {
+ size += sizeof(uint64_t);
+ }
+
SetSize(size);
char* new_binary = new char[size];
char* p = new_binary;
@@ -529,6 +537,12 @@ SampleRecord::SampleRecord(const perf_event_attr& attr, uint64_t id,
callchain_data.ips = reinterpret_cast<uint64_t*>(p);
MoveToBinaryFormat(ips.data(), ips.size(), p);
}
+ if (sample_type & PERF_SAMPLE_REGS_USER) {
+ MoveToBinaryFormat(regs_user_data.abi, p);
+ }
+ if (sample_type & PERF_SAMPLE_STACK_USER) {
+ MoveToBinaryFormat(stack_user_data.size, p);
+ }
CHECK_EQ(p, new_binary + size);
UpdateBinary(new_binary);
}
diff --git a/simpleperf/runtest/comm_change.cpp b/simpleperf/runtest/comm_change.cpp
index 12d64fa2..cdcb2bf4 100644
--- a/simpleperf/runtest/comm_change.cpp
+++ b/simpleperf/runtest/comm_change.cpp
@@ -8,9 +8,14 @@ void Function1() {
}
int main() {
- prctl(PR_SET_NAME, reinterpret_cast<unsigned long>("RUN_COMM1"), 0, 0, 0); // NOLINT
- Function1();
- prctl(PR_SET_NAME, reinterpret_cast<unsigned long>("RUN_COMM2"), 0, 0, 0); // NOLINT
- Function1();
+ // Run the test in an infinite loop, so if we profile the test manually, the process
+ // doesn't exit before we attach to it. This scheme also allows simpleperf to control
+ // how long to profile.
+ while (true) {
+ prctl(PR_SET_NAME, reinterpret_cast<unsigned long>("RUN_COMM1"), 0, 0, 0); // NOLINT
+ Function1();
+ prctl(PR_SET_NAME, reinterpret_cast<unsigned long>("RUN_COMM2"), 0, 0, 0); // NOLINT
+ Function1();
+ }
return 0;
}
diff --git a/simpleperf/runtest/function_fork.cpp b/simpleperf/runtest/function_fork.cpp
index b1477a6a..8551927d 100644
--- a/simpleperf/runtest/function_fork.cpp
+++ b/simpleperf/runtest/function_fork.cpp
@@ -1,4 +1,5 @@
#include <stdlib.h>
+#include <sys/wait.h>
#include <unistd.h>
constexpr int LOOP_COUNT = 100000000;
@@ -19,12 +20,15 @@ void ChildFunction() {
}
int main() {
- pid_t pid = fork();
- if (pid == 0) {
- ChildFunction();
- return 0;
- } else {
- ParentFunction();
+ while (true) {
+ pid_t pid = fork();
+ if (pid == 0) {
+ ChildFunction();
+ return 0;
+ } else {
+ ParentFunction();
+ waitpid(pid, nullptr, 0);
+ }
}
return 0;
}
diff --git a/simpleperf/runtest/function_indirect_recursive.cpp b/simpleperf/runtest/function_indirect_recursive.cpp
index 5e70fd32..70645a1b 100644
--- a/simpleperf/runtest/function_indirect_recursive.cpp
+++ b/simpleperf/runtest/function_indirect_recursive.cpp
@@ -19,6 +19,8 @@ void FunctionRecursiveTwo(int loop) {
}
int main() {
- FunctionRecursiveOne(10);
+ while (true) {
+ FunctionRecursiveOne(10);
+ }
return 0;
}
diff --git a/simpleperf/runtest/function_pthread.cpp b/simpleperf/runtest/function_pthread.cpp
index 02fc0a5f..c80fb3f2 100644
--- a/simpleperf/runtest/function_pthread.cpp
+++ b/simpleperf/runtest/function_pthread.cpp
@@ -17,17 +17,19 @@ void MainThreadFunction() {
}
int main() {
- pthread_t thread;
- int ret = pthread_create(&thread, nullptr, ChildThreadFunction, nullptr);
- if (ret != 0) {
- fprintf(stderr, "pthread_create failed: %s\n", strerror(ret));
- exit(1);
- }
- MainThreadFunction();
- ret = pthread_join(thread, nullptr);
- if (ret != 0) {
- fprintf(stderr, "pthread_join failed: %s\n", strerror(ret));
- exit(1);
+ while (true) {
+ pthread_t thread;
+ int ret = pthread_create(&thread, nullptr, ChildThreadFunction, nullptr);
+ if (ret != 0) {
+ fprintf(stderr, "pthread_create failed: %s\n", strerror(ret));
+ exit(1);
+ }
+ MainThreadFunction();
+ ret = pthread_join(thread, nullptr);
+ if (ret != 0) {
+ fprintf(stderr, "pthread_join failed: %s\n", strerror(ret));
+ exit(1);
+ }
}
return 0;
}
diff --git a/simpleperf/runtest/function_recursive.cpp b/simpleperf/runtest/function_recursive.cpp
index d8d28bcc..bf60668b 100644
--- a/simpleperf/runtest/function_recursive.cpp
+++ b/simpleperf/runtest/function_recursive.cpp
@@ -11,6 +11,8 @@ void FunctionRecursive(int loop) {
}
int main() {
- FunctionRecursive(10);
+ while (true) {
+ FunctionRecursive(10);
+ }
return 0;
}
diff --git a/simpleperf/runtest/one_function.cpp b/simpleperf/runtest/one_function.cpp
index 49090aca..561bb5a5 100644
--- a/simpleperf/runtest/one_function.cpp
+++ b/simpleperf/runtest/one_function.cpp
@@ -6,6 +6,8 @@ void Function1() {
}
int main() {
- Function1();
+ while (true) {
+ Function1();
+ }
return 0;
}
diff --git a/simpleperf/runtest/runtest.py b/simpleperf/runtest/runtest.py
index bbfdc48a..77fc5669 100644
--- a/simpleperf/runtest/runtest.py
+++ b/simpleperf/runtest/runtest.py
@@ -277,22 +277,29 @@ def load_symbol_relation_requirement(symbol_item):
class Runner(object):
- def __init__(self, perf_path):
+ def __init__(self, target, perf_path):
+ self.target = target
self.perf_path = perf_path
+ self.use_callgraph = False
+ self.sampler = 'cpu-cycles'
def record(self, test_executable_name, record_file, additional_options=[]):
- call_args = [self.perf_path,
- 'record'] + additional_options + ['-e',
- 'cpu-cycles:u',
- '-o',
- record_file,
- test_executable_name]
+ call_args = [self.perf_path, 'record']
+ call_args += ['--duration', '1']
+ call_args += ['-e', '%s:u' % self.sampler]
+ if self.use_callgraph:
+ call_args += ['-f', '1000', '-g']
+ call_args += ['-o', record_file]
+ call_args += additional_options
+ call_args += [test_executable_name]
self._call(call_args)
def report(self, record_file, report_file, additional_options=[]):
- call_args = [self.perf_path,
- 'report'] + additional_options + ['-i',
- record_file]
+ call_args = [self.perf_path, 'report']
+ call_args += ['-i', record_file]
+ if self.use_callgraph:
+ call_args += ['-g', 'callee']
+ call_args += additional_options
self._call(call_args, report_file)
def _call(self, args, output_file=None):
@@ -303,6 +310,9 @@ class HostRunner(Runner):
"""Run perf test on host."""
+ def __init__(self, perf_path):
+ super(HostRunner, self).__init__('host', perf_path)
+
def _call(self, args, output_file=None):
output_fh = None
if output_file is not None:
@@ -318,8 +328,8 @@ class DeviceRunner(Runner):
def __init__(self, perf_path):
self.tmpdir = '/data/local/tmp/'
+ super(DeviceRunner, self).__init__('device', self.tmpdir + perf_path)
self._download(os.environ['OUT'] + '/system/xbin/' + perf_path, self.tmpdir)
- self.perf_path = self.tmpdir + perf_path
def _call(self, args, output_file=None):
output_fh = None
@@ -518,7 +528,7 @@ class ReportAnalyzer(object):
return result
-def runtest(host, device, normal, callgraph, selected_tests):
+def runtest(host, device, normal, callgraph, use_inplace_sampler, selected_tests):
tests = load_config_file(os.path.dirname(os.path.realpath(__file__)) + \
'/runtest.conf')
host_runner = HostRunner('simpleperf')
@@ -581,26 +591,76 @@ def runtest(host, device, normal, callgraph, selected_tests):
if not result:
exit(1)
+
+def build_runner(target, use_callgraph, sampler):
+ if target == 'host':
+ runner = HostRunner('simpleperf')
+ else:
+ runner = DeviceRunner('simpleperf')
+ runner.use_callgraph = use_callgraph
+ runner.sampler = sampler
+ return runner
+
+
+def test_with_runner(runner, tests):
+ report_analyzer = ReportAnalyzer()
+ for test in tests:
+ runner.record(test.executable_name, 'perf.data')
+ if runner.sampler == 'inplace-sampler':
+ # TODO: fix this when inplace-sampler actually works.
+ runner.report('perf.data', 'perf.report')
+ symbols = report_analyzer._read_report_file('perf.report', runner.use_callgraph)
+ result = False
+ if len(symbols) == 1 and symbols[0].name == 'fake_elf[+0]':
+ result = True
+ else:
+ runner.report('perf.data', 'perf.report', additional_options = test.report_options)
+ result = report_analyzer.check_report_file(test, 'perf.report', runner.use_callgraph)
+ str = 'test %s on %s ' % (test.test_name, runner.target)
+ if runner.use_callgraph:
+ str += 'with call graph '
+ str += 'using %s ' % runner.sampler
+ str += ' Succeeded' if result else 'Failed'
+ print str
+ if not result:
+ exit(1)
+
+
+def runtest(target_options, use_callgraph_options, sampler_options, selected_tests):
+ tests = load_config_file(os.path.dirname(os.path.realpath(__file__)) + \
+ '/runtest.conf')
+ if selected_tests is not None:
+ new_tests = []
+ for test in tests:
+ if test.test_name in selected_tests:
+ new_tests.append(test)
+ tests = new_tests
+ for target in target_options:
+ for use_callgraph in use_callgraph_options:
+ for sampler in sampler_options:
+ runner = build_runner(target, use_callgraph, sampler)
+ test_with_runner(runner, tests)
+
+
def main():
- host = True
- device = True
- normal = True
- callgraph = True
+ target_options = ['host', 'target']
+ use_callgraph_options = [False, True]
+ sampler_options = ['cpu-cycles', 'inplace-sampler']
selected_tests = None
i = 1
while i < len(sys.argv):
if sys.argv[i] == '--host':
- host = True
- device = False
+ use_callgraph_options = ['host']
elif sys.argv[i] == '--device':
- host = False
- device = True
+ use_callgraph_options = ['device']
elif sys.argv[i] == '--normal':
- normal = True
- callgraph = False
+ use_callgraph_options = [False]
elif sys.argv[i] == '--callgraph':
- normal = False
- callgraph = True
+ use_callgraph_options = [True]
+ elif sys.argv[i] == '--no-inplace-sampler':
+ sampler_options = ['cpu-cycles']
+ elif sys.argv[i] == '--inplace-sampler':
+ sampler_options = ['inplace-sampler']
elif sys.argv[i] == '--test':
if i < len(sys.argv):
i += 1
@@ -609,7 +669,7 @@ def main():
selected_tests = {}
selected_tests[test] = True
i += 1
- runtest(host, device, normal, callgraph, selected_tests)
+ runtest(target_options, use_callgraph_options, sampler_options, selected_tests)
if __name__ == '__main__':
main()
diff --git a/simpleperf/runtest/two_functions.cpp b/simpleperf/runtest/two_functions.cpp
index 1d3e3893..b74c1538 100644
--- a/simpleperf/runtest/two_functions.cpp
+++ b/simpleperf/runtest/two_functions.cpp
@@ -18,7 +18,9 @@ void Function2() {
}
int main() {
- Function1();
- Function2();
+ while (true) {
+ Function1();
+ Function2();
+ }
return 0;
}
diff --git a/simpleperf/workload.h b/simpleperf/workload.h
index 2141830f..9d9d5952 100644
--- a/simpleperf/workload.h
+++ b/simpleperf/workload.h
@@ -40,6 +40,9 @@ class Workload {
~Workload();
bool Start();
+ bool IsStarted() {
+ return work_state_ == Started;
+ }
pid_t GetPid() {
return work_pid_;
}