summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYabin Cui <yabinc@google.com>2015-10-23 00:17:29 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2015-10-23 00:17:29 +0000
commite7182b0cc45b447943b51e36aa1feb2d17257875 (patch)
treef0bbf50c5816e3afc84064d232b7e4a684530c69
parentb5ff8a9895b21da46a7d0c6b082db363e1f9929d (diff)
parentf469c3d7f4466a2a31c3e837969c43c1e3bc4c90 (diff)
downloadextras-e7182b0cc45b447943b51e36aa1feb2d17257875.tar.gz
Merge "Simpleperf: do stack unwinding while recording."
-rw-r--r--simpleperf/cmd_record.cpp131
-rw-r--r--simpleperf/cmd_record_test.cpp12
-rw-r--r--simpleperf/event_fd.cpp34
-rw-r--r--simpleperf/event_fd.h14
-rw-r--r--simpleperf/event_selection_set.cpp1
-rw-r--r--simpleperf/record.cpp91
-rw-r--r--simpleperf/record.h38
-rw-r--r--simpleperf/record_test.cpp46
8 files changed, 304 insertions, 63 deletions
diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp
index 04c6d7d4..ccc499c9 100644
--- a/simpleperf/cmd_record.cpp
+++ b/simpleperf/cmd_record.cpp
@@ -97,6 +97,11 @@ class RecordCommand : public Command {
" -o record_file_name Set record file name, default is perf.data.\n"
" -p pid1,pid2,...\n"
" Record events on existing processes. Mutually exclusive with -a.\n"
+ " --post-unwind\n"
+ " If `--call-graph dwarf` option is used, then the user's stack will\n"
+ " be unwound while recording by default. But it may lose records as\n"
+ " stacking unwinding can be time consuming. Use this option to unwind\n"
+ " the user's stack after recording.\n"
" -t tid1,tid2,...\n"
" Record events on existing threads. Mutually exclusive with -a.\n"),
use_sample_freq_(true),
@@ -107,6 +112,7 @@ class RecordCommand : public Command {
dwarf_callchain_sampling_(false),
dump_stack_size_in_dwarf_sampling_(8192),
unwind_dwarf_callchain_(true),
+ post_unwind_(false),
child_inherit_(true),
perf_mmap_pages_(256),
record_filename_("perf.data") {
@@ -124,10 +130,12 @@ class RecordCommand : public Command {
bool AddMeasuredEventType(const std::string& event_type_name);
bool SetEventSelection();
bool CreateRecordFile();
- bool WriteData(const char* data, size_t size);
bool DumpKernelAndModuleMmaps();
bool DumpThreadCommAndMmaps(bool all_threads, const std::vector<pid_t>& selected_threads);
- bool UnwindDwarfCallChain();
+ bool CollectRecordsFromKernel(const char* data, size_t size);
+ bool ProcessRecord(Record* record);
+ void UnwindRecord(Record* record);
+ bool PostUnwind();
bool DumpAdditionalFeatures(const std::vector<std::string>& args);
bool DumpBuildIdFeature();
bool GetHitFiles(std::set<std::string>* kernel_modules, std::set<std::string>* user_files);
@@ -142,6 +150,7 @@ class RecordCommand : public Command {
bool dwarf_callchain_sampling_;
uint32_t dump_stack_size_in_dwarf_sampling_;
bool unwind_dwarf_callchain_;
+ bool post_unwind_;
bool child_inherit_;
std::vector<pid_t> monitored_threads_;
std::vector<EventTypeAndModifier> measured_event_types_;
@@ -150,6 +159,8 @@ class RecordCommand : public Command {
// mmap pages used by each perf event file, should be power of 2.
const size_t perf_mmap_pages_;
+ std::unique_ptr<RecordCache> record_cache_;
+ ThreadTree thread_tree_;
std::string record_filename_;
std::unique_ptr<RecordFileWriter> record_file_writer_;
@@ -220,8 +231,10 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
if (workload != nullptr && !workload->Start()) {
return false;
}
- auto callback =
- std::bind(&RecordCommand::WriteData, this, std::placeholders::_1, std::placeholders::_2);
+ record_cache_.reset(new RecordCache(
+ *event_selection_set_.FindEventAttrByType(measured_event_types_[0]), 1000u, 1000000u));
+ auto callback = std::bind(&RecordCommand::CollectRecordsFromKernel, this, std::placeholders::_1,
+ std::placeholders::_2);
while (true) {
if (!event_selection_set_.ReadMmapEventData(callback)) {
return false;
@@ -231,10 +244,16 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
}
poll(&pollfds[0], pollfds.size(), -1);
}
+ std::vector<std::unique_ptr<Record>> records = record_cache_->PopAll();
+ for (auto& r : records) {
+ if (!ProcessRecord(r.get())) {
+ return false;
+ }
+ }
// 6. Unwind dwarf callchain.
- if (unwind_dwarf_callchain_) {
- if (!UnwindDwarfCallChain()) {
+ if (post_unwind_) {
+ if (!PostUnwind()) {
return false;
}
}
@@ -350,6 +369,8 @@ bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
if (!GetValidThreadsFromProcessString(args[i], &tid_set)) {
return false;
}
+ } else if (args[i] == "--post-unwind") {
+ post_unwind_ = true;
} else if (args[i] == "-t") {
if (!NextArgumentOrError(args, &i)) {
return false;
@@ -370,6 +391,16 @@ bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
}
unwind_dwarf_callchain_ = false;
}
+ if (post_unwind_) {
+ if (!dwarf_callchain_sampling_) {
+ LOG(ERROR) << "--post-unwind is only used with `--call-graph dwarf` option.";
+ return false;
+ }
+ if (!unwind_dwarf_callchain_) {
+ LOG(ERROR) << "--post-unwind can't be used with `--no-unwind` option.";
+ return false;
+ }
+ }
monitored_threads_.insert(monitored_threads_.end(), tid_set.begin(), tid_set.end());
if (system_wide_collection_ && !monitored_threads_.empty()) {
@@ -454,10 +485,6 @@ bool RecordCommand::CreateRecordFile() {
return true;
}
-bool RecordCommand::WriteData(const char* data, size_t size) {
- return record_file_writer_->WriteData(data, size);
-}
-
bool RecordCommand::DumpKernelAndModuleMmaps() {
KernelMmap kernel_mmap;
std::vector<ModuleMmap> module_mmaps;
@@ -468,7 +495,7 @@ bool RecordCommand::DumpKernelAndModuleMmaps() {
CHECK(attr != nullptr);
MmapRecord mmap_record = CreateMmapRecord(*attr, true, UINT_MAX, 0, kernel_mmap.start_addr,
kernel_mmap.len, kernel_mmap.pgoff, kernel_mmap.name);
- if (!record_file_writer_->WriteData(mmap_record.BinaryFormat())) {
+ if (!ProcessRecord(&mmap_record)) {
return false;
}
for (auto& module_mmap : module_mmaps) {
@@ -478,7 +505,7 @@ bool RecordCommand::DumpKernelAndModuleMmaps() {
}
MmapRecord mmap_record = CreateMmapRecord(*attr, true, UINT_MAX, 0, module_mmap.start_addr,
module_mmap.len, 0, filename);
- if (!record_file_writer_->WriteData(mmap_record.BinaryFormat())) {
+ if (!ProcessRecord(&mmap_record)) {
return false;
}
}
@@ -515,7 +542,7 @@ bool RecordCommand::DumpThreadCommAndMmaps(bool all_threads,
continue;
}
CommRecord record = CreateCommRecord(*attr, thread.pid, thread.tid, thread.comm);
- if (!record_file_writer_->WriteData(record.BinaryFormat())) {
+ if (!ProcessRecord(&record)) {
return false;
}
std::vector<ThreadMmap> thread_mmaps;
@@ -530,7 +557,7 @@ bool RecordCommand::DumpThreadCommAndMmaps(bool all_threads,
MmapRecord record =
CreateMmapRecord(*attr, false, thread.pid, thread.tid, thread_mmap.start_addr,
thread_mmap.len, thread_mmap.pgoff, thread_mmap.name);
- if (!record_file_writer_->WriteData(record.BinaryFormat())) {
+ if (!ProcessRecord(&record)) {
return false;
}
}
@@ -545,45 +572,69 @@ bool RecordCommand::DumpThreadCommAndMmaps(bool all_threads,
continue;
}
ForkRecord fork_record = CreateForkRecord(*attr, thread.pid, thread.tid, thread.pid, thread.pid);
- if (!record_file_writer_->WriteData(fork_record.BinaryFormat())) {
+ if (!ProcessRecord(&fork_record)) {
return false;
}
CommRecord comm_record = CreateCommRecord(*attr, thread.pid, thread.tid, thread.comm);
- if (!record_file_writer_->WriteData(comm_record.BinaryFormat())) {
+ if (!ProcessRecord(&comm_record)) {
return false;
}
}
return true;
}
-bool RecordCommand::UnwindDwarfCallChain() {
+bool RecordCommand::CollectRecordsFromKernel(const char* data, size_t size) {
+ record_cache_->Push(data, size);
+ while (true) {
+ std::unique_ptr<Record> r = record_cache_->Pop();
+ if (r == nullptr) {
+ break;
+ }
+ if (!ProcessRecord(r.get())) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool RecordCommand::ProcessRecord(Record* record) {
+ if (unwind_dwarf_callchain_ && !post_unwind_) {
+ UnwindRecord(record);
+ }
+ bool result = record_file_writer_->WriteData(record->BinaryFormat());
+ return result;
+}
+
+void RecordCommand::UnwindRecord(Record* record) {
+ BuildThreadTree(*record, &thread_tree_);
+ if (record->header.type == PERF_RECORD_SAMPLE) {
+ SampleRecord& r = *static_cast<SampleRecord*>(record);
+ if ((r.sample_type & PERF_SAMPLE_CALLCHAIN) && (r.sample_type & PERF_SAMPLE_REGS_USER) &&
+ (r.regs_user_data.reg_mask != 0) && (r.sample_type & PERF_SAMPLE_STACK_USER) &&
+ (!r.stack_user_data.data.empty())) {
+ ThreadEntry* thread = thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
+ RegSet regs = CreateRegSet(r.regs_user_data.reg_mask, r.regs_user_data.regs);
+ std::vector<char>& stack = r.stack_user_data.data;
+ std::vector<uint64_t> unwind_ips = UnwindCallChain(*thread, regs, stack);
+ r.callchain_data.ips.push_back(PERF_CONTEXT_USER);
+ r.callchain_data.ips.insert(r.callchain_data.ips.end(), unwind_ips.begin(), unwind_ips.end());
+ r.regs_user_data.abi = 0;
+ r.regs_user_data.reg_mask = 0;
+ r.regs_user_data.regs.clear();
+ r.stack_user_data.data.clear();
+ r.stack_user_data.dyn_size = 0;
+ r.AdjustSizeBasedOnData();
+ }
+ }
+}
+
+bool RecordCommand::PostUnwind() {
std::vector<std::unique_ptr<Record>> records;
if (!record_file_writer_->ReadDataSection(&records)) {
return false;
}
- ThreadTree thread_tree;
- for (auto& record : records) {
- BuildThreadTree(*record, &thread_tree);
- if (record->header.type == PERF_RECORD_SAMPLE) {
- SampleRecord& r = *static_cast<SampleRecord*>(record.get());
- if ((r.sample_type & PERF_SAMPLE_CALLCHAIN) && (r.sample_type & PERF_SAMPLE_REGS_USER) &&
- (r.regs_user_data.reg_mask != 0) && (r.sample_type & PERF_SAMPLE_STACK_USER) &&
- (!r.stack_user_data.data.empty())) {
- ThreadEntry* thread = thread_tree.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
- RegSet regs = CreateRegSet(r.regs_user_data.reg_mask, r.regs_user_data.regs);
- std::vector<char>& stack = r.stack_user_data.data;
- std::vector<uint64_t> unwind_ips = UnwindCallChain(*thread, regs, stack);
- r.callchain_data.ips.push_back(PERF_CONTEXT_USER);
- r.callchain_data.ips.insert(r.callchain_data.ips.end(), unwind_ips.begin(),
- unwind_ips.end());
- r.regs_user_data.abi = 0;
- r.regs_user_data.reg_mask = 0;
- r.regs_user_data.regs.clear();
- r.stack_user_data.data.clear();
- r.stack_user_data.dyn_size = 0;
- r.AdjustSizeBasedOnData();
- }
- }
+ for (auto& r : records) {
+ UnwindRecord(r.get());
}
return record_file_writer_->WriteDataSection(records);
}
diff --git a/simpleperf/cmd_record_test.cpp b/simpleperf/cmd_record_test.cpp
index 29ddf765..a9466ac9 100644
--- a/simpleperf/cmd_record_test.cpp
+++ b/simpleperf/cmd_record_test.cpp
@@ -131,6 +131,18 @@ TEST(record_cmd, no_unwind_option) {
ASSERT_FALSE(RecordCmd()->Run({"--no-unwind", "sleep", "1"}));
}
+TEST(record_cmd, post_unwind_option) {
+ if (IsDwarfCallChainSamplingSupported()) {
+ ASSERT_TRUE(RecordCmd()->Run({"--call-graph", "dwarf", "--post-unwind", "sleep", "1"}));
+ } else {
+ GTEST_LOG_(INFO)
+ << "This test does nothing as dwarf callchain sampling is not supported on this device.";
+ }
+ ASSERT_FALSE(RecordCmd()->Run({"--post-unwind", "sleep", "1"}));
+ ASSERT_FALSE(
+ RecordCmd()->Run({"--call-graph", "dwarf", "--no-unwind", "--post-unwind", "sleep", "1"}));
+}
+
TEST(record_cmd, existing_processes) {
std::vector<std::unique_ptr<Workload>> workloads;
CreateProcesses(2, &workloads);
diff --git a/simpleperf/event_fd.cpp b/simpleperf/event_fd.cpp
index 29be60fa..6f9da385 100644
--- a/simpleperf/event_fd.cpp
+++ b/simpleperf/event_fd.cpp
@@ -18,6 +18,7 @@
#include <fcntl.h>
#include <stdio.h>
+#include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/syscall.h>
@@ -33,6 +34,8 @@
#include "perf_event.h"
#include "utils.h"
+std::vector<char> EventFd::data_process_buffer_;
+
static int perf_event_open(perf_event_attr* attr, pid_t pid, int cpu, int group_fd,
unsigned long flags) {
return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
@@ -133,6 +136,9 @@ bool EventFd::MmapContent(size_t mmap_pages) {
mmap_metadata_page_ = reinterpret_cast<perf_event_mmap_page*>(mmap_addr_);
mmap_data_buffer_ = reinterpret_cast<char*>(mmap_addr_) + page_size;
mmap_data_buffer_size_ = mmap_len_ - page_size;
+ if (data_process_buffer_.size() < mmap_data_buffer_size_) {
+ data_process_buffer_.resize(mmap_data_buffer_size_);
+ }
return true;
}
@@ -148,9 +154,9 @@ size_t EventFd::GetAvailableMmapData(char** pdata) {
// in [write_head, read_head). The kernel is responsible for updating write_head, and the user
// is responsible for updating read_head.
- uint64_t buf_mask = mmap_data_buffer_size_ - 1;
- uint64_t write_head = mmap_metadata_page_->data_head & buf_mask;
- uint64_t read_head = mmap_metadata_page_->data_tail & buf_mask;
+ size_t buf_mask = mmap_data_buffer_size_ - 1;
+ size_t write_head = static_cast<size_t>(mmap_metadata_page_->data_head & buf_mask);
+ size_t read_head = static_cast<size_t>(mmap_metadata_page_->data_tail & buf_mask);
if (read_head == write_head) {
// No available data.
@@ -160,12 +166,28 @@ size_t EventFd::GetAvailableMmapData(char** pdata) {
// Make sure we can see the data after the fence.
std::atomic_thread_fence(std::memory_order_acquire);
- *pdata = mmap_data_buffer_ + read_head;
+ // Copy records from mapped buffer to data_process_buffer. Note that records can be wrapped
+ // at the end of the mapped buffer.
+ char* to = data_process_buffer_.data();
if (read_head < write_head) {
- return write_head - read_head;
+ char* from = mmap_data_buffer_ + read_head;
+ size_t n = write_head - read_head;
+ memcpy(to, from, n);
+ to += n;
} else {
- return mmap_data_buffer_size_ - read_head;
+ char* from = mmap_data_buffer_ + read_head;
+ size_t n = mmap_data_buffer_size_ - read_head;
+ memcpy(to, from, n);
+ to += n;
+ from = mmap_data_buffer_;
+ n = write_head;
+ memcpy(to, from, n);
+ to += n;
}
+ size_t read_bytes = to - data_process_buffer_.data();
+ *pdata = data_process_buffer_.data();
+ DiscardMmapData(read_bytes);
+ return read_bytes;
}
void EventFd::DiscardMmapData(size_t discard_size) {
diff --git a/simpleperf/event_fd.h b/simpleperf/event_fd.h
index 12427912..4e123fbe 100644
--- a/simpleperf/event_fd.h
+++ b/simpleperf/event_fd.h
@@ -22,6 +22,7 @@
#include <memory>
#include <string>
+#include <vector>
#include <base/macros.h>
@@ -68,10 +69,6 @@ class EventFd {
// the start address and size of the data.
size_t GetAvailableMmapData(char** pdata);
- // Discard how much data we have read, so the kernel can reuse this part of mapped area to store
- // new data.
- void DiscardMmapData(size_t discard_size);
-
// Prepare pollfd for poll() to wait on available mmap_data.
void PreparePollForMmapData(pollfd* poll_fd);
@@ -89,6 +86,10 @@ class EventFd {
// Give information about this perf_event_file, like (event_name, tid, cpu).
std::string Name() const;
+ // Discard how much data we have read, so the kernel can reuse this part of mapped area to store
+ // new data.
+ void DiscardMmapData(size_t discard_size);
+
int perf_event_fd_;
mutable uint64_t id_;
const std::string event_name_;
@@ -102,6 +103,11 @@ class EventFd {
// by then kernel.
size_t mmap_data_buffer_size_;
+ // As mmap_data_buffer is a ring buffer, it is possible that one record is wrapped at the
+ // end of the buffer. So we need to copy records from mmap_data_buffer to data_process_buffer
+ // before processing them.
+ static std::vector<char> data_process_buffer_;
+
DISALLOW_COPY_AND_ASSIGN(EventFd);
};
diff --git a/simpleperf/event_selection_set.cpp b/simpleperf/event_selection_set.cpp
index b7fcd581..e42b89a4 100644
--- a/simpleperf/event_selection_set.cpp
+++ b/simpleperf/event_selection_set.cpp
@@ -272,7 +272,6 @@ static bool ReadMmapEventDataForFd(std::unique_ptr<EventFd>& event_fd,
return false;
}
*have_data = true;
- event_fd->DiscardMmapData(size);
}
return true;
}
diff --git a/simpleperf/record.cpp b/simpleperf/record.cpp
index 3ebd325c..d7a350bf 100644
--- a/simpleperf/record.cpp
+++ b/simpleperf/record.cpp
@@ -176,6 +176,10 @@ void Record::Dump(size_t indent) const {
sample_id.Dump(indent + 1);
}
+uint64_t Record::Timestamp() const {
+ return sample_id.time_data.time;
+}
+
MmapRecord::MmapRecord(const perf_event_attr& attr, const perf_event_header* pheader)
: Record(pheader) {
const char* p = reinterpret_cast<const char*>(pheader + 1);
@@ -509,6 +513,10 @@ void SampleRecord::DumpData(size_t indent) const {
}
}
+uint64_t SampleRecord::Timestamp() const {
+ return time_data.time;
+}
+
BuildIdRecord::BuildIdRecord(const perf_event_header* pheader) : Record(pheader) {
const char* p = reinterpret_cast<const char*>(pheader + 1);
const char* end = reinterpret_cast<const char*>(pheader) + pheader->size;
@@ -575,14 +583,11 @@ static std::unique_ptr<Record> ReadRecordFromBuffer(const perf_event_attr& attr,
}
}
-static bool IsRecordHappensBefore(const std::unique_ptr<Record>& r1,
- const std::unique_ptr<Record>& r2) {
+static bool IsRecordHappensBefore(const Record* r1, const Record* r2) {
bool is_r1_sample = (r1->header.type == PERF_RECORD_SAMPLE);
bool is_r2_sample = (r2->header.type == PERF_RECORD_SAMPLE);
- uint64_t time1 = (is_r1_sample ? static_cast<const SampleRecord*>(r1.get())->time_data.time
- : r1->sample_id.time_data.time);
- uint64_t time2 = (is_r2_sample ? static_cast<const SampleRecord*>(r2.get())->time_data.time
- : r2->sample_id.time_data.time);
+ uint64_t time1 = r1->Timestamp();
+ uint64_t time2 = r2->Timestamp();
// The record with smaller time happens first.
if (time1 != time2) {
return time1 < time2;
@@ -596,20 +601,29 @@ static bool IsRecordHappensBefore(const std::unique_ptr<Record>& r1,
return false;
}
-std::vector<std::unique_ptr<Record>> ReadRecordsFromBuffer(const perf_event_attr& attr,
- const char* buf, size_t buf_size) {
+static std::vector<std::unique_ptr<Record>> ReadUnsortedRecordsFromBuffer(
+ const perf_event_attr& attr, const char* buf, size_t buf_size) {
std::vector<std::unique_ptr<Record>> result;
const char* p = buf;
const char* end = buf + buf_size;
while (p < end) {
const perf_event_header* header = reinterpret_cast<const perf_event_header*>(p);
- if (p + header->size <= end) {
- result.push_back(ReadRecordFromBuffer(attr, header));
- }
+ CHECK_LE(p + header->size, end);
+ CHECK_NE(0u, header->size);
+ result.push_back(ReadRecordFromBuffer(attr, header));
p += header->size;
}
+ return result;
+}
+
+std::vector<std::unique_ptr<Record>> ReadRecordsFromBuffer(const perf_event_attr& attr,
+ const char* buf, size_t buf_size) {
+ std::vector<std::unique_ptr<Record>> result = ReadUnsortedRecordsFromBuffer(attr, buf, buf_size);
if ((attr.sample_type & PERF_SAMPLE_TIME) && attr.sample_id_all) {
- std::sort(result.begin(), result.end(), IsRecordHappensBefore);
+ std::sort(result.begin(), result.end(),
+ [](const std::unique_ptr<Record>& r1, const std::unique_ptr<Record>& r2) {
+ return IsRecordHappensBefore(r1.get(), r2.get());
+ });
}
return result;
}
@@ -673,3 +687,56 @@ BuildIdRecord CreateBuildIdRecord(bool in_kernel, pid_t pid, const BuildId& buil
ALIGN(record.build_id.Size(), 8) + ALIGN(filename.size() + 1, 64);
return record;
}
+
+bool RecordCache::RecordComparator::operator()(const Record* r1, const Record* r2) {
+ return IsRecordHappensBefore(r2, r1);
+}
+
+RecordCache::RecordCache(const perf_event_attr& attr, size_t min_cache_size,
+ uint64_t min_time_diff_in_ns)
+ : attr_(attr),
+ has_timestamp_(attr.sample_id_all && (attr.sample_type & PERF_SAMPLE_TIME)),
+ min_cache_size_(min_cache_size),
+ min_time_diff_in_ns_(min_time_diff_in_ns),
+ last_time_(0),
+ queue_(RecordComparator()) {
+}
+
+RecordCache::~RecordCache() {
+ PopAll();
+}
+
+void RecordCache::Push(const char* data, size_t size) {
+ std::vector<std::unique_ptr<Record>> records = ReadUnsortedRecordsFromBuffer(attr_, data, size);
+ if (has_timestamp_) {
+ for (const auto& r : records) {
+ last_time_ = std::max(last_time_, r->Timestamp());
+ }
+ }
+ for (auto& r : records) {
+ queue_.push(r.release());
+ }
+}
+
+std::unique_ptr<Record> RecordCache::Pop() {
+ if (queue_.size() < min_cache_size_) {
+ return nullptr;
+ }
+ Record* r = queue_.top();
+ if (has_timestamp_) {
+ if (r->Timestamp() + min_time_diff_in_ns_ > last_time_) {
+ return nullptr;
+ }
+ }
+ queue_.pop();
+ return std::unique_ptr<Record>(r);
+}
+
+std::vector<std::unique_ptr<Record>> RecordCache::PopAll() {
+ std::vector<std::unique_ptr<Record>> result;
+ while (!queue_.empty()) {
+ result.emplace_back(queue_.top());
+ queue_.pop();
+ }
+ return result;
+}
diff --git a/simpleperf/record.h b/simpleperf/record.h
index d7dfe19f..dd242484 100644
--- a/simpleperf/record.h
+++ b/simpleperf/record.h
@@ -19,6 +19,7 @@
#include <sys/types.h>
+#include <queue>
#include <string>
#include <vector>
@@ -142,6 +143,7 @@ struct Record {
void Dump(size_t indent = 0) const;
virtual std::vector<char> BinaryFormat() const = 0;
+ virtual uint64_t Timestamp() const;
protected:
virtual void DumpData(size_t) const = 0;
@@ -257,6 +259,7 @@ struct SampleRecord : public Record {
SampleRecord(const perf_event_attr& attr, const perf_event_header* pheader);
std::vector<char> BinaryFormat() const override;
void AdjustSizeBasedOnData();
+ uint64_t Timestamp() const override;
protected:
void DumpData(size_t indent) const override;
@@ -290,6 +293,41 @@ struct UnknownRecord : public Record {
void DumpData(size_t indent) const override;
};
+// RecordCache is a cache used when receiving records from the kernel.
+// It sorts received records based on type and timestamp, and pops records
+// in sorted order. Records from the kernel need to be sorted because
+// records may come from different cpus at the same time, and it is affected
+// by the order in which we collect records from different cpus.
+// RecordCache pushes records and pops sorted record online. It uses two checks to help
+// ensure that records are popped in order. Each time we pop a record A, it is the earliest record
+// among all records in the cache. In addition, we have checks for min_cache_size and
+// min_time_diff. For min_cache_size check, we check if the cache size >= min_cache_size,
+// which is based on the assumption that if we have received (min_cache_size - 1) records
+// after record A, we are not likely to receive a record earlier than A. For min_time_diff
+// check, we check if record A is generated min_time_diff ns earlier than the latest
+// record, which is based on the assumption that if we have received a record for time t,
+// we are not likely to receive a record for time (t - min_time_diff) or earlier.
+class RecordCache {
+ public:
+ RecordCache(const perf_event_attr& attr, size_t min_cache_size, uint64_t min_time_diff_in_ns);
+ ~RecordCache();
+ void Push(const char* data, size_t size);
+ std::unique_ptr<Record> Pop();
+ std::vector<std::unique_ptr<Record>> PopAll();
+
+ private:
+ struct RecordComparator {
+ bool operator()(const Record* r1, const Record* r2);
+ };
+
+ const perf_event_attr attr_;
+ bool has_timestamp_;
+ size_t min_cache_size_;
+ uint64_t min_time_diff_in_ns_;
+ uint64_t last_time_;
+ std::priority_queue<Record*, std::vector<Record*>, RecordComparator> queue_;
+};
+
std::vector<std::unique_ptr<Record>> ReadRecordsFromBuffer(const perf_event_attr& attr,
const char* buf, size_t buf_size);
MmapRecord CreateMmapRecord(const perf_event_attr& attr, bool in_kernel, uint32_t pid, uint32_t tid,
diff --git a/simpleperf/record_test.cpp b/simpleperf/record_test.cpp
index 27edc529..6e4ed745 100644
--- a/simpleperf/record_test.cpp
+++ b/simpleperf/record_test.cpp
@@ -54,3 +54,49 @@ TEST_F(RecordTest, CommRecordMatchBinary) {
CommRecord record = CreateCommRecord(event_attr, 1, 2, "CommRecord");
CheckRecordMatchBinary(record);
}
+
+TEST_F(RecordTest, RecordCache_smoke) {
+ event_attr.sample_id_all = 1;
+ event_attr.sample_type |= PERF_SAMPLE_TIME;
+ RecordCache cache(event_attr, 2, 2);
+ MmapRecord r1 = CreateMmapRecord(event_attr, true, 1, 1, 0x100, 0x200, 0x300, "mmap_record1");
+ MmapRecord r2 = r1;
+ MmapRecord r3 = r1;
+ MmapRecord r4 = r1;
+ r1.sample_id.time_data.time = 3;
+ r2.sample_id.time_data.time = 1;
+ r3.sample_id.time_data.time = 4;
+ r4.sample_id.time_data.time = 6;
+ std::vector<char> buf1 = r1.BinaryFormat();
+ std::vector<char> buf2 = r2.BinaryFormat();
+ std::vector<char> buf3 = r3.BinaryFormat();
+ std::vector<char> buf4 = r4.BinaryFormat();
+ // Push r1.
+ cache.Push(buf1.data(), buf1.size());
+ ASSERT_EQ(nullptr, cache.Pop());
+ // Push r2.
+ cache.Push(buf2.data(), buf2.size());
+ // Pop r2.
+ std::unique_ptr<Record> popped_r = cache.Pop();
+ ASSERT_TRUE(popped_r != nullptr);
+ CheckRecordEqual(r2, *popped_r);
+ ASSERT_EQ(nullptr, cache.Pop());
+ // Push r3.
+ cache.Push(buf3.data(), buf3.size());
+ ASSERT_EQ(nullptr, cache.Pop());
+ // Push r4.
+ cache.Push(buf4.data(), buf4.size());
+ // Pop r1.
+ popped_r = cache.Pop();
+ ASSERT_TRUE(popped_r != nullptr);
+ CheckRecordEqual(r1, *popped_r);
+ // Pop r3.
+ popped_r = cache.Pop();
+ ASSERT_TRUE(popped_r != nullptr);
+ CheckRecordEqual(r3, *popped_r);
+ ASSERT_EQ(nullptr, cache.Pop());
+ // Pop r4.
+ std::vector<std::unique_ptr<Record>> last_records = cache.PopAll();
+ ASSERT_EQ(1u, last_records.size());
+ CheckRecordEqual(r4, *last_records[0]);
+}