summaryrefslogtreecommitdiff
path: root/simpleperf/cmd_record.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'simpleperf/cmd_record.cpp')
-rw-r--r--simpleperf/cmd_record.cpp389
1 files changed, 269 insertions, 120 deletions
diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp
index e69355d4..d60e2d03 100644
--- a/simpleperf/cmd_record.cpp
+++ b/simpleperf/cmd_record.cpp
@@ -49,6 +49,7 @@
#include <unwindstack/Error.h>
#include "CallChainJoiner.h"
+#include "ETMBranchListFile.h"
#include "ETMRecorder.h"
#include "IOEventLoop.h"
#include "JITDebugReader.h"
@@ -98,23 +99,17 @@ static std::unordered_map<std::string, int> clockid_map = {
// The max size of records dumped by kernel is 65535, and dump stack size
// should be a multiply of 8, so MAX_DUMP_STACK_SIZE is 65528.
-constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528;
+static constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528;
// The max allowed pages in mapped buffer is decided by rlimit(RLIMIT_MEMLOCK).
// Here 1024 is a desired value for pages in mapped buffer. If mapped
// successfully, the buffer size = 1024 * 4K (page size) = 4M.
-constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024;
+static constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024;
// Cache size used by CallChainJoiner to cache call chains in memory.
-constexpr size_t DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE = 8 * 1024 * 1024;
+static constexpr size_t DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE = 8 * kMegabyte;
-// Currently, the record buffer size in user-space is set to match the kernel buffer size on a
-// 8 core system. For system-wide recording, it is 8K pages * 4K page_size * 8 cores = 256MB.
-// For non system-wide recording, it is 1K pages * 4K page_size * 8 cores = 64MB.
-static constexpr size_t kRecordBufferSize = 64 * 1024 * 1024;
-static constexpr size_t kSystemWideRecordBufferSize = 256 * 1024 * 1024;
-
-static constexpr size_t kDefaultAuxBufferSize = 4 * 1024 * 1024;
+static constexpr size_t kDefaultAuxBufferSize = 4 * kMegabyte;
// On Pixel 3, it takes about 1ms to enable ETM, and 16-40ms to disable ETM and copy 4M ETM data.
// So make default period to 100ms.
@@ -128,6 +123,30 @@ struct TimeStat {
uint64_t post_process_time = 0;
};
+std::optional<size_t> GetDefaultRecordBufferSize(bool system_wide_recording) {
+ // Currently, the record buffer size in user-space is set to match the kernel buffer size on a
+ // 8 core system. For system-wide recording, it is 8K pages * 4K page_size * 8 cores = 256MB.
+ // For non system-wide recording, it is 1K pages * 4K page_size * 8 cores = 64MB.
+ // But on devices with memory >= 4GB, we increase buffer size to 256MB. This reduces the chance
+ // of cutting samples, which can cause broken callchains.
+ static constexpr size_t kLowMemoryRecordBufferSize = 64 * kMegabyte;
+ static constexpr size_t kHighMemoryRecordBufferSize = 256 * kMegabyte;
+ static constexpr size_t kSystemWideRecordBufferSize = 256 * kMegabyte;
+ // Ideally we can use >= 4GB here. But the memory size shown in /proc/meminfo is like to be 3.x GB
+ // on a device with 4GB memory. So we have to use <= 3GB.
+ static constexpr uint64_t kLowMemoryLimit = 3 * kGigabyte;
+
+ if (system_wide_recording) {
+ return kSystemWideRecordBufferSize;
+ }
+ auto device_memory = GetMemorySize();
+ if (!device_memory.has_value()) {
+ return std::nullopt;
+ }
+ return device_memory.value() <= kLowMemoryLimit ? kLowMemoryRecordBufferSize
+ : kHighMemoryRecordBufferSize;
+}
+
class RecordCommand : public Command {
public:
RecordCommand()
@@ -146,8 +165,9 @@ class RecordCommand : public Command {
" On non-rooted devices, the app must be debuggable,\n"
" because we use run-as to switch to the app's context.\n"
#endif
-"-p pid1,pid2,... Record events on existing processes. Mutually exclusive\n"
-" with -a.\n"
+"-p pid_or_process_name_regex1,pid_or_process_name_regex2,...\n"
+" Record events on existing processes. Processes are searched either by pid\n"
+" or process name regex. Mutually exclusive with -a.\n"
"-t tid1,tid2,... Record events on existing threads. Mutually exclusive with -a.\n"
"\n"
"Select monitored event types:\n"
@@ -215,32 +235,14 @@ class RecordCommand : public Command {
" This option requires at least one branch type among any, any_call,\n"
" any_ret, ind_call.\n"
"-b Enable taken branch stack sampling. Same as '-j any'.\n"
-"-m mmap_pages Set the size of the buffer used to receiving sample data from\n"
-" the kernel. It should be a power of 2. If not set, the max\n"
-" possible value <= 1024 will be used.\n"
-"--aux-buffer-size <buffer_size> Set aux buffer size, only used in cs-etm event type.\n"
-" Need to be power of 2 and page size aligned.\n"
-" Used memory size is (buffer_size * (cpu_count + 1).\n"
-" Default is 4M.\n"
+"-m mmap_pages Set pages used in the kernel to cache sample data for each cpu.\n"
+" It should be a power of 2. If not set, the max possible value <= 1024\n"
+" will be used.\n"
+"--user-buffer-size <buffer_size> Set buffer size in userspace to cache sample data.\n"
+" By default, it is %s.\n"
"--no-inherit Don't record created child threads/processes.\n"
"--cpu-percent <percent> Set the max percent of cpu time used for recording.\n"
" percent is in range [1-100], default is 25.\n"
-"--addr-filter filter_str1,filter_str2,...\n"
-" Provide address filters for cs-etm instruction tracing.\n"
-" filter_str accepts below formats:\n"
-" 'filter <addr-range>' -- trace instructions in a range\n"
-" 'start <addr>' -- start tracing when ip is <addr>\n"
-" 'stop <addr>' -- stop tracing when ip is <addr>\n"
-" <addr-range> accepts below formats:\n"
-" <file_path> -- code sections in a binary file\n"
-" <vaddr_start>-<vaddr_end>@<file_path> -- part of a binary file\n"
-" <kernel_addr_start>-<kernel_addr_end> -- part of kernel space\n"
-" <addr> accepts below formats:\n"
-" <vaddr>@<file_path> -- virtual addr in a binary file\n"
-" <kernel_addr> -- a kernel address\n"
-" Examples:\n"
-" 'filter 0x456-0x480@/system/lib/libc.so'\n"
-" 'start 0x456@/system/lib/libc.so,stop 0x480@/system/lib/libc.so'\n"
"\n"
"--tp-filter filter_string Set filter_string for the previous tracepoint event.\n"
" Format is in Documentation/trace/events.rst in the kernel.\n"
@@ -287,6 +289,31 @@ RECORD_FILTER_OPTION_HELP_MSG_FOR_RECORDING
" debug information, which are used for unwinding and dumping symbols.\n"
"--add-meta-info key=value Add extra meta info, which will be stored in the recording file.\n"
"\n"
+"ETM recording options:\n"
+"--addr-filter filter_str1,filter_str2,...\n"
+" Provide address filters for cs-etm instruction tracing.\n"
+" filter_str accepts below formats:\n"
+" 'filter <addr-range>' -- trace instructions in a range\n"
+" 'start <addr>' -- start tracing when ip is <addr>\n"
+" 'stop <addr>' -- stop tracing when ip is <addr>\n"
+" <addr-range> accepts below formats:\n"
+" <file_path> -- code sections in a binary file\n"
+" <vaddr_start>-<vaddr_end>@<file_path> -- part of a binary file\n"
+" <kernel_addr_start>-<kernel_addr_end> -- part of kernel space\n"
+" <addr> accepts below formats:\n"
+" <vaddr>@<file_path> -- virtual addr in a binary file\n"
+" <kernel_addr> -- a kernel address\n"
+" Examples:\n"
+" 'filter 0x456-0x480@/system/lib/libc.so'\n"
+" 'start 0x456@/system/lib/libc.so,stop 0x480@/system/lib/libc.so'\n"
+"--aux-buffer-size <buffer_size> Set aux buffer size, only used in cs-etm event type.\n"
+" Need to be power of 2 and page size aligned.\n"
+" Used memory size is (buffer_size * (cpu_count + 1).\n"
+" Default is 4M.\n"
+"--decode-etm Convert ETM data into branch lists while recording.\n"
+"--binary binary_name Used with --decode-etm to only generate data for binaries\n"
+" matching binary_name regex.\n"
+"\n"
"Other options:\n"
"--exit-with-parent Stop recording when the thread starting simpleperf dies.\n"
"--use-cmd-exit-code Exit with the same exit code as the monitored cmdline.\n"
@@ -319,7 +346,6 @@ RECORD_FILTER_OPTION_HELP_MSG_FOR_RECORDING
mmap_page_range_(std::make_pair(1, DESIRED_PAGES_IN_MAPPED_BUFFER)),
record_filename_("perf.data"),
sample_record_count_(0),
- lost_record_count_(0),
in_app_context_(false),
trace_offcpu_(false),
exclude_kernel_callchain_(false),
@@ -334,6 +360,7 @@ RECORD_FILTER_OPTION_HELP_MSG_FOR_RECORDING
signal(SIGPIPE, SIG_IGN);
}
+ std::string LongHelpString() const override;
void Run(const std::vector<std::string>& args, int* exit_code) override;
bool Run(const std::vector<std::string>& args) override {
int exit_code;
@@ -352,8 +379,8 @@ RECORD_FILTER_OPTION_HELP_MSG_FOR_RECORDING
bool TraceOffCpu();
bool SetEventSelectionFlags();
bool CreateAndInitRecordFile();
- std::unique_ptr<RecordFileWriter> CreateRecordFile(
- const std::string& filename, const std::vector<EventAttrWithId>& override_attrs);
+ std::unique_ptr<RecordFileWriter> CreateRecordFile(const std::string& filename,
+ const EventAttrIds& attrs);
bool DumpKernelSymbol();
bool DumpTracingData();
bool DumpMaps();
@@ -384,6 +411,7 @@ RECORD_FILTER_OPTION_HELP_MSG_FOR_RECORDING
bool DumpMetaInfoFeature(bool kernel_symbols_available);
bool DumpDebugUnwindFeature(const std::unordered_set<Dso*>& dso_set);
void CollectHitFileInfo(const SampleRecord& r, std::unordered_set<Dso*>* dso_set);
+ bool DumpETMBranchListFeature();
std::unique_ptr<SampleSpeed> sample_speed_;
bool system_wide_collection_;
@@ -405,6 +433,7 @@ RECORD_FILTER_OPTION_HELP_MSG_FOR_RECORDING
EventSelectionSet event_selection_set_;
std::pair<size_t, size_t> mmap_page_range_;
+ std::optional<size_t> user_buffer_size_;
size_t aux_buffer_size_ = kDefaultAuxBufferSize;
ThreadTree thread_tree_;
@@ -414,7 +443,6 @@ RECORD_FILTER_OPTION_HELP_MSG_FOR_RECORDING
android::base::unique_fd stop_signal_fd_;
uint64_t sample_record_count_;
- uint64_t lost_record_count_;
android::base::unique_fd start_profiling_fd_;
bool stdio_controls_profiling_ = false;
@@ -447,8 +475,32 @@ RECORD_FILTER_OPTION_HELP_MSG_FOR_RECORDING
std::unordered_map<std::string, std::string> extra_meta_info_;
bool use_cmd_exit_code_ = false;
std::vector<std::string> add_counters_;
+
+ std::unique_ptr<ETMBranchListGenerator> etm_branch_list_generator_;
+ std::unique_ptr<RegEx> binary_name_regex_;
};
+std::string RecordCommand::LongHelpString() const {
+ uint64_t process_buffer_size = 0;
+ uint64_t system_wide_buffer_size = 0;
+ if (auto size = GetDefaultRecordBufferSize(false); size) {
+ process_buffer_size = size.value() / kMegabyte;
+ }
+ if (auto size = GetDefaultRecordBufferSize(true); size) {
+ system_wide_buffer_size = size.value() / kMegabyte;
+ }
+ std::string buffer_size_str;
+ if (process_buffer_size == system_wide_buffer_size) {
+ buffer_size_str = android::base::StringPrintf("%" PRIu64 "M", process_buffer_size);
+ } else {
+ buffer_size_str =
+ android::base::StringPrintf("%" PRIu64 "M for process recording and %" PRIu64
+ "M\n for system wide recording",
+ process_buffer_size, system_wide_buffer_size);
+ }
+ return android::base::StringPrintf(long_help_string_.c_str(), buffer_size_str.c_str());
+}
+
void RecordCommand::Run(const std::vector<std::string>& args, int* exit_code) {
*exit_code = 1;
time_stat_.prepare_recording_time = GetSystemClock();
@@ -607,8 +659,16 @@ bool RecordCommand::PrepareRecording(Workload* workload) {
if (!event_selection_set_.OpenEventFiles(cpus_)) {
return false;
}
- size_t record_buffer_size =
- system_wide_collection_ ? kSystemWideRecordBufferSize : kRecordBufferSize;
+ size_t record_buffer_size = 0;
+ if (user_buffer_size_.has_value()) {
+ record_buffer_size = user_buffer_size_.value();
+ } else {
+ auto default_size = GetDefaultRecordBufferSize(system_wide_collection_);
+ if (!default_size.has_value()) {
+ return false;
+ }
+ record_buffer_size = default_size.value();
+ }
if (!event_selection_set_.MmapEventFiles(mmap_page_range_.first, mmap_page_range_.second,
aux_buffer_size_, record_buffer_size,
allow_cutting_samples_, exclude_perf_)) {
@@ -697,6 +757,15 @@ bool RecordCommand::PrepareRecording(Workload* workload) {
if (!loop->AddPeriodicEvent(SecondToTimeval(kDefaultEtmDataFlushPeriodInSec), etm_flush)) {
return false;
}
+
+ if (etm_branch_list_generator_) {
+ if (exclude_perf_) {
+ etm_branch_list_generator_->SetExcludePid(getpid());
+ }
+ if (binary_name_regex_) {
+ etm_branch_list_generator_->SetBinaryFilter(binary_name_regex_.get());
+ }
+ }
}
return true;
}
@@ -798,26 +867,59 @@ bool RecordCommand::PostProcessRecording(const std::vector<std::string>& args) {
if (event_selection_set_.HasAuxTrace()) {
LOG(INFO) << "Aux data traced: " << record_stat.aux_data_size;
if (record_stat.lost_aux_data_size != 0) {
- LOG(INFO) << "Aux data lost in user space: " << record_stat.lost_aux_data_size;
+ LOG(INFO) << "Aux data lost in user space: " << record_stat.lost_aux_data_size
+ << ", consider increasing userspace buffer size(--user-buffer-size).";
}
} else {
- std::string cut_samples;
- if (record_stat.cut_stack_samples > 0) {
- cut_samples = android::base::StringPrintf(" (cut %zu)", record_stat.cut_stack_samples);
- }
- lost_record_count_ += record_stat.lost_samples + record_stat.lost_non_samples;
- LOG(INFO) << "Samples recorded: " << sample_record_count_ << cut_samples
- << ". Samples lost: " << lost_record_count_ << ".";
- LOG(DEBUG) << "In user space, dropped " << record_stat.lost_samples << " samples, "
- << record_stat.lost_non_samples << " non samples, cut stack of "
- << record_stat.cut_stack_samples << " samples.";
- if (sample_record_count_ + lost_record_count_ != 0) {
- double lost_percent =
- static_cast<double>(lost_record_count_) / (lost_record_count_ + sample_record_count_);
- constexpr double LOST_PERCENT_WARNING_BAR = 0.1;
- if (lost_percent >= LOST_PERCENT_WARNING_BAR) {
- LOG(WARNING) << "Lost " << (lost_percent * 100) << "% of samples, "
- << "consider increasing mmap_pages(-m), "
+ // Here we report all lost records as samples. This isn't accurate. Because records like
+ // MmapRecords are not samples. But It's easier for users to understand.
+ size_t userspace_lost_samples =
+ record_stat.userspace_lost_samples + record_stat.userspace_lost_non_samples;
+ size_t lost_samples = record_stat.kernelspace_lost_records + userspace_lost_samples;
+
+ std::stringstream os;
+ os << "Samples recorded: " << sample_record_count_;
+ if (record_stat.userspace_cut_stack_samples > 0) {
+ os << " (cut " << record_stat.userspace_cut_stack_samples << ")";
+ }
+ os << ". Samples lost: " << lost_samples;
+ if (lost_samples != 0) {
+ os << " (kernelspace: " << record_stat.kernelspace_lost_records
+ << ", userspace: " << userspace_lost_samples << ")";
+ }
+ os << ".";
+ LOG(INFO) << os.str();
+
+ LOG(DEBUG) << "Record stat: kernelspace_lost_records=" << record_stat.kernelspace_lost_records
+ << ", userspace_lost_samples=" << record_stat.userspace_lost_samples
+ << ", userspace_lost_non_samples=" << record_stat.userspace_lost_non_samples
+ << ", userspace_cut_stack_samples=" << record_stat.userspace_cut_stack_samples;
+
+ if (sample_record_count_ + record_stat.kernelspace_lost_records != 0) {
+ double kernelspace_lost_percent =
+ static_cast<double>(record_stat.kernelspace_lost_records) /
+ (record_stat.kernelspace_lost_records + sample_record_count_);
+ constexpr double KERNELSPACE_LOST_PERCENT_WARNING_BAR = 0.1;
+ if (kernelspace_lost_percent >= KERNELSPACE_LOST_PERCENT_WARNING_BAR) {
+ LOG(WARNING) << "Lost " << (kernelspace_lost_percent * 100)
+ << "% of samples in kernel space, "
+ << "consider increasing kernel buffer size(-m), "
+ << "or decreasing sample frequency(-f), "
+ << "or increasing sample period(-c).";
+ }
+ }
+ size_t userspace_lost_cut_samples =
+ userspace_lost_samples + record_stat.userspace_cut_stack_samples;
+ size_t userspace_complete_samples =
+ sample_record_count_ - record_stat.userspace_cut_stack_samples;
+ if (userspace_complete_samples + userspace_lost_cut_samples != 0) {
+ double userspace_lost_percent = static_cast<double>(userspace_lost_cut_samples) /
+ (userspace_complete_samples + userspace_lost_cut_samples);
+ constexpr double USERSPACE_LOST_PERCENT_WARNING_BAR = 0.1;
+ if (userspace_lost_percent >= USERSPACE_LOST_PERCENT_WARNING_BAR) {
+ LOG(WARNING) << "Lost/Cut " << (userspace_lost_percent * 100)
+ << "% of samples in user space, "
+ << "consider increasing userspace buffer size(--user-buffer-size), "
<< "or decreasing sample frequency(-f), "
<< "or increasing sample period(-c).";
}
@@ -890,6 +992,13 @@ bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
branch_sampling_ = branch_sampling_type_map["any"];
}
+ if (auto value = options.PullValue("--binary"); value) {
+ binary_name_regex_ = RegEx::Create(*value->str_value);
+ if (binary_name_regex_ == nullptr) {
+ return false;
+ }
+ }
+
if (!options.PullUintValue("--callchain-joiner-min-matching-nodes",
&callchain_joiner_min_matching_nodes_, 1)) {
return false;
@@ -921,6 +1030,10 @@ bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
return false;
}
+ if (options.PullBoolValue("--decode-etm")) {
+ etm_branch_list_generator_ = ETMBranchListGenerator::Create(system_wide_collection_);
+ }
+
if (!options.PullDoubleValue("--duration", &duration_in_sec_, 1e-9)) {
return false;
}
@@ -992,8 +1105,8 @@ bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
out_fd_.reset(static_cast<int>(value->uint_value));
}
- for (const OptionValue& value : options.PullValues("-p")) {
- if (auto pids = GetTidsFromString(*value.str_value, true); pids) {
+ if (auto strs = options.PullStringValues("-p"); !strs.empty()) {
+ if (auto pids = GetPidsFromStrings(strs, true, true); pids) {
event_selection_set_.AddMonitoredProcesses(pids.value());
} else {
return false;
@@ -1011,6 +1124,15 @@ bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
post_unwind_ = false;
}
+ if (auto value = options.PullValue("--user-buffer-size"); value) {
+ uint64_t v = value->uint_value;
+ if (v > std::numeric_limits<size_t>::max() || v == 0) {
+ LOG(ERROR) << "invalid user buffer size: " << v;
+ return false;
+ }
+ user_buffer_size_ = static_cast<size_t>(v);
+ }
+
if (!options.PullUintValue("--size-limit", &size_limit_in_bytes_, 1)) {
return false;
}
@@ -1281,32 +1403,35 @@ bool RecordCommand::SetEventSelectionFlags() {
}
bool RecordCommand::CreateAndInitRecordFile() {
- record_file_writer_ =
- CreateRecordFile(record_filename_, event_selection_set_.GetEventAttrWithId());
+ EventAttrIds attrs = event_selection_set_.GetEventAttrWithId();
+ bool remove_regs_and_stacks = unwind_dwarf_callchain_ && !post_unwind_;
+ if (remove_regs_and_stacks) {
+ for (auto& attr : attrs) {
+ ReplaceRegAndStackWithCallChain(attr.attr);
+ }
+ }
+ record_file_writer_ = CreateRecordFile(record_filename_, attrs);
if (record_file_writer_ == nullptr) {
return false;
}
// Use first perf_event_attr and first event id to dump mmap and comm records.
- dumping_attr_id_ = event_selection_set_.GetEventAttrWithId()[0];
+ CHECK(!attrs.empty());
+ dumping_attr_id_ = attrs[0];
CHECK(!dumping_attr_id_.ids.empty());
- map_record_reader_.emplace(*dumping_attr_id_.attr, dumping_attr_id_.ids[0],
+ map_record_reader_.emplace(dumping_attr_id_.attr, dumping_attr_id_.ids[0],
event_selection_set_.RecordNotExecutableMaps());
map_record_reader_->SetCallback([this](Record* r) { return ProcessRecord(r); });
return DumpKernelSymbol() && DumpTracingData() && DumpMaps() && DumpAuxTraceInfo();
}
-std::unique_ptr<RecordFileWriter> RecordCommand::CreateRecordFile(
- const std::string& filename, const std::vector<EventAttrWithId>& attrs) {
+std::unique_ptr<RecordFileWriter> RecordCommand::CreateRecordFile(const std::string& filename,
+ const EventAttrIds& attrs) {
std::unique_ptr<RecordFileWriter> writer = RecordFileWriter::CreateInstance(filename);
- if (writer == nullptr) {
- return nullptr;
- }
-
- if (!writer->WriteAttrSection(attrs)) {
- return nullptr;
+ if (writer != nullptr && writer->WriteAttrSection(attrs)) {
+ return writer;
}
- return writer;
+ return nullptr;
}
bool RecordCommand::DumpKernelSymbol() {
@@ -1348,9 +1473,11 @@ bool RecordCommand::DumpMaps() {
// For system wide recording:
// If not aux tracing, only dump kernel maps. Maps of a process is dumped when needed (the
// first time a sample hits that process).
- // If aux tracing, we don't know which maps will be needed, so dump all process maps. To
- // reduce pre recording time, we dump process maps in map record thread while recording.
- if (event_selection_set_.HasAuxTrace()) {
+ // If aux tracing with decoding etm data, the maps are dumped by etm_branch_list_generator.
+ // If aux tracing without decoding etm data, we don't know which maps will be needed, so dump
+ // all process maps. To reduce pre recording time, we dump process maps in map record thread
+ // while recording.
+ if (event_selection_set_.HasAuxTrace() && !etm_branch_list_generator_) {
map_record_thread_.emplace(*map_record_reader_);
return true;
}
@@ -1409,6 +1536,15 @@ bool RecordCommand::ProcessRecord(Record* record) {
return true;
}
}
+ if (etm_branch_list_generator_) {
+ bool consumed = false;
+ if (!etm_branch_list_generator_->ProcessRecord(*record, consumed)) {
+ return false;
+ }
+ if (consumed) {
+ return true;
+ }
+ }
if (unwind_dwarf_callchain_) {
if (post_unwind_) {
return SaveRecordForPostUnwinding(record);
@@ -1487,8 +1623,6 @@ bool RecordCommand::SaveRecordAfterUnwinding(Record* record) {
return true;
}
sample_record_count_++;
- } else if (record->type() == PERF_RECORD_LOST) {
- lost_record_count_ += static_cast<LostRecord*>(record)->lost;
} else {
thread_tree_.Update(*record);
}
@@ -1506,8 +1640,6 @@ bool RecordCommand::SaveRecordWithoutUnwinding(Record* record) {
return true;
}
sample_record_count_++;
- } else if (record->type() == PERF_RECORD_LOST) {
- lost_record_count_ += static_cast<LostRecord*>(record)->lost;
}
return record_file_writer_->WriteRecord(*record);
}
@@ -1518,7 +1650,7 @@ bool RecordCommand::ProcessJITDebugInfo(const std::vector<JITDebugInfo>& debug_i
if (info.type == JITDebugInfo::JIT_DEBUG_JIT_CODE) {
uint64_t timestamp =
jit_debug_reader_->SyncWithRecords() ? info.timestamp : last_record_timestamp_;
- Mmap2Record record(*dumping_attr_id_.attr, false, info.pid, info.pid, info.jit_code_addr,
+ Mmap2Record record(dumping_attr_id_.attr, false, info.pid, info.pid, info.jit_code_addr,
info.jit_code_len, info.file_offset, map_flags::PROT_JIT_SYMFILE_MAP,
info.file_path, dumping_attr_id_.ids[0], timestamp);
if (!ProcessRecord(&record)) {
@@ -1529,7 +1661,7 @@ bool RecordCommand::ProcessJITDebugInfo(const std::vector<JITDebugInfo>& debug_i
ThreadMmap& map = *info.extracted_dex_file_map;
uint64_t timestamp =
jit_debug_reader_->SyncWithRecords() ? info.timestamp : last_record_timestamp_;
- Mmap2Record record(*dumping_attr_id_.attr, false, info.pid, info.pid, map.start_addr,
+ Mmap2Record record(dumping_attr_id_.attr, false, info.pid, info.pid, map.start_addr,
map.len, map.pgoff, map.prot, map.name, dumping_attr_id_.ids[0],
timestamp);
if (!ProcessRecord(&record)) {
@@ -1637,9 +1769,11 @@ void RecordCommand::UpdateRecord(Record* record) {
}
bool RecordCommand::UnwindRecord(SampleRecord& r) {
- if ((r.sample_type & PERF_SAMPLE_CALLCHAIN) && (r.sample_type & PERF_SAMPLE_REGS_USER) &&
- (r.regs_user_data.reg_mask != 0) && (r.sample_type & PERF_SAMPLE_STACK_USER) &&
- (r.GetValidStackSize() > 0)) {
+ if (!(r.sample_type & PERF_SAMPLE_CALLCHAIN) && (r.sample_type & PERF_SAMPLE_REGS_USER) &&
+ (r.regs_user_data.reg_mask != 0) && (r.sample_type & PERF_SAMPLE_STACK_USER)) {
+ return true;
+ }
+ if (r.GetValidStackSize() > 0) {
ThreadEntry* thread = thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs);
std::vector<uint64_t> ips;
@@ -1668,6 +1802,9 @@ bool RecordCommand::UnwindRecord(SampleRecord& r) {
CallChainJoiner::ORIGINAL_OFFLINE, ips, sps)) {
return false;
}
+ } else {
+ // For kernel samples, we still need to remove user stack and register fields.
+ r.ReplaceRegAndStackWithCallChain({});
}
return true;
}
@@ -1692,11 +1829,15 @@ std::unique_ptr<RecordFileReader> RecordCommand::MoveRecordFile(const std::strin
return nullptr;
}
record_file_writer_.reset();
- {
- std::error_code ec;
- std::filesystem::rename(record_filename_, old_filename, ec);
- if (ec) {
- LOG(ERROR) << "Failed to rename: " << ec.message();
+ std::error_code ec;
+ std::filesystem::rename(record_filename_, old_filename, ec);
+ if (ec) {
+ LOG(DEBUG) << "Failed to rename: " << ec.message();
+ // rename() fails on Android N x86 emulator, which uses kernel 3.10. Because rename() in bionic
+ // uses renameat2 syscall, which isn't support on kernel < 3.15. So add a fallback to mv
+ // command. The mv command can also work with other situations when rename() doesn't work.
+ // So we'd like to keep it as a fallback to rename().
+ if (!Workload::RunCmd({"mv", record_filename_, old_filename})) {
return nullptr;
}
}
@@ -1755,8 +1896,16 @@ bool RecordCommand::PostUnwindRecords() {
if (!reader) {
return false;
}
+ // Write new event attrs without regs and stacks fields.
+ EventAttrIds attrs = reader->AttrSection();
+ for (auto& attr : attrs) {
+ ReplaceRegAndStackWithCallChain(attr.attr);
+ }
+ if (!record_file_writer_->WriteAttrSection(attrs)) {
+ return false;
+ }
+
sample_record_count_ = 0;
- lost_record_count_ = 0;
auto callback = [this](std::unique_ptr<Record> record) {
return SaveRecordAfterUnwinding(record.get());
};
@@ -1867,6 +2016,9 @@ bool RecordCommand::DumpAdditionalFeatures(const std::vector<std::string>& args)
if (keep_failed_unwinding_debug_info_) {
feature_count += 2;
}
+ if (etm_branch_list_generator_) {
+ feature_count++;
+ }
if (!record_file_writer_->BeginWriteFeatures(feature_count)) {
return false;
}
@@ -1909,6 +2061,9 @@ bool RecordCommand::DumpAdditionalFeatures(const std::vector<std::string>& args)
if (keep_failed_unwinding_debug_info_ && !DumpDebugUnwindFeature(debug_unwinding_files)) {
return false;
}
+ if (etm_branch_list_generator_ && !DumpETMBranchListFeature()) {
+ return false;
+ }
if (!record_file_writer_->EndWriteFeatures()) {
return false;
@@ -1926,34 +2081,9 @@ bool RecordCommand::DumpBuildIdFeature() {
if (!dso->HasDumpId() && !event_selection_set_.HasAuxTrace()) {
continue;
}
- if (dso->type() == DSO_KERNEL) {
- if (!GetKernelBuildId(&build_id)) {
- continue;
- }
- build_id_records.push_back(BuildIdRecord(true, UINT_MAX, build_id, dso->Path()));
- } else if (dso->type() == DSO_KERNEL_MODULE) {
- bool has_build_id = false;
- if (android::base::EndsWith(dso->Path(), ".ko")) {
- has_build_id = GetBuildIdFromDsoPath(dso->Path(), &build_id);
- } else if (const std::string& path = dso->Path();
- path.size() > 2 && path[0] == '[' && path.back() == ']') {
- // For kernel modules that we can't find the corresponding file, read build id from /sysfs.
- has_build_id = GetModuleBuildId(path.substr(1, path.size() - 2), &build_id);
- }
- if (has_build_id) {
- build_id_records.push_back(BuildIdRecord(true, UINT_MAX, build_id, dso->Path()));
- } else {
- LOG(DEBUG) << "Can't read build_id for module " << dso->Path();
- }
- } else if (dso->type() == DSO_ELF_FILE) {
- if (dso->Path() == DEFAULT_EXECNAME_FOR_THREAD_MMAP || dso->IsForJavaMethod()) {
- continue;
- }
- if (!GetBuildIdFromDsoPath(dso->Path(), &build_id)) {
- LOG(DEBUG) << "Can't read build_id from file " << dso->Path();
- continue;
- }
- build_id_records.push_back(BuildIdRecord(false, UINT_MAX, build_id, dso->Path()));
+ if (GetBuildId(*dso, build_id)) {
+ bool in_kernel = dso->type() == DSO_KERNEL || dso->type() == DSO_KERNEL_MODULE;
+ build_id_records.emplace_back(in_kernel, UINT_MAX, build_id, dso->Path());
}
}
if (!record_file_writer_->WriteBuildIdFeature(build_id_records)) {
@@ -2013,6 +2143,15 @@ bool RecordCommand::DumpMetaInfoFeature(bool kernel_symbols_available) {
if (dwarf_callchain_sampling_ && !unwind_dwarf_callchain_) {
OfflineUnwinder::CollectMetaInfo(&info_map);
}
+ auto record_stat = event_selection_set_.GetRecordStat();
+ info_map["record_stat"] = android::base::StringPrintf(
+ "sample_record_count=%" PRIu64
+ ",kernelspace_lost_records=%zu,userspace_lost_samples=%zu,"
+ "userspace_lost_non_samples=%zu,userspace_cut_stack_samples=%zu",
+ sample_record_count_, record_stat.kernelspace_lost_records,
+ record_stat.userspace_lost_samples, record_stat.userspace_lost_non_samples,
+ record_stat.userspace_cut_stack_samples);
+
return record_file_writer_->WriteMetaInfoFeature(info_map);
}
@@ -2064,6 +2203,16 @@ void RecordCommand::CollectHitFileInfo(const SampleRecord& r, std::unordered_set
}
}
+bool RecordCommand::DumpETMBranchListFeature() {
+ BranchListBinaryMap binary_map = etm_branch_list_generator_->GetBranchListBinaryMap();
+ std::string s;
+ if (!BranchListBinaryMapToString(binary_map, s)) {
+ return false;
+ }
+ return record_file_writer_->WriteFeature(PerfFileFormat::FEAT_ETM_BRANCH_LIST, s.data(),
+ s.size());
+}
+
} // namespace
static bool ConsumeStr(const char*& p, const char* s) {