diff options
Diffstat (limited to 'simpleperf/cmd_record.cpp')
-rw-r--r-- | simpleperf/cmd_record.cpp | 161 |
1 files changed, 33 insertions, 128 deletions
diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp index 2208503e..ff01d808 100644 --- a/simpleperf/cmd_record.cpp +++ b/simpleperf/cmd_record.cpp @@ -40,7 +40,6 @@ #include "CallChainJoiner.h" #include "command.h" #include "environment.h" -#include "ETMRecorder.h" #include "event_selection_set.h" #include "event_type.h" #include "IOEventLoop.h" @@ -93,12 +92,6 @@ constexpr size_t DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE = 8 * 1024 * 1024; static constexpr size_t kRecordBufferSize = 64 * 1024 * 1024; static constexpr size_t kSystemWideRecordBufferSize = 256 * 1024 * 1024; -static constexpr size_t kDefaultAuxBufferSize = 4 * 1024 * 1024; - -// On Pixel 3, it takes about 1ms to enable ETM, and 16-40ms to disable ETM and copy 4M ETM data. -// So make default period to 100ms. -static constexpr double kDefaultEtmDataFlushPeriodInSec = 0.1; - struct TimeStat { uint64_t prepare_recording_time = 0; uint64_t start_recording_time = 0; @@ -186,16 +179,9 @@ class RecordCommand : public Command { "-m mmap_pages Set the size of the buffer used to receiving sample data from\n" " the kernel. It should be a power of 2. If not set, the max\n" " possible value <= 1024 will be used.\n" -"--aux-buffer-size <buffer_size> Set aux buffer size, only used in cs-etm event type.\n" -" Need to be power of 2 and page size aligned.\n" -" Used memory size is (buffer_size * (cpu_count + 1).\n" -" Default is 4M.\n" "--no-inherit Don't record created child threads/processes.\n" "--cpu-percent <percent> Set the max percent of cpu time used for recording.\n" " percent is in range [1-100], default is 25.\n" -"--include-filter binary1,binary2,...\n" -" Trace only selected binaries in cs-etm instruction tracing.\n" -" Each entry is a binary path.\n" "\n" "Dwarf unwinding options:\n" "--post-unwind=(yes|no) If `--call-graph dwarf` option is used, then the user's\n" @@ -212,11 +198,6 @@ class RecordCommand : public Command { "--callchain-joiner-min-matching-nodes count\n" " When callchain joiner is used, set the matched nodes needed to join\n" " callchains. The count should be >= 1. By default it is 1.\n" -"--no-cut-samples Simpleperf uses a record buffer to cache records received from the kernel.\n" -" When the available space in the buffer reaches low level, it cuts part of\n" -" the stack data in samples. When the available space reaches critical level,\n" -" it drops all samples. This option makes simpleperf not cut samples when the\n" -" available space reaches low level.\n" "\n" "Recording file options:\n" "--no-dump-kernel-symbols Don't dump kernel symbols in perf.data. By default\n" @@ -296,7 +277,6 @@ class RecordCommand : public Command { bool DumpKernelMaps(); bool DumpUserSpaceMaps(); bool DumpProcessMaps(pid_t pid, const std::unordered_set<pid_t>& tids); - bool DumpAuxTraceInfo(); bool ProcessRecord(Record* record); bool ShouldOmitRecord(Record* record); bool DumpMapsForRecord(Record* record); @@ -334,7 +314,6 @@ class RecordCommand : public Command { EventSelectionSet event_selection_set_; std::pair<size_t, size_t> mmap_page_range_; - size_t aux_buffer_size_ = kDefaultAuxBufferSize; ThreadTree thread_tree_; std::string record_filename_; @@ -359,7 +338,6 @@ class RecordCommand : public Command { bool allow_callchain_joiner_; size_t callchain_joiner_min_matching_nodes_; std::unique_ptr<CallChainJoiner> callchain_joiner_; - bool allow_cutting_samples_ = true; std::unique_ptr<JITDebugReader> jit_debug_reader_; uint64_t last_record_timestamp_; // used to insert Mmap2Records for JIT debug info @@ -435,7 +413,7 @@ bool RecordCommand::PrepareRecording(Workload* workload) { return false; } if (unwind_dwarf_callchain_) { - offline_unwinder_ = OfflineUnwinder::Create(false); + offline_unwinder_.reset(new OfflineUnwinder(false)); } if (unwind_dwarf_callchain_ && allow_callchain_joiner_) { callchain_joiner_.reset(new CallChainJoiner(DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE, @@ -473,8 +451,7 @@ bool RecordCommand::PrepareRecording(Workload* workload) { need_to_check_targets = true; } // Profiling JITed/interpreted Java code is supported starting from Android P. - // Also support profiling art interpreter on host. - if (GetAndroidVersion() >= kAndroidVersionP || GetAndroidVersion() == 0) { + if (GetAndroidVersion() >= kAndroidVersionP) { // JIT symfiles are stored in temporary files, and are deleted after recording. But if // `-g --no-unwind` option is used, we want to keep symfiles to support unwinding in // the debug-unwind cmd. @@ -492,8 +469,7 @@ bool RecordCommand::PrepareRecording(Workload* workload) { size_t record_buffer_size = system_wide_collection_ ? kSystemWideRecordBufferSize : kRecordBufferSize; if (!event_selection_set_.MmapEventFiles(mmap_page_range_.first, mmap_page_range_.second, - aux_buffer_size_, record_buffer_size, - allow_cutting_samples_)) { + record_buffer_size)) { return false; } auto callback = @@ -538,7 +514,7 @@ bool RecordCommand::PrepareRecording(Workload* workload) { } } if (stdio_controls_profiling_) { - if (!loop->AddReadEvent(0, [this, loop]() { return ProcessControlCmd(loop); })) { + if (!loop->AddReadEvent(0, [&]() { return ProcessControlCmd(loop); })) { return false; } } @@ -549,7 +525,7 @@ bool RecordCommand::PrepareRecording(Workload* workload) { if (!jit_debug_reader_->RegisterDebugInfoCallback(loop, callback)) { return false; } - if (!system_wide_collection_) { + if (!app_package_name_.empty()) { std::set<pid_t> pids = event_selection_set_.GetMonitoredProcesses(); for (pid_t tid : event_selection_set_.GetMonitoredThreads()) { pid_t pid; @@ -567,21 +543,6 @@ bool RecordCommand::PrepareRecording(Workload* workload) { } } } - if (event_selection_set_.HasAuxTrace()) { - // ETM data is dumped to kernel buffer only when there is no thread traced by ETM. It happens - // either when all monitored threads are scheduled off cpu, or when all etm perf events are - // disabled. - // If ETM data isn't dumped to kernel buffer in time, overflow parts will be dropped. This - // makes less than expected data, especially in system wide recording. So add a periodic event - // to flush etm data by temporarily disable all perf events. - auto etm_flush = [this]() { - return event_selection_set_.SetEnableEvents(false) && - event_selection_set_.SetEnableEvents(true); - }; - if (!loop->AddPeriodicEvent(SecondToTimeval(kDefaultEtmDataFlushPeriodInSec), etm_flush)) { - return false; - } - } return true; } @@ -662,38 +623,33 @@ bool RecordCommand::PostProcessRecording(const std::vector<std::string>& args) { time_stat_.post_process_time = GetSystemClock(); // 4. Show brief record result. - auto record_stat = event_selection_set_.GetRecordStat(); - if (event_selection_set_.HasAuxTrace()) { - LOG(INFO) << "Aux data traced: " << record_stat.aux_data_size; - if (record_stat.lost_aux_data_size != 0) { - LOG(INFO) << "Aux data lost in user space: " << record_stat.lost_aux_data_size; - } - } else { - std::string cut_samples; - if (record_stat.cut_stack_samples > 0) { - cut_samples = android::base::StringPrintf(" (cut %zu)", record_stat.cut_stack_samples); - } - lost_record_count_ += record_stat.lost_samples + record_stat.lost_non_samples; - LOG(INFO) << "Samples recorded: " << sample_record_count_ << cut_samples - << ". Samples lost: " << lost_record_count_ << "."; - LOG(DEBUG) << "In user space, dropped " << record_stat.lost_samples << " samples, " - << record_stat.lost_non_samples << " non samples, cut stack of " - << record_stat.cut_stack_samples << " samples."; - if (sample_record_count_ + lost_record_count_ != 0) { - double lost_percent = - static_cast<double>(lost_record_count_) / (lost_record_count_ + sample_record_count_); - constexpr double LOST_PERCENT_WARNING_BAR = 0.1; - if (lost_percent >= LOST_PERCENT_WARNING_BAR) { - LOG(WARNING) << "Lost " << (lost_percent * 100) << "% of samples, " - << "consider increasing mmap_pages(-m), " - << "or decreasing sample frequency(-f), " - << "or increasing sample period(-c)."; - } - } - if (callchain_joiner_) { - callchain_joiner_->DumpStat(); + size_t lost_samples; + size_t lost_non_samples; + size_t cut_stack_samples; + event_selection_set_.GetLostRecords(&lost_samples, &lost_non_samples, &cut_stack_samples); + std::string cut_samples; + if (cut_stack_samples > 0) { + cut_samples = android::base::StringPrintf(" (cut %zu)", cut_stack_samples); + } + lost_record_count_ += lost_samples + lost_non_samples; + LOG(INFO) << "Samples recorded: " << sample_record_count_ << cut_samples + << ". Samples lost: " << lost_record_count_ << "."; + LOG(DEBUG) << "In user space, dropped " << lost_samples << " samples, " << lost_non_samples + << " non samples, cut stack of " << cut_stack_samples << " samples."; + if (sample_record_count_ + lost_record_count_ != 0) { + double lost_percent = static_cast<double>(lost_record_count_) / + (lost_record_count_ + sample_record_count_); + constexpr double LOST_PERCENT_WARNING_BAR = 0.1; + if (lost_percent >= LOST_PERCENT_WARNING_BAR) { + LOG(WARNING) << "Lost " << (lost_percent * 100) << "% of samples, " + << "consider increasing mmap_pages(-m), " + << "or decreasing sample frequency(-f), " + << "or increasing sample period(-c)."; } } + if (callchain_joiner_) { + callchain_joiner_->DumpStat(); + } LOG(DEBUG) << "Prepare recording time " << (time_stat_.start_recording_time - time_stat_.prepare_recording_time) / 1e6 << " ms, recording time " @@ -717,15 +673,6 @@ bool RecordCommand::ParseOptions(const std::vector<std::string>& args, return false; } app_package_name_ = args[i]; - } else if (args[i] == "--aux-buffer-size") { - if (!GetUintOption(args, &i, &aux_buffer_size_, 0, std::numeric_limits<size_t>::max(), - true)) { - return false; - } - if (!IsPowerOfTwo(aux_buffer_size_) || aux_buffer_size_ % sysconf(_SC_PAGE_SIZE)) { - LOG(ERROR) << "invalid aux buffer size: " << args[i]; - return false; - } } else if (args[i] == "-b") { branch_sampling_ = branch_sampling_type_map["any"]; } else if (args[i] == "-c" || args[i] == "-f") { @@ -848,11 +795,6 @@ bool RecordCommand::ParseOptions(const std::vector<std::string>& args, } } else if (args[i] == "--in-app") { in_app_context_ = true; - } else if (args[i] == "--include-filter") { - if (!NextArgumentOrError(args, &i)) { - return false; - } - event_selection_set_.SetIncludeFilters(android::base::Split(args[i], ",")); } else if (args[i] == "-j") { if (!NextArgumentOrError(args, &i)) { return false; @@ -891,8 +833,6 @@ bool RecordCommand::ParseOptions(const std::vector<std::string>& args, if (!GetUintOption(args, &i, &callchain_joiner_min_matching_nodes_, 1)) { return false; } - } else if (args[i] == "--no-cut-samples") { - allow_cutting_samples_ = false; } else if (args[i] == "-o") { if (!NextArgumentOrError(args, &i)) { return false; @@ -1037,11 +977,7 @@ bool RecordCommand::AdjustPerfEventLimit() { set_prop = true; } // 3. Adjust perf_event_mlock_kb. - long cpus = sysconf(_SC_NPROCESSORS_CONF); - uint64_t mlock_kb = cpus * (mmap_page_range_.second + 1) * 4; - if (event_selection_set_.HasAuxTrace()) { - mlock_kb += cpus * aux_buffer_size_ / 1024; - } + uint64_t mlock_kb = sysconf(_SC_NPROCESSORS_CONF) * (mmap_page_range_.second + 1) * 4; uint64_t cur_mlock_kb; if (GetPerfEventMlockKb(&cur_mlock_kb) && cur_mlock_kb < mlock_kb && !SetPerfEventMlockKb(mlock_kb)) { @@ -1100,8 +1036,7 @@ bool RecordCommand::CreateAndInitRecordFile() { } // Use first perf_event_attr and first event id to dump mmap and comm records. dumping_attr_id_ = event_selection_set_.GetEventAttrWithId()[0]; - return DumpKernelSymbol() && DumpTracingData() && DumpKernelMaps() && DumpUserSpaceMaps() && - DumpAuxTraceInfo(); + return DumpKernelSymbol() && DumpTracingData() && DumpKernelMaps() && DumpUserSpaceMaps(); } std::unique_ptr<RecordFileWriter> RecordCommand::CreateRecordFile( @@ -1267,14 +1202,6 @@ bool RecordCommand::ProcessRecord(Record* record) { return SaveRecordWithoutUnwinding(record); } -bool RecordCommand::DumpAuxTraceInfo() { - if (event_selection_set_.HasAuxTrace()) { - AuxTraceInfoRecord auxtrace_info = ETMRecorder::GetInstance().CreateAuxTraceInfoRecord(); - return ProcessRecord(&auxtrace_info); - } - return true; -} - template <typename MmapRecordType> bool MapOnlyExistInMemory(MmapRecordType* record) { return !record->InKernel() && MappedFileOnlyExistInMemory(record->filename); @@ -1377,16 +1304,6 @@ bool RecordCommand::ProcessJITDebugInfo(const std::vector<JITDebugInfo>& debug_i return false; } } else { - if (info.extracted_dex_file_map) { - ThreadMmap& map = *info.extracted_dex_file_map; - uint64_t timestamp = jit_debug_reader_->SyncWithRecords() ? info.timestamp - : last_record_timestamp_; - Mmap2Record record(*attr_id.attr, false, info.pid, info.pid, map.start_addr, map.len, - map.pgoff, map.prot, map.name, attr_id.ids[0], timestamp); - if (!ProcessRecord(&record)) { - return false; - } - } thread_tree_.AddDexFileOffset(info.file_path, info.dex_file_offset); } } @@ -1607,14 +1524,10 @@ bool RecordCommand::DumpAdditionalFeatures( Dso::ReadKernelSymbolsFromProc(); kernel_symbols_available = true; } - std::vector<uint64_t> auxtrace_offset; auto callback = [&](const Record* r) { thread_tree_.Update(*r); if (r->type() == PERF_RECORD_SAMPLE) { CollectHitFileInfo(*reinterpret_cast<const SampleRecord*>(r)); - } else if (r->type() == PERF_RECORD_AUXTRACE) { - auto auxtrace = static_cast<const AuxTraceRecord*>(r); - auxtrace_offset.emplace_back(auxtrace->location.file_offset - auxtrace->size()); } }; if (!record_file_writer_->ReadDataSection(callback)) { @@ -1625,9 +1538,6 @@ bool RecordCommand::DumpAdditionalFeatures( if (branch_sampling_) { feature_count++; } - if (!auxtrace_offset.empty()) { - feature_count++; - } if (!record_file_writer_->BeginWriteFeatures(feature_count)) { return false; } @@ -1667,9 +1577,6 @@ bool RecordCommand::DumpAdditionalFeatures( if (!DumpMetaInfoFeature(kernel_symbols_available)) { return false; } - if (!auxtrace_offset.empty() && !record_file_writer_->WriteAuxTraceFeature(auxtrace_offset)) { - return false; - } if (!record_file_writer_->EndWriteFeatures()) { return false; @@ -1682,9 +1589,7 @@ bool RecordCommand::DumpBuildIdFeature() { BuildId build_id; std::vector<Dso*> dso_v = thread_tree_.GetAllDsos(); for (Dso* dso : dso_v) { - // For aux tracing, we don't know which binaries are traced. - // So dump build ids for all binaries. - if (!dso->HasDumpId() && !event_selection_set_.HasAuxTrace()) { + if (!dso->HasDumpId()) { continue; } if (dso->type() == DSO_KERNEL) { |