diff options
57 files changed, 1605 insertions, 488 deletions
diff --git a/libfscrypt/fscrypt_init_extensions.cpp b/libfscrypt/fscrypt_init_extensions.cpp index 9781267c..f132f8a0 100644 --- a/libfscrypt/fscrypt_init_extensions.cpp +++ b/libfscrypt/fscrypt_init_extensions.cpp @@ -37,8 +37,6 @@ #define TAG "fscrypt" -static const std::string arbitrary_sequence_number = "42"; - static int set_system_de_policy_on(char const* dir); int fscrypt_install_keyring() diff --git a/memory_replay/Android.bp b/memory_replay/Android.bp index 753b8add..a21a6265 100644 --- a/memory_replay/Android.bp +++ b/memory_replay/Android.bp @@ -47,6 +47,13 @@ cc_defaults { ], shared_libs: ["libbase"], +} + +cc_binary { + name: "memory_replay", + defaults: ["memory_replay_defaults"], + + srcs: ["main.cpp"], multilib: { lib32: { @@ -56,19 +63,12 @@ cc_defaults { suffix: "64", }, }, - -} - -cc_binary { - name: "memory_replay", - defaults: ["memory_replay_defaults"], - - srcs: ["main.cpp"], } cc_test { name: "memory_replay_tests", defaults: ["memory_replay_defaults"], + isolated: true, srcs: [ "tests/ActionTest.cpp", diff --git a/memory_replay/NativeInfo.cpp b/memory_replay/NativeInfo.cpp index 18c832bb..2db653cb 100644 --- a/memory_replay/NativeInfo.cpp +++ b/memory_replay/NativeInfo.cpp @@ -32,18 +32,18 @@ // This function is not re-entrant since it uses a static buffer for // the line data. -void GetNativeInfo(int smaps_fd, size_t* pss_bytes, size_t* va_bytes) { +void GetNativeInfo(int smaps_fd, size_t* rss_bytes, size_t* va_bytes) { static char map_buffer[65535]; LineBuffer line_buf(smaps_fd, map_buffer, sizeof(map_buffer)); char* line; - size_t total_pss_bytes = 0; + size_t total_rss_bytes = 0; size_t total_va_bytes = 0; size_t line_len; bool native_map = false; while (line_buf.GetLine(&line, &line_len)) { uintptr_t start, end; int name_pos; - size_t native_pss_kB; + size_t native_rss_kB; if (sscanf(line, "%" SCNxPTR "-%" SCNxPTR " %*4s %*x %*x:%*x %*d %n", &start, &end, &name_pos) == 2) { if (strcmp(line + name_pos, "[anon:libc_malloc]") == 0 || @@ -53,16 +53,16 @@ void GetNativeInfo(int smaps_fd, size_t* pss_bytes, size_t* va_bytes) { } else { native_map = false; } - } else if (native_map && sscanf(line, "Pss: %zu", &native_pss_kB) == 1) { - total_pss_bytes += native_pss_kB * 1024; + } else if (native_map && sscanf(line, "Rss: %zu", &native_rss_kB) == 1) { + total_rss_bytes += native_rss_kB * 1024; } } - *pss_bytes = total_pss_bytes; + *rss_bytes = total_rss_bytes; *va_bytes = total_va_bytes; } void PrintNativeInfo(const char* preamble) { - size_t pss_bytes; + size_t rss_bytes; size_t va_bytes; android::base::unique_fd smaps_fd(open("/proc/self/smaps", O_RDONLY)); @@ -70,8 +70,8 @@ void PrintNativeInfo(const char* preamble) { err(1, "Cannot open /proc/self/smaps: %s\n", strerror(errno)); } - GetNativeInfo(smaps_fd, &pss_bytes, &va_bytes); - printf("%sNative PSS: %zu bytes %0.2fMB\n", preamble, pss_bytes, pss_bytes/(1024*1024.0)); + GetNativeInfo(smaps_fd, &rss_bytes, &va_bytes); + printf("%sNative RSS: %zu bytes %0.2fMB\n", preamble, rss_bytes, rss_bytes/(1024*1024.0)); printf("%sNative VA Space: %zu bytes %0.2fMB\n", preamble, va_bytes, va_bytes/(1024*1024.0)); fflush(stdout); } diff --git a/memory_replay/NativeInfo.h b/memory_replay/NativeInfo.h index 59536954..40a16f2d 100644 --- a/memory_replay/NativeInfo.h +++ b/memory_replay/NativeInfo.h @@ -18,7 +18,7 @@ #define _MEMORY_REPLAY_NATIVE_INFO_H // This function is not re-entrant. -void GetNativeInfo(int smaps_fd, size_t* pss_bytes, size_t* va_bytes); +void GetNativeInfo(int smaps_fd, size_t* rss_bytes, size_t* va_bytes); // This function is not re-entrant. void PrintNativeInfo(const char* preamble); diff --git a/memory_replay/Pointers.cpp b/memory_replay/Pointers.cpp index b9604f06..e9eebadb 100644 --- a/memory_replay/Pointers.cpp +++ b/memory_replay/Pointers.cpp @@ -37,7 +37,7 @@ Pointers::Pointers(size_t max_allocs) { if (memory == MAP_FAILED) { err(1, "Unable to allocate data for pointer hash: %zu total_allocs\n", max_allocs); } - // Make sure that all of the PSS for this is counted right away. + // Make sure that all of the RSS for this is counted right away. memset(memory, 0, pointers_size_); pointers_ = reinterpret_cast<pointer_data*>(memory); } diff --git a/memory_replay/tests/NativeInfoTest.cpp b/memory_replay/tests/NativeInfoTest.cpp index e0dea509..44c87704 100644 --- a/memory_replay/tests/NativeInfoTest.cpp +++ b/memory_replay/tests/NativeInfoTest.cpp @@ -41,8 +41,8 @@ TEST_F(NativeInfoTest, no_matching) { std::string smaps_data = "b6f1a000-b6f1c000 rw-p 00000000 00:00 0 [anon:thread signal stack]\n" "Size: 8 kB\n" - "Rss: 0 kB\n" - "Pss: 12 kB\n" + "Rss: 12 kB\n" + "Pss: 0 kB\n" "Shared_Clean: 0 kB\n" "Shared_Dirty: 0 kB\n" "Private_Clean: 0 kB\n" @@ -59,10 +59,10 @@ TEST_F(NativeInfoTest, no_matching) { write(tmp_file_->fd, smaps_data.c_str(), smaps_data.size())) != -1); ASSERT_TRUE(lseek(tmp_file_->fd, 0, SEEK_SET) != off_t(-1)); - size_t pss_bytes = 1; + size_t rss_bytes = 1; size_t va_bytes = 1; - GetNativeInfo(tmp_file_->fd, &pss_bytes, &va_bytes); - ASSERT_EQ(0U, pss_bytes); + GetNativeInfo(tmp_file_->fd, &rss_bytes, &va_bytes); + ASSERT_EQ(0U, rss_bytes); ASSERT_EQ(0U, va_bytes); } @@ -70,8 +70,8 @@ TEST_F(NativeInfoTest, multiple_anons) { std::string smaps_data = "b6f1a000-b6f1c000 rw-p 00000000 00:00 0 [anon:libc_malloc]\n" "Size: 8 kB\n" - "Rss: 0 kB\n" - "Pss: 12 kB\n" + "Rss: 12 kB\n" + "Pss: 0 kB\n" "Shared_Clean: 0 kB\n" "Shared_Dirty: 0 kB\n" "Private_Clean: 0 kB\n" @@ -86,8 +86,8 @@ TEST_F(NativeInfoTest, multiple_anons) { "Name: [anon:libc_malloc]\n" "b6f1e000-b6f1f000 rw-p 00000000 00:00 0 [anon:libc_malloc]\n" "Size: 8 kB\n" - "Rss: 0 kB\n" - "Pss: 20 kB\n" + "Rss: 20 kB\n" + "Pss: 0 kB\n" "Shared_Clean: 0 kB\n" "Shared_Dirty: 0 kB\n" "Private_Clean: 0 kB\n" @@ -102,8 +102,8 @@ TEST_F(NativeInfoTest, multiple_anons) { "Name: [anon:libc_malloc]\n" "b6f2e000-b6f2f000 rw-p 00000000 00:00 0\n" "Size: 8 kB\n" - "Rss: 0 kB\n" - "Pss: 24 kB\n" + "Rss: 24 kB\n" + "Pss: 0 kB\n" "Shared_Clean: 0 kB\n" "Shared_Dirty: 0 kB\n" "Private_Clean: 0 kB\n" @@ -120,10 +120,10 @@ TEST_F(NativeInfoTest, multiple_anons) { write(tmp_file_->fd, smaps_data.c_str(), smaps_data.size())) != -1); ASSERT_TRUE(lseek(tmp_file_->fd, 0, SEEK_SET) != off_t(-1)); - size_t pss_bytes = 1; + size_t rss_bytes = 1; size_t va_bytes = 1; - GetNativeInfo(tmp_file_->fd, &pss_bytes, &va_bytes); - ASSERT_EQ(32768U, pss_bytes); + GetNativeInfo(tmp_file_->fd, &rss_bytes, &va_bytes); + ASSERT_EQ(32768U, rss_bytes); ASSERT_EQ(12288U, va_bytes); } @@ -131,8 +131,8 @@ TEST_F(NativeInfoTest, multiple_heaps) { std::string smaps_data = "b6f1a000-b6f1c000 rw-p 00000000 00:00 0 [heap]\n" "Size: 8 kB\n" - "Rss: 0 kB\n" - "Pss: 24 kB\n" + "Rss: 24 kB\n" + "Pss: 0 kB\n" "Shared_Clean: 0 kB\n" "Shared_Dirty: 0 kB\n" "Private_Clean: 0 kB\n" @@ -147,8 +147,8 @@ TEST_F(NativeInfoTest, multiple_heaps) { "Name: [heap]\n" "b6f1e000-b6f1f000 rw-p 00000000 00:00 0 [heap]\n" "Size: 8 kB\n" - "Rss: 0 kB\n" - "Pss: 20 kB\n" + "Rss: 20 kB\n" + "Pss: 0 kB\n" "Shared_Clean: 0 kB\n" "Shared_Dirty: 0 kB\n" "Private_Clean: 0 kB\n" @@ -163,8 +163,8 @@ TEST_F(NativeInfoTest, multiple_heaps) { "Name: [heap]\n" "b6f2e000-b6f2f000 rw-p 00000000 00:00 0\n" "Size: 8 kB\n" - "Rss: 0 kB\n" - "Pss: 24 kB\n" + "Rss: 24 kB\n" + "Pss: 0 kB\n" "Shared_Clean: 0 kB\n" "Shared_Dirty: 0 kB\n" "Private_Clean: 0 kB\n" @@ -181,10 +181,10 @@ TEST_F(NativeInfoTest, multiple_heaps) { write(tmp_file_->fd, smaps_data.c_str(), smaps_data.size())) != -1); ASSERT_TRUE(lseek(tmp_file_->fd, 0, SEEK_SET) != off_t(-1)); - size_t pss_bytes = 1; + size_t rss_bytes = 1; size_t va_bytes = 1; - GetNativeInfo(tmp_file_->fd, &pss_bytes, &va_bytes); - ASSERT_EQ(45056U, pss_bytes); + GetNativeInfo(tmp_file_->fd, &rss_bytes, &va_bytes); + ASSERT_EQ(45056U, rss_bytes); ASSERT_EQ(12288U, va_bytes); } @@ -192,8 +192,8 @@ TEST_F(NativeInfoTest, mix_heap_anon) { std::string smaps_data = "b6f1a000-b6f1c000 rw-p 00000000 00:00 0 [heap]\n" "Size: 8 kB\n" - "Rss: 0 kB\n" - "Pss: 32 kB\n" + "Rss: 32 kB\n" + "Pss: 0 kB\n" "Shared_Clean: 0 kB\n" "Shared_Dirty: 0 kB\n" "Private_Clean: 0 kB\n" @@ -208,8 +208,8 @@ TEST_F(NativeInfoTest, mix_heap_anon) { "Name: [heap]\n" "b6f1e000-b6f1f000 rw-p 00000000 00:00 0 [anon:skip]\n" "Size: 8 kB\n" - "Rss: 0 kB\n" - "Pss: 32 kB\n" + "Rss: 32 kB\n" + "Pss: 0 kB\n" "Shared_Clean: 0 kB\n" "Shared_Dirty: 0 kB\n" "Private_Clean: 0 kB\n" @@ -224,8 +224,8 @@ TEST_F(NativeInfoTest, mix_heap_anon) { "Name: [anon:skip]\n" "b6f2e000-b6f2f000 rw-p 00000000 00:00 0 [anon:libc_malloc]\n" "Size: 8 kB\n" - "Rss: 0 kB\n" - "Pss: 40 kB\n" + "Rss: 40 kB\n" + "Pss: 0 kB\n" "Shared_Clean: 0 kB\n" "Shared_Dirty: 0 kB\n" "Private_Clean: 0 kB\n" @@ -240,8 +240,8 @@ TEST_F(NativeInfoTest, mix_heap_anon) { "Name: [anon:libc_malloc]\n" "b6f3e000-b6f3f000 rw-p 00000000 00:00 0\n" "Size: 8 kB\n" - "Rss: 0 kB\n" - "Pss: 24 kB\n" + "Rss: 24 kB\n" + "Pss: 0 kB\n" "Shared_Clean: 0 kB\n" "Shared_Dirty: 0 kB\n" "Private_Clean: 0 kB\n" @@ -258,9 +258,9 @@ TEST_F(NativeInfoTest, mix_heap_anon) { write(tmp_file_->fd, smaps_data.c_str(), smaps_data.size())) != -1); ASSERT_TRUE(lseek(tmp_file_->fd, 0, SEEK_SET) != off_t(-1)); - size_t pss_bytes = 1; + size_t rss_bytes = 1; size_t va_bytes = 1; - GetNativeInfo(tmp_file_->fd, &pss_bytes, &va_bytes); - ASSERT_EQ(73728U, pss_bytes); + GetNativeInfo(tmp_file_->fd, &rss_bytes, &va_bytes); + ASSERT_EQ(73728U, rss_bytes); ASSERT_EQ(12288U, va_bytes); } diff --git a/simpleperf/Android.bp b/simpleperf/Android.bp index be134209..df43a59a 100644 --- a/simpleperf/Android.bp +++ b/simpleperf/Android.bp @@ -285,6 +285,7 @@ cc_defaults { "cmd_stat.cpp", "cmd_trace_sched.cpp", "environment.cpp", + "ETMRecorder.cpp", "event_fd.cpp", "event_selection_set.cpp", "InplaceSamplerClient.cpp", diff --git a/simpleperf/ETMRecorder.cpp b/simpleperf/ETMRecorder.cpp new file mode 100644 index 00000000..45d18f26 --- /dev/null +++ b/simpleperf/ETMRecorder.cpp @@ -0,0 +1,208 @@ +/* + * Copyright (C) 2019 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ETMRecorder.h" + +#include <stdio.h> +#include <sys/sysinfo.h> + +#include <memory> +#include <limits> +#include <string> + +#include <android-base/file.h> +#include <android-base/logging.h> +#include <android-base/parseint.h> +#include <android-base/strings.h> + +#include "utils.h" + +namespace simpleperf { + +static constexpr bool ETM_RECORD_TIMESTAMP = false; + +// Config bits from include/linux/coresight-pmu.h in the kernel +// For etm_event_config: +static constexpr int ETM_OPT_CTXTID = 14; +static constexpr int ETM_OPT_TS = 28; +// For etm_config_reg: +static constexpr int ETM4_CFG_BIT_CTXTID = 6; +static constexpr int ETM4_CFG_BIT_TS = 11; + +static const std::string ETM_DIR = "/sys/bus/event_source/devices/cs_etm/"; + +template <typename T> +static bool ReadValueInEtmDir(const std::string& file, T* value, bool report_error = true) { + std::string s; + uint64_t v; + if (!android::base::ReadFileToString(ETM_DIR + file, &s) || + !android::base::ParseUint(android::base::Trim(s), &v)) { + if (report_error) { + LOG(ERROR) << "failed to read " << ETM_DIR << file; + } + return false; + } + *value = static_cast<T>(v); + return true; +} + +static uint32_t GetBits(uint32_t value, int start, int end) { + return (value >> start) & ((1U << (end - start + 1)) - 1); +} + +int ETMPerCpu::GetMajorVersion() const { + return GetBits(trcidr1, 8, 11); +} + +bool ETMPerCpu::IsContextIDSupported() const { + return GetBits(trcidr2, 5, 9) >= 4; +} + +bool ETMPerCpu::IsTimestampSupported() const { + return GetBits(trcidr0, 24, 28) > 0; +} + +ETMRecorder& ETMRecorder::GetInstance() { + static ETMRecorder etm; + return etm; +} + +int ETMRecorder::GetEtmEventType() { + if (event_type_ == 0) { + if (!IsDir(ETM_DIR) || !ReadValueInEtmDir("type", &event_type_, false)) { + event_type_ = -1; + } + } + return event_type_; +} + +std::unique_ptr<EventType> ETMRecorder::BuildEventType() { + int etm_event_type = GetEtmEventType(); + if (etm_event_type == -1) { + return nullptr; + } + return std::make_unique<EventType>( + "cs-etm", etm_event_type, 0, "CoreSight ETM instruction tracing", "arm"); +} + +bool ETMRecorder::CheckEtmSupport() { + if (GetEtmEventType() == -1) { + LOG(ERROR) << "etm event type isn't supported on device"; + return false; + } + if (!ReadEtmInfo()) { + LOG(ERROR) << "etm devices are not available"; + return false; + } + for (const auto& p : etm_info_) { + if (p.second.GetMajorVersion() < 4) { + LOG(ERROR) << "etm device version is less than 4.0"; + return false; + } + if (!p.second.IsContextIDSupported()) { + LOG(ERROR) << "etm device doesn't support contextID"; + return false; + } + } + if (!FindSinkConfig()) { + LOG(ERROR) << "can't find etr device, which moves etm data to memory"; + return false; + } + etm_supported_ = true; + return true; +} + +bool ETMRecorder::ReadEtmInfo() { + int cpu_count = get_nprocs_conf(); + for (const auto &name : GetEntriesInDir(ETM_DIR)) { + int cpu; + if (sscanf(name.c_str(), "cpu%d", &cpu) == 1) { + ETMPerCpu &cpu_info = etm_info_[cpu]; + bool success = + ReadValueInEtmDir(name + "/trcidr/trcidr0", &cpu_info.trcidr0) && + ReadValueInEtmDir(name + "/trcidr/trcidr1", &cpu_info.trcidr1) && + ReadValueInEtmDir(name + "/trcidr/trcidr2", &cpu_info.trcidr2) && + ReadValueInEtmDir(name + "/trcidr/trcidr8", &cpu_info.trcidr8) && + ReadValueInEtmDir(name + "/mgmt/trcauthstatus", &cpu_info.trcauthstatus) && + ReadValueInEtmDir(name + "/mgmt/trctraceid", &cpu_info.trctraceid); + if (!success) { + return false; + } + } + } + return (etm_info_.size() == cpu_count); +} + +bool ETMRecorder::FindSinkConfig() { + for (const auto &name : GetEntriesInDir(ETM_DIR + "sinks")) { + if (name.find("etr") != -1) { + if (ReadValueInEtmDir("sinks/" + name, &sink_config_)) { + return true; + } + } + } + return false; +} + +void ETMRecorder::SetEtmPerfEventAttr(perf_event_attr* attr) { + CHECK(etm_supported_); + BuildEtmConfig(); + attr->config = etm_event_config_; + attr->config2 = sink_config_; +} + +void ETMRecorder::BuildEtmConfig() { + if (etm_event_config_ == 0) { + etm_event_config_ |= 1ULL << ETM_OPT_CTXTID; + etm_config_reg_ |= 1U << ETM4_CFG_BIT_CTXTID; + + if (ETM_RECORD_TIMESTAMP) { + bool ts_supported = true; + for (auto& p : etm_info_) { + ts_supported &= p.second.IsTimestampSupported(); + } + if (ts_supported) { + etm_event_config_ |= 1ULL << ETM_OPT_TS; + etm_config_reg_ |= 1U << ETM4_CFG_BIT_TS; + } + } + } +} + +AuxTraceInfoRecord ETMRecorder::CreateAuxTraceInfoRecord() { + AuxTraceInfoRecord::DataType data; + memset(&data, 0, sizeof(data)); + data.aux_type = AuxTraceInfoRecord::AUX_TYPE_ETM; + data.nr_cpu = etm_info_.size(); + data.pmu_type = GetEtmEventType(); + std::vector<AuxTraceInfoRecord::ETM4Info> etm4_v(etm_info_.size()); + size_t pos = 0; + for (auto& p : etm_info_) { + auto& e = etm4_v[pos++]; + e.magic = AuxTraceInfoRecord::MAGIC_ETM4; + e.cpu = p.first; + e.trcconfigr = etm_config_reg_; + e.trctraceidr = p.second.trctraceid; + e.trcidr0 = p.second.trcidr0; + e.trcidr1 = p.second.trcidr1; + e.trcidr2 = p.second.trcidr2; + e.trcidr8 = p.second.trcidr8; + e.trcauthstatus = p.second.trcauthstatus; + } + return AuxTraceInfoRecord(data, etm4_v); +} + +} // namespace simpleperf
\ No newline at end of file diff --git a/simpleperf/ETMRecorder.h b/simpleperf/ETMRecorder.h new file mode 100644 index 00000000..ba31d83a --- /dev/null +++ b/simpleperf/ETMRecorder.h @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2019 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <inttypes.h> + +#include <map> +#include <memory> + +#include "event_type.h" +#include "record.h" +#include "perf_event.h" + +namespace simpleperf { + +struct ETMPerCpu { + uint32_t trcidr0; + uint32_t trcidr1; + uint32_t trcidr2; + uint32_t trcidr8; + uint32_t trcauthstatus; + uint32_t trctraceid; + + int GetMajorVersion() const; + bool IsContextIDSupported() const; + bool IsTimestampSupported() const; +}; + +// Help recording Coresight ETM data on ARM devices. +// 1. Get etm event type on device. +// 2. Get sink config, which selects the ETR device moving etm data to memory. +// 3. Get etm info on each cpu. +// The etm event type and sink config are used to build perf_event_attr for etm data tracing. +// The etm info is kept in perf.data to help etm decoding. +class ETMRecorder { + public: + static ETMRecorder& GetInstance(); + + // If not found, return -1. + int GetEtmEventType(); + std::unique_ptr<EventType> BuildEventType(); + bool CheckEtmSupport(); + void SetEtmPerfEventAttr(perf_event_attr* attr); + AuxTraceInfoRecord CreateAuxTraceInfoRecord(); + + private: + bool ReadEtmInfo(); + bool FindSinkConfig(); + void BuildEtmConfig(); + + int event_type_ = 0; + bool etm_supported_ = false; + // select ETR device, setting in perf_event_attr->config2 + uint32_t sink_config_ = 0; + // select etm options (timestamp, context_id, ...), setting in perf_event_attr->config + uint64_t etm_event_config_ = 0; + // record etm options in AuxTraceInfoRecord + uint32_t etm_config_reg_ = 0; + std::map<int, ETMPerCpu> etm_info_; +}; + +} // namespace simpleperf
\ No newline at end of file diff --git a/simpleperf/OfflineUnwinder.cpp b/simpleperf/OfflineUnwinder.cpp index b98b8de7..07eda11f 100644 --- a/simpleperf/OfflineUnwinder.cpp +++ b/simpleperf/OfflineUnwinder.cpp @@ -18,6 +18,8 @@ #include <sys/mman.h> +#include <unordered_map> + #include <android-base/logging.h> #include <unwindstack/MachineArm.h> #include <unwindstack/MachineArm64.h> @@ -133,6 +135,15 @@ static unwindstack::MapInfo* CreateMapInfo(const MapEntry* entry) { PROT_READ | entry->flags, name); } +class UnwindMaps : public unwindstack::Maps { + public: + void UpdateMaps(const MapSet& map_set); + + private: + uint64_t version_ = 0u; + std::vector<const MapEntry*> entries_; +}; + void UnwindMaps::UpdateMaps(const MapSet& map_set) { if (version_ == map_set.version) { return; @@ -182,13 +193,24 @@ void UnwindMaps::UpdateMaps(const MapSet& map_set) { } } -OfflineUnwinder::OfflineUnwinder(bool collect_stat) : collect_stat_(collect_stat) { - unwindstack::Elf::SetCachingEnabled(true); -} +class OfflineUnwinderImpl : public OfflineUnwinder { + public: + OfflineUnwinderImpl(bool collect_stat) : collect_stat_(collect_stat) { + unwindstack::Elf::SetCachingEnabled(true); + } -bool OfflineUnwinder::UnwindCallChain(const ThreadEntry& thread, const RegSet& regs, - const char* stack, size_t stack_size, - std::vector<uint64_t>* ips, std::vector<uint64_t>* sps) { + bool UnwindCallChain(const ThreadEntry& thread, const RegSet& regs, const char* stack, + size_t stack_size, std::vector<uint64_t>* ips, + std::vector<uint64_t>* sps) override; + + private: + bool collect_stat_; + std::unordered_map<pid_t, UnwindMaps> cached_maps_; +}; + +bool OfflineUnwinderImpl::UnwindCallChain(const ThreadEntry& thread, const RegSet& regs, + const char* stack, size_t stack_size, + std::vector<uint64_t>* ips, std::vector<uint64_t>* sps) { uint64_t start_time; if (collect_stat_) { start_time = GetSystemClock(); @@ -285,4 +307,8 @@ bool OfflineUnwinder::UnwindCallChain(const ThreadEntry& thread, const RegSet& r return true; } +std::unique_ptr<OfflineUnwinder> OfflineUnwinder::Create(bool collect_stat) { + return std::unique_ptr<OfflineUnwinder>(new OfflineUnwinderImpl(collect_stat)); +} + } // namespace simpleperf diff --git a/simpleperf/OfflineUnwinder.h b/simpleperf/OfflineUnwinder.h index e5091167..b0ff7ac5 100644 --- a/simpleperf/OfflineUnwinder.h +++ b/simpleperf/OfflineUnwinder.h @@ -19,15 +19,10 @@ #include <memory> #include <vector> -#include <unordered_map> #include "perf_regs.h" #include "thread_tree.h" -#if defined(__linux__) -#include <unwindstack/Maps.h> -#endif - namespace simpleperf { struct ThreadEntry; @@ -55,26 +50,14 @@ struct UnwindingResult { uint64_t stack_end; }; -#if defined(__linux__) -class UnwindMaps : public unwindstack::Maps { - public: - void UpdateMaps(const MapSet& map_set); - - private: - uint64_t version_ = 0u; - std::vector<const MapEntry*> entries_; -}; - class OfflineUnwinder { public: - OfflineUnwinder(bool collect_stat); - - bool UnwindCallChain(const ThreadEntry& thread, const RegSet& regs, const char* stack, - size_t stack_size, std::vector<uint64_t>* ips, std::vector<uint64_t>* sps); + static std::unique_ptr<OfflineUnwinder> Create(bool collect_stat); + virtual ~OfflineUnwinder() {} - bool HasStat() const { - return collect_stat_; - } + virtual bool UnwindCallChain(const ThreadEntry& thread, const RegSet& regs, const char* stack, + size_t stack_size, std::vector<uint64_t>* ips, + std::vector<uint64_t>* sps) = 0; const UnwindingResult& GetUnwindingResult() const { return unwinding_result_; @@ -84,27 +67,13 @@ class OfflineUnwinder { return is_callchain_broken_for_incomplete_jit_debug_info_; } - private: - bool collect_stat_; - UnwindingResult unwinding_result_; - bool is_callchain_broken_for_incomplete_jit_debug_info_; - - std::unordered_map<pid_t, UnwindMaps> cached_maps_; -}; - -#else // defined(__linux__) + protected: + OfflineUnwinder() {} -class OfflineUnwinder { - public: - OfflineUnwinder(bool) {} - bool UnwindCallChain(const ThreadEntry&, const RegSet&, const char*, size_t, - std::vector<uint64_t>*, std::vector<uint64_t>*) { - return false; - } + UnwindingResult unwinding_result_; + bool is_callchain_broken_for_incomplete_jit_debug_info_ = false; }; -#endif // !defined(__linux__) - } // namespace simpleperf #endif // SIMPLE_PERF_OFFLINE_UNWINDER_H_ diff --git a/simpleperf/RecordReadThread.cpp b/simpleperf/RecordReadThread.cpp index c6049466..b89066b9 100644 --- a/simpleperf/RecordReadThread.cpp +++ b/simpleperf/RecordReadThread.cpp @@ -23,7 +23,9 @@ #include <unordered_map> #include "environment.h" +#include "event_type.h" #include "record.h" +#include "utils.h" namespace simpleperf { @@ -205,9 +207,13 @@ bool KernelRecordReader::MoveToNextRecord(const RecordParser& parser) { RecordReadThread::RecordReadThread(size_t record_buffer_size, const perf_event_attr& attr, size_t min_mmap_pages, size_t max_mmap_pages, - bool allow_cutting_samples) - : record_buffer_(record_buffer_size), record_parser_(attr), attr_(attr), - min_mmap_pages_(min_mmap_pages), max_mmap_pages_(max_mmap_pages) { + size_t aux_buffer_size, bool allow_cutting_samples) + : record_buffer_(record_buffer_size), + record_parser_(attr), + attr_(attr), + min_mmap_pages_(min_mmap_pages), + max_mmap_pages_(max_mmap_pages), + aux_buffer_size_(aux_buffer_size) { if (attr.sample_type & PERF_SAMPLE_STACK_USER) { stack_size_in_sample_record_ = attr.sample_stack_user; } @@ -287,7 +293,13 @@ std::unique_ptr<Record> RecordReadThread::GetRecord() { record_buffer_.MoveToNextRecord(); char* p = record_buffer_.GetCurrentRecord(); if (p != nullptr) { - return ReadRecordFromBuffer(attr_, p); + std::unique_ptr<Record> r = ReadRecordFromBuffer(attr_, p); + if (r->type() == PERF_RECORD_AUXTRACE) { + auto auxtrace = static_cast<AuxTraceRecord*>(r.get()); + record_buffer_.AddCurrentRecordSize(auxtrace->data->aux_size); + auxtrace->location.addr = r->Binary() + r->size(); + } + return r; } if (has_data_notification_) { char dummy; @@ -355,13 +367,21 @@ bool RecordReadThread::HandleAddEventFds(IOEventLoop& loop, std::unordered_map<int, EventFd*> cpu_map; for (size_t pages = max_mmap_pages_; pages >= min_mmap_pages_; pages >>= 1) { bool success = true; + bool report_error = pages == min_mmap_pages_; for (EventFd* fd : event_fds) { auto it = cpu_map.find(fd->Cpu()); if (it == cpu_map.end()) { - if (!fd->CreateMappedBuffer(pages, pages == min_mmap_pages_)) { + if (!fd->CreateMappedBuffer(pages, report_error)) { success = false; break; } + if (IsEtmEventType(fd->attr().type)) { + if (!fd->CreateAuxBuffer(aux_buffer_size_, report_error)) { + fd->DestroyMappedBuffer(); + success = false; + break; + } + } cpu_map[fd->Cpu()] = fd; } else { if (!fd->ShareMappedBuffer(*(it->second), pages == min_mmap_pages_)) { @@ -376,6 +396,7 @@ bool RecordReadThread::HandleAddEventFds(IOEventLoop& loop, } for (auto& pair : cpu_map) { pair.second->DestroyMappedBuffer(); + pair.second->DestroyAuxBuffer(); } cpu_map.clear(); } @@ -402,6 +423,7 @@ bool RecordReadThread::HandleRemoveEventFds(const std::vector<EventFd*>& event_f kernel_record_readers_.erase(it); event_fd->StopPolling(); event_fd->DestroyMappedBuffer(); + event_fd->DestroyAuxBuffer(); } } } @@ -423,34 +445,39 @@ bool RecordReadThread::ReadRecordsFromKernelBuffer() { readers.push_back(&reader); } } - if (readers.empty()) { - break; - } - if (readers.size() == 1u) { - // Only one buffer has data, process it directly. - while (readers[0]->MoveToNextRecord(record_parser_)) { - PushRecordToRecordBuffer(readers[0]); - } - } else { - // Use a binary heap to merge records from different buffers. As records from the same buffer - // are already ordered by time, we only need to merge the first record from all buffers. And - // each time a record is popped from the heap, we put the next record from its buffer into - // the heap. - for (auto& reader : readers) { - reader->MoveToNextRecord(record_parser_); - } - std::make_heap(readers.begin(), readers.end(), CompareRecordTime); - size_t size = readers.size(); - while (size > 0) { - std::pop_heap(readers.begin(), readers.begin() + size, CompareRecordTime); - PushRecordToRecordBuffer(readers[size - 1]); - if (readers[size - 1]->MoveToNextRecord(record_parser_)) { - std::push_heap(readers.begin(), readers.begin() + size, CompareRecordTime); - } else { - size--; + bool has_data = false; + if (!readers.empty()) { + has_data = true; + if (readers.size() == 1u) { + // Only one buffer has data, process it directly. + while (readers[0]->MoveToNextRecord(record_parser_)) { + PushRecordToRecordBuffer(readers[0]); + } + } else { + // Use a binary heap to merge records from different buffers. As records from the same + // buffer are already ordered by time, we only need to merge the first record from all + // buffers. And each time a record is popped from the heap, we put the next record from its + // buffer into the heap. + for (auto& reader : readers) { + reader->MoveToNextRecord(record_parser_); + } + std::make_heap(readers.begin(), readers.end(), CompareRecordTime); + size_t size = readers.size(); + while (size > 0) { + std::pop_heap(readers.begin(), readers.begin() + size, CompareRecordTime); + PushRecordToRecordBuffer(readers[size - 1]); + if (readers[size - 1]->MoveToNextRecord(record_parser_)) { + std::push_heap(readers.begin(), readers.begin() + size, CompareRecordTime); + } else { + size--; + } } } } + ReadAuxDataFromKernelBuffer(&has_data); + if (!has_data) { + break; + } if (!SendDataNotificationToMainThread()) { return false; } @@ -466,7 +493,7 @@ void RecordReadThread::PushRecordToRecordBuffer(KernelRecordReader* kernel_recor if (free_size < record_buffer_critical_level_) { // When the free size in record buffer is below critical level, drop sample records to save // space for more important records (like mmap or fork records). - lost_samples_++; + stat_.lost_samples++; return; } size_t stack_size_limit = stack_size_in_sample_record_; @@ -513,10 +540,10 @@ void RecordReadThread::PushRecordToRecordBuffer(KernelRecordReader* kernel_recor memcpy(p + pos + new_stack_size, &new_stack_size, sizeof(uint64_t)); record_buffer_.FinishWrite(); if (new_stack_size < dyn_stack_size) { - cut_stack_samples_++; + stat_.cut_stack_samples++; } } else { - lost_samples_++; + stat_.lost_samples++; } return; } @@ -528,9 +555,47 @@ void RecordReadThread::PushRecordToRecordBuffer(KernelRecordReader* kernel_recor record_buffer_.FinishWrite(); } else { if (header.type == PERF_RECORD_SAMPLE) { - lost_samples_++; + stat_.lost_samples++; } else { - lost_non_samples_++; + stat_.lost_non_samples++; + } + } +} + +void RecordReadThread::ReadAuxDataFromKernelBuffer(bool* has_data) { + for (auto& reader : kernel_record_readers_) { + EventFd* event_fd = reader.GetEventFd(); + if (event_fd->HasAuxBuffer()) { + char* buf[2]; + size_t size[2]; + uint64_t offset = event_fd->GetAvailableAuxData(&buf[0], &size[0], &buf[1], &size[1]); + size_t aux_size = size[0] + size[1]; + if (aux_size == 0) { + continue; + } + *has_data = true; + AuxTraceRecord auxtrace(Align(aux_size, 8), offset, event_fd->Cpu(), 0, event_fd->Cpu()); + size_t alloc_size = auxtrace.size() + auxtrace.data->aux_size; + if (record_buffer_.GetFreeSize() < alloc_size + record_buffer_critical_level_) { + stat_.lost_aux_data_size += aux_size; + } else { + char* p = record_buffer_.AllocWriteSpace(alloc_size); + CHECK(p != nullptr); + MoveToBinaryFormat(auxtrace.Binary(), auxtrace.size(), p); + MoveToBinaryFormat(buf[0], size[0], p); + if (size[1] != 0) { + MoveToBinaryFormat(buf[1], size[1], p); + } + size_t pad_size = auxtrace.data->aux_size - aux_size; + if (pad_size != 0) { + uint64_t pad = 0; + memcpy(p, &pad, pad_size); + } + record_buffer_.FinishWrite(); + stat_.aux_data_size += aux_size; + LOG(DEBUG) << "record aux data " << aux_size << " bytes"; + } + event_fd->DiscardAuxData(aux_size); } } } diff --git a/simpleperf/RecordReadThread.h b/simpleperf/RecordReadThread.h index 703de619..4e93f97a 100644 --- a/simpleperf/RecordReadThread.h +++ b/simpleperf/RecordReadThread.h @@ -50,6 +50,7 @@ class RecordBuffer { // Get data of the current record. Return nullptr if there is no records in the buffer. char* GetCurrentRecord(); + void AddCurrentRecordSize(size_t size) { cur_read_record_size_ += size; } // Called after reading a record, the space of the record will be writable. void MoveToNextRecord(); @@ -82,6 +83,14 @@ class RecordParser { size_t callchain_pos_in_sample_records_ = 0; }; +struct RecordStat { + size_t lost_samples = 0; + size_t lost_non_samples = 0; + size_t cut_stack_samples = 0; + uint64_t aux_data_size = 0; + uint64_t lost_aux_data_size = 0; +}; + // Read records from the kernel buffer belong to an event_fd. class KernelRecordReader { public: @@ -115,7 +124,8 @@ class KernelRecordReader { class RecordReadThread { public: RecordReadThread(size_t record_buffer_size, const perf_event_attr& attr, size_t min_mmap_pages, - size_t max_mmap_pages, bool allow_cutting_samples = true); + size_t max_mmap_pages, size_t aux_buffer_size, + bool allow_cutting_samples = true); ~RecordReadThread(); void SetBufferLevels(size_t record_buffer_low_level, size_t record_buffer_critical_level) { record_buffer_low_level_ = record_buffer_low_level; @@ -137,11 +147,8 @@ class RecordReadThread { // If available, return the next record in the RecordBuffer, otherwise return nullptr. std::unique_ptr<Record> GetRecord(); - void GetLostRecords(size_t* lost_samples, size_t* lost_non_samples, size_t* cut_stack_samples) { - *lost_samples = lost_samples_; - *lost_non_samples = lost_non_samples_; - *cut_stack_samples = cut_stack_samples_; - } + + const RecordStat& GetStat() const { return stat_; } private: enum Cmd { @@ -164,6 +171,7 @@ class RecordReadThread { bool HandleRemoveEventFds(const std::vector<EventFd*>& event_fds); bool ReadRecordsFromKernelBuffer(); void PushRecordToRecordBuffer(KernelRecordReader* kernel_record_reader); + void ReadAuxDataFromKernelBuffer(bool* has_data); bool SendDataNotificationToMainThread(); RecordBuffer record_buffer_; @@ -177,6 +185,7 @@ class RecordReadThread { size_t stack_size_in_sample_record_ = 0; size_t min_mmap_pages_; size_t max_mmap_pages_; + size_t aux_buffer_size_; // Used to pass command notification from the main thread to the read thread. android::base::unique_fd write_cmd_fd_; @@ -195,9 +204,7 @@ class RecordReadThread { std::unique_ptr<std::thread> read_thread_; std::vector<KernelRecordReader> kernel_record_readers_; - size_t lost_samples_ = 0; - size_t lost_non_samples_ = 0; - size_t cut_stack_samples_ = 0; + RecordStat stat_; }; } // namespace simpleperf diff --git a/simpleperf/RecordReadThread_test.cpp b/simpleperf/RecordReadThread_test.cpp index 74b352d1..eae58345 100644 --- a/simpleperf/RecordReadThread_test.cpp +++ b/simpleperf/RecordReadThread_test.cpp @@ -106,10 +106,14 @@ TEST(RecordParser, smoke) { } struct MockEventFd : public EventFd { - MockEventFd(const perf_event_attr& attr, int cpu, char* buffer, size_t buffer_size) + MockEventFd(const perf_event_attr& attr, int cpu, char* buffer, size_t buffer_size, + bool mock_aux_buffer) : EventFd(attr, -1, "", 0, cpu) { mmap_data_buffer_ = buffer; mmap_data_buffer_size_ = buffer_size; + if (mock_aux_buffer) { + aux_buffer_size_ = 1; // Make HasAuxBuffer() return true. + } } MOCK_METHOD2(CreateMappedBuffer, bool(size_t, bool)); @@ -118,6 +122,11 @@ struct MockEventFd : public EventFd { MOCK_METHOD0(StopPolling, bool()); MOCK_METHOD1(GetAvailableMmapDataSize, size_t(size_t&)); MOCK_METHOD1(DiscardMmapData, void(size_t)); + + MOCK_METHOD2(CreateAuxBuffer, bool(size_t, bool)); + MOCK_METHOD0(DestroyAuxBuffer, void()); + MOCK_METHOD4(GetAvailableAuxData, uint64_t(char**, size_t*, char**, size_t*)); + MOCK_METHOD1(DiscardAuxData, void(size_t)); }; static perf_event_attr CreateFakeEventAttr() { @@ -170,7 +179,7 @@ TEST(KernelRecordReader, smoke) { pos += records[i]->size(); } // Read records using KernelRecordReader. - MockEventFd event_fd(attr, 0, buffer.data(), buffer.size()); + MockEventFd event_fd(attr, 0, buffer.data(), buffer.size(), false); EXPECT_CALL(event_fd, GetAvailableMmapDataSize(Truly(SetArg(data_pos)))) .Times(1).WillOnce(Return(data_size)); @@ -208,7 +217,7 @@ class RecordReadThreadTest : public ::testing::Test { } event_fds_.resize(event_fd_count); for (size_t i = 0; i < event_fd_count; ++i) { - event_fds_[i].reset(new MockEventFd(attr, i, buffers_[i].data(), buffer_size)); + event_fds_[i].reset(new MockEventFd(attr, i, buffers_[i].data(), buffer_size, false)); EXPECT_CALL(*event_fds_[i], CreateMappedBuffer(_, _)).Times(1).WillOnce(Return(true)); EXPECT_CALL(*event_fds_[i], StartPolling(_, _)).Times(1).WillOnce(Return(true)); EXPECT_CALL(*event_fds_[i], GetAvailableMmapDataSize(Truly(SetArg(0)))).Times(1) @@ -216,6 +225,7 @@ class RecordReadThreadTest : public ::testing::Test { EXPECT_CALL(*event_fds_[i], DiscardMmapData(Eq(data_size))).Times(1); EXPECT_CALL(*event_fds_[i], StopPolling()).Times(1).WillOnce(Return(true)); EXPECT_CALL(*event_fds_[i], DestroyMappedBuffer()).Times(1); + EXPECT_CALL(*event_fds_[i], DestroyAuxBuffer()).Times(1); } std::vector<EventFd*> result; for (auto& fd : event_fds_) { @@ -233,7 +243,7 @@ TEST_F(RecordReadThreadTest, handle_cmds) { perf_event_attr attr = CreateFakeEventAttr(); records_ = CreateFakeRecords(attr, 2, 0, 0); std::vector<EventFd*> event_fds = CreateFakeEventFds(attr, 2); - RecordReadThread thread(128 * 1024, event_fds[0]->attr(), 1, 1); + RecordReadThread thread(128 * 1024, event_fds[0]->attr(), 1, 1, 0); IOEventLoop loop; bool has_notify = false; auto callback = [&]() { @@ -252,7 +262,7 @@ TEST_F(RecordReadThreadTest, handle_cmds) { TEST_F(RecordReadThreadTest, read_records) { perf_event_attr attr = CreateFakeEventAttr(); - RecordReadThread thread(128 * 1024, attr, 1, 1); + RecordReadThread thread(128 * 1024, attr, 1, 1, 0); IOEventLoop loop; size_t record_index; auto callback = [&]() { @@ -287,7 +297,7 @@ TEST_F(RecordReadThreadTest, process_sample_record) { attr.sample_type |= PERF_SAMPLE_STACK_USER; attr.sample_stack_user = 64 * 1024; size_t record_buffer_size = 128 * 1024; - RecordReadThread thread(record_buffer_size, attr, 1, 1); + RecordReadThread thread(record_buffer_size, attr, 1, 1, 0); IOEventLoop loop; ASSERT_TRUE(thread.RegisterDataCallback(loop, []() { return true; })); @@ -329,20 +339,16 @@ TEST_F(RecordReadThreadTest, process_sample_record) { thread.SetBufferLevels(record_buffer_size, record_buffer_size); read_record(r); ASSERT_FALSE(r); - size_t lost_samples; - size_t lost_non_samples; - size_t cut_stack_samples; - thread.GetLostRecords(&lost_samples, &lost_non_samples, &cut_stack_samples); - ASSERT_EQ(lost_samples, 1u); - ASSERT_EQ(lost_non_samples, 0u); - ASSERT_EQ(cut_stack_samples, 1u); + ASSERT_EQ(thread.GetStat().lost_samples, 1u); + ASSERT_EQ(thread.GetStat().lost_non_samples, 0u); + ASSERT_EQ(thread.GetStat().cut_stack_samples, 1u); } // Test that the data notification exists until the RecordBuffer is empty. So we can read all // records even if reading one record at a time. TEST_F(RecordReadThreadTest, has_data_notification_until_buffer_empty) { perf_event_attr attr = CreateFakeEventAttr(); - RecordReadThread thread(128 * 1024, attr, 1, 1); + RecordReadThread thread(128 * 1024, attr, 1, 1, 0); IOEventLoop loop; size_t record_index = 0; auto read_one_record = [&]() { @@ -370,7 +376,7 @@ TEST_F(RecordReadThreadTest, no_cut_samples) { perf_event_attr attr = CreateFakeEventAttr(); attr.sample_type |= PERF_SAMPLE_STACK_USER; attr.sample_stack_user = 64 * 1024; - RecordReadThread thread(128 * 1024, attr, 1, 1, false); + RecordReadThread thread(128 * 1024, attr, 1, 1, 0, false); IOEventLoop loop; ASSERT_TRUE(thread.RegisterDataCallback(loop, []() { return true; })); const size_t total_samples = 100; @@ -383,12 +389,113 @@ TEST_F(RecordReadThreadTest, no_cut_samples) { while (thread.GetRecord()) { received_samples++; } - size_t lost_samples; - size_t lost_non_samples; - size_t cut_stack_samples; - thread.GetLostRecords(&lost_samples, &lost_non_samples, &cut_stack_samples); ASSERT_GT(received_samples, 0u); - ASSERT_GT(lost_samples, 0u); - ASSERT_EQ(lost_samples, total_samples - received_samples); - ASSERT_EQ(cut_stack_samples, 0u); + ASSERT_GT(thread.GetStat().lost_samples, 0u); + ASSERT_EQ(thread.GetStat().lost_samples, total_samples - received_samples); + ASSERT_EQ(thread.GetStat().cut_stack_samples, 0u); } + +struct FakeAuxData { + std::vector<char> buf1; + std::vector<char> buf2; + std::vector<char> pad; + bool lost; + + FakeAuxData(size_t buf1_size, size_t buf2_size, char c, size_t pad_size, bool lost) + : buf1(buf1_size, c), buf2(buf2_size, c), pad(pad_size, 0), lost(lost) {} +}; + +TEST_F(RecordReadThreadTest, read_aux_data) { + const EventType* type = FindEventTypeByName("cs-etm"); + if (type == nullptr) { + GTEST_LOG_(INFO) << "Omit this test as cs-etm event type isn't available"; + return; + } + std::vector<FakeAuxData> aux_data; + aux_data.emplace_back(40, 0, '0', 0, false); // one buffer + aux_data.emplace_back(40, 40, '1', 0, false); // two buffers + aux_data.emplace_back(36, 0, '2', 4, false); // one buffer needs padding to 8 bytes alignment + aux_data.emplace_back(1024, 0, '3', 0, true); // one buffer too big to fit into RecordReadThread + size_t test_index = 0; + + auto SetBuf1 = [&](char** buf1) { + *buf1 = aux_data[test_index].buf1.data(); + return true; + }; + auto SetSize1 = [&](size_t* size1) { + *size1 = aux_data[test_index].buf1.size(); + return true; + }; + auto SetBuf2 = [&](char** buf2) { + *buf2 = aux_data[test_index].buf2.data(); + return true; + }; + auto SetSize2 = [&](size_t* size2) { + *size2 = aux_data[test_index].buf2.size(); + return true; + }; + auto CheckDiscardSize = [&](size_t size) { + return size == aux_data[test_index].buf1.size() + aux_data[test_index].buf2.size(); + }; + + const size_t AUX_BUFFER_SIZE = 4096; + + perf_event_attr attr = CreateDefaultPerfEventAttr(*type); + MockEventFd fd(attr, 0, nullptr, 1, true); + EXPECT_CALL(fd, CreateMappedBuffer(_, _)).Times(1).WillOnce(Return(true)); + EXPECT_CALL(fd, CreateAuxBuffer(Eq(AUX_BUFFER_SIZE), _)).Times(1).WillOnce(Return(true)); + EXPECT_CALL(fd, StartPolling(_, _)).Times(1).WillOnce(Return(true)); + EXPECT_CALL(fd, GetAvailableMmapDataSize(_)).Times(aux_data.size()).WillRepeatedly(Return(0)); + EXPECT_CALL(fd, + GetAvailableAuxData(Truly(SetBuf1), Truly(SetSize1), Truly(SetBuf2), Truly(SetSize2))) + .Times(aux_data.size()); + EXPECT_CALL(fd, DiscardAuxData(Truly(CheckDiscardSize))).Times(aux_data.size()); + EXPECT_CALL(fd, StopPolling()).Times(1).WillOnce(Return(true)); + EXPECT_CALL(fd, DestroyMappedBuffer()).Times(1); + EXPECT_CALL(fd, DestroyAuxBuffer()).Times(1); + + RecordReadThread thread(1024, attr, 1, 1, AUX_BUFFER_SIZE); + IOEventLoop loop; + ASSERT_TRUE(thread.RegisterDataCallback(loop, []() { return true; })); + ASSERT_TRUE(thread.AddEventFds({&fd})); + for (; test_index < aux_data.size(); ++test_index) { + ASSERT_TRUE(thread.SyncKernelBuffer()); + std::unique_ptr<Record> r = thread.GetRecord(); + if (aux_data[test_index].lost) { + ASSERT_TRUE(r == nullptr); + continue; + } + ASSERT_TRUE(r); + ASSERT_EQ(r->type(), PERF_RECORD_AUXTRACE); + auto auxtrace = static_cast<AuxTraceRecord*>(r.get()); + auto& expected = aux_data[test_index]; + ASSERT_EQ(auxtrace->data->aux_size, + expected.buf1.size() + expected.buf2.size() + expected.pad.size()); + const char* p = auxtrace->location.addr; + ASSERT_TRUE(p != nullptr); + if (!expected.buf1.empty()) { + ASSERT_EQ(memcmp(p, expected.buf1.data(), expected.buf1.size()), 0); + p += expected.buf1.size(); + } + if (!expected.buf2.empty()) { + ASSERT_EQ(memcmp(p, expected.buf2.data(), expected.buf2.size()), 0); + p += expected.buf2.size(); + } + if (!expected.pad.empty()) { + ASSERT_EQ(memcmp(p, expected.pad.data(), expected.pad.size()), 0); + } + } + ASSERT_TRUE(thread.GetRecord() == nullptr); + ASSERT_TRUE(thread.RemoveEventFds({&fd})); + size_t aux_data_size = 0; + size_t lost_aux_data_size = 0; + for (auto& aux : aux_data) { + if (aux.lost) { + lost_aux_data_size += aux.buf1.size() + aux.buf2.size(); + } else { + aux_data_size += aux.buf1.size() + aux.buf2.size(); + } + } + ASSERT_EQ(aux_data_size, thread.GetStat().aux_data_size); + ASSERT_EQ(lost_aux_data_size, thread.GetStat().lost_aux_data_size); +}
\ No newline at end of file diff --git a/simpleperf/cmd_debug_unwind.cpp b/simpleperf/cmd_debug_unwind.cpp index c2019bd3..b5df2da3 100644 --- a/simpleperf/cmd_debug_unwind.cpp +++ b/simpleperf/cmd_debug_unwind.cpp @@ -94,7 +94,7 @@ class DebugUnwindCommand : public Command { ), input_filename_("perf.data"), output_filename_("perf.data.debug"), - offline_unwinder_(true), + offline_unwinder_(OfflineUnwinder::Create(true)), callchain_joiner_(DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE, 1, true), selected_time_(0) { } @@ -126,7 +126,7 @@ class DebugUnwindCommand : public Command { std::unique_ptr<RecordFileReader> reader_; std::unique_ptr<RecordFileWriter> writer_; ThreadTree thread_tree_; - OfflineUnwinder offline_unwinder_; + std::unique_ptr<OfflineUnwinder> offline_unwinder_; CallChainJoiner callchain_joiner_; Stat stat_; uint64_t selected_time_; @@ -236,12 +236,12 @@ bool DebugUnwindCommand::ProcessRecord(Record* record) { RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs); std::vector<uint64_t> ips; std::vector<uint64_t> sps; - if (!offline_unwinder_.UnwindCallChain(*thread, regs, r.stack_user_data.data, + if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data, r.GetValidStackSize(), &ips, &sps)) { return false; } - const UnwindingResult& unwinding_result = offline_unwinder_.GetUnwindingResult(); + const UnwindingResult& unwinding_result = offline_unwinder_->GetUnwindingResult(); stat_.unwinding_sample_count++; stat_.total_unwinding_time_in_ns += unwinding_result.used_time; stat_.max_unwinding_time_in_ns = std::max(stat_.max_unwinding_time_in_ns, @@ -371,9 +371,7 @@ bool DebugUnwindCommand::WriteFeatureSections() { // Write meta_info section. std::unordered_map<std::string, std::string> info_map; if (it != features.end() && it->first == PerfFileFormat::FEAT_META_INFO) { - if (!reader_->ReadMetaInfoFeature(&info_map)) { - return false; - } + info_map = reader_->GetMetaInfoFeature(); ++it; } info_map["debug_unwind"] = "true"; diff --git a/simpleperf/cmd_debug_unwind_test.cpp b/simpleperf/cmd_debug_unwind_test.cpp index 8110ed06..20a441ec 100644 --- a/simpleperf/cmd_debug_unwind_test.cpp +++ b/simpleperf/cmd_debug_unwind_test.cpp @@ -60,9 +60,8 @@ TEST(cmd_debug_unwind, symfs_option) { const std::map<int, PerfFileFormat::SectionDesc>& features = reader->FeatureSectionDescriptors(); ASSERT_NE(features.find(PerfFileFormat::FEAT_FILE), features.end()); ASSERT_NE(features.find(PerfFileFormat::FEAT_META_INFO), features.end()); - std::unordered_map<std::string, std::string> info_map; - ASSERT_TRUE(reader->ReadMetaInfoFeature(&info_map)); - ASSERT_EQ(info_map["debug_unwind"], "true"); + auto meta_info = reader->GetMetaInfoFeature(); + ASSERT_EQ(meta_info["debug_unwind"], "true"); } TEST(cmd_debug_unwind, unwind_with_ip_zero_in_callchain) { diff --git a/simpleperf/cmd_dumprecord.cpp b/simpleperf/cmd_dumprecord.cpp index 6f8762f8..be81b6a1 100644 --- a/simpleperf/cmd_dumprecord.cpp +++ b/simpleperf/cmd_dumprecord.cpp @@ -40,7 +40,7 @@ class DumpRecordCommand : public Command { : Command("dump", "dump perf record file", "Usage: simpleperf dumprecord [options] [perf_record_file]\n" " Dump different parts of a perf record file. Default file is perf.data.\n"), - record_filename_("perf.data"), record_file_arch_(GetBuildArch()) { + record_filename_("perf.data") { } bool Run(const std::vector<std::string>& args); @@ -50,11 +50,11 @@ class DumpRecordCommand : public Command { void DumpFileHeader(); void DumpAttrSection(); bool DumpDataSection(); + bool DumpAuxData(const AuxRecord& aux); bool DumpFeatureSection(); std::string record_filename_; std::unique_ptr<RecordFileReader> record_file_reader_; - ArchType record_file_arch_; }; bool DumpRecordCommand::Run(const std::vector<std::string>& args) { @@ -65,25 +65,6 @@ bool DumpRecordCommand::Run(const std::vector<std::string>& args) { if (record_file_reader_ == nullptr) { return false; } - std::string arch = record_file_reader_->ReadFeatureString(FEAT_ARCH); - if (!arch.empty()) { - record_file_arch_ = GetArchType(arch); - if (record_file_arch_ == ARCH_UNSUPPORTED) { - return false; - } - } - ScopedCurrentArch scoped_arch(record_file_arch_); - std::unique_ptr<ScopedEventTypes> scoped_event_types; - if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_META_INFO)) { - std::unordered_map<std::string, std::string> meta_info; - if (!record_file_reader_->ReadMetaInfoFeature(&meta_info)) { - return false; - } - auto it = meta_info.find("event_type_info"); - if (it != meta_info.end()) { - scoped_event_types.reset(new ScopedEventTypes(it->second)); - } - } DumpFileHeader(); DumpAttrSection(); if (!DumpDataSection()) { @@ -212,12 +193,33 @@ bool DumpRecordCommand::DumpDataSection() { PrintIndented(2, "%s (%s[+%" PRIx64 "])\n", symbol_name.c_str(), dso_name.c_str(), vaddr_in_file); } + } else if (r->type() == PERF_RECORD_AUX) { + return DumpAuxData(*static_cast<AuxRecord*>(r.get())); } return true; }; return record_file_reader_->ReadDataSection(record_callback); } +bool DumpRecordCommand::DumpAuxData(const AuxRecord& aux) { + size_t size = aux.data->aux_size; + if (size > 0) { + std::unique_ptr<uint8_t[]> data(new uint8_t[size]); + if (!record_file_reader_->ReadAuxData(aux.Cpu(), aux.data->aux_offset, data.get(), size)) { + return false; + } + PrintIndented(1, "aux_data:\n"); + for (size_t i = 0; i < size; i += 16) { + PrintIndented(2, ""); + for (size_t j = i; j < std::min(i + 16, size); j++) { + printf("%02x", data[j]); + } + printf("\n"); + } + } + return true; +} + bool DumpRecordCommand::DumpFeatureSection() { std::map<int, SectionDesc> section_map = record_file_reader_->FeatureSectionDescriptors(); for (const auto& pair : section_map) { @@ -268,14 +270,15 @@ bool DumpRecordCommand::DumpFeatureSection() { } } } else if (feature == FEAT_META_INFO) { - std::unordered_map<std::string, std::string> info_map; - if (!record_file_reader_->ReadMetaInfoFeature(&info_map)) { - return false; - } PrintIndented(1, "meta_info:\n"); - for (auto& pair : info_map) { + for (auto& pair : record_file_reader_->GetMetaInfoFeature()) { PrintIndented(2, "%s = %s\n", pair.first.c_str(), pair.second.c_str()); } + } else if (feature == FEAT_AUXTRACE) { + PrintIndented(1, "file_offsets_of_auxtrace_records:\n"); + for (auto offset : record_file_reader_->ReadAuxTraceFeature()) { + PrintIndented(2, "%" PRIu64 "\n", offset); + } } } return true; diff --git a/simpleperf/cmd_dumprecord_test.cpp b/simpleperf/cmd_dumprecord_test.cpp index a2a50cee..12aadb10 100644 --- a/simpleperf/cmd_dumprecord_test.cpp +++ b/simpleperf/cmd_dumprecord_test.cpp @@ -44,3 +44,13 @@ TEST(cmd_dump, dump_callchain_of_sample_records) { ASSERT_NE(data.find("[kernel.kallsyms][+ffffffc000086b4a]"), std::string::npos); ASSERT_NE(data.find("__ioctl (/system/lib64/libc.so[+70b6c])"), std::string::npos); } + +TEST(cmd_dump, etm_data) { + CaptureStdout capture; + ASSERT_TRUE(capture.Start()); + ASSERT_TRUE(DumpCmd()->Run({GetTestData(PERF_DATA_ETM_TEST_LOOP)})); + std::string data = capture.Finish(); + ASSERT_NE(data.find("record aux:"), std::string::npos); + ASSERT_NE(data.find("aux_data:"), std::string::npos); + ASSERT_NE(data.find("feature section for auxtrace:"), std::string::npos); +} diff --git a/simpleperf/cmd_list.cpp b/simpleperf/cmd_list.cpp index d9fdf9c8..46bee30d 100644 --- a/simpleperf/cmd_list.cpp +++ b/simpleperf/cmd_list.cpp @@ -24,11 +24,14 @@ #include "command.h" #include "environment.h" +#include "ETMRecorder.h" #include "event_attr.h" #include "event_fd.h" #include "event_selection_set.h" #include "event_type.h" +using namespace simpleperf; + static bool IsEventTypeSupported(const EventType& event_type) { if (event_type.type != PERF_TYPE_RAW) { perf_event_attr attr = CreateDefaultPerfEventAttr(event_type); @@ -71,11 +74,19 @@ static bool IsEventTypeSupported(const EventType& event_type) { static void PrintEventTypesOfType(uint32_t type, const std::string& type_name, const std::set<EventType>& event_types) { printf("List of %s:\n", type_name.c_str()); - if (type == PERF_TYPE_RAW && (GetBuildArch() == ARCH_ARM || GetBuildArch() == ARCH_ARM64)) { - printf(" # Please refer to \"PMU common architectural and microarchitectural event numbers\"\n" - " # and \"ARM recommendations for IMPLEMENTATION DEFINED event numbers\" listed in\n" - " # ARMv8 manual for details.\n" - " # A possible link is https://developer.arm.com/docs/ddi0487/latest/arm-architecture-reference-manual-armv8-for-armv8-a-architecture-profile.\n"); + if (GetBuildArch() == ARCH_ARM || GetBuildArch() == ARCH_ARM64) { + if (type == PERF_TYPE_RAW) { + printf( + // clang-format off +" # Please refer to \"PMU common architectural and microarchitectural event numbers\"\n" +" # and \"ARM recommendations for IMPLEMENTATION DEFINED event numbers\" listed in\n" +" # ARMv8 manual for details.\n" +" # A possible link is https://developer.arm.com/docs/ddi0487/latest/arm-architecture-reference-manual-armv8-for-armv8-a-architecture-profile.\n" + // clang-format on + ); + } else if (type == PERF_TYPE_HW_CACHE) { + printf(" # More cache events are available in `simpleperf list raw`.\n"); + } } for (auto& event_type : event_types) { if (event_type.type == type) { @@ -109,8 +120,9 @@ class ListCommand : public Command { " hw hardware events\n" " sw software events\n" " cache hardware cache events\n" -" raw raw pmu events\n" +" raw raw cpu pmu events\n" " tracepoint tracepoint events\n" +" cs-etm coresight etm instruction tracing events\n" "Options:\n" "--show-features Show features supported on the device, including:\n" " dwarf-based-call-graph\n" @@ -137,6 +149,7 @@ bool ListCommand::Run(const std::vector<std::string>& args) { {"raw", {PERF_TYPE_RAW, "raw events provided by cpu pmu"}}, {"tracepoint", {PERF_TYPE_TRACEPOINT, "tracepoint events"}}, {"user-space-sampler", {SIMPLEPERF_TYPE_USER_SPACE_SAMPLERS, "user-space samplers"}}, + {"cs-etm", {-1, "coresight etm events"}}, }; std::vector<std::string> names; @@ -162,6 +175,9 @@ bool ListCommand::Run(const std::vector<std::string>& args) { for (auto& name : names) { auto it = type_map.find(name); + if (name == "cs-etm") { + it->second.first = ETMRecorder::GetInstance().GetEtmEventType(); + } PrintEventTypesOfType(it->second.first, it->second.second, event_types); } return true; diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp index de4ac42a..7641c40e 100644 --- a/simpleperf/cmd_record.cpp +++ b/simpleperf/cmd_record.cpp @@ -40,6 +40,7 @@ #include "CallChainJoiner.h" #include "command.h" #include "environment.h" +#include "ETMRecorder.h" #include "event_selection_set.h" #include "event_type.h" #include "IOEventLoop.h" @@ -92,6 +93,8 @@ constexpr size_t DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE = 8 * 1024 * 1024; static constexpr size_t kRecordBufferSize = 64 * 1024 * 1024; static constexpr size_t kSystemWideRecordBufferSize = 256 * 1024 * 1024; +static constexpr size_t kDefaultAuxBufferSize = 4 * 1024 * 1024; + struct TimeStat { uint64_t prepare_recording_time = 0; uint64_t start_recording_time = 0; @@ -179,6 +182,10 @@ class RecordCommand : public Command { "-m mmap_pages Set the size of the buffer used to receiving sample data from\n" " the kernel. It should be a power of 2. If not set, the max\n" " possible value <= 1024 will be used.\n" +"--aux-buffer-size <buffer_size> Set aux buffer size, only used in cs-etm event type.\n" +" Need to be power of 2 and page size aligned.\n" +" Used memory size is (buffer_size * (cpu_count + 1).\n" +" Default is 4M.\n" "--no-inherit Don't record created child threads/processes.\n" "--cpu-percent <percent> Set the max percent of cpu time used for recording.\n" " percent is in range [1-100], default is 25.\n" @@ -282,6 +289,7 @@ class RecordCommand : public Command { bool DumpKernelMaps(); bool DumpUserSpaceMaps(); bool DumpProcessMaps(pid_t pid, const std::unordered_set<pid_t>& tids); + bool DumpAuxTraceInfo(); bool ProcessRecord(Record* record); bool ShouldOmitRecord(Record* record); bool DumpMapsForRecord(Record* record); @@ -319,6 +327,7 @@ class RecordCommand : public Command { EventSelectionSet event_selection_set_; std::pair<size_t, size_t> mmap_page_range_; + size_t aux_buffer_size_ = kDefaultAuxBufferSize; ThreadTree thread_tree_; std::string record_filename_; @@ -419,7 +428,7 @@ bool RecordCommand::PrepareRecording(Workload* workload) { return false; } if (unwind_dwarf_callchain_) { - offline_unwinder_.reset(new OfflineUnwinder(false)); + offline_unwinder_ = OfflineUnwinder::Create(false); } if (unwind_dwarf_callchain_ && allow_callchain_joiner_) { callchain_joiner_.reset(new CallChainJoiner(DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE, @@ -475,7 +484,8 @@ bool RecordCommand::PrepareRecording(Workload* workload) { size_t record_buffer_size = system_wide_collection_ ? kSystemWideRecordBufferSize : kRecordBufferSize; if (!event_selection_set_.MmapEventFiles(mmap_page_range_.first, mmap_page_range_.second, - record_buffer_size, allow_cutting_samples_)) { + aux_buffer_size_, record_buffer_size, + allow_cutting_samples_)) { return false; } auto callback = @@ -629,32 +639,37 @@ bool RecordCommand::PostProcessRecording(const std::vector<std::string>& args) { time_stat_.post_process_time = GetSystemClock(); // 4. Show brief record result. - size_t lost_samples; - size_t lost_non_samples; - size_t cut_stack_samples; - event_selection_set_.GetLostRecords(&lost_samples, &lost_non_samples, &cut_stack_samples); - std::string cut_samples; - if (cut_stack_samples > 0) { - cut_samples = android::base::StringPrintf(" (cut %zu)", cut_stack_samples); - } - lost_record_count_ += lost_samples + lost_non_samples; - LOG(INFO) << "Samples recorded: " << sample_record_count_ << cut_samples - << ". Samples lost: " << lost_record_count_ << "."; - LOG(DEBUG) << "In user space, dropped " << lost_samples << " samples, " << lost_non_samples - << " non samples, cut stack of " << cut_stack_samples << " samples."; - if (sample_record_count_ + lost_record_count_ != 0) { - double lost_percent = static_cast<double>(lost_record_count_) / - (lost_record_count_ + sample_record_count_); - constexpr double LOST_PERCENT_WARNING_BAR = 0.1; - if (lost_percent >= LOST_PERCENT_WARNING_BAR) { - LOG(WARNING) << "Lost " << (lost_percent * 100) << "% of samples, " - << "consider increasing mmap_pages(-m), " - << "or decreasing sample frequency(-f), " - << "or increasing sample period(-c)."; + auto record_stat = event_selection_set_.GetRecordStat(); + if (event_selection_set_.HasAuxTrace()) { + LOG(INFO) << "Aux data traced: " << record_stat.aux_data_size; + if (record_stat.lost_aux_data_size != 0) { + LOG(INFO) << "Aux data lost in user space: " << record_stat.lost_aux_data_size; + } + } else { + std::string cut_samples; + if (record_stat.cut_stack_samples > 0) { + cut_samples = android::base::StringPrintf(" (cut %zu)", record_stat.cut_stack_samples); + } + lost_record_count_ += record_stat.lost_samples + record_stat.lost_non_samples; + LOG(INFO) << "Samples recorded: " << sample_record_count_ << cut_samples + << ". Samples lost: " << lost_record_count_ << "."; + LOG(DEBUG) << "In user space, dropped " << record_stat.lost_samples << " samples, " + << record_stat.lost_non_samples << " non samples, cut stack of " + << record_stat.cut_stack_samples << " samples."; + if (sample_record_count_ + lost_record_count_ != 0) { + double lost_percent = + static_cast<double>(lost_record_count_) / (lost_record_count_ + sample_record_count_); + constexpr double LOST_PERCENT_WARNING_BAR = 0.1; + if (lost_percent >= LOST_PERCENT_WARNING_BAR) { + LOG(WARNING) << "Lost " << (lost_percent * 100) << "% of samples, " + << "consider increasing mmap_pages(-m), " + << "or decreasing sample frequency(-f), " + << "or increasing sample period(-c)."; + } + } + if (callchain_joiner_) { + callchain_joiner_->DumpStat(); } - } - if (callchain_joiner_) { - callchain_joiner_->DumpStat(); } LOG(DEBUG) << "Prepare recording time " << (time_stat_.start_recording_time - time_stat_.prepare_recording_time) / 1e6 @@ -679,6 +694,15 @@ bool RecordCommand::ParseOptions(const std::vector<std::string>& args, return false; } app_package_name_ = args[i]; + } else if (args[i] == "--aux-buffer-size") { + if (!GetUintOption(args, &i, &aux_buffer_size_, 0, std::numeric_limits<size_t>::max(), + true)) { + return false; + } + if (!IsPowerOfTwo(aux_buffer_size_) || aux_buffer_size_ % sysconf(_SC_PAGE_SIZE)) { + LOG(ERROR) << "invalid aux buffer size: " << args[i]; + return false; + } } else if (args[i] == "-b") { branch_sampling_ = branch_sampling_type_map["any"]; } else if (args[i] == "-c" || args[i] == "-f") { @@ -985,7 +1009,11 @@ bool RecordCommand::AdjustPerfEventLimit() { set_prop = true; } // 3. Adjust perf_event_mlock_kb. - uint64_t mlock_kb = sysconf(_SC_NPROCESSORS_CONF) * (mmap_page_range_.second + 1) * 4; + long cpus = sysconf(_SC_NPROCESSORS_CONF); + uint64_t mlock_kb = cpus * (mmap_page_range_.second + 1) * 4; + if (event_selection_set_.HasAuxTrace()) { + mlock_kb += cpus * aux_buffer_size_ / 1024; + } uint64_t cur_mlock_kb; if (GetPerfEventMlockKb(&cur_mlock_kb) && cur_mlock_kb < mlock_kb && !SetPerfEventMlockKb(mlock_kb)) { @@ -1044,7 +1072,8 @@ bool RecordCommand::CreateAndInitRecordFile() { } // Use first perf_event_attr and first event id to dump mmap and comm records. dumping_attr_id_ = event_selection_set_.GetEventAttrWithId()[0]; - return DumpKernelSymbol() && DumpTracingData() && DumpKernelMaps() && DumpUserSpaceMaps(); + return DumpKernelSymbol() && DumpTracingData() && DumpKernelMaps() && DumpUserSpaceMaps() && + DumpAuxTraceInfo(); } std::unique_ptr<RecordFileWriter> RecordCommand::CreateRecordFile( @@ -1210,6 +1239,14 @@ bool RecordCommand::ProcessRecord(Record* record) { return SaveRecordWithoutUnwinding(record); } +bool RecordCommand::DumpAuxTraceInfo() { + if (event_selection_set_.HasAuxTrace()) { + AuxTraceInfoRecord auxtrace_info = ETMRecorder::GetInstance().CreateAuxTraceInfoRecord(); + return ProcessRecord(&auxtrace_info); + } + return true; +} + template <typename MmapRecordType> bool MapOnlyExistInMemory(MmapRecordType* record) { return !record->InKernel() && MappedFileOnlyExistInMemory(record->filename); @@ -1542,10 +1579,14 @@ bool RecordCommand::DumpAdditionalFeatures( Dso::ReadKernelSymbolsFromProc(); kernel_symbols_available = true; } + std::vector<uint64_t> auxtrace_offset; auto callback = [&](const Record* r) { thread_tree_.Update(*r); if (r->type() == PERF_RECORD_SAMPLE) { CollectHitFileInfo(*reinterpret_cast<const SampleRecord*>(r)); + } else if (r->type() == PERF_RECORD_AUXTRACE) { + auto auxtrace = static_cast<const AuxTraceRecord*>(r); + auxtrace_offset.emplace_back(auxtrace->location.file_offset - auxtrace->size()); } }; if (!record_file_writer_->ReadDataSection(callback)) { @@ -1556,6 +1597,9 @@ bool RecordCommand::DumpAdditionalFeatures( if (branch_sampling_) { feature_count++; } + if (!auxtrace_offset.empty()) { + feature_count++; + } if (!record_file_writer_->BeginWriteFeatures(feature_count)) { return false; } @@ -1595,6 +1639,9 @@ bool RecordCommand::DumpAdditionalFeatures( if (!DumpMetaInfoFeature(kernel_symbols_available)) { return false; } + if (!auxtrace_offset.empty() && !record_file_writer_->WriteAuxTraceFeature(auxtrace_offset)) { + return false; + } if (!record_file_writer_->EndWriteFeatures()) { return false; diff --git a/simpleperf/cmd_record_test.cpp b/simpleperf/cmd_record_test.cpp index 1f0f8d3c..1821913e 100644 --- a/simpleperf/cmd_record_test.cpp +++ b/simpleperf/cmd_record_test.cpp @@ -31,6 +31,7 @@ #include "command.h" #include "environment.h" +#include "ETMRecorder.h" #include "event_selection_set.h" #include "get_test_data.h" #include "record.h" @@ -38,6 +39,7 @@ #include "test_util.h" #include "thread_tree.h" +using namespace simpleperf; using namespace PerfFileFormat; static std::unique_ptr<Command> RecordCmd() { @@ -563,8 +565,7 @@ TEST(record_cmd, record_meta_info_feature) { ASSERT_TRUE(RunRecordCmd({}, tmpfile.path)); std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmpfile.path); ASSERT_TRUE(reader); - std::unordered_map<std::string, std::string> info_map; - ASSERT_TRUE(reader->ReadMetaInfoFeature(&info_map)); + auto& info_map = reader->GetMetaInfoFeature(); ASSERT_NE(info_map.find("simpleperf_version"), info_map.end()); ASSERT_NE(info_map.find("timestamp"), info_map.end()); #if defined(__ANDROID__) @@ -602,8 +603,7 @@ TEST(record_cmd, trace_offcpu_option) { ASSERT_TRUE(RunRecordCmd({"--trace-offcpu", "-f", "1000"}, tmpfile.path)); std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmpfile.path); ASSERT_TRUE(reader); - std::unordered_map<std::string, std::string> info_map; - ASSERT_TRUE(reader->ReadMetaInfoFeature(&info_map)); + auto info_map = reader->GetMetaInfoFeature(); ASSERT_EQ(info_map["trace_offcpu"], "true"); CheckEventType(tmpfile.path, "sched:sched_switch", 1u, 0u); } @@ -622,8 +622,7 @@ TEST(record_cmd, clockid_option) { ASSERT_TRUE(RunRecordCmd({"--clockid", "monotonic"}, tmpfile.path)); std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmpfile.path); ASSERT_TRUE(reader); - std::unordered_map<std::string, std::string> info_map; - ASSERT_TRUE(reader->ReadMetaInfoFeature(&info_map)); + auto info_map = reader->GetMetaInfoFeature(); ASSERT_EQ(info_map["clockid"], "monotonic"); } } @@ -803,3 +802,42 @@ TEST(record_cmd, no_cut_samples_option) { TEST_REQUIRE_HW_COUNTER(); ASSERT_TRUE(RunRecordCmd({"--no-cut-samples"})); } + +TEST(record_cmd, cs_etm_event) { + if (!ETMRecorder::GetInstance().CheckEtmSupport()) { + GTEST_LOG_(INFO) << "Omit this test since etm isn't supported on this device"; + return; + } + TemporaryFile tmpfile; + ASSERT_TRUE(RunRecordCmd({"-e", "cs-etm"}, tmpfile.path)); + std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmpfile.path); + ASSERT_TRUE(reader); + bool has_auxtrace_info = false; + bool has_auxtrace = false; + bool has_aux = false; + ASSERT_TRUE(reader->ReadDataSection([&](std::unique_ptr<Record> r) { + if (r->type() == PERF_RECORD_AUXTRACE_INFO) { + has_auxtrace_info = true; + } else if (r->type() == PERF_RECORD_AUXTRACE) { + has_auxtrace = true; + } else if (r->type() == PERF_RECORD_AUX) { + has_aux = true; + } + return true; + })); + ASSERT_TRUE(has_auxtrace_info); + ASSERT_TRUE(has_auxtrace); + ASSERT_TRUE(has_aux); +} + +TEST(record_cmd, aux_buffer_size_option) { + if (!ETMRecorder::GetInstance().CheckEtmSupport()) { + GTEST_LOG_(INFO) << "Omit this test since etm isn't supported on this device"; + return; + } + ASSERT_TRUE(RunRecordCmd({"-e", "cs-etm", "--aux-buffer-size", "1m"})); + // not page size aligned + ASSERT_FALSE(RunRecordCmd({"-e", "cs-etm", "--aux-buffer-size", "1024"})); + // not power of two + ASSERT_FALSE(RunRecordCmd({"-e", "cs-etm", "--aux-buffer-size", "12k"})); +}
\ No newline at end of file diff --git a/simpleperf/cmd_report.cpp b/simpleperf/cmd_report.cpp index f7aeb281..8e67f5e0 100644 --- a/simpleperf/cmd_report.cpp +++ b/simpleperf/cmd_report.cpp @@ -432,7 +432,7 @@ class ReportCommand : public Command { private: bool ParseOptions(const std::vector<std::string>& args); - bool ReadMetaInfoFromRecordFile(); + void ReadMetaInfoFromRecordFile(); bool ReadEventAttrFromRecordFile(); bool ReadFeaturesFromRecordFile(); bool ReadSampleTreeFromRecordFile(); @@ -468,8 +468,6 @@ class ReportCommand : public Command { size_t sched_switch_attr_id_; std::string report_filename_; - std::unordered_map<std::string, std::string> meta_info_; - std::unique_ptr<ScopedEventTypes> scoped_event_types_; }; bool ReportCommand::Run(const std::vector<std::string>& args) { @@ -483,9 +481,7 @@ bool ReportCommand::Run(const std::vector<std::string>& args) { if (record_file_reader_ == nullptr) { return false; } - if (!ReadMetaInfoFromRecordFile()) { - return false; - } + ReadMetaInfoFromRecordFile(); if (!ReadEventAttrFromRecordFile()) { return false; } @@ -735,25 +731,14 @@ bool ReportCommand::ParseOptions(const std::vector<std::string>& args) { return true; } -bool ReportCommand::ReadMetaInfoFromRecordFile() { - if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_META_INFO)) { - if (!record_file_reader_->ReadMetaInfoFeature(&meta_info_)) { - return false; - } - auto it = meta_info_.find("system_wide_collection"); - if (it != meta_info_.end()) { - system_wide_collection_ = it->second == "true"; - } - it = meta_info_.find("trace_offcpu"); - if (it != meta_info_.end()) { - trace_offcpu_ = it->second == "true"; - } - it = meta_info_.find("event_type_info"); - if (it != meta_info_.end()) { - scoped_event_types_.reset(new ScopedEventTypes(it->second)); - } +void ReportCommand::ReadMetaInfoFromRecordFile() { + auto& meta_info = record_file_reader_->GetMetaInfoFeature(); + if (auto it = meta_info.find("system_wide_collection"); it != meta_info.end()) { + system_wide_collection_ = it->second == "true"; + } + if (auto it = meta_info.find("trace_offcpu"); it != meta_info.end()) { + trace_offcpu_ = it->second == "true"; } - return true; } bool ReportCommand::ReadEventAttrFromRecordFile() { @@ -806,7 +791,7 @@ bool ReportCommand::ReadFeaturesFromRecordFile() { std::vector<std::string> cmdline = record_file_reader_->ReadCmdlineFeature(); if (!cmdline.empty()) { record_cmdline_ = android::base::Join(cmdline, ' '); - if (meta_info_.find("system_wide_collection") == meta_info_.end()) { + if (record_file_reader_->GetMetaInfoFeature().count("system_wide_collection")) { // TODO: the code to detect system wide collection option is fragile, remove // it once we can do cross unwinding. for (size_t i = 0; i < cmdline.size(); i++) { diff --git a/simpleperf/cmd_report_sample.cpp b/simpleperf/cmd_report_sample.cpp index 51793255..e133fb28 100644 --- a/simpleperf/cmd_report_sample.cpp +++ b/simpleperf/cmd_report_sample.cpp @@ -138,9 +138,7 @@ class ReportSampleCommand : public Command { size_t sample_count_; size_t lost_count_; bool trace_offcpu_; - std::unique_ptr<ScopedEventTypes> scoped_event_types_; std::vector<std::string> event_types_; - std::unordered_map<std::string, std::string> meta_info_; bool remove_unknown_kernel_symbols_; bool kernel_symbols_available_; bool show_art_frames_; @@ -425,22 +423,12 @@ bool ReportSampleCommand::OpenRecordFile() { return false; } record_file_reader_->LoadBuildIdAndFileFeatures(thread_tree_); - if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_META_INFO)) { - if (!record_file_reader_->ReadMetaInfoFeature(&meta_info_)) { - return false; - } - auto it = meta_info_.find("event_type_info"); - if (it != meta_info_.end()) { - scoped_event_types_.reset(new ScopedEventTypes(it->second)); - } - it = meta_info_.find("trace_offcpu"); - if (it != meta_info_.end()) { - trace_offcpu_ = it->second == "true"; - } - it = meta_info_.find("kernel_symbols_available"); - if (it != meta_info_.end()) { - kernel_symbols_available_ = it->second == "true"; - } + auto& meta_info = record_file_reader_->GetMetaInfoFeature(); + if (auto it = meta_info.find("trace_offcpu"); it != meta_info.end()) { + trace_offcpu_ = it->second == "true"; + } + if (auto it = meta_info.find("kernel_symbols_available"); it != meta_info.end()) { + kernel_symbols_available_ = it->second == "true"; } for (EventAttrWithId& attr : record_file_reader_->AttrSection()) { event_types_.push_back(GetEventNameByAttr(*attr.attr)); @@ -449,8 +437,9 @@ bool ReportSampleCommand::OpenRecordFile() { } bool ReportSampleCommand::PrintMetaInfo() { - auto it = meta_info_.find("app_package_name"); - std::string app_package_name = it != meta_info_.end() ? it->second : ""; + auto& meta_info = record_file_reader_->GetMetaInfoFeature(); + auto it = meta_info.find("app_package_name"); + std::string app_package_name = it != meta_info.end() ? it->second : ""; if (use_protobuf_) { proto::Record proto_record; proto::MetaInfo* meta_info = proto_record.mutable_meta_info(); diff --git a/simpleperf/cmd_stat.cpp b/simpleperf/cmd_stat.cpp index 31ab2a39..bc047cb8 100644 --- a/simpleperf/cmd_stat.cpp +++ b/simpleperf/cmd_stat.cpp @@ -24,6 +24,7 @@ #include <chrono> #include <set> #include <string> +#include <string_view> #include <vector> #include <android-base/file.h> @@ -128,6 +129,43 @@ struct CounterSummary { } }; +static const std::unordered_map<std::string_view, std::pair<std::string_view, std::string_view>> + COMMON_EVENT_RATE_MAP = { + {"cache-misses", {"cache-references", "miss rate"}}, + {"branch-misses", {"branch-instructions", "miss rate"}}, +}; + +static const std::unordered_map<std::string_view, std::pair<std::string_view, std::string_view>> + ARM_EVENT_RATE_MAP = { + // Refer to "D6.10.5 Meaningful ratios between common microarchitectural events" in ARMv8 + // specification. + {"raw-l1i-cache-refill", {"raw-l1i-cache", "level 1 instruction cache refill rate"}}, + {"raw-l1i-tlb-refill", {"raw-l1i-tlb", "level 1 instruction TLB refill rate"}}, + {"raw-l1d-cache-refill", {"raw-l1d-cache", "level 1 data or unified cache refill rate"}}, + {"raw-l1d-tlb-refill", {"raw-l1d-tlb", "level 1 data or unified TLB refill rate"}}, + {"raw-l2d-cache-refill", {"raw-l2d-cache", "level 2 data or unified cache refill rate"}}, + {"raw-l2i-cache-refill", {"raw-l2i-cache", "level 2 instruction cache refill rate"}}, + {"raw-l3d-cache-refill", {"raw-l3d-cache", "level 3 data or unified cache refill rate"}}, + {"raw-l2d-tlb-refill", {"raw-l2d-tlb", "level 2 data or unified TLB refill rate"}}, + {"raw-l2i-tlb-refill", {"raw-l2i-tlb", "level 2 instruction TLB refill rate"}}, + {"raw-bus-access", {"raw-bus-cycles", "bus accesses per cycle"}}, + {"raw-ll-cache-miss", {"raw-ll-cache", "last level data or unified cache refill rate"}}, + {"raw-dtlb-walk", {"raw-l1d-tlb", "data TLB miss rate"}}, + {"raw-itlb-walk", {"raw-l1i-tlb", "instruction TLB miss rate"}}, + {"raw-ll-cache-miss-rd", {"raw-ll-cache-rd", "memory read operation miss rate"}}, + {"raw-remote-access-rd", + {"raw-remote-access", "read accesses to another socket in a multi-socket system"}}, + // Refer to "Table K3-2 Relationship between REFILL events and associated access events" in + // ARMv8 specification. + {"raw-l1d-cache-refill-rd", {"raw-l1d-cache-rd", "level 1 cache refill rate, read"}}, + {"raw-l1d-cache-refill-wr", {"raw-l1d-cache-wr", "level 1 cache refill rate, write"}}, + {"raw-l1d-tlb-refill-rd", {"raw-l1d-tlb-rd", "level 1 TLB refill rate, read"}}, + {"raw-l1d-tlb-refill-wr", {"raw-l1d-tlb-wr", "level 1 TLB refill rate, write"}}, + {"raw-l2d-cache-refill-rd", {"raw-l2d-cache-rd", "level 2 data cache refill rate, read"}}, + {"raw-l2d-cache-refill-wr", {"raw-l2d-cache-wr", "level 2 data cache refill rate, write"}}, + {"raw-l2d-tlb-refill-rd", {"raw-l2d-tlb-rd", "level 2 data TLB refill rate, read"}}, +}; + class CounterSummaries { public: explicit CounterSummaries(bool csv) : csv_(csv) {} @@ -231,31 +269,9 @@ class CounterSummaries { sap_mid); } } - if (android::base::EndsWith(s.type_name, "-misses")) { - std::string other_name; - if (s.type_name == "cache-misses") { - other_name = "cache-references"; - } else if (s.type_name == "branch-misses") { - other_name = "branch-instructions"; - } else { - other_name = - s.type_name.substr(0, s.type_name.size() - strlen("-misses")) + "s"; - } - const CounterSummary* other = FindSummary(other_name, s.modifier); - if (other != nullptr && other->IsMonitoredAtTheSameTime(s) && - other->count != 0) { - double miss_rate = static_cast<double>(s.count) / other->count; - return android::base::StringPrintf("%lf%%%cmiss rate", miss_rate * 100, - sap_mid); - } - } - if (android::base::EndsWith(s.type_name, "-refill")) { - std::string other_name = s.type_name.substr(0, s.type_name.size() - strlen("-refill")); - const CounterSummary* other = FindSummary(other_name, s.modifier); - if (other != nullptr && other->IsMonitoredAtTheSameTime(s) && other->count != 0) { - double miss_rate = static_cast<double>(s.count) / other->count; - return android::base::StringPrintf("%f%%%cmiss rate", miss_rate * 100, sap_mid); - } + std::string rate_comment = GetRateComment(s, sap_mid); + if (!rate_comment.empty()) { + return rate_comment; } double running_time_in_sec; if (!FindRunningTimeForSummary(s, &running_time_in_sec)) { @@ -274,6 +290,34 @@ class CounterSummaries { return android::base::StringPrintf("%.3lf%c/sec", rate, sap_mid); } + std::string GetRateComment(const CounterSummary& s, char sep) { + std::string_view miss_event_name = s.type_name; + std::string event_name; + std::string rate_desc; + if (auto it = COMMON_EVENT_RATE_MAP.find(miss_event_name); it != COMMON_EVENT_RATE_MAP.end()) { + event_name = it->second.first; + rate_desc = it->second.second; + } + if (event_name.empty() && (GetBuildArch() == ARCH_ARM || GetBuildArch() == ARCH_ARM64)) { + if (auto it = ARM_EVENT_RATE_MAP.find(miss_event_name); it != ARM_EVENT_RATE_MAP.end()) { + event_name = it->second.first; + rate_desc = it->second.second; + } + } + if (event_name.empty() && android::base::ConsumeSuffix(&miss_event_name, "-misses")) { + event_name = std::string(miss_event_name) + "s"; + rate_desc = "miss rate"; + } + if (!event_name.empty()) { + const CounterSummary* other = FindSummary(event_name, s.modifier); + if (other != nullptr && other->IsMonitoredAtTheSameTime(s) && other->count != 0) { + double miss_rate = static_cast<double>(s.count) / other->count; + return android::base::StringPrintf("%f%%%c%s", miss_rate * 100, sep, rate_desc.c_str()); + } + } + return ""; + } + bool FindRunningTimeForSummary(const CounterSummary& summary, double* running_time_in_sec) { for (auto& s : summaries_) { if ((s.type_name == "task-clock" || s.type_name == "cpu-clock") && diff --git a/simpleperf/cmd_trace_sched.cpp b/simpleperf/cmd_trace_sched.cpp index 34c6318f..a5961ec9 100644 --- a/simpleperf/cmd_trace_sched.cpp +++ b/simpleperf/cmd_trace_sched.cpp @@ -187,17 +187,6 @@ bool TraceSchedCommand::ParseSchedEvents(const std::string& record_file_path) { if (!reader) { return false; } - std::unique_ptr<ScopedEventTypes> scoped_event_types; - if (reader->HasFeature(PerfFileFormat::FEAT_META_INFO)) { - std::unordered_map<std::string, std::string> meta_info; - if (!reader->ReadMetaInfoFeature(&meta_info)) { - return false; - } - auto it = meta_info.find("event_type_info"); - if (it != meta_info.end()) { - scoped_event_types.reset(new ScopedEventTypes(it->second)); - } - } const EventType* event = FindEventTypeByName("sched:sched_stat_runtime"); std::vector<EventAttrWithId> attrs = reader->AttrSection(); if (attrs.size() != 1u || attrs[0].attr->type != event->type || diff --git a/simpleperf/environment.cpp b/simpleperf/environment.cpp index 3591626e..86766921 100644 --- a/simpleperf/environment.cpp +++ b/simpleperf/environment.cpp @@ -342,10 +342,6 @@ bool CheckPerfEventLimit() { // enough permission to create inherited tracepoint events, write -1 to perf_event_paranoid. // See http://b/62230699. if (IsRoot()) { - char* env = getenv("PERFPROFD_DISABLE_PERF_EVENT_PARANOID_CHANGE"); - if (env != nullptr && strcmp(env, "1") == 0) { - return true; - } return android::base::WriteStringToFile("-1", "/proc/sys/kernel/perf_event_paranoid"); } int limit_level; diff --git a/simpleperf/event_attr.cpp b/simpleperf/event_attr.cpp index 09efa31b..ecc08439 100644 --- a/simpleperf/event_attr.cpp +++ b/simpleperf/event_attr.cpp @@ -133,6 +133,7 @@ void DumpPerfEventAttr(const perf_event_attr& attr, size_t indent) { PrintIndented(indent + 1, "sample_id_all %u, exclude_host %u, exclude_guest %u\n", attr.sample_id_all, attr.exclude_host, attr.exclude_guest); + PrintIndented(indent + 1, "config2 0x%llx\n", attr.config2); PrintIndented(indent + 1, "branch_sample_type 0x%" PRIx64 "\n", attr.branch_sample_type); PrintIndented(indent + 1, "exclude_callchain_kernel %u, exclude_callchain_user %u\n", attr.exclude_callchain_kernel, attr.exclude_callchain_user); @@ -229,7 +230,10 @@ bool IsCpuSupported(const perf_event_attr& attr) { std::string GetEventNameByAttr(const perf_event_attr& attr) { for (const auto& event_type : GetAllEventTypes()) { - if (event_type.type == attr.type && event_type.config == attr.config) { + // An event type uses both type and config value to define itself. But etm event type + // only uses type value (whose config value is used to set etm options). + if (event_type.type == attr.type && + (event_type.config == attr.config || IsEtmEventType(event_type.type))) { std::string name = event_type.name; if (attr.exclude_user && !attr.exclude_kernel) { name += ":k"; diff --git a/simpleperf/event_fd.cpp b/simpleperf/event_fd.cpp index f93b694c..5bda379f 100644 --- a/simpleperf/event_fd.cpp +++ b/simpleperf/event_fd.cpp @@ -95,6 +95,7 @@ std::unique_ptr<EventFd> EventFd::OpenEventFile(const perf_event_attr& attr, EventFd::~EventFd() { DestroyMappedBuffer(); + DestroyAuxBuffer(); close(perf_event_fd_); } @@ -253,6 +254,66 @@ void EventFd::DiscardMmapData(size_t discard_size) { mmap_metadata_page_->data_tail += discard_size; } +bool EventFd::CreateAuxBuffer(size_t aux_buffer_size, bool report_error) { + CHECK(HasMappedBuffer()); + CHECK(IsPowerOfTwo(aux_buffer_size)); + mmap_metadata_page_->aux_offset = mmap_len_; + mmap_metadata_page_->aux_size = aux_buffer_size; + mmap_metadata_page_->aux_head = 0; + mmap_metadata_page_->aux_tail = 0; + void* mmap_addr = mmap(nullptr, aux_buffer_size, PROT_READ | PROT_WRITE, MAP_SHARED, + perf_event_fd_, mmap_metadata_page_->aux_offset); + if (mmap_addr == MAP_FAILED) { + if (report_error) { + PLOG(ERROR) << "failed to mmap aux buffer of size " << aux_buffer_size << " for " << Name(); + } else { + PLOG(DEBUG) << "failed to mmap aux buffer of size " << aux_buffer_size << " for " << Name(); + } + return false; + } + aux_buffer_ = static_cast<char*>(mmap_addr); + aux_buffer_size_ = aux_buffer_size; + return true; +} + +void EventFd::DestroyAuxBuffer() { + if (HasAuxBuffer()) { + munmap(aux_buffer_, aux_buffer_size_); + aux_buffer_ = nullptr; + aux_buffer_size_ = 0; + } +} + +uint64_t EventFd::GetAvailableAuxData(char** buf1, size_t* size1, char** buf2, size_t* size2) { + // Aux buffer is similar to mapped_data_buffer. See comments in GetAvailableMmapData(). + uint64_t write_head = mmap_metadata_page_->aux_head; + uint64_t read_head = mmap_metadata_page_->aux_tail; + if (write_head <= read_head) { + *size1 = *size2 = 0; + return 0; // No available data. + } + // rmb() used to ensure reading data after reading aux_head. + __sync_synchronize(); + size_t data_pos = read_head & (aux_buffer_size_ - 1); + size_t data_size = write_head - read_head; + *buf1 = aux_buffer_ + data_pos; + if (data_size <= aux_buffer_size_ - data_pos) { + *size1 = data_size; + *size2 = 0; + } else { + *size1 = aux_buffer_size_ - data_pos; + *buf2 = aux_buffer_; + *size2 = data_size - *size1; + } + return read_head; +} + +void EventFd::DiscardAuxData(size_t discard_size) { + // mb() used to ensure finish reading data before writing aux_tail. + __sync_synchronize(); + mmap_metadata_page_->aux_tail += discard_size; +} + bool EventFd::StartPolling(IOEventLoop& loop, const std::function<bool()>& callback) { ioevent_ref_ = loop.AddReadEvent(perf_event_fd_, callback); diff --git a/simpleperf/event_fd.h b/simpleperf/event_fd.h index e4e20ce4..0de94a99 100644 --- a/simpleperf/event_fd.h +++ b/simpleperf/event_fd.h @@ -86,6 +86,21 @@ class EventFd { // Discard the size of the data we have read, so the kernel can reuse the space for new data. virtual void DiscardMmapData(size_t discard_size); + // Manage the aux buffer, which receive auxiliary data sent by the kernel. + // aux_buffer_size: should be power of two, and mod PAGE_SIZE is zero. + virtual bool CreateAuxBuffer(size_t aux_buffer_size, bool report_error); + bool HasAuxBuffer() const { return aux_buffer_size_ != 0; } + virtual void DestroyAuxBuffer(); + + // Get available aux data, which can appear in one or two continuous buffers. + // buf1: return pointer to the first buffer + // size1: return data size in the first buffer + // buf2: return pointer to the second buffer + // size2: return data size in the second buffer + // Return value: return how many bytes of aux data has been read before. + virtual uint64_t GetAvailableAuxData(char** buf1, size_t* size1, char** buf2, size_t* size2); + virtual void DiscardAuxData(size_t discard_size); + // [callback] is called when there is data available in the mapped buffer. virtual bool StartPolling(IOEventLoop& loop, const std::function<bool()>& callback); virtual bool StopPolling(); @@ -118,10 +133,14 @@ class EventFd { void* mmap_addr_; size_t mmap_len_; - perf_event_mmap_page* mmap_metadata_page_; // The first page of mmap_area. - char* mmap_data_buffer_; // Starting from the second page of mmap_area, - // containing records written by then kernel. + // the first page of mapped area, whose content can be changed by the kernel at any time + volatile perf_event_mmap_page* mmap_metadata_page_; + // starting from the second page of mapped area, containing records written by the kernel + char* mmap_data_buffer_; size_t mmap_data_buffer_size_; + // receiving auxiliary data (like instruction tracing data generated by etm) from the kernel + char* aux_buffer_ = nullptr; + size_t aux_buffer_size_ = 0; IOEventRef ioevent_ref_; diff --git a/simpleperf/event_selection_set.cpp b/simpleperf/event_selection_set.cpp index 44085bc5..1e7f790d 100644 --- a/simpleperf/event_selection_set.cpp +++ b/simpleperf/event_selection_set.cpp @@ -23,6 +23,7 @@ #include <android-base/logging.h> #include "environment.h" +#include "ETMRecorder.h" #include "event_attr.h" #include "event_type.h" #include "IOEventLoop.h" @@ -30,6 +31,8 @@ #include "utils.h" #include "RecordReadThread.h" +using namespace simpleperf; + bool IsBranchSamplingSupported() { const EventType* type = FindEventTypeByName("cpu-cycles"); if (type == nullptr) { @@ -159,6 +162,13 @@ bool EventSelectionSet::BuildAndCheckEventSelection(const std::string& event_nam selection->event_attr.exclude_host = event_type->exclude_host; selection->event_attr.exclude_guest = event_type->exclude_guest; selection->event_attr.precise_ip = event_type->precise_ip; + if (IsEtmEventType(event_type->event_type.type)) { + auto& etm_recorder = ETMRecorder::GetInstance(); + if (!etm_recorder.CheckEtmSupport()) { + return false; + } + ETMRecorder::GetInstance().SetEtmPerfEventAttr(&selection->event_attr); + } bool set_default_sample_freq = false; if (!for_stat_cmd_) { if (event_type->event_type.type == PERF_TYPE_TRACEPOINT) { @@ -218,6 +228,9 @@ bool EventSelectionSet::AddEventGroup( if (!BuildAndCheckEventSelection(event_name, first_event, &selection)) { return false; } + if (IsEtmEventType(selection.event_attr.type)) { + has_aux_trace_ = true; + } first_event = false; group.push_back(std::move(selection)); } @@ -610,10 +623,11 @@ bool EventSelectionSet::ReadCounters(std::vector<CountersInfo>* counters) { } bool EventSelectionSet::MmapEventFiles(size_t min_mmap_pages, size_t max_mmap_pages, - size_t record_buffer_size, bool allow_cutting_samples) { - record_read_thread_.reset(new simpleperf::RecordReadThread( - record_buffer_size, groups_[0][0].event_attr, min_mmap_pages, max_mmap_pages, - allow_cutting_samples)); + size_t aux_buffer_size, size_t record_buffer_size, + bool allow_cutting_samples) { + record_read_thread_.reset( + new simpleperf::RecordReadThread(record_buffer_size, groups_[0][0].event_attr, min_mmap_pages, + max_mmap_pages, aux_buffer_size, allow_cutting_samples)); return true; } @@ -705,11 +719,6 @@ bool EventSelectionSet::FinishReadMmapEventData() { return loop_->RunLoop(); } -void EventSelectionSet::GetLostRecords(size_t* lost_samples, size_t* lost_non_samples, - size_t* cut_stack_samples) { - record_read_thread_->GetLostRecords(lost_samples, lost_non_samples, cut_stack_samples); -} - bool EventSelectionSet::HandleCpuHotplugEvents(const std::vector<int>& monitored_cpus, double check_interval_in_sec) { monitored_cpus_.insert(monitored_cpus.begin(), monitored_cpus.end()); diff --git a/simpleperf/event_selection_set.h b/simpleperf/event_selection_set.h index 08b7665f..426af2f2 100644 --- a/simpleperf/event_selection_set.h +++ b/simpleperf/event_selection_set.h @@ -32,10 +32,7 @@ #include "IOEventLoop.h" #include "perf_event.h" #include "record.h" - -namespace simpleperf { - class RecordReadThread; -} +#include "RecordReadThread.h" constexpr double DEFAULT_PERIOD_TO_DETECT_CPU_HOTPLUG_EVENTS_IN_SEC = 0.5; constexpr double DEFAULT_PERIOD_TO_CHECK_MONITORED_TARGETS_IN_SEC = 1; @@ -96,6 +93,7 @@ class EventSelectionSet { std::vector<const EventType*> GetEvents() const; std::vector<const EventType*> GetTracepointEvents() const; bool ExcludeKernel() const; + bool HasAuxTrace() const { return has_aux_trace_; } bool HasInplaceSampler() const; std::vector<EventAttrWithId> GetEventAttrWithId() const; @@ -134,12 +132,15 @@ class EventSelectionSet { bool OpenEventFiles(const std::vector<int>& on_cpus); bool ReadCounters(std::vector<CountersInfo>* counters); - bool MmapEventFiles(size_t min_mmap_pages, size_t max_mmap_pages, size_t record_buffer_size, - bool allow_cutting_samples); + bool MmapEventFiles(size_t min_mmap_pages, size_t max_mmap_pages, size_t aux_buffer_size, + size_t record_buffer_size, bool allow_cutting_samples); bool PrepareToReadMmapEventData(const std::function<bool(Record*)>& callback); bool SyncKernelBuffer(); bool FinishReadMmapEventData(); - void GetLostRecords(size_t* lost_samples, size_t* lost_non_samples, size_t* cut_stack_samples); + + const simpleperf::RecordStat& GetRecordStat() { + return record_read_thread_->GetStat(); + } // If monitored_cpus is empty, monitor all cpus. bool HandleCpuHotplugEvents(const std::vector<int>& monitored_cpus, @@ -194,6 +195,8 @@ class EventSelectionSet { std::unique_ptr<simpleperf::RecordReadThread> record_read_thread_; + bool has_aux_trace_ = false; + DISALLOW_COPY_AND_ASSIGN(EventSelectionSet); }; diff --git a/simpleperf/event_type.cpp b/simpleperf/event_type.cpp index ed323791..33be5c62 100644 --- a/simpleperf/event_type.cpp +++ b/simpleperf/event_type.cpp @@ -28,9 +28,12 @@ #include <android-base/stringprintf.h> #include <android-base/strings.h> +#include "ETMRecorder.h" #include "event_attr.h" #include "utils.h" +using namespace simpleperf; + #define EVENT_TYPE_TABLE_ENTRY(name, type, config, description, limited_arch) \ {name, type, config, description, limited_arch}, @@ -39,6 +42,8 @@ static const std::vector<EventType> static_event_type_array = { }; static std::string tracepoint_events; +static std::set<EventType> g_event_types; +static uint32_t g_etm_event_type; bool SetTracepointEventsFilePath(const std::string& filepath) { if (!android::base::ReadFileToString(filepath, &tracepoint_events)) { @@ -110,8 +115,6 @@ static std::vector<EventType> GetTracepointEventTypes() { return result; } -static std::set<EventType> g_event_types; - std::string ScopedEventTypes::BuildString(const std::vector<const EventType*>& event_types) { std::string result; for (auto type : event_types) { @@ -132,6 +135,9 @@ ScopedEventTypes::ScopedEventTypes(const std::string& event_type_str) { uint32_t type; uint64_t config; sscanf(s.c_str() + name.size(), ",%u,%" PRIu64, &type, &config); + if (name == "cs-etm") { + g_etm_event_type = type; + } g_event_types.emplace(name, type, config, "", ""); } } @@ -145,6 +151,13 @@ const std::set<EventType>& GetAllEventTypes() { g_event_types.insert(static_event_type_array.begin(), static_event_type_array.end()); std::vector<EventType> tracepoint_array = GetTracepointEventTypes(); g_event_types.insert(tracepoint_array.begin(), tracepoint_array.end()); +#if defined(__linux__) + std::unique_ptr<EventType> etm_type = ETMRecorder::GetInstance().BuildEventType(); + if (etm_type) { + g_etm_event_type = etm_type->type; + g_event_types.emplace(std::move(*etm_type)); + } +#endif } return g_event_types; } @@ -244,3 +257,7 @@ std::unique_ptr<EventTypeAndModifier> ParseEventType(const std::string& event_ty event_type_modifier->modifier = modifier; return event_type_modifier; } + +bool IsEtmEventType(uint32_t type) { + return g_etm_event_type != 0 && type == g_etm_event_type; +}
\ No newline at end of file diff --git a/simpleperf/event_type.h b/simpleperf/event_type.h index d432cb32..184b4e75 100644 --- a/simpleperf/event_type.h +++ b/simpleperf/event_type.h @@ -98,5 +98,6 @@ struct EventTypeAndModifier { }; std::unique_ptr<EventTypeAndModifier> ParseEventType(const std::string& event_type_str); +bool IsEtmEventType(uint32_t type); #endif // SIMPLE_PERF_EVENT_H_ diff --git a/simpleperf/event_type_table.h b/simpleperf/event_type_table.h index 2da0b99b..cd1f9d37 100644 --- a/simpleperf/event_type_table.h +++ b/simpleperf/event_type_table.h @@ -66,11 +66,11 @@ EVENT_TYPE_TABLE_ENTRY("node-prefetch-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_ EVENT_TYPE_TABLE_ENTRY("inplace-sampler", SIMPLEPERF_TYPE_USER_SPACE_SAMPLERS, SIMPLEPERF_CONFIG_INPLACE_SAMPLER, "", "") EVENT_TYPE_TABLE_ENTRY("raw-sw-incr", PERF_TYPE_RAW, 0x0, "Instruction architecturally executed, Condition code check pass, software increment", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-l1i-cache-refilla", PERF_TYPE_RAW, 0x1, "Level 1 instruction cache refill", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-l1i-tlb-refilla", PERF_TYPE_RAW, 0x2, "Attributable Level 1 instruction TLB refill", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-l1d-cache-refilla", PERF_TYPE_RAW, 0x3, "Level 1 data cache refill", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-l1i-cache-refill", PERF_TYPE_RAW, 0x1, "Level 1 instruction cache refill", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-l1i-tlb-refill", PERF_TYPE_RAW, 0x2, "Attributable Level 1 instruction TLB refill", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-l1d-cache-refill", PERF_TYPE_RAW, 0x3, "Level 1 data cache refill", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l1d-cache", PERF_TYPE_RAW, 0x4, "Level 1 data cache access", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-l1d-tlb-refilla", PERF_TYPE_RAW, 0x5, "Attributable Level 1 data TLB refill", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-l1d-tlb-refill", PERF_TYPE_RAW, 0x5, "Attributable Level 1 data TLB refill", "arm") EVENT_TYPE_TABLE_ENTRY("raw-ld-retired", PERF_TYPE_RAW, 0x6, "Instruction architecturally executed, Condition code check pass, load", "arm") EVENT_TYPE_TABLE_ENTRY("raw-st-retired", PERF_TYPE_RAW, 0x7, "Instruction architecturally executed, Condition code check pass, store", "arm") EVENT_TYPE_TABLE_ENTRY("raw-inst-retired", PERF_TYPE_RAW, 0x8, "Instruction architecturally executed", "arm") @@ -88,7 +88,7 @@ EVENT_TYPE_TABLE_ENTRY("raw-mem-access", PERF_TYPE_RAW, 0x13, "Data memory acces EVENT_TYPE_TABLE_ENTRY("raw-l1i-cache", PERF_TYPE_RAW, 0x14, "Attributable Level 1 instruction cache access", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l1d-cache-wb", PERF_TYPE_RAW, 0x15, "Attributable Level 1 data cache write-back", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l2d-cache", PERF_TYPE_RAW, 0x16, "Level 2 data cache access", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-l2d-cache-refilla", PERF_TYPE_RAW, 0x17, "Level 2 data cache refill", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-l2d-cache-refill", PERF_TYPE_RAW, 0x17, "Level 2 data cache refill", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l2d-cache-wb", PERF_TYPE_RAW, 0x18, "Attributable Level 2 data cache write-back", "arm") EVENT_TYPE_TABLE_ENTRY("raw-bus-access", PERF_TYPE_RAW, 0x19, "Bus access", "arm") EVENT_TYPE_TABLE_ENTRY("raw-memory-error", PERF_TYPE_RAW, 0x1a, "Local memory error", "arm") @@ -105,23 +105,23 @@ EVENT_TYPE_TABLE_ENTRY("raw-stall-backend", PERF_TYPE_RAW, 0x24, "No operation i EVENT_TYPE_TABLE_ENTRY("raw-l1d-tlb", PERF_TYPE_RAW, 0x25, "Attributable Level 1 data or unified TLB access", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l1i-tlb", PERF_TYPE_RAW, 0x26, "Attributable Level 1 instruction TLB access", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l2i-cache", PERF_TYPE_RAW, 0x27, "Attributable Level 2 instruction cache access", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-l2i-cache-refilla", PERF_TYPE_RAW, 0x28, "Attributable Level 2 instruction cache refill", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-l2i-cache-refill", PERF_TYPE_RAW, 0x28, "Attributable Level 2 instruction cache refill", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l3d-cache-allocate", PERF_TYPE_RAW, 0x29, "Attributable Level 3 data or unified cache allocation without refill", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-l3d-cache-refilla", PERF_TYPE_RAW, 0x2a, "Attributable Level 3 data cache refill", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-l3d-cache-refill", PERF_TYPE_RAW, 0x2a, "Attributable Level 3 data cache refill", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l3d-cache", PERF_TYPE_RAW, 0x2b, "Attributable Level 3 data cache access", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l3d-cache-wb", PERF_TYPE_RAW, 0x2c, "Attributable Level 3 data or unified cache write-back", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-l2d-tlb-refilla", PERF_TYPE_RAW, 0x2d, "Attributable Level 2 data or unified TLB refill", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-l2i-tlb-refilla", PERF_TYPE_RAW, 0x2e, "Attributable Level 2 instruction TLB refill", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-l2d-tlb-refill", PERF_TYPE_RAW, 0x2d, "Attributable Level 2 data or unified TLB refill", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-l2i-tlb-refill", PERF_TYPE_RAW, 0x2e, "Attributable Level 2 instruction TLB refill", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l2d-tlb", PERF_TYPE_RAW, 0x2f, "Attributable Level 2 data or unified TLB access", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l2i-tlb", PERF_TYPE_RAW, 0x30, "Attributable Level 2 instruction TLB access", "arm") EVENT_TYPE_TABLE_ENTRY("raw-remote-access", PERF_TYPE_RAW, 0x31, "Attributable access to another socket in a multi-socket system", "arm") EVENT_TYPE_TABLE_ENTRY("raw-ll-cache", PERF_TYPE_RAW, 0x32, "Attributable Last Level data cache access", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-ll-cache-missa", PERF_TYPE_RAW, 0x33, "Attributable Last level data or unified cache miss", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-dtlb-walka", PERF_TYPE_RAW, 0x34, "Attributable data or unified TLB access with at least one translation table walk", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-itlb-walka", PERF_TYPE_RAW, 0x35, "Attributable instruction TLB access with at least one translation table walk", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-ll-cache-miss", PERF_TYPE_RAW, 0x33, "Attributable Last level data or unified cache miss", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-dtlb-walk", PERF_TYPE_RAW, 0x34, "Attributable data or unified TLB access with at least one translation table walk", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-itlb-walk", PERF_TYPE_RAW, 0x35, "Attributable instruction TLB access with at least one translation table walk", "arm") EVENT_TYPE_TABLE_ENTRY("raw-ll-cache-rd", PERF_TYPE_RAW, 0x36, "Attributable Last Level cache memory read", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-ll-cache-miss-rda", PERF_TYPE_RAW, 0x37, "Attributable Last Level cache memory read miss", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-remote-access-rda", PERF_TYPE_RAW, 0x38, "Attributable memory read access to another socket in a multi-socket system", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-ll-cache-miss-rd", PERF_TYPE_RAW, 0x37, "Attributable Last Level cache memory read miss", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-remote-access-rd", PERF_TYPE_RAW, 0x38, "Attributable memory read access to another socket in a multi-socket system", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l1d-cache-lmiss-rd", PERF_TYPE_RAW, 0x39, "Level 1 data cache long-latency read miss", "arm") EVENT_TYPE_TABLE_ENTRY("raw-op-retired", PERF_TYPE_RAW, 0x3a, "Micro-operation architecturally executed", "arm") EVENT_TYPE_TABLE_ENTRY("raw-op-spec", PERF_TYPE_RAW, 0x3b, "Micro-operation Speculatively executed", "arm") @@ -143,26 +143,26 @@ EVENT_TYPE_TABLE_ENTRY("raw-l3d-cache-lmiss-rd", PERF_TYPE_RAW, 0x400b, "Level 3 EVENT_TYPE_TABLE_ENTRY("raw-sve-inst-retired", PERF_TYPE_RAW, 0x8002, "SVE Instructions architecturally executed", "arm") EVENT_TYPE_TABLE_ENTRY("raw-sve-inst-spec", PERF_TYPE_RAW, 0x8006, "SVE Instructions speculatively executed", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l1d-cache-wr", PERF_TYPE_RAW, 0x41, "Attributable Level 1 data cache access, write", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-l1d-cache-refill-rda", PERF_TYPE_RAW, 0x42, "Attributable Level 1 data cache refill, read", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-l1d-cache-refill-wra", PERF_TYPE_RAW, 0x43, "Attributable Level 1 data cache refill, write", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-l1d-cache-refill-rd", PERF_TYPE_RAW, 0x42, "Attributable Level 1 data cache refill, read", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-l1d-cache-refill-wr", PERF_TYPE_RAW, 0x43, "Attributable Level 1 data cache refill, write", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l1d-cache-refill-inner", PERF_TYPE_RAW, 0x44, "Attributable Level 1 data cache refill, inner", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l1d-cache-refill-outer", PERF_TYPE_RAW, 0x45, "Attributable Level 1 data cache refill, outer", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l1d-cache-wb-victim", PERF_TYPE_RAW, 0x46, "Attributable Level 1 data cache Write-Back, victim", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l1d-cache-wb-clean", PERF_TYPE_RAW, 0x47, "Level 1 data cache Write-Back, cleaning and coherency", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l1d-cache-inval", PERF_TYPE_RAW, 0x48, "Attributable Level 1 data cache invalidate", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-l1d-tlb-refill-rda", PERF_TYPE_RAW, 0x4c, "Attributable Level 1 data TLB refill, read", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-l1d-tlb-refill-wra", PERF_TYPE_RAW, 0x4d, "Attributable Level 1 data TLB refill, write", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-l1d-tlb-refill-rd", PERF_TYPE_RAW, 0x4c, "Attributable Level 1 data TLB refill, read", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-l1d-tlb-refill-wr", PERF_TYPE_RAW, 0x4d, "Attributable Level 1 data TLB refill, write", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l1d-tlb-rd", PERF_TYPE_RAW, 0x4e, "Attributable Level 1 data or unified TLB access, read", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l1d-tlb-wr", PERF_TYPE_RAW, 0x4f, "Attributable Level 1 data or unified TLB access, write", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l2d-cache-rd", PERF_TYPE_RAW, 0x50, "Attributable Level 2 data cache access, read", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l2d-cache-wr", PERF_TYPE_RAW, 0x51, "Attributable Level 2 data cache access, write", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-l2d-cache-refill-rda", PERF_TYPE_RAW, 0x52, "Attributable Level 2 data cache refill, read", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-l2d-cache-refill-wra", PERF_TYPE_RAW, 0x53, "Attributable Level 2 data cache refill, write", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-l2d-cache-refill-rd", PERF_TYPE_RAW, 0x52, "Attributable Level 2 data cache refill, read", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-l2d-cache-refill-wr", PERF_TYPE_RAW, 0x53, "Attributable Level 2 data cache refill, write", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l2d-cache-wb-victim", PERF_TYPE_RAW, 0x56, "Attributable Level 2 data cache Write-Back, victim", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l2d-cache-wb-clean", PERF_TYPE_RAW, 0x57, "Level 2 data cache Write-Back, cleaning and coherency", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l2d-cache-inval", PERF_TYPE_RAW, 0x58, "Attributable Level 2 data cache invalidate", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-l2d-tlb-refill-rda", PERF_TYPE_RAW, 0x5c, "Attributable Level 2 data or unified TLB refill, read", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-l2d-tlb-refill-wra", PERF_TYPE_RAW, 0x5d, "Attributable Level 2 data or unified TLB refill, write", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-l2d-tlb-refill-rd", PERF_TYPE_RAW, 0x5c, "Attributable Level 2 data or unified TLB refill, read", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-l2d-tlb-refill-wr", PERF_TYPE_RAW, 0x5d, "Attributable Level 2 data or unified TLB refill, write", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l2d-tlb-rd", PERF_TYPE_RAW, 0x5e, "Attributable Level 2 data or unified TLB access, read", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l2d-tlb-wr", PERF_TYPE_RAW, 0x5f, "Attributable Level 2 data or unified TLB access, write", "arm") EVENT_TYPE_TABLE_ENTRY("raw-bus-access-rd", PERF_TYPE_RAW, 0x60, "Bus access, read", "arm") @@ -211,8 +211,8 @@ EVENT_TYPE_TABLE_ENTRY("raw-rc-ld-spec", PERF_TYPE_RAW, 0x90, "Release consisten EVENT_TYPE_TABLE_ENTRY("raw-rc-st-spec", PERF_TYPE_RAW, 0x91, "Release consistency operation speculatively executed, Store-Release", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l3d-cache-rd", PERF_TYPE_RAW, 0xa0, "Attributable Level 3 data or unified cache access, read", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l3d-cache-wr", PERF_TYPE_RAW, 0xa1, "Attributable Level 3 data or unified cache access, write", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-l3d-cache-refill-rda", PERF_TYPE_RAW, 0xa2, "Attributable Level 3 data or unified cache refill, read", "arm") -EVENT_TYPE_TABLE_ENTRY("raw-l3d-cache-refill-wra", PERF_TYPE_RAW, 0xa3, "Attributable Level 3 data or unified cache refill, write", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-l3d-cache-refill-rd", PERF_TYPE_RAW, 0xa2, "Attributable Level 3 data or unified cache refill, read", "arm") +EVENT_TYPE_TABLE_ENTRY("raw-l3d-cache-refill-wr", PERF_TYPE_RAW, 0xa3, "Attributable Level 3 data or unified cache refill, write", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l3d-cache-wb-victim", PERF_TYPE_RAW, 0xa6, "Attributable Level 3 data or unified cache Write-Back, victim", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l3d-cache-wb-clean", PERF_TYPE_RAW, 0xa7, "Attributable Level 3 data or unified cache Write-Back, cache clean", "arm") EVENT_TYPE_TABLE_ENTRY("raw-l3d-cache-inval", PERF_TYPE_RAW, 0xa8, "Attributable Level 3 data or unified cache access, invalidate", "arm") diff --git a/simpleperf/generate_event_type_table.py b/simpleperf/generate_event_type_table.py index f17f4dce..9ed50c50 100755 --- a/simpleperf/generate_event_type_table.py +++ b/simpleperf/generate_event_type_table.py @@ -121,11 +121,11 @@ def gen_arm_raw_events(): raw_types = [ # Refer to "Table D6-7 PMU common architectural and microarchitectural event numbers" in ARMv8 specification. [0x0000, "sw-incr", "Instruction architecturally executed, Condition code check pass, software increment"], - [0x0001, "l1i-cache-refilla", "Level 1 instruction cache refill"], - [0x0002, "l1i-tlb-refilla", "Attributable Level 1 instruction TLB refill"], - [0x0003, "l1d-cache-refilla", "Level 1 data cache refill"], + [0x0001, "l1i-cache-refill", "Level 1 instruction cache refill"], + [0x0002, "l1i-tlb-refill", "Attributable Level 1 instruction TLB refill"], + [0x0003, "l1d-cache-refill", "Level 1 data cache refill"], [0x0004, "l1d-cache", "Level 1 data cache access"], - [0x0005, "l1d-tlb-refilla", "Attributable Level 1 data TLB refill"], + [0x0005, "l1d-tlb-refill", "Attributable Level 1 data TLB refill"], [0x0006, "ld-retired", "Instruction architecturally executed, Condition code check pass, load"], [0x0007, "st-retired", "Instruction architecturally executed, Condition code check pass, store"], [0x0008, "inst-retired", "Instruction architecturally executed"], @@ -143,7 +143,7 @@ def gen_arm_raw_events(): [0x0014, "l1i-cache", "Attributable Level 1 instruction cache access"], [0x0015, "l1d-cache-wb", "Attributable Level 1 data cache write-back"], [0x0016, "l2d-cache", "Level 2 data cache access"], - [0x0017, "l2d-cache-refilla", "Level 2 data cache refill"], + [0x0017, "l2d-cache-refill", "Level 2 data cache refill"], [0x0018, "l2d-cache-wb", "Attributable Level 2 data cache write-back"], [0x0019, "bus-access", "Bus access"], [0x001A, "memory-error", "Local memory error"], @@ -160,23 +160,23 @@ def gen_arm_raw_events(): [0x0025, "l1d-tlb", "Attributable Level 1 data or unified TLB access"], [0x0026, "l1i-tlb", "Attributable Level 1 instruction TLB access"], [0x0027, "l2i-cache", "Attributable Level 2 instruction cache access"], - [0x0028, "l2i-cache-refilla", "Attributable Level 2 instruction cache refill"], + [0x0028, "l2i-cache-refill", "Attributable Level 2 instruction cache refill"], [0x0029, "l3d-cache-allocate", "Attributable Level 3 data or unified cache allocation without refill"], - [0x002A, "l3d-cache-refilla", "Attributable Level 3 data cache refill"], + [0x002A, "l3d-cache-refill", "Attributable Level 3 data cache refill"], [0x002B, "l3d-cache", "Attributable Level 3 data cache access"], [0x002C, "l3d-cache-wb", "Attributable Level 3 data or unified cache write-back"], - [0x002D, "l2d-tlb-refilla", "Attributable Level 2 data or unified TLB refill"], - [0x002E, "l2i-tlb-refilla", "Attributable Level 2 instruction TLB refill"], + [0x002D, "l2d-tlb-refill", "Attributable Level 2 data or unified TLB refill"], + [0x002E, "l2i-tlb-refill", "Attributable Level 2 instruction TLB refill"], [0x002F, "l2d-tlb", "Attributable Level 2 data or unified TLB access"], [0x0030, "l2i-tlb", "Attributable Level 2 instruction TLB access"], [0x0031, "remote-access", "Attributable access to another socket in a multi-socket system"], [0x0032, "ll-cache", "Attributable Last Level data cache access"], - [0x0033, "ll-cache-missa", "Attributable Last level data or unified cache miss"], - [0x0034, "dtlb-walka", "Attributable data or unified TLB access with at least one translation table walk"], - [0x0035, "itlb-walka", "Attributable instruction TLB access with at least one translation table walk"], + [0x0033, "ll-cache-miss", "Attributable Last level data or unified cache miss"], + [0x0034, "dtlb-walk", "Attributable data or unified TLB access with at least one translation table walk"], + [0x0035, "itlb-walk", "Attributable instruction TLB access with at least one translation table walk"], [0x0036, "ll-cache-rd", "Attributable Last Level cache memory read"], - [0x0037, "ll-cache-miss-rda", "Attributable Last Level cache memory read miss"], - [0x0038, "remote-access-rda", "Attributable memory read access to another socket in a multi-socket system"], + [0x0037, "ll-cache-miss-rd", "Attributable Last Level cache memory read miss"], + [0x0038, "remote-access-rd", "Attributable memory read access to another socket in a multi-socket system"], [0x0039, "l1d-cache-lmiss-rd", "Level 1 data cache long-latency read miss"], [0x003A, "op-retired", "Micro-operation architecturally executed"], [0x003B, "op-spec", "Micro-operation Speculatively executed"], @@ -201,29 +201,29 @@ def gen_arm_raw_events(): # Refer to "Table K3.1 ARM recommendations for IMPLEMENTATION DEFINED event numbers" in ARMv8 specification. #[0x0040, "l1d-cache-rd", "Attributable Level 1 data cache access, read"], [0x0041, "l1d-cache-wr", "Attributable Level 1 data cache access, write"], - [0x0042, "l1d-cache-refill-rda", "Attributable Level 1 data cache refill, read"], - [0x0043, "l1d-cache-refill-wra", "Attributable Level 1 data cache refill, write"], + [0x0042, "l1d-cache-refill-rd", "Attributable Level 1 data cache refill, read"], + [0x0043, "l1d-cache-refill-wr", "Attributable Level 1 data cache refill, write"], [0x0044, "l1d-cache-refill-inner", "Attributable Level 1 data cache refill, inner"], [0x0045, "l1d-cache-refill-outer", "Attributable Level 1 data cache refill, outer"], [0x0046, "l1d-cache-wb-victim", "Attributable Level 1 data cache Write-Back, victim"], [0x0047, "l1d-cache-wb-clean", "Level 1 data cache Write-Back, cleaning and coherency"], [0x0048, "l1d-cache-inval", "Attributable Level 1 data cache invalidate"], # 0x0049-0x004B - Reserved - [0x004C, "l1d-tlb-refill-rda", "Attributable Level 1 data TLB refill, read"], - [0x004D, "l1d-tlb-refill-wra", "Attributable Level 1 data TLB refill, write"], + [0x004C, "l1d-tlb-refill-rd", "Attributable Level 1 data TLB refill, read"], + [0x004D, "l1d-tlb-refill-wr", "Attributable Level 1 data TLB refill, write"], [0x004E, "l1d-tlb-rd", "Attributable Level 1 data or unified TLB access, read"], [0x004F, "l1d-tlb-wr", "Attributable Level 1 data or unified TLB access, write"], [0x0050, "l2d-cache-rd", "Attributable Level 2 data cache access, read"], [0x0051, "l2d-cache-wr", "Attributable Level 2 data cache access, write"], - [0x0052, "l2d-cache-refill-rda", "Attributable Level 2 data cache refill, read"], - [0x0053, "l2d-cache-refill-wra", "Attributable Level 2 data cache refill, write"], + [0x0052, "l2d-cache-refill-rd", "Attributable Level 2 data cache refill, read"], + [0x0053, "l2d-cache-refill-wr", "Attributable Level 2 data cache refill, write"], # 0x0054-0x0055 - Reserved [0x0056, "l2d-cache-wb-victim", "Attributable Level 2 data cache Write-Back, victim"], [0x0057, "l2d-cache-wb-clean", "Level 2 data cache Write-Back, cleaning and coherency"], [0x0058, "l2d-cache-inval", "Attributable Level 2 data cache invalidate"], # 0x0059-0x005B - Reserved - [0x005C, "l2d-tlb-refill-rda", "Attributable Level 2 data or unified TLB refill, read"], - [0x005D, "l2d-tlb-refill-wra", "Attributable Level 2 data or unified TLB refill, write"], + [0x005C, "l2d-tlb-refill-rd", "Attributable Level 2 data or unified TLB refill, read"], + [0x005D, "l2d-tlb-refill-wr", "Attributable Level 2 data or unified TLB refill, write"], [0x005E, "l2d-tlb-rd", "Attributable Level 2 data or unified TLB access, read"], [0x005F, "l2d-tlb-wr", "Attributable Level 2 data or unified TLB access, write"], [0x0060, "bus-access-rd", "Bus access, read"], @@ -278,8 +278,8 @@ def gen_arm_raw_events(): # 0x0092-0x009F - Reserved [0x00A0, "l3d-cache-rd", "Attributable Level 3 data or unified cache access, read"], [0x00A1, "l3d-cache-wr", "Attributable Level 3 data or unified cache access, write"], - [0x00A2, "l3d-cache-refill-rda", "Attributable Level 3 data or unified cache refill, read"], - [0x00A3, "l3d-cache-refill-wra", "Attributable Level 3 data or unified cache refill, write"], + [0x00A2, "l3d-cache-refill-rd", "Attributable Level 3 data or unified cache refill, read"], + [0x00A3, "l3d-cache-refill-wr", "Attributable Level 3 data or unified cache refill, write"], # 0x00A4-0x00A5 - Reserved [0x00A6, "l3d-cache-wb-victim", "Attributable Level 3 data or unified cache Write-Back, victim"], [0x00A7, "l3d-cache-wb-clean", "Attributable Level 3 data or unified cache Write-Back, cache clean"], diff --git a/simpleperf/get_test_data.h b/simpleperf/get_test_data.h index 67bf0ea5..d9e0d988 100644 --- a/simpleperf/get_test_data.h +++ b/simpleperf/get_test_data.h @@ -137,4 +137,7 @@ static const std::string PERF_DATA_WITH_INTERPRETER_FRAMES = "perf_with_interpre static const std::string PERF_DATA_WITH_IP_ZERO_IN_CALLCHAIN = "perf_with_ip_zero_in_callchain.data"; +// generated by `simpleperf record -e cs-etm:u ./etm_test_loop` +static const std::string PERF_DATA_ETM_TEST_LOOP = "etm/perf.data"; + #endif // SIMPLE_PERF_GET_TEST_DATA_H_ diff --git a/simpleperf/nonlinux_support/nonlinux_support.cpp b/simpleperf/nonlinux_support/nonlinux_support.cpp index 19a3cb11..1726b589 100644 --- a/simpleperf/nonlinux_support/nonlinux_support.cpp +++ b/simpleperf/nonlinux_support/nonlinux_support.cpp @@ -38,3 +38,19 @@ bool ReadSymbolsFromDexFile(const std::string&, const std::vector<uint64_t>&, std::vector<DexFileSymbol>*) { return true; } + +namespace simpleperf { + +class DummyOfflineUnwinder : public OfflineUnwinder { + public: + bool UnwindCallChain(const ThreadEntry&, const RegSet&, const char*, size_t, + std::vector<uint64_t>*, std::vector<uint64_t>*) override { + return false; + } +}; + +std::unique_ptr<OfflineUnwinder> OfflineUnwinder::Create(bool) { + return std::unique_ptr<OfflineUnwinder>(new DummyOfflineUnwinder); +} + +} // namespace simpleperf
\ No newline at end of file diff --git a/simpleperf/record.cpp b/simpleperf/record.cpp index 2d684a8a..3ce99acc 100644 --- a/simpleperf/record.cpp +++ b/simpleperf/record.cpp @@ -44,7 +44,10 @@ static std::string RecordTypeToString(int record_type) { {PERF_RECORD_SAMPLE, "sample"}, {PERF_RECORD_BUILD_ID, "build_id"}, {PERF_RECORD_MMAP2, "mmap2"}, + {PERF_RECORD_AUX, "aux"}, {PERF_RECORD_TRACING_DATA, "tracing_data"}, + {PERF_RECORD_AUXTRACE_INFO, "auxtrace_info"}, + {PERF_RECORD_AUXTRACE, "auxtrace"}, {SIMPLE_PERF_RECORD_KERNEL_SYMBOL, "kernel_symbol"}, {SIMPLE_PERF_RECORD_DSO, "dso"}, {SIMPLE_PERF_RECORD_SYMBOL, "symbol"}, @@ -874,6 +877,20 @@ std::vector<uint64_t> SampleRecord::GetCallChain(size_t* kernel_ip_count) const return ips; } +AuxRecord::AuxRecord(const perf_event_attr& attr, char* p) : Record(p) { + const char* end = p + size(); + p += header_size(); + data = reinterpret_cast<DataType*>(p); + p += sizeof(DataType); + sample_id.ReadFromBinaryFormat(attr, p, end); +} + +void AuxRecord::DumpData(size_t indent) const { + PrintIndented(indent, "aux_offset %" PRIu64 "\n", data->aux_offset); + PrintIndented(indent, "aux_size %" PRIu64 "\n", data->aux_size); + PrintIndented(indent, "flags 0x%" PRIx64 "\n", data->flags); +} + BuildIdRecord::BuildIdRecord(char* p) : Record(p) { const char* end = p + size(); p += header_size(); @@ -910,6 +927,90 @@ BuildIdRecord::BuildIdRecord(bool in_kernel, pid_t pid, const BuildId& build_id, UpdateBinary(new_binary); } +AuxTraceInfoRecord::AuxTraceInfoRecord(char* p) : Record(p) { + const char* end = p + size(); + p += header_size(); + data = reinterpret_cast<DataType*>(p); + CHECK_EQ(data->aux_type, AUX_TYPE_ETM); + CHECK_EQ(data->version, 0); + for (uint32_t i = 0; i < data->nr_cpu; ++i) { + CHECK_EQ(data->etm4_info[i].magic, MAGIC_ETM4); + } + p += sizeof(DataType) + data->nr_cpu * sizeof(ETM4Info); + CHECK_EQ(p, end); +} + +AuxTraceInfoRecord::AuxTraceInfoRecord(const DataType& data, + const std::vector<ETM4Info>& etm4_info) { + SetTypeAndMisc(PERF_RECORD_AUXTRACE_INFO, 0); + SetSize(header_size() + sizeof(DataType) + sizeof(ETM4Info) * etm4_info.size()); + char* new_binary = new char[size()]; + char* p = new_binary; + MoveToBinaryFormat(header, p); + this->data = reinterpret_cast<DataType*>(p); + MoveToBinaryFormat(data, p); + for (auto& etm4 : etm4_info) { + MoveToBinaryFormat(etm4, p); + } + UpdateBinary(new_binary); +} + +void AuxTraceInfoRecord::DumpData(size_t indent) const { + PrintIndented(indent, "aux_type %u\n", data->aux_type); + PrintIndented(indent, "version %" PRIu64 "\n", data->version); + PrintIndented(indent, "nr_cpu %u\n", data->nr_cpu); + PrintIndented(indent, "pmu_type %u\n", data->pmu_type); + PrintIndented(indent, "snapshot %" PRIu64 "\n", data->snapshot); + indent++; + for (int i = 0; i < data->nr_cpu; i++) { + const ETM4Info& e = data->etm4_info[i]; + PrintIndented(indent, "magic 0x%" PRIx64 "\n", e.magic); + PrintIndented(indent, "cpu %" PRIu64 "\n", e.cpu); + PrintIndented(indent, "trcconfigr 0x%" PRIx64 "\n", e.trcconfigr); + PrintIndented(indent, "trctraceidr 0x%" PRIx64 "\n", e.trctraceidr); + PrintIndented(indent, "trcidr0 0x%" PRIx64 "\n", e.trcidr0); + PrintIndented(indent, "trcidr1 0x%" PRIx64 "\n", e.trcidr1); + PrintIndented(indent, "trcidr2 0x%" PRIx64 "\n", e.trcidr2); + PrintIndented(indent, "trcidr8 0x%" PRIx64 "\n", e.trcidr8); + PrintIndented(indent, "trcauthstatus 0x%" PRIx64 "\n", e.trcauthstatus); + } +} + +AuxTraceRecord::AuxTraceRecord(char* p) : Record(p) { + const char* end = p + header.size; + p += header_size(); + data = reinterpret_cast<DataType*>(p); + p += sizeof(DataType); + CHECK_EQ(p, end); +} + +AuxTraceRecord::AuxTraceRecord(uint64_t aux_size, uint64_t offset, uint32_t idx, uint32_t tid, + uint32_t cpu) { + SetTypeAndMisc(PERF_RECORD_AUXTRACE, 0); + SetSize(header_size() + sizeof(DataType)); + char* new_binary = new char[size()]; + char* p = new_binary; + MoveToBinaryFormat(header, p); + data = reinterpret_cast<DataType*>(p); + data->aux_size = aux_size; + data->offset = offset; + data->reserved0 = 0; + data->idx = idx; + data->tid = tid; + data->cpu = cpu; + data->reserved1 = 0; + UpdateBinary(new_binary); +} + +void AuxTraceRecord::DumpData(size_t indent) const { + PrintIndented(indent, "aux_size %" PRIu64 "\n", data->aux_size); + PrintIndented(indent, "offset %" PRIu64 "\n", data->offset); + PrintIndented(indent, "idx %u\n", data->idx); + PrintIndented(indent, "tid %u\n", data->tid); + PrintIndented(indent, "cpu %u\n", data->cpu); + PrintIndented(indent, "location.file_offset %" PRIu64 "\n", location.file_offset); +} + KernelSymbolRecord::KernelSymbolRecord(char* p) : Record(p) { const char* end = p + size(); p += header_size(); @@ -1209,8 +1310,14 @@ std::unique_ptr<Record> ReadRecordFromBuffer(const perf_event_attr& attr, uint32 return std::unique_ptr<Record>(new LostRecord(attr, p)); case PERF_RECORD_SAMPLE: return std::unique_ptr<Record>(new SampleRecord(attr, p)); + case PERF_RECORD_AUX: + return std::unique_ptr<Record>(new AuxRecord(attr, p)); case PERF_RECORD_TRACING_DATA: return std::unique_ptr<Record>(new TracingDataRecord(p)); + case PERF_RECORD_AUXTRACE_INFO: + return std::unique_ptr<Record>(new AuxTraceInfoRecord(p)); + case PERF_RECORD_AUXTRACE: + return std::unique_ptr<Record>(new AuxTraceRecord(p)); case SIMPLE_PERF_RECORD_KERNEL_SYMBOL: return std::unique_ptr<Record>(new KernelSymbolRecord(p)); case SIMPLE_PERF_RECORD_DSO: diff --git a/simpleperf/record.h b/simpleperf/record.h index adde3f44..163d5208 100644 --- a/simpleperf/record.h +++ b/simpleperf/record.h @@ -40,6 +40,9 @@ enum user_record_type { PERF_RECORD_BUILD_ID, PERF_RECORD_FINISHED_ROUND, + PERF_RECORD_AUXTRACE_INFO = 70, + PERF_RECORD_AUXTRACE = 71, + SIMPLE_PERF_RECORD_TYPE_START = 32768, SIMPLE_PERF_RECORD_KERNEL_SYMBOL, // TODO: remove DsoRecord and SymbolRecord. @@ -422,6 +425,19 @@ struct SampleRecord : public Record { void DumpData(size_t indent) const override; }; +struct AuxRecord : public Record { + struct DataType { + uint64_t aux_offset; + uint64_t aux_size; + uint64_t flags; + }* data; + + AuxRecord(const perf_event_attr& attr, char* p); + + protected: + void DumpData(size_t indent) const override; +}; + // BuildIdRecord is defined in user-space, stored in BuildId feature section in // record file. struct BuildIdRecord : public Record { @@ -438,6 +454,66 @@ struct BuildIdRecord : public Record { void DumpData(size_t indent) const override; }; +struct AuxTraceInfoRecord : public Record { + // magic values to be compatible with linux perf + static const uint32_t AUX_TYPE_ETM = 3; + static const uint64_t MAGIC_ETM4 = 0x4040404040404040ULL; + + struct ETM4Info { + uint64_t magic; + uint64_t cpu; + uint64_t trcconfigr; + uint64_t trctraceidr; + uint64_t trcidr0; + uint64_t trcidr1; + uint64_t trcidr2; + uint64_t trcidr8; + uint64_t trcauthstatus; + }; + + struct DataType { + uint32_t aux_type; + uint32_t reserved; + uint64_t version; + uint32_t nr_cpu; + uint32_t pmu_type; + uint64_t snapshot; + ETM4Info etm4_info[0]; + }* data; + + explicit AuxTraceInfoRecord(char* p); + AuxTraceInfoRecord(const DataType& data, const std::vector<ETM4Info>& etm4_info); + + protected: + void DumpData(size_t indent) const override; +}; + +struct AuxTraceRecord : public Record { + struct DataType { + uint64_t aux_size; + uint64_t offset; + uint64_t reserved0; // reference + uint32_t idx; + uint32_t tid; + uint32_t cpu; + uint32_t reserved1; + } * data; + // AuxTraceRecord is followed by aux tracing data with size data->aux_size. + // The location of aux tracing data in memory or file is kept in location. + struct AuxDataLocation { + const char* addr = nullptr; + uint64_t file_offset = 0; + } location; + + explicit AuxTraceRecord(char* p); + AuxTraceRecord(uint64_t aux_size, uint64_t offset, uint32_t idx, uint32_t tid, uint32_t cpu); + + static size_t Size() { return sizeof(perf_event_header) + sizeof(DataType); } + + protected: + void DumpData(size_t indent) const override; +}; + struct KernelSymbolRecord : public Record { uint32_t kallsyms_size; const char* kallsyms; diff --git a/simpleperf/record_file.h b/simpleperf/record_file.h index 223c1081..a9fa1c32 100644 --- a/simpleperf/record_file.h +++ b/simpleperf/record_file.h @@ -30,6 +30,7 @@ #include "dso.h" #include "event_attr.h" +#include "event_type.h" #include "perf_event.h" #include "record.h" #include "record_file_format.h" @@ -55,6 +56,7 @@ class RecordFileWriter { bool WriteFeatureString(int feature, const std::string& s); bool WriteCmdlineFeature(const std::vector<std::string>& cmdline); bool WriteBranchStackFeature(); + bool WriteAuxTraceFeature(const std::vector<uint64_t>& auxtrace_offset); bool WriteFileFeatures(const std::vector<Dso*>& files); bool WriteMetaInfoFeature(const std::unordered_map<std::string, std::string>& info_map); bool WriteFeature(int feature, const std::vector<char>& data); @@ -143,6 +145,7 @@ class RecordFileReader { std::vector<std::string> ReadCmdlineFeature(); std::vector<BuildIdRecord> ReadBuildIdFeature(); std::string ReadFeatureString(int feature); + std::vector<uint64_t> ReadAuxTraceFeature(); // File feature section contains many file information. This function reads // one file information located at [read_pos]. [read_pos] is 0 at the first @@ -152,10 +155,13 @@ class RecordFileReader { bool ReadFileFeature(size_t& read_pos, std::string* file_path, uint32_t* file_type, uint64_t* min_vaddr, uint64_t* file_offset_of_min_vaddr, std::vector<Symbol>* symbols, std::vector<uint64_t>* dex_file_offsets); - bool ReadMetaInfoFeature(std::unordered_map<std::string, std::string>* info_map); + + const std::unordered_map<std::string, std::string>& GetMetaInfoFeature() { return meta_info_; } void LoadBuildIdAndFileFeatures(ThreadTree& thread_tree); + bool ReadAuxData(uint32_t cpu, uint64_t aux_offset, void* buf, size_t size); + bool Close(); // For testing only. @@ -167,9 +173,13 @@ class RecordFileReader { bool ReadAttrSection(); bool ReadIdsForAttr(const PerfFileFormat::FileAttr& attr, std::vector<uint64_t>* ids); bool ReadFeatureSectionDescriptors(); - std::unique_ptr<Record> ReadRecord(uint64_t* nbytes_read); + bool ReadMetaInfoFeature(); + void UseRecordingEnvironment(); + std::unique_ptr<Record> ReadRecord(); bool Read(void* buf, size_t len); + bool ReadAtOffset(uint64_t offset, void* buf, size_t len); void ProcessEventIdRecord(const EventIdRecord& r); + bool BuildAuxDataLocation(); const std::string filename_; FILE* record_fp_; @@ -185,6 +195,22 @@ class RecordFileReader { uint64_t read_record_size_; + std::unordered_map<std::string, std::string> meta_info_; + std::unique_ptr<ScopedCurrentArch> scoped_arch_; + std::unique_ptr<ScopedEventTypes> scoped_event_types_; + + struct AuxDataLocation { + uint64_t aux_offset; + uint64_t aux_size; + uint64_t file_offset; + + AuxDataLocation(uint64_t aux_offset, uint64_t aux_size, uint64_t file_offset) + : aux_offset(aux_offset), aux_size(aux_size), file_offset(file_offset) {} + }; + // It maps from a cpu id to the locations (file offsets in perf.data) of aux data received from + // that cpu's aux buffer. It is used to locate aux data in perf.data. + std::unordered_map<uint32_t, std::vector<AuxDataLocation>> aux_data_location_; + DISALLOW_COPY_AND_ASSIGN(RecordFileReader); }; diff --git a/simpleperf/record_file_format.h b/simpleperf/record_file_format.h index ead42758..4606abcc 100644 --- a/simpleperf/record_file_format.h +++ b/simpleperf/record_file_format.h @@ -89,6 +89,7 @@ enum { FEAT_BRANCH_STACK, FEAT_PMU_MAPPINGS, FEAT_GROUP_DESC, + FEAT_AUXTRACE, FEAT_LAST_FEATURE, FEAT_SIMPLEPERF_START = 128, diff --git a/simpleperf/record_file_reader.cpp b/simpleperf/record_file_reader.cpp index 4e246b91..b17086a3 100644 --- a/simpleperf/record_file_reader.cpp +++ b/simpleperf/record_file_reader.cpp @@ -49,6 +49,7 @@ static const std::map<int, std::string> feature_name_map = { {FEAT_BRANCH_STACK, "branch_stack"}, {FEAT_PMU_MAPPINGS, "pmu_mappings"}, {FEAT_GROUP_DESC, "group_desc"}, + {FEAT_AUXTRACE, "auxtrace"}, {FEAT_FILE, "file"}, {FEAT_META_INFO, "meta_info"}, }; @@ -78,9 +79,10 @@ std::unique_ptr<RecordFileReader> RecordFileReader::CreateInstance(const std::st } auto reader = std::unique_ptr<RecordFileReader>(new RecordFileReader(filename, fp)); if (!reader->ReadHeader() || !reader->ReadAttrSection() || - !reader->ReadFeatureSectionDescriptors()) { + !reader->ReadFeatureSectionDescriptors() || !reader->ReadMetaInfoFeature()) { return nullptr; } + reader->UseRecordingEnvironment(); return reader; } @@ -205,6 +207,17 @@ bool RecordFileReader::ReadIdsForAttr(const FileAttr& attr, std::vector<uint64_t return true; } +void RecordFileReader::UseRecordingEnvironment() { + std::string arch = ReadFeatureString(FEAT_ARCH); + if (!arch.empty()) { + scoped_arch_.reset(new ScopedCurrentArch(GetArchType(arch))); + } + auto& meta_info = GetMetaInfoFeature(); + if (auto it = meta_info.find("event_type_info"); it != meta_info.end()) { + scoped_event_types_.reset(new ScopedEventTypes(it->second)); + } +} + bool RecordFileReader::ReadDataSection( const std::function<bool(std::unique_ptr<Record>)>& callback) { std::unique_ptr<Record> record; @@ -228,7 +241,7 @@ bool RecordFileReader::ReadRecord(std::unique_ptr<Record>& record) { } record = nullptr; if (read_record_size_ < header_.data.size) { - record = ReadRecord(&read_record_size_); + record = ReadRecord(); if (record == nullptr) { return false; } @@ -239,7 +252,7 @@ bool RecordFileReader::ReadRecord(std::unique_ptr<Record>& record) { return true; } -std::unique_ptr<Record> RecordFileReader::ReadRecord(uint64_t* nbytes_read) { +std::unique_ptr<Record> RecordFileReader::ReadRecord() { char header_buf[Record::header_size()]; if (!Read(header_buf, Record::header_size())) { return nullptr; @@ -258,7 +271,7 @@ std::unique_ptr<Record> RecordFileReader::ReadRecord(uint64_t* nbytes_read) { return nullptr; } cur_size += bytes_to_read; - *nbytes_read += header.size; + read_record_size_ += header.size; if (!Read(header_buf, Record::header_size())) { return nullptr; } @@ -268,7 +281,7 @@ std::unique_ptr<Record> RecordFileReader::ReadRecord(uint64_t* nbytes_read) { LOG(ERROR) << "SPLIT records are not followed by a SPLIT_END record."; return nullptr; } - *nbytes_read += header.size; + read_record_size_ += header.size; header = RecordHeader(buf.data()); p.reset(new char[header.size]); memcpy(p.get(), buf.data(), buf.size()); @@ -280,7 +293,7 @@ std::unique_ptr<Record> RecordFileReader::ReadRecord(uint64_t* nbytes_read) { return nullptr; } } - *nbytes_read += header.size; + read_record_size_ += header.size; } const perf_event_attr* attr = &file_attrs_[0].attr; @@ -305,7 +318,17 @@ std::unique_ptr<Record> RecordFileReader::ReadRecord(uint64_t* nbytes_read) { } } } - return ReadRecordFromOwnedBuffer(*attr, header.type, p.release()); + auto r = ReadRecordFromOwnedBuffer(*attr, header.type, p.release()); + if (r->type() == PERF_RECORD_AUXTRACE) { + auto auxtrace = static_cast<AuxTraceRecord*>(r.get()); + auxtrace->location.file_offset = header_.data.offset + read_record_size_; + read_record_size_ += auxtrace->data->aux_size; + if (fseek(record_fp_, auxtrace->data->aux_size, SEEK_CUR) != 0) { + PLOG(ERROR) << "fseek() failed"; + return nullptr; + } + } + return r; } bool RecordFileReader::Read(void* buf, size_t len) { @@ -316,6 +339,14 @@ bool RecordFileReader::Read(void* buf, size_t len) { return true; } +bool RecordFileReader::ReadAtOffset(uint64_t offset, void* buf, size_t len) { + if (fseek(record_fp_, offset, SEEK_SET) != 0) { + PLOG(ERROR) << "failed to seek to " << offset; + return false; + } + return Read(buf, len); +} + void RecordFileReader::ProcessEventIdRecord(const EventIdRecord& r) { for (size_t i = 0; i < r.count; ++i) { event_ids_for_file_attrs_[r.data[i].attr_id].push_back(r.data[i].event_id); @@ -342,11 +373,7 @@ bool RecordFileReader::ReadFeatureSection(int feature, std::vector<char>* data) if (section.size == 0) { return true; } - if (fseek(record_fp_, section.offset, SEEK_SET) != 0) { - PLOG(ERROR) << "fseek() failed"; - return false; - } - if (!Read(data->data(), data->size())) { + if (!ReadAtOffset(section.offset, data->data(), data->size())) { return false; } return true; @@ -409,6 +436,25 @@ std::string RecordFileReader::ReadFeatureString(int feature) { return p; } +std::vector<uint64_t> RecordFileReader::ReadAuxTraceFeature() { + std::vector<char> buf; + if (!ReadFeatureSection(FEAT_AUXTRACE, &buf)) { + return {}; + } + std::vector<uint64_t> auxtrace_offset; + const char* p = buf.data(); + const char* end = buf.data() + buf.size(); + while (p < end) { + uint64_t offset; + uint64_t size; + MoveFromBinaryFormat(offset, p); + auxtrace_offset.push_back(offset); + MoveFromBinaryFormat(size, p); + CHECK_EQ(size, AuxTraceRecord::Size()); + } + return auxtrace_offset; +} + bool RecordFileReader::ReadFileFeature(size_t& read_pos, std::string* file_path, uint32_t* file_type, @@ -471,19 +517,21 @@ bool RecordFileReader::ReadFileFeature(size_t& read_pos, return true; } -bool RecordFileReader::ReadMetaInfoFeature(std::unordered_map<std::string, std::string>* info_map) { - std::vector<char> buf; - if (!ReadFeatureSection(FEAT_META_INFO, &buf)) { - return false; - } - const char* p = buf.data(); - const char* end = buf.data() + buf.size(); - while (p < end) { - const char* key = p; - const char* value = key + strlen(key) + 1; - CHECK(value < end); - (*info_map)[p] = value; - p = value + strlen(value) + 1; +bool RecordFileReader::ReadMetaInfoFeature() { + if (feature_section_descriptors_.count(FEAT_META_INFO)) { + std::vector<char> buf; + if (!ReadFeatureSection(FEAT_META_INFO, &buf)) { + return false; + } + const char* p = buf.data(); + const char* end = buf.data() + buf.size(); + while (p < end) { + const char* key = p; + const char* value = key + strlen(key) + 1; + CHECK(value < end); + meta_info_[p] = value; + p = value + strlen(value) + 1; + } } return true; } @@ -512,6 +560,62 @@ void RecordFileReader::LoadBuildIdAndFileFeatures(ThreadTree& thread_tree) { } } +bool RecordFileReader::ReadAuxData(uint32_t cpu, uint64_t aux_offset, void* buf, size_t size) { + long saved_pos = ftell(record_fp_); + if (saved_pos == -1) { + PLOG(ERROR) << "ftell() failed"; + return false; + } + if (aux_data_location_.empty() && !BuildAuxDataLocation()) { + return false; + } + AuxDataLocation* location = nullptr; + auto it = aux_data_location_.find(cpu); + if (it != aux_data_location_.end()) { + auto comp = [](uint64_t aux_offset, const AuxDataLocation& location) { + return aux_offset < location.aux_offset; + }; + auto location_it = std::upper_bound(it->second.begin(), it->second.end(), aux_offset, comp); + if (location_it != it->second.begin()) { + --location_it; + if (location_it->aux_offset + location_it->aux_size >= aux_offset + size) { + location = &*location_it; + } + } + } + if (location == nullptr) { + LOG(ERROR) << "failed to find file offset of aux data: cpu " << cpu << ", aux_offset " + << aux_offset << ", size " << size; + return false; + } + if (!ReadAtOffset(aux_offset - location->aux_offset + location->file_offset, buf, size)) { + return false; + } + if (fseek(record_fp_, saved_pos, SEEK_SET) != 0) { + PLOG(ERROR) << "fseek() failed"; + return false; + } + return true; +} + +bool RecordFileReader::BuildAuxDataLocation() { + std::vector<uint64_t> auxtrace_offset = ReadAuxTraceFeature(); + if (auxtrace_offset.empty()) { + LOG(ERROR) << "failed to read auxtrace feature section"; + return false; + } + std::unique_ptr<char[]> buf(new char[AuxTraceRecord::Size()]); + for (auto offset : auxtrace_offset) { + if (!ReadAtOffset(offset, buf.get(), AuxTraceRecord::Size())) { + return false; + } + AuxTraceRecord auxtrace(buf.get()); + aux_data_location_[auxtrace.data->cpu].emplace_back( + auxtrace.data->offset, auxtrace.data->aux_size, offset + auxtrace.size()); + } + return true; +} + std::vector<std::unique_ptr<Record>> RecordFileReader::DataSection() { std::vector<std::unique_ptr<Record>> records; ReadDataSection([&](std::unique_ptr<Record> record) { diff --git a/simpleperf/record_file_test.cpp b/simpleperf/record_file_test.cpp index 7eb1b6e5..76d15ec7 100644 --- a/simpleperf/record_file_test.cpp +++ b/simpleperf/record_file_test.cpp @@ -144,7 +144,5 @@ TEST_F(RecordFileTest, write_meta_info_feature_section) { // Read from a record file. std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmpfile_.path); ASSERT_TRUE(reader != nullptr); - std::unordered_map<std::string, std::string> read_info_map; - ASSERT_TRUE(reader->ReadMetaInfoFeature(&read_info_map)); - ASSERT_EQ(read_info_map, info_map); + ASSERT_EQ(reader->GetMetaInfoFeature(), info_map); } diff --git a/simpleperf/record_file_writer.cpp b/simpleperf/record_file_writer.cpp index ecc94ee5..f718df49 100644 --- a/simpleperf/record_file_writer.cpp +++ b/simpleperf/record_file_writer.cpp @@ -129,8 +129,12 @@ bool RecordFileWriter::WriteRecord(const Record& record) { // RECORD_SPLIT records, followed by a RECORD_SPLIT_END record. constexpr uint32_t RECORD_SIZE_LIMIT = 65535; if (record.size() <= RECORD_SIZE_LIMIT) { - WriteData(record.Binary(), record.size()); - return true; + bool result = WriteData(record.Binary(), record.size()); + if (result && record.type() == PERF_RECORD_AUXTRACE) { + auto auxtrace = static_cast<const AuxTraceRecord*>(&record); + result = WriteData(auxtrace->location.addr, auxtrace->data->aux_size); + } + return result; } CHECK_GT(record.type(), SIMPLE_PERF_RECORD_TYPE_START); const char* p = record.Binary(); @@ -204,6 +208,15 @@ bool RecordFileWriter::ReadDataSection(const std::function<void(const Record*)>& } read_pos += header.size; std::unique_ptr<Record> r = ReadRecordFromBuffer(event_attr_, header.type, record_buf.data()); + if (r->type() == PERF_RECORD_AUXTRACE) { + auto auxtrace = static_cast<AuxTraceRecord*>(r.get()); + auxtrace->location.file_offset = data_section_offset_ + read_pos; + if (fseek(record_fp_, auxtrace->data->aux_size, SEEK_CUR) != 0) { + PLOG(ERROR) << "fseek() failed"; + return false; + } + read_pos += auxtrace->data->aux_size; + } callback(r.get()); } return true; @@ -297,6 +310,16 @@ bool RecordFileWriter::WriteBranchStackFeature() { return WriteFeatureEnd(FEAT_BRANCH_STACK); } +bool RecordFileWriter::WriteAuxTraceFeature(const std::vector<uint64_t>& auxtrace_offset) { + std::vector<uint64_t> data; + for (auto offset : auxtrace_offset) { + data.push_back(offset); + data.push_back(AuxTraceRecord::Size()); + } + return WriteFeatureBegin(FEAT_AUXTRACE) && Write(data.data(), data.size() * sizeof(uint64_t)) && + WriteFeatureEnd(FEAT_AUXTRACE); +} + bool RecordFileWriter::WriteFileFeatures(const std::vector<Dso*>& files) { for (Dso* dso : files) { // Always want to dump dex file offsets for DSO_DEX_FILE type. diff --git a/simpleperf/report_lib_interface.cpp b/simpleperf/report_lib_interface.cpp index 97a95943..f57ee896 100644 --- a/simpleperf/report_lib_interface.cpp +++ b/simpleperf/report_lib_interface.cpp @@ -187,7 +187,6 @@ class ReportLib { std::vector<CallChainEntry> callchain_entries_; std::string build_id_string_; std::vector<EventInfo> events_; - std::unique_ptr<ScopedEventTypes> scoped_event_types_; bool trace_offcpu_; std::unordered_map<pid_t, std::unique_ptr<SampleRecord>> next_sample_cache_; FeatureSection feature_section_; @@ -224,17 +223,8 @@ bool ReportLib::OpenRecordFileIfNecessary() { return false; } record_file_reader_->LoadBuildIdAndFileFeatures(thread_tree_); - std::unordered_map<std::string, std::string> meta_info_map; - if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_META_INFO) && - !record_file_reader_->ReadMetaInfoFeature(&meta_info_map)) { - return false; - } - auto it = meta_info_map.find("event_type_info"); - if (it != meta_info_map.end()) { - scoped_event_types_.reset(new ScopedEventTypes(it->second)); - } - it = meta_info_map.find("trace_offcpu"); - if (it != meta_info_map.end()) { + auto& meta_info = record_file_reader_->GetMetaInfoFeature(); + if (auto it = meta_info.find("trace_offcpu"); it != meta_info.end()) { trace_offcpu_ = it->second == "true"; } } diff --git a/simpleperf/sample_tree.h b/simpleperf/sample_tree.h index 8578ffd5..f57fe786 100644 --- a/simpleperf/sample_tree.h +++ b/simpleperf/sample_tree.h @@ -77,7 +77,7 @@ class SampleTreeBuilder { build_callchain_ = build_callchain; use_caller_as_callchain_root_ = use_caller_as_callchain_root; if (accumulate_callchain_) { - offline_unwinder_.reset(new OfflineUnwinder(false)); + offline_unwinder_ = OfflineUnwinder::Create(false); } } diff --git a/simpleperf/scripts/app_profiler.py b/simpleperf/scripts/app_profiler.py index b8f00cb6..0080b7f9 100755 --- a/simpleperf/scripts/app_profiler.py +++ b/simpleperf/scripts/app_profiler.py @@ -30,7 +30,7 @@ import sys import time from utils import AdbHelper, bytes_to_str, extant_dir, get_script_dir, get_target_binary_path -from utils import log_debug, log_info, log_exit, ReadElf, remove, str_to_bytes +from utils import log_debug, log_info, log_exit, ReadElf, remove, set_log_level, str_to_bytes NATIVE_LIBS_DIR_ON_DEVICE = '/data/local/tmp/native_libs/' @@ -203,11 +203,12 @@ class ProfilerBase(object): """Start simpleperf reocrd process on device.""" args = ['/data/local/tmp/simpleperf', 'record', '-o', '/data/local/tmp/perf.data', self.args.record_options] - if self.adb.run(['shell', 'ls', NATIVE_LIBS_DIR_ON_DEVICE]): + if self.adb.run(['shell', 'ls', NATIVE_LIBS_DIR_ON_DEVICE, '>/dev/null', '2>&1']): args += ['--symfs', NATIVE_LIBS_DIR_ON_DEVICE] + args += ['--log', self.args.log] args += target_args adb_args = [self.adb.adb_path, 'shell'] + args - log_debug('run adb cmd: %s' % adb_args) + log_info('run adb cmd: %s' % adb_args) self.record_subproc = subprocess.Popen(adb_args) def wait_profiling(self): @@ -244,7 +245,7 @@ class ProfilerBase(object): if not self.args.skip_collect_binaries: binary_cache_args = [sys.executable, os.path.join(get_script_dir(), 'binary_cache_builder.py')] - binary_cache_args += ['-i', self.args.perf_data_path] + binary_cache_args += ['-i', self.args.perf_data_path, '--log', self.args.log] if self.args.native_lib_dir: binary_cache_args += ['-lib', self.args.native_lib_dir] if self.args.disable_adb_root: @@ -425,12 +426,15 @@ def main(): help="""Force adb to run in non root mode. By default, app_profiler.py will try to switch to root mode to be able to profile released Android apps.""") + other_group.add_argument( + '--log', choices=['debug', 'info', 'warning'], default='info', help='set log level') def check_args(args): if (not args.app) and (args.compile_java_code or args.activity or args.test): log_exit('--compile_java_code, -a, -t can only be used when profiling an Android app.') args = parser.parse_args() + set_log_level(args.log) check_args(args) if args.app: profiler = AppProfiler(args) diff --git a/simpleperf/scripts/binary_cache_builder.py b/simpleperf/scripts/binary_cache_builder.py index 50fcc548..97d169c5 100755 --- a/simpleperf/scripts/binary_cache_builder.py +++ b/simpleperf/scripts/binary_cache_builder.py @@ -27,7 +27,7 @@ import shutil from simpleperf_report_lib import ReportLib from utils import AdbHelper, extant_dir, extant_file, flatten_arg_list, log_info, log_warning -from utils import ReadElf +from utils import ReadElf, set_log_level def is_jit_symfile(dso_name): return dso_name.split('/')[-1].startswith('TemporaryFile') @@ -209,7 +209,7 @@ class BinaryCacheBuilder(object): if os.path.isfile(file_path): os.remove(file_path) if self.adb.switch_to_root(): - self.adb.run(['shell', '"echo 0 >/proc/sys/kernel/kptr_restrict"']) + self.adb.run(['shell', 'echo', '0', '>/proc/sys/kernel/kptr_restrict']) self.adb.run(['pull', '/proc/kallsyms', file_path]) @@ -223,8 +223,10 @@ def main(): parser.add_argument('--disable_adb_root', action='store_true', help=""" Force adb to run in non root mode.""") parser.add_argument('--ndk_path', nargs=1, help='Find tools in the ndk path.') + parser.add_argument( + '--log', choices=['debug', 'info', 'warning'], default='info', help='set log level') args = parser.parse_args() - + set_log_level(args.log) ndk_path = None if not args.ndk_path else args.ndk_path[0] builder = BinaryCacheBuilder(ndk_path, args.disable_adb_root) symfs_dirs = flatten_arg_list(args.native_lib_dir) diff --git a/simpleperf/scripts/inferno/data_types.py b/simpleperf/scripts/inferno/data_types.py index deb9f515..35ef4c0b 100644 --- a/simpleperf/scripts/inferno/data_types.py +++ b/simpleperf/scripts/inferno/data_types.py @@ -113,15 +113,16 @@ class FlameGraphCallSite(object): self._get_next_callsite_id()) return child - def trim_callchain(self, min_num_events): + def trim_callchain(self, min_num_events, max_depth, depth=0): """ Remove call sites with num_events < min_num_events in the subtree. Remaining children are collected in a list. """ - for key in self.child_dict: - child = self.child_dict[key] - if child.num_events >= min_num_events: - child.trim_callchain(min_num_events) - self.children.append(child) + if depth <= max_depth: + for key in self.child_dict: + child = self.child_dict[key] + if child.num_events >= min_num_events: + child.trim_callchain(min_num_events, max_depth, depth + 1) + self.children.append(child) # Relese child_dict since it will not be used. self.child_dict = None diff --git a/simpleperf/scripts/inferno/inferno.py b/simpleperf/scripts/inferno/inferno.py index 78cc3437..12b9d904 100755 --- a/simpleperf/scripts/inferno/inferno.py +++ b/simpleperf/scripts/inferno/inferno.py @@ -40,7 +40,7 @@ import sys SCRIPTS_PATH = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) sys.path.append(SCRIPTS_PATH) from simpleperf_report_lib import ReportLib -from utils import log_exit, log_info, AdbHelper, open_report_in_browser +from utils import log_exit, log_fatal, log_info, AdbHelper, open_report_in_browser from data_types import Process from svg_renderer import get_proper_scaled_time_string, render_svg @@ -143,7 +143,7 @@ def parse_samples(process, args, sample_filter_fn): for thread in process.threads.values(): min_event_count = thread.num_events * args.min_callchain_percentage * 0.01 - thread.flamegraph.trim_callchain(min_event_count) + thread.flamegraph.trim_callchain(min_event_count, args.max_callchain_depth) log_info("Parsed %s callchains." % process.num_samples) @@ -289,6 +289,11 @@ def main(): It is used to limit nodes shown in the flamegraph. For example, when set to 0.01, only callchains taking >= 0.01%% of the event count of the owner thread are collected in the report.""") + report_group.add_argument('--max_callchain_depth', default=1000000000, type=int, help=""" + Set maximum depth of callchains shown in the report. It is used + to limit the nodes shown in the flamegraph and avoid processing + limits. For example, when set to 10, callstacks will be cut after + the tenth frame.""") report_group.add_argument('--no_browser', action='store_true', help="""Don't open report in browser.""") report_group.add_argument('-o', '--report_path', default='report.html', help="""Set report @@ -343,11 +348,16 @@ def main(): args.title = '' args.title += '(One Flamegraph)' - parse_samples(process, args, sample_filter_fn) - generate_threads_offsets(process) - report_path = output_report(process, args) - if not args.no_browser: - open_report_in_browser(report_path) + try: + parse_samples(process, args, sample_filter_fn) + generate_threads_offsets(process) + report_path = output_report(process, args) + if not args.no_browser: + open_report_in_browser(report_path) + except RuntimeError as r: + if 'maximum recursion depth' in r.__str__(): + log_fatal("Recursion limit exceeded (%s), try --max_callchain_depth." % r) + raise r log_info("Flamegraph generated at '%s'." % report_path) diff --git a/simpleperf/scripts/pprof_proto_generator.py b/simpleperf/scripts/pprof_proto_generator.py index 35678b13..4c59cc56 100755 --- a/simpleperf/scripts/pprof_proto_generator.py +++ b/simpleperf/scripts/pprof_proto_generator.py @@ -30,8 +30,8 @@ import os import os.path from simpleperf_report_lib import ReportLib -from utils import Addr2Nearestline, bytes_to_str, extant_dir, find_tool_path, flatten_arg_list -from utils import log_info, log_exit, str_to_bytes +from utils import Addr2Nearestline, extant_dir, find_tool_path, flatten_arg_list +from utils import log_info, log_exit try: import profile_pb2 except ImportError: @@ -40,13 +40,13 @@ except ImportError: def load_pprof_profile(filename): profile = profile_pb2.Profile() with open(filename, "rb") as f: - profile.ParseFromString(bytes_to_str(f.read())) + profile.ParseFromString(f.read()) return profile def store_pprof_profile(filename, profile): with open(filename, 'wb') as f: - f.write(str_to_bytes(profile.SerializeToString())) + f.write(profile.SerializeToString()) class PprofProfilePrinter(object): @@ -268,6 +268,7 @@ class PprofProfileGenerator(object): else: self.tid_filter = None self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None + self.max_chain_length = config['max_chain_length'] self.profile = profile_pb2.Profile() self.profile.string_table.append('') self.string_table = {} @@ -302,7 +303,7 @@ class PprofProfileGenerator(object): if self._filter_symbol(symbol): location_id = self.get_location_id(symbol.vaddr_in_file, symbol) sample.add_location_id(location_id) - for i in range(callchain.nr): + for i in range(max(0, callchain.nr - self.max_chain_length), callchain.nr): entry = callchain.entries[i] if self._filter_symbol(symbol): location_id = self.get_location_id(entry.ip, entry.symbol) @@ -330,12 +331,12 @@ class PprofProfileGenerator(object): if self.comm_filter: if sample.thread_comm not in self.comm_filter: return False - if self.pid_filter: - if sample.pid not in self.pid_filter: - return False - if self.tid_filter: - if sample.tid not in self.tid_filter: - return False + if self.pid_filter: + if sample.pid not in self.pid_filter: + return False + if self.tid_filter: + if sample.tid not in self.tid_filter: + return False return True def _filter_symbol(self, symbol): @@ -562,6 +563,8 @@ def main(): Use samples only in threads with selected thread ids.""") parser.add_argument('--dso', nargs='+', action='append', help=""" Use samples only in selected binaries.""") + parser.add_argument('--max_chain_length', type=int, default=1000000000, help=""" + Maximum depth of samples to be converted.""") # Large value as infinity standin. parser.add_argument('--ndk_path', type=extant_dir, help='Set the path of a ndk release.') parser.add_argument('--show_art_frames', action='store_true', help='Show frames of internal methods in the ART Java interpreter.') @@ -583,6 +586,7 @@ def main(): config['dso_filters'] = flatten_arg_list(args.dso) config['ndk_path'] = args.ndk_path config['show_art_frames'] = args.show_art_frames + config['max_chain_length'] = args.max_chain_length generator = PprofProfileGenerator(config) profile = generator.gen() store_pprof_profile(config['output_file'], profile) diff --git a/simpleperf/scripts/report_html.py b/simpleperf/scripts/report_html.py index d7fe0ecf..f2292720 100755 --- a/simpleperf/scripts/report_html.py +++ b/simpleperf/scripts/report_html.py @@ -642,6 +642,7 @@ class RecordData(object): hit_func_ids = set() for event in self.events.values(): min_limit = event.event_count * min_func_percent * 0.01 + to_del_processes = [] for process in event.processes.values(): to_del_threads = [] for thread in process.threads.values(): @@ -651,6 +652,10 @@ class RecordData(object): thread.limit_percents(min_limit, min_callchain_percent, hit_func_ids) for thread in to_del_threads: del process.threads[thread] + if not process.threads: + to_del_processes.append(process.pid) + for process in to_del_processes: + del event.processes[process] self.functions.trim_functions(hit_func_ids) def _get_event(self, event_name): diff --git a/simpleperf/scripts/script_testdata/two_process_perf.data b/simpleperf/scripts/script_testdata/two_process_perf.data Binary files differnew file mode 100644 index 00000000..c61d5916 --- /dev/null +++ b/simpleperf/scripts/script_testdata/two_process_perf.data diff --git a/simpleperf/scripts/test.py b/simpleperf/scripts/test.py index 8e905f12..24a04356 100755 --- a/simpleperf/scripts/test.py +++ b/simpleperf/scripts/test.py @@ -1285,6 +1285,21 @@ class TestReportHtml(TestBase): hit_count += 1 self.assertEqual(hit_count, len(event_count_for_thread_name)) + def test_no_empty_process(self): + """ Test not showing a process having no threads. """ + perf_data = os.path.join('testdata', 'two_process_perf.data') + self.run_cmd(['report_html.py', '-i', perf_data]) + record_data = self._load_record_data_in_html('report.html') + processes = record_data['sampleInfo'][0]['processes'] + self.assertEqual(len(processes), 2) + + # One process is removed because all its threads are removed for not + # reaching the min_func_percent limit. + self.run_cmd(['report_html.py', '-i', perf_data, '--min_func_percent', '20']) + record_data = self._load_record_data_in_html('report.html') + processes = record_data['sampleInfo'][0]['processes'] + self.assertEqual(len(processes), 1) + def _load_record_data_in_html(self, html_file): with open(html_file, 'r') as fh: data = fh.read() @@ -1411,22 +1426,58 @@ class TestApiProfiler(TestBase): class TestPprofProtoGenerator(TestBase): - def test_show_art_frames(self): + def setUp(self): if not HAS_GOOGLE_PROTOBUF: - log_info('Skip test for pprof_proto_generator because google.protobuf is missing') - return - testdata_path = os.path.join('testdata', 'perf_with_interpreter_frames.data') - art_frame_str = 'art::interpreter::DoCall' + raise unittest.SkipTest( + 'Skip test for pprof_proto_generator because google.protobuf is missing') - # By default, don't show art frames. - self.run_cmd(['pprof_proto_generator.py', '-i', testdata_path]) - output = self.run_cmd(['pprof_proto_generator.py', '--show'], return_output=True) - self.assertEqual(output.find(art_frame_str), -1, 'output: ' + output) + def run_generator(self, options=None, testdata_file='perf_with_interpreter_frames.data'): + testdata_path = os.path.join('testdata', testdata_file) + options = options or [] + self.run_cmd(['pprof_proto_generator.py', '-i', testdata_path] + options) + return self.run_cmd(['pprof_proto_generator.py', '--show'], return_output=True) + def test_show_art_frames(self): + art_frame_str = 'art::interpreter::DoCall' + # By default, don't show art frames. + self.assertNotIn(art_frame_str, self.run_generator()) # Use --show_art_frames to show art frames. - self.run_cmd(['pprof_proto_generator.py', '-i', testdata_path, '--show_art_frames']) - output = self.run_cmd(['pprof_proto_generator.py', '--show'], return_output=True) - self.assertNotEqual(output.find(art_frame_str), -1, 'output: ' + output) + self.assertIn(art_frame_str, self.run_generator(['--show_art_frames'])) + + def test_pid_filter(self): + key = 'PlayScene::DoFrame()' # function in process 10419 + self.assertIn(key, self.run_generator()) + self.assertIn(key, self.run_generator(['--pid', '10419'])) + self.assertIn(key, self.run_generator(['--pid', '10419', '10416'])) + self.assertNotIn(key, self.run_generator(['--pid', '10416'])) + + def test_tid_filter(self): + key1 = 'art::ProfileSaver::Run()' # function in thread 10459 + key2 = 'PlayScene::DoFrame()' # function in thread 10463 + for options in ([], ['--tid', '10459', '10463']): + output = self.run_generator(options) + self.assertIn(key1, output) + self.assertIn(key2, output) + output = self.run_generator(['--tid', '10459']) + self.assertIn(key1, output) + self.assertNotIn(key2, output) + output = self.run_generator(['--tid', '10463']) + self.assertNotIn(key1, output) + self.assertIn(key2, output) + + def test_comm_filter(self): + key1 = 'art::ProfileSaver::Run()' # function in thread 'Profile Saver' + key2 = 'PlayScene::DoFrame()' # function in thread 'e.sample.tunnel' + for options in ([], ['--comm', 'Profile Saver', 'e.sample.tunnel']): + output = self.run_generator(options) + self.assertIn(key1, output) + self.assertIn(key2, output) + output = self.run_generator(['--comm', 'Profile Saver']) + self.assertIn(key1, output) + self.assertNotIn(key2, output) + output = self.run_generator(['--comm', 'e.sample.tunnel']) + self.assertNotIn(key1, output) + self.assertIn(key2, output) def get_all_tests(): @@ -1458,7 +1509,7 @@ def main(): parser.add_argument('--test-from', nargs=1, help='Run left tests from the selected test.') parser.add_argument('--python-version', choices=['2', '3', 'both'], default='both', help=""" Run tests on which python versions.""") - parser.add_argument('--repeat', type=int, nargs=1, default=1, help='run test multiple times') + parser.add_argument('--repeat', type=int, nargs=1, default=[1], help='run test multiple times') parser.add_argument('pattern', nargs='*', help='Run tests matching the selected pattern.') args = parser.parse_args() tests = get_all_tests() diff --git a/simpleperf/scripts/utils.py b/simpleperf/scripts/utils.py index ea708c61..4b942d8c 100644 --- a/simpleperf/scripts/utils.py +++ b/simpleperf/scripts/utils.py @@ -70,6 +70,17 @@ def log_exit(msg): def disable_debug_log(): logging.getLogger().setLevel(logging.WARN) +def set_log_level(level_name): + if level_name == 'debug': + level = logging.DEBUG + elif level_name == 'info': + level = logging.INFO + elif level_name == 'warning': + level = logging.WARNING + else: + log_fatal('unknown log level: %s' % level_name) + logging.getLogger().setLevel(level) + def str_to_bytes(str_value): if not is_python3(): return str_value diff --git a/simpleperf/testdata/etm/etm_test_loop b/simpleperf/testdata/etm/etm_test_loop Binary files differnew file mode 100644 index 00000000..ec715f1d --- /dev/null +++ b/simpleperf/testdata/etm/etm_test_loop diff --git a/simpleperf/testdata/etm/perf.data b/simpleperf/testdata/etm/perf.data Binary files differnew file mode 100644 index 00000000..2cae4b21 --- /dev/null +++ b/simpleperf/testdata/etm/perf.data |