summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYabin Cui <yabinc@google.com>2020-03-31 11:59:47 -0700
committerYi Kong <yikong@google.com>2020-04-24 00:49:20 +0800
commit901a08a73484b0989530fb9fc7de0829d13d6757 (patch)
treef7ee8f39dfb19f5286e82cc66021b79cebeb32d4
parent10f527cb163b4bbd96d2f883f7cc5477a7c6f36c (diff)
downloadextras-901a08a73484b0989530fb9fc7de0829d13d6757.tar.gz
simpleperf: adjust based on opencsd change.
Currently, simpleperf gets branch_addr from instruction decoder. But OpenCSD is changed to cache InstrRange elements. Thus when simpleperf gets an InstrRange element, the instruction decoder may point to an instruction executed later. Fix it by using the start_addr of the next InstrRange element as the branch_addr of the current InstrRange element. Also add a test. Also use recorded binary path in output to help test. Bug: 153039105 Test: run simpleperf manually so the inject result doesn't Test: change for a 10s system wide recording data. Test: run simpleperf_unit_test. Change-Id: I6fdf32d3bac18ed3762c944c282ec881d09395b4 Merged-In: I6fdf32d3bac18ed3762c944c282ec881d09395b4 (cherry picked from commit 2c294914c1eeb30754a8a2449795b296bd2176e9)
-rw-r--r--simpleperf/ETMDecoder.cpp62
-rw-r--r--simpleperf/ETMDecoder.h1
-rw-r--r--simpleperf/cmd_inject.cpp35
-rw-r--r--simpleperf/cmd_inject_test.cpp5
-rw-r--r--simpleperf/testdata/etm/perf_inject.data24
5 files changed, 108 insertions, 19 deletions
diff --git a/simpleperf/ETMDecoder.cpp b/simpleperf/ETMDecoder.cpp
index 5c8d4570..136bbcdf 100644
--- a/simpleperf/ETMDecoder.cpp
+++ b/simpleperf/ETMDecoder.cpp
@@ -396,6 +396,12 @@ class DataDumper : public ElementCallback {
// It decodes each ETMV4IPacket into TraceElements, and generates ETMInstrRanges from TraceElements.
// Decoding each packet is slow, but ensures correctness.
class InstrRangeParser : public ElementCallback {
+ private:
+ struct TraceData {
+ ETMInstrRange instr_range;
+ bool wait_for_branch_to_addr_fix = false;
+ };
+
public:
InstrRangeParser(MapLocator& map_locator, const ETMDecoder::CallbackFn& callback)
: map_locator_(map_locator), callback_(callback) {}
@@ -404,33 +410,64 @@ class InstrRangeParser : public ElementCallback {
const OcsdTraceElement& elem,
const ocsd_instr_info* next_instr) override {
if (elem.getType() == OCSD_GEN_TRC_ELEM_INSTR_RANGE) {
+ TraceData& data = trace_data_[trace_id];
const MapEntry* map = map_locator_.FindMap(trace_id, elem.st_addr);
if (map == nullptr) {
+ FlushData(data);
return OCSD_RESP_CONT;
}
- instr_range_.dso = map->dso;
- instr_range_.start_addr = map->GetVaddrInFile(elem.st_addr);
- instr_range_.end_addr = map->GetVaddrInFile(elem.en_addr - elem.last_instr_sz);
+ uint64_t start_addr = map->GetVaddrInFile(elem.st_addr);
+ auto& instr_range = data.instr_range;
+
+ if (data.wait_for_branch_to_addr_fix) {
+ // OpenCSD may cache a list of InstrRange elements, making it inaccurate to get branch to
+ // address from next_instr->branch_addr. So fix it by using the start address of the next
+ // InstrRange element.
+ instr_range.branch_to_addr = start_addr;
+ }
+ FlushData(data);
+ instr_range.dso = map->dso;
+ instr_range.start_addr = start_addr;
+ instr_range.end_addr = map->GetVaddrInFile(elem.en_addr - elem.last_instr_sz);
bool end_with_branch =
elem.last_i_type == OCSD_INSTR_BR || elem.last_i_type == OCSD_INSTR_BR_INDIRECT;
bool branch_taken = end_with_branch && elem.last_instr_exec;
if (elem.last_i_type == OCSD_INSTR_BR && branch_taken) {
// It is based on the assumption that we only do immediate branch inside a binary,
// which may not be true for all cases. TODO: http://b/151665001.
- instr_range_.branch_to_addr = map->GetVaddrInFile(next_instr->branch_addr);
+ instr_range.branch_to_addr = map->GetVaddrInFile(next_instr->branch_addr);
+ data.wait_for_branch_to_addr_fix = true;
} else {
- instr_range_.branch_to_addr = 0;
+ instr_range.branch_to_addr = 0;
}
- instr_range_.branch_taken_count = branch_taken ? 1 : 0;
- instr_range_.branch_not_taken_count = branch_taken ? 0 : 1;
- callback_(instr_range_);
+ instr_range.branch_taken_count = branch_taken ? 1 : 0;
+ instr_range.branch_not_taken_count = branch_taken ? 0 : 1;
+
+ } else if (elem.getType() == OCSD_GEN_TRC_ELEM_TRACE_ON) {
+ // According to the ETM Specification, the Trace On element indicates a discontinuity in the
+ // instruction trace stream. So it cuts the connection between instr ranges.
+ FlushData(trace_data_[trace_id]);
}
return OCSD_RESP_CONT;
}
+ void FinishData() {
+ for (auto& pair : trace_data_) {
+ FlushData(pair.second);
+ }
+ }
+
private:
+ void FlushData(TraceData& data) {
+ if (data.instr_range.dso != nullptr) {
+ callback_(data.instr_range);
+ data.instr_range.dso = nullptr;
+ }
+ data.wait_for_branch_to_addr_fix = false;
+ }
+
MapLocator& map_locator_;
- ETMInstrRange instr_range_;
+ std::unordered_map<uint8_t, TraceData> trace_data_;
ETMDecoder::CallbackFn callback_;
};
@@ -522,6 +559,13 @@ class ETMDecoderImpl : public ETMDecoder {
return true;
}
+ bool FinishData() override {
+ if (instr_range_parser_) {
+ instr_range_parser_->FinishData();
+ }
+ return true;
+ }
+
private:
void InstallMapLocator() {
if (!map_locator_) {
diff --git a/simpleperf/ETMDecoder.h b/simpleperf/ETMDecoder.h
index 3ebbd7dd..b9493acd 100644
--- a/simpleperf/ETMDecoder.h
+++ b/simpleperf/ETMDecoder.h
@@ -61,6 +61,7 @@ class ETMDecoder {
virtual void RegisterCallback(const CallbackFn& callback) = 0;
virtual bool ProcessData(const uint8_t* data, size_t size) = 0;
+ virtual bool FinishData() = 0;
};
} // namespace simpleperf \ No newline at end of file
diff --git a/simpleperf/cmd_inject.cpp b/simpleperf/cmd_inject.cpp
index 0a6719d8..ee27b0b8 100644
--- a/simpleperf/cmd_inject.cpp
+++ b/simpleperf/cmd_inject.cpp
@@ -80,6 +80,9 @@ class InjectCommand : public Command {
if (!record_file_reader_->ReadDataSection([this](auto r) { return ProcessRecord(r.get()); })) {
return false;
}
+ if (etm_decoder_ && !etm_decoder_->FinishData()) {
+ return false;
+ }
PostProcess();
output_fp_.reset(nullptr);
return true;
@@ -164,7 +167,7 @@ class InjectCommand : public Command {
return;
}
- auto& binary = binary_map_[instr_range.dso->GetDebugFilePath()];
+ auto& binary = binary_map_[instr_range.dso];
binary.range_count_map[AddrPair(instr_range.start_addr, instr_range.end_addr)] +=
instr_range.branch_taken_count + instr_range.branch_not_taken_count;
if (instr_range.branch_taken_count > 0) {
@@ -174,13 +177,23 @@ class InjectCommand : public Command {
}
void PostProcess() {
- for (const auto& pair : binary_map_) {
- const std::string& binary_path = pair.first;
- const BinaryInfo& binary = pair.second;
+ // binary_map is used to store instruction ranges, which can have a large amount. And it has
+ // a larger access time (instruction ranges * executed time). So it's better to use
+ // unorder_maps to speed up access time. But we also want a stable output here, to compare
+ // output changes result from code changes. So generate a sorted output here.
+ std::vector<Dso*> dso_v;
+ for (auto& p : binary_map_) {
+ dso_v.emplace_back(p.first);
+ }
+ std::sort(dso_v.begin(), dso_v.end(), [](Dso* d1, Dso* d2) { return d1->Path() < d2->Path(); });
+ for (auto dso : dso_v) {
+ const BinaryInfo& binary = binary_map_[dso];
// Write range_count_map.
- fprintf(output_fp_.get(), "%zu\n", binary.range_count_map.size());
- for (const auto& pair2 : binary.range_count_map) {
+ std::map<AddrPair, uint64_t> range_count_map(binary.range_count_map.begin(),
+ binary.range_count_map.end());
+ fprintf(output_fp_.get(), "%zu\n", range_count_map.size());
+ for (const auto& pair2 : range_count_map) {
const AddrPair& addr_range = pair2.first;
uint64_t count = pair2.second;
@@ -192,8 +205,10 @@ class InjectCommand : public Command {
fprintf(output_fp_.get(), "0\n");
// Write branch_count_map.
- fprintf(output_fp_.get(), "%zu\n", binary.branch_count_map.size());
- for (const auto& pair2 : binary.branch_count_map) {
+ std::map<AddrPair, uint64_t> branch_count_map(binary.branch_count_map.begin(),
+ binary.branch_count_map.end());
+ fprintf(output_fp_.get(), "%zu\n", branch_count_map.size());
+ for (const auto& pair2 : branch_count_map) {
const AddrPair& branch = pair2.first;
uint64_t count = pair2.second;
@@ -202,7 +217,7 @@ class InjectCommand : public Command {
}
// Write the binary path in comment.
- fprintf(output_fp_.get(), "// %s\n\n", binary_path.c_str());
+ fprintf(output_fp_.get(), "// %s\n\n", dso->Path().c_str());
}
}
@@ -217,7 +232,7 @@ class InjectCommand : public Command {
std::unique_ptr<FILE, decltype(&fclose)> output_fp_;
// Store results for AutoFDO.
- std::unordered_map<std::string, BinaryInfo> binary_map_;
+ std::unordered_map<Dso*, BinaryInfo> binary_map_;
};
} // namespace
diff --git a/simpleperf/cmd_inject_test.cpp b/simpleperf/cmd_inject_test.cpp
index 0d66d6cd..1f057cb0 100644
--- a/simpleperf/cmd_inject_test.cpp
+++ b/simpleperf/cmd_inject_test.cpp
@@ -20,6 +20,7 @@
#include "command.h"
#include "get_test_data.h"
#include "test_util.h"
+#include "utils.h"
static std::unique_ptr<Command> InjectCmd() { return CreateCommandInstance("inject"); }
@@ -31,6 +32,10 @@ TEST(cmd_inject, smoke) {
ASSERT_TRUE(android::base::ReadFileToString(tmpfile.path, &data));
// Test that we can find instr range in etm_test_loop binary.
ASSERT_NE(data.find("etm_test_loop"), std::string::npos);
+ std::string expected_data;
+ ASSERT_TRUE(android::base::ReadFileToString(
+ GetTestData(std::string("etm") + OS_PATH_SEPARATOR + "perf_inject.data"), &expected_data));
+ ASSERT_EQ(data, expected_data);
}
TEST(cmd_inject, binary_option) {
diff --git a/simpleperf/testdata/etm/perf_inject.data b/simpleperf/testdata/etm/perf_inject.data
new file mode 100644
index 00000000..4c121a32
--- /dev/null
+++ b/simpleperf/testdata/etm/perf_inject.data
@@ -0,0 +1,24 @@
+10
+1000-1004:1
+100c-1050:1
+1054-106c:1
+105c-106c:100
+1070-1070:1
+1074-1080:100
+1084-1088:1
+108c-10a0:1
+10a4-10b0:1
+10e0-10ec:1
+0
+9
+1004->100c:1
+1050->10e0:1
+106c->1074:100
+1070->1084:1
+1080->105c:100
+1088->0:1
+10a0->1054:1
+10b0->0:1
+10ec->0:1
+// /data/local/tmp/etm_test_loop
+